diff options
| author | Jan Sucan <jan@jansucan.com> | 2024-04-14 11:51:34 +0200 |
|---|---|---|
| committer | Ján Sučan <jan@jansucan.com> | 2024-05-04 15:46:47 +0200 |
| commit | 16ff70613a95d7f85f4cab09f2b3316f67e5901b (patch) | |
| tree | c7b7768dd048d2ca040ea09c2e9df554d6f1e5ad /src | |
| parent | 451be15d119d4d485d06f19f15d9a996cefb5288 (diff) | |
Move input and output buffering to classes
Diffstat (limited to 'src')
| -rw-r--r-- | src/backup.cpp | 179 | ||||
| -rw-r--r-- | src/buffered_file.cpp | 161 | ||||
| -rw-r--r-- | src/buffered_file.h | 81 | ||||
| -rw-r--r-- | src/main.cpp | 1 | ||||
| -rw-r--r-- | src/restore.cpp | 152 |
5 files changed, 333 insertions, 241 deletions
diff --git a/src/backup.cpp b/src/backup.cpp index 99a9141..111d3f3 100644 --- a/src/backup.cpp +++ b/src/backup.cpp @@ -25,79 +25,12 @@ */ #include "backup.h" +#include "buffered_file.h" #include <cstring> #include <fstream> -typedef struct { - std::ifstream in_file; - std::ifstream ref_file; - std::ofstream out_file; - - std::unique_ptr<char[]> in_buffer; - std::unique_ptr<char[]> ref_buffer; - std::unique_ptr<char[]> out_buffer; - - size_t out_buffer_size; -} resources_backup_t; - static void check_files(const OptionsBackup &opts); -static void write_out_buffer(const char *const buffer, size_t size, - std::ofstream &file); - -static resources_backup_t -resources_allocate_for_backup(const OptionsBackup &opts) -{ - resources_backup_t res; - - res.in_file.open(opts.getInFilePath(), - std::ifstream::in | std::ifstream::binary); - if (!res.in_file) { - throw BackupError("cannot open input file"); - } - - res.ref_file.open(opts.getRefFilePath(), - std::ifstream::in | std::ifstream::binary); - if (!res.ref_file) { - throw BackupError("cannot open reference file"); - } - - /* When backing up, the output file is truncated to hold the - * new data - */ - res.out_file.open(opts.getOutFilePath(), std::ifstream::out | - std::ifstream::trunc | - std::ifstream::binary); - if (!res.out_file) { - throw BackupError("cannot open output file"); - } - - /* The output buffer contains also the offsets */ - res.out_buffer_size = - ((opts.getBufferSize() / opts.getSectorSize()) * sizeof(uint64_t)) + - opts.getBufferSize(); - - // TODO: separate function - try { - res.in_buffer = std::make_unique<char[]>(opts.getBufferSize()); - } catch (const std::bad_alloc &e) { - throw BackupError("cannot allocate buffer for input file data"); - } - - try { - res.ref_buffer = std::make_unique<char[]>(opts.getBufferSize()); - } catch (const std::bad_alloc &e) { - throw BackupError("cannot allocate buffer for reference file data"); - } - - try { - res.out_buffer = std::make_unique<char[]>(res.out_buffer_size); - } catch (const std::bad_alloc &e) { - throw BackupError("cannot allocate buffer for output file data"); - } - - return res; -} static void check_files(const OptionsBackup &opts) @@ -128,91 +61,57 @@ check_files(const OptionsBackup &opts) } } -static void -write_out_buffer(const char *const buffer, size_t size, std::ofstream &file) +void +backup(const OptionsBackup &opts) { - file.write(buffer, size); + check_files(opts); - if (!file) { - throw BackupError("cannot write to output file"); - } -} + BufferedFileReader in_file(opts.getInFilePath(), opts.getBufferSize()); + BufferedFileReader ref_file(opts.getRefFilePath(), opts.getBufferSize()); + BufferedFileWriter out_file(opts.getOutFilePath(), opts.getBufferSize()); -static size_t -read_sectors(std::ifstream &file, char *const buffer, uint32_t buffer_size, - uint32_t sector_size) -{ - file.read(buffer, buffer_size); - const size_t bytes_read = file.gcount(); + std::unique_ptr<char[]> in_buffer; + try { + in_buffer = std::make_unique<char[]>(opts.getSectorSize()); + } catch (const std::bad_alloc &e) { + throw BackupError("cannot allocate sector buffer for input file data"); + } - if (!file.good() && !file.eof()) { - throw BackupError("cannot read from file"); - } else if ((bytes_read % sector_size) != 0) { + std::unique_ptr<char[]> ref_buffer; + try { + ref_buffer = std::make_unique<char[]>(opts.getSectorSize()); + } catch (const std::bad_alloc &e) { throw BackupError( - "data read from input file is not multiple of sector size"); - } else { - return (bytes_read / sector_size); + "cannot allocate sector buffer for reference file data"); } -} - -void -backup(const OptionsBackup &opts) -{ - resources_backup_t res{resources_allocate_for_backup(opts)}; - check_files(opts); - - size_t out_buffer_index = 0; - uint64_t input_file_offset = 0; + uint64_t input_file_offset{0}; for (;;) { - /* Read the sectors from the input and reference files into the buffers - */ - const size_t in_sectors_read = - read_sectors(res.in_file, res.in_buffer.get(), opts.getBufferSize(), - opts.getSectorSize()); - const size_t ref_sectors_read = - read_sectors(res.ref_file, res.ref_buffer.get(), - opts.getBufferSize(), opts.getSectorSize()); + // Read sectors + const size_t in_read_size = + in_file.read(in_buffer.get(), opts.getSectorSize()); + const size_t ref_read_size = + ref_file.read(ref_buffer.get(), opts.getSectorSize()); - if ((in_sectors_read == 0) || (ref_sectors_read == 0)) { - break; - } else if (in_sectors_read != ref_sectors_read) { + if (in_read_size != ref_read_size) { throw BackupError( - "cannot read equal amount of sectors from the input files"); + "cannot read equal amount of bytes from the input files"); + } else if (in_read_size == 0) { + break; + } else if (in_read_size != opts.getSectorSize()) { + throw BackupError("cannot read full sectors from the input files"); } - /* Process the sectors in the buffers */ - for (size_t sector = 0; sector < in_sectors_read; ++sector) { - const size_t buffer_offset = sector * opts.getSectorSize(); - - if (memcmp(res.in_buffer.get() + buffer_offset, - res.ref_buffer.get() + buffer_offset, - opts.getSectorSize()) != 0) { - /* Backup the changed sector */ - if (out_buffer_index >= res.out_buffer_size) { - /* The output buffer is full. Write it to the output file */ - write_out_buffer(res.out_buffer.get(), out_buffer_index, - res.out_file); - out_buffer_index = 0; - } - /* Write the next backup record */ - const uint64_t o = htole64(input_file_offset); - memcpy(res.out_buffer.get() + out_buffer_index, - reinterpret_cast<const void *>(&o), sizeof(o)); - out_buffer_index += sizeof(o); - - memcpy(res.out_buffer.get() + out_buffer_index, - res.in_buffer.get() + buffer_offset, - opts.getSectorSize()); - out_buffer_index += opts.getSectorSize(); - } - - input_file_offset += opts.getSectorSize(); + // Check for difference + const bool differ = (memcmp(in_buffer.get(), ref_buffer.get(), + opts.getSectorSize()) != 0); + if (differ) { + // Backup sector + uint64_t o = htole64(input_file_offset); + out_file.write(reinterpret_cast<char *>(&o), sizeof(o)); + out_file.write(in_buffer.get(), opts.getSectorSize()); } - } - /* Write out the output buffer */ - if (out_buffer_index > 0) { - write_out_buffer(res.out_buffer.get(), out_buffer_index, res.out_file); + input_file_offset += opts.getSectorSize(); } } diff --git a/src/buffered_file.cpp b/src/buffered_file.cpp new file mode 100644 index 0000000..f5eb863 --- /dev/null +++ b/src/buffered_file.cpp @@ -0,0 +1,161 @@ +/* Copyright 2024 Ján Sučan <jan@jansucan.com> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "buffered_file.h" +#include "exception.h" + +#include <cstring> +#include <filesystem> +#include <fstream> + +BufferedFileReader::BufferedFileReader(std::filesystem::path path, + size_t buffer_capacity) + : m_buffer_offset(0), m_buffer_capacity(buffer_capacity) +{ + m_file.open(path, std::ifstream::in | std::ifstream::binary); + if (!m_file) { + throw BufferedFileError("cannot open input file"); + } + + try { + m_buffer = std::make_unique<char[]>(m_buffer_capacity); + } catch (const std::bad_alloc &e) { + throw BufferedFileError("cannot allocate buffer for input file data"); + } + + refill_buffer(); +}; + +size_t +BufferedFileReader::read(char *data, size_t data_size) +{ + const size_t size_left = m_buffer_size - m_buffer_offset; + if (data_size <= size_left) { + return read_buffer(data, data_size); + } else { + const size_t size_outside_buffer = data_size - size_left; + read_buffer(data, size_left); + const size_t read_outside = + read_file(data + size_left, size_outside_buffer); + refill_buffer(); + return size_left + read_outside; + } +}; + +size_t +BufferedFileReader::read_buffer(char *data, size_t data_size) +{ + // Assumes that the caller makes sure there is enough data in the buffer + // to read + memcpy(data, reinterpret_cast<char *>(m_buffer.get()) + m_buffer_offset, + data_size); + m_buffer_offset += data_size; + return data_size; +}; + +void +BufferedFileReader::refill_buffer() +{ + m_buffer_size = read_file(m_buffer.get(), m_buffer_capacity); + m_buffer_offset = 0; +}; + +size_t +BufferedFileReader::read_file(char *data, size_t data_size) +{ + m_file.read(data, data_size); + + if (!m_file.good() && !m_file.eof()) { + throw BufferedFileError("cannot read from file"); + } + + return m_file.gcount(); +}; + +BufferedFileWriter::BufferedFileWriter(std::filesystem::path path, + size_t buffer_capacity) + : m_buffer_size(0), m_buffer_capacity(buffer_capacity) +{ + /* When backing up, the output file is truncated to hold the + * new data + */ + m_file.open(path, std::ifstream::out | std::ifstream::trunc | + std::ifstream::binary); + if (!m_file) { + throw BufferedFileError("cannot open output file"); + } + + try { + m_buffer = std::make_unique<char[]>(m_buffer_capacity); + } catch (const std::bad_alloc &e) { + throw BufferedFileError("cannot allocate buffer for output file data"); + } +}; +BufferedFileWriter::~BufferedFileWriter() { flush_buffer(); }; + +void +BufferedFileWriter::write(const char *data, size_t data_size) +{ + size_t free{m_buffer_capacity - m_buffer_size}; + if (data_size <= free) { + // There is free space in the buffer + write_buffer(data, data_size); + } else { + // No free space + flush_buffer(); + if (data_size <= m_buffer_capacity) { + // Data fits into the buffer + write_buffer(data, data_size); + } else { + // Doesn't fit + write_file(data, data_size); + } + } +}; + +void +BufferedFileWriter::write_buffer(const char *data, size_t data_size) +{ + memcpy(reinterpret_cast<char *>(m_buffer.get()) + m_buffer_size, data, + data_size); + m_buffer_size += data_size; +}; + +void +BufferedFileWriter::flush_buffer() +{ + write_file(m_buffer.get(), m_buffer_size); + m_buffer_size = 0; +}; + +void +BufferedFileWriter::write_file(const char *data, size_t data_size) +{ + m_file.write(data, data_size); + if (!m_file) { + throw BufferedFileError("cannot write to output file"); + } +}; diff --git a/src/buffered_file.h b/src/buffered_file.h new file mode 100644 index 0000000..1bcbb73 --- /dev/null +++ b/src/buffered_file.h @@ -0,0 +1,81 @@ +/* Copyright 2024 Ján Sučan <jan@jansucan.com> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#pragma once + +#include "exception.h" + +#include <cstring> +#include <filesystem> +#include <fstream> + +class BufferedFileError : public DiffddError +{ + public: + explicit BufferedFileError(const std::string &message) + : DiffddError(message) + { + } +}; + +class BufferedFileReader +{ + public: + BufferedFileReader(std::filesystem::path path, size_t buffer_capacity); + virtual ~BufferedFileReader() = default; + + size_t read(char *data, size_t data_size); + + private: + std::ifstream m_file; + std::unique_ptr<char[]> m_buffer; + size_t m_buffer_offset; + size_t m_buffer_size; + const size_t m_buffer_capacity; + + size_t read_buffer(char *data, size_t data_size); + void refill_buffer(); + size_t read_file(char *data, size_t data_size); +}; + +class BufferedFileWriter +{ + public: + BufferedFileWriter(std::filesystem::path path, size_t buffer_capacity); + virtual ~BufferedFileWriter(); + + void write(const char *data, size_t data_size); + + private: + std::fstream m_file; + std::unique_ptr<char[]> m_buffer; + size_t m_buffer_size; + const size_t m_buffer_capacity; + + void write_buffer(const char *data, size_t data_size); + void flush_buffer(); + void write_file(const char *data, size_t data_size); +}; diff --git a/src/main.cpp b/src/main.cpp index 0810f04..8f6dac5 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -25,6 +25,7 @@ */ #include "backup.h" +#include "buffered_file.h" #include "options.h" #include "restore.h" diff --git a/src/restore.cpp b/src/restore.cpp index c9ace96..813d9ce 100644 --- a/src/restore.cpp +++ b/src/restore.cpp @@ -25,58 +25,13 @@ */ #include "restore.h" +#include "buffered_file.h" #include <filesystem> #include <fstream> -#include <iostream> - -typedef struct { - std::ifstream in_file; - std::fstream out_file; - - std::unique_ptr<char[]> in_buffer; - - size_t in_sector_size; - size_t in_buffer_size; -} resources_restore_t; - -static resources_restore_t -resources_allocate_for_restore(const OptionsRestore &opts) -{ - resources_restore_t res; - - res.in_file.open(opts.getInFilePath(), std::ios::in | std::ios::binary); - if (!res.in_file) { - throw RestoreError("cannot open input file"); - } - - /* When restoring, the file must be opened for writing and not - * truncated - */ - res.out_file.open(opts.getOutFilePath(), - std::ios::in | std::ios::out | std::ios::binary); - if (!res.out_file) { - throw RestoreError("cannot open output file"); - } - - /* Allocate the buffer for data from the input file */ - /* The input buffer contains also the offsets */ - res.in_sector_size = sizeof(uint64_t) + opts.getSectorSize(); - const size_t in_buffer_sector_count = - opts.getBufferSize() / res.in_sector_size; - res.in_buffer_size = in_buffer_sector_count * res.in_sector_size; - - try { - res.in_buffer = std::make_unique<char[]>(res.in_buffer_size); - } catch (const std::bad_alloc &e) { - throw RestoreError("cannot allocate buffer for input file data"); - } - - return res; -} static void -check_input_file(resources_restore_t &res, const OptionsRestore &opts) +check_input_file(const OptionsRestore &opts) { size_t in_size{0}; try { @@ -104,6 +59,12 @@ check_input_file(resources_restore_t &res, const OptionsRestore &opts) std::string(e.what())); } + std::ifstream in_file; + in_file.open(opts.getInFilePath(), std::ios::in | std::ios::binary); + if (!in_file) { + throw RestoreError("cannot open input file"); + } + uint64_t prev_out_offset = 0; bool is_first_reading = true; @@ -111,12 +72,11 @@ check_input_file(resources_restore_t &res, const OptionsRestore &opts) for (;;) { uint64_t out_offset; /* Read the next offset */ - res.in_file.read(reinterpret_cast<char *>(&out_offset), - sizeof(out_offset)); + in_file.read(reinterpret_cast<char *>(&out_offset), sizeof(out_offset)); - if (res.in_file.eof() && res.in_file.fail() && !res.in_file.bad()) { + if (in_file.eof() && in_file.fail() && !in_file.bad()) { break; - } else if (!res.in_file.good() && !res.in_file.eof()) { + } else if (!in_file.good() && !in_file.eof()) { throw RestoreError("cannot read from file"); } out_offset = le64toh(out_offset); @@ -127,8 +87,7 @@ check_input_file(resources_restore_t &res, const OptionsRestore &opts) } else if ((out_offset + opts.getSectorSize()) > out_size) { throw RestoreError( "a sector offset points past the end of the output file"); - } else if (!res.in_file.seekg(opts.getSectorSize(), - std::ios_base::cur)) { + } else if (!in_file.seekg(opts.getSectorSize(), std::ios_base::cur)) { throw RestoreError("cannot seek in input file"); } @@ -138,77 +97,68 @@ check_input_file(resources_restore_t &res, const OptionsRestore &opts) /* The input file must be read completely */ char c; - res.in_file.read(&c, 1); - if (res.in_file.gcount() != 0) { + in_file.read(&c, 1); + if (in_file.gcount() != 0) { throw RestoreError("input file is not valid"); } - res.in_file.clear(); + in_file.clear(); - /* The file must be prepared for the restoring */ - if (!res.in_file.seekg(0, std::ios_base::beg)) { - throw RestoreError("cannot seek in input file"); - } -} - -static size_t -read_sectors(std::ifstream &file, char *const buffer, uint32_t buffer_size, - uint32_t sector_size) -{ - file.read(buffer, buffer_size); - const size_t bytes_read = file.gcount(); - - if (!file.good() && !file.eof()) { - throw RestoreError("cannot read from file"); - } else if ((bytes_read % sector_size) != 0) { - throw RestoreError( - "data read from input file is not multiple of sector size"); - } else { - return (bytes_read / sector_size); - } + in_file.close(); } void restore(const OptionsRestore &opts) { - resources_restore_t res{resources_allocate_for_restore(opts)}; + check_input_file(opts); + + BufferedFileReader in_file(opts.getInFilePath(), opts.getBufferSize()); + + std::fstream out_file; + out_file.open(opts.getOutFilePath(), + std::ios::in | std::ios::out | std::ios::binary); + if (!out_file) { + throw RestoreError("cannot open output file"); + } - check_input_file(res, opts); + const size_t in_buffer_size = sizeof(uint64_t) + opts.getSectorSize(); + std::unique_ptr<char[]> in_buffer; + try { + in_buffer = std::make_unique<char[]>(in_buffer_size); + } catch (const std::bad_alloc &e) { + throw RestoreError("cannot allocate sector buffer for input file data"); + } /* Restore data from the differential image */ + size_t in_read_size = {0}; for (;;) { - /* Read data of the offset and the next sector */ - const size_t in_sectors_read = - read_sectors(res.in_file, res.in_buffer.get(), res.in_buffer_size, - res.in_sector_size); + in_read_size = in_file.read(in_buffer.get(), in_buffer_size); - if (in_sectors_read == 0) { + if (in_read_size == 0) { break; + } else if (in_read_size != in_buffer_size) { + throw RestoreError("cannot read from input file"); } - char *in_buffer = res.in_buffer.get(); - - for (size_t s = 0; s < in_sectors_read; ++s) { - const uint64_t out_offset = - le64toh(*(reinterpret_cast<uint64_t *>(in_buffer))); - in_buffer += sizeof(uint64_t); - - if (!res.out_file.seekp(out_offset, std::ios_base::beg)) { - throw RestoreError("cannot seek in output file"); - } + const uint64_t out_offset = + le64toh(*reinterpret_cast<uint64_t *>(in_buffer.get())); - if (!res.out_file.write(in_buffer, opts.getSectorSize())) { - throw RestoreError("cannot write to output file"); - } + if (!out_file.seekp(out_offset, std::ios_base::beg)) { + throw RestoreError("cannot seek in output file"); + } - in_buffer += opts.getSectorSize(); + if (!out_file.write(reinterpret_cast<char *>(in_buffer.get()) + + sizeof(uint64_t), + opts.getSectorSize())) { + throw RestoreError("cannot write to output file"); } } + out_file.close(); + /* The input file must be read completely */ char c; - res.in_file.read(&c, 1); - if (res.in_file.gcount() != 0) { + in_read_size = in_file.read(&c, 1); + if (in_read_size != 0) { throw RestoreError("input file is not valid"); } - res.in_file.clear(); } |
