diff options
| -rw-r--r-- | .pre-commit-config.yaml | 2 | ||||
| -rw-r--r-- | README.md | 22 | ||||
| -rw-r--r-- | src/backup.cpp | 433 | ||||
| -rw-r--r-- | src/buffered_file.cpp | 91 | ||||
| -rw-r--r-- | src/buffered_file.h | 11 | ||||
| -rw-r--r-- | src/format_v2.h | 119 | ||||
| -rw-r--r-- | src/options.cpp | 56 | ||||
| -rw-r--r-- | src/options.h | 3 | ||||
| -rw-r--r-- | src/restore.cpp | 139 | ||||
| -rw-r--r-- | tests/005-missing_argument_for_option.sh | 2 | ||||
| -rw-r--r-- | tests/006-incorrect_buffer_size.sh | 10 | ||||
| -rw-r--r-- | tests/007-incorrect_sector_size.sh | 15 | ||||
| -rw-r--r-- | tests/100-cannot_open_files.sh | 4 | ||||
| -rw-r--r-- | tests/200-input_and_reference_size_differs.sh | 15 | ||||
| -rw-r--r-- | tests/201-input_or_reference_size_is_not_multiple_of_sector_size.sh | 16 | ||||
| -rw-r--r-- | tests/300-incorrect_reference_file.sh | 37 | ||||
| -rw-r--r-- | tests/400-expected_backup_output.bin | bin | 1560 -> 39 bytes | |||
| -rw-r--r-- | tests/400-successful_backup_restore.sh | 4 |
18 files changed, 600 insertions, 379 deletions
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index d2ed0f3..3e824ca 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -20,4 +20,4 @@ repos: rev: 0.8.0 hooks: - id: cppcheck - args: [--enable=all, --force, --suppress=missingIncludeSystem, --check-level=exhaustive, src] + args: [--enable=all, --force, --suppress=missingIncludeSystem, --inline-suppr, --check-level=exhaustive, src] @@ -10,17 +10,15 @@ This simple utility was created to reduce size of backup images of disk partitions. It is intended to be used in conjunction with ```dd``` or similar -utility. The primary concern is safe operation, so ```diff-dd``` performs many -checks of the input and output files. For example, the differential image file -is read twice when restoring it. Because of that, it is slower. +utility. ## Synopsis > diff-dd help -> diff-dd backup [-S SECTOR_SIZE] [-B BUFFER_SIZE] -i INFILE -b BASEFILE -o OUTFILE +> diff-dd backup [-B BUFFER_SIZE] -i INFILE -b BASEFILE -o OUTFILE -> diff-dd restore [-S SECTOR_SIZE] [-B BUFFER_SIZE] -d DIFFFILE -o OUTFILE +> diff-dd restore [-B BUFFER_SIZE] -d DIFFFILE -o OUTFILE ## Backup @@ -31,24 +29,20 @@ exist. Differential backup is created with: The ```INFILE``` is a path to the file to backup differentially, the ```BASEFILE``` is the full image, and the ```OUTFILE``` is the file to -which only the changed sectors of the ```INFILE```, compared to the -```BASEFILE```, and their offsets will be saved. +which the changed data of the ```INFILE```, compared to the +```BASEFILE```, their offsets, and sizes will be saved. ## Restore -The restoration means application of the changed sectors saved in the +The restoration means application of the changed data saved in the ```DIFFFILE```, which is the differential image, to the ```OUTFILE```: > diff-dd restore -d DIFFFILE -o OUTFILE ## Options -```-S``` sets the sector size by which the files will be processed -(default is 512 B). It can be used to control granularity of -differential backup. - -```-B``` sets the size of the buffer for the sectors of the input and -output file (default is 4 MiB). The input data is always buffered. The +```-B``` sets the size of the buffer for the data of the input and +output files (default is 4 MiB). The input data is always buffered. The output data are buffered only in backup mode. ## Example diff --git a/src/backup.cpp b/src/backup.cpp index bf8bfaf..4a76be5 100644 --- a/src/backup.cpp +++ b/src/backup.cpp @@ -1,4 +1,4 @@ -/* Copyright 2021 Ján Sučan <jan@jansucan.com> +/* Copyright 2024 Ján Sučan <jan@jansucan.com> * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are @@ -26,91 +26,398 @@ #include "backup.h" #include "buffered_file.h" +#include "format_v2.h" -#include <cstring> -#include <fstream> +#include <algorithm> +#include <array> +#include <cassert> +#include <iostream> +#include <vector> -static void check_files(const OptionsBackup &opts); +class Page +{ + friend bool operator==(const Page &lhs, const Page &rhs); + + public: + Page() : m_start(0), m_end(0){}; + + Page(std::shared_ptr<char[]> data, uint64_t start, uint64_t end) + : m_data(data), m_start(start), m_end(end) + { + assert(m_start <= m_end); + }; + + std::shared_ptr<char[]> getData() const { return m_data; }; + uint64_t getStart() const { return m_start; }; + uint64_t getEnd() const { return m_end; }; + size_t getSize() const { return m_end - m_start; }; + bool isEmpty() const { return getSize() == 0; }; + + private: + std::shared_ptr<char[]> m_data; + uint64_t m_start; + uint64_t m_end; +}; + +bool +operator==(const Page &lhs, const Page &rhs) +{ + return (lhs.m_data == rhs.m_data) && (lhs.m_start == rhs.m_start) && + (lhs.m_end == rhs.m_end); +} + +class PagedStreamReader +{ + public: + PagedStreamReader(std::istream &istr, size_t page_size_bytes) + : m_page_size_bytes(page_size_bytes), m_istream(istr), + m_stream_pos_bytes(0), m_buffer_index(0) + { + try { + m_buffers[0] = std::shared_ptr<char[]>(new char[m_page_size_bytes]); + m_buffers[1] = std::shared_ptr<char[]>(new char[m_page_size_bytes]); + } catch (const std::bad_alloc &e) { + throw BufferedFileError( + "cannot allocate pages for input stream data"); + } + }; + + Page getNextPage() + { + m_buffer_index = (m_buffer_index + 1) % 2; + auto buf = m_buffers[m_buffer_index]; + // Buffer for new data must not in use + assert(buf.use_count() == 2); + + const size_t bytes_read{readFromStream(buf.get())}; + m_stream_pos_bytes += bytes_read; + + if (bytes_read == 0) { + buf = std::shared_ptr<char[]>(); + } + return Page{buf, m_stream_pos_bytes - bytes_read, m_stream_pos_bytes}; + } + + private: + const size_t m_page_size_bytes; + std::istream &m_istream; + uint64_t m_stream_pos_bytes; + std::array<std::shared_ptr<char[]>, 2> m_buffers; + unsigned m_buffer_index; + + size_t readFromStream(char *const data) + { + if (m_istream.eof()) { + return 0; + } + + m_istream.read(data, m_page_size_bytes); -static void -check_files(const OptionsBackup &opts) + if (!m_istream.good() && !m_istream.eof()) { + throw BufferedFileError("cannot read from stream"); + } + + return m_istream.gcount(); + } +}; + +enum class MergeState { + Finished, + Incomplete, +}; + +class Diff +{ + friend MergeState diffsTryMerge(Diff &diff_a, Diff &diff_b, + size_t max_merge_gap, size_t max_size); + + public: + explicit Diff(uint64_t start_end) + : m_pages{}, m_start{start_end}, m_end{start_end} + { + assert(m_start <= m_end); + }; + Diff(Page page, uint64_t start, uint64_t end) + : m_pages{page}, m_start{start}, m_end{end} + { + assert(m_start <= m_end); + }; + uint64_t getStart() const { return m_start; }; + uint64_t getEnd() const { return m_end; }; + size_t getSize() const { return m_end - m_start; }; + bool isEmpty() const { return getSize() == 0; }; + + std::vector<FormatV2::RecordData> getData() const + { + std::vector<FormatV2::RecordData> data{}; + + if (!m_pages[0].isEmpty() && m_pages[1].isEmpty()) { + // Only the first page + assert((m_start >= m_pages[0].getStart()) && + (m_start <= m_pages[0].getEnd()) && + (m_end >= m_pages[0].getStart()) && + (m_end <= m_pages[0].getEnd())); + + const uint64_t offset{m_start - m_pages[0].getStart()}; + auto data_first{std::shared_ptr<char[]>{ + m_pages[0].getData(), + static_cast<char *>(m_pages[0].getData().get()) + offset}}; + data.push_back(FormatV2::RecordData{getSize(), data_first}); + } else if (!m_pages[0].isEmpty() && !m_pages[1].isEmpty()) { + // Both pages + assert((m_start >= m_pages[0].getStart()) && + (m_start <= m_pages[0].getEnd()) && + (m_end >= m_pages[1].getStart()) && + (m_end <= m_pages[1].getEnd())); + + size_t size{m_pages[0].getEnd() - m_start}; + const uint64_t offset{m_start - m_pages[0].getStart()}; + auto data_first{std::shared_ptr<char[]>{ + m_pages[0].getData(), + static_cast<char *>(m_pages[0].getData().get()) + offset}}; + data.push_back(FormatV2::RecordData{size, data_first}); + + size = m_end - m_pages[1].getStart(); + data.push_back(FormatV2::RecordData{size, m_pages[1].getData()}); + } + + return data; + }; + + private: + std::array<Page, 2> m_pages; + uint64_t m_start; + uint64_t m_end; + + bool hasPage(size_t i) // cppcheck-suppress unusedPrivateFunction + { + return (i < m_pages.size()) && (m_pages[i].getData() != nullptr); + }; +}; + +MergeState +diffsTryMerge(Diff &diff_a, Diff &diff_b, size_t max_merge_gap, size_t max_size) { - size_t in_size{0}; - try { - in_size = std::filesystem::file_size(opts.getInFilePath()); - } catch (const std::exception &e) { - throw BackupError("cannot get size of input file: " + - std::string(e.what())); + if (diff_a.isEmpty()) { + // Do not merge to an empty diff + return MergeState::Finished; } - size_t base_size{0}; - try { - base_size = std::filesystem::file_size(opts.getBaseFilePath()); - } catch (const std::exception &e) { - throw BackupError("cannot get size of base file: " + - std::string(e.what())); + if (diff_b.isEmpty()) { + // Nothing to merge from an empty diff + return MergeState::Finished; } - /* Check sizes of the input file and the base file */ - if (in_size != base_size) { - throw BackupError("input file and base file differ in size"); - } else if ((in_size % opts.getSectorSize()) != 0) { - throw BackupError( - "size of input file and base file is not multiple of " + - std::to_string(opts.getSectorSize())); + assert(diff_a.getEnd() <= diff_b.getStart()); + const size_t gap{diff_b.getStart() - diff_a.getEnd()}; + if (gap > max_merge_gap) { + // B is too far away + return MergeState::Finished; } + + if ((diff_a.getSize() + gap) >= max_size) { + // No space in A + return MergeState::Finished; + } + + // Can be merged + + // Adjust the diff start and end offsets + + // There is always at least 1 byte free in A here + const size_t free{max_size - (diff_a.getSize() + gap)}; + const size_t to_merge{std::min(free, diff_b.getSize())}; + // There is always at least 1 byte to merge from B here + + // Enlarge A + diff_a.m_end += gap + to_merge; + // Shrink B + diff_b.m_start += to_merge; + + // Add B's page to A if needed + + // Non-empty A must have only the first, or both pages + assert(diff_a.hasPage(0)); + // Non-empty B must have only the first page + assert(diff_b.hasPage(0) && !diff_b.hasPage(1)); + + // If A has both pages, B's page must only be the same as A's second + // page. No setting of pages in A is needed in this case + assert(!diff_a.hasPage(1) || (diff_b.m_pages[0] == diff_a.m_pages[1])); + if (!diff_a.hasPage(1)) { + // If A has only the first page, B's page must only be the same as the + // A's first page or following it + const bool b_follows{ + (diff_b.m_pages[0].getData() != diff_a.m_pages[0].getData()) && + (diff_b.m_pages[0].getStart() == diff_a.m_pages[0].getEnd())}; + assert((diff_b.m_pages[0] == diff_a.m_pages[0]) || b_follows); + if (b_follows) { + diff_a.m_pages[1] = diff_b.m_pages[0]; + } + } + + return (diff_a.getSize() >= max_size) ? MergeState::Finished + : MergeState::Incomplete; } +class DiffFinder +{ + public: + DiffFinder(std::istream &old_stream, std::istream &new_stream, + uint32_t buffer_size, size_t max_merge_gap) + : m_old_page_reader(old_stream, buffer_size), + m_new_page_reader(new_stream, buffer_size), + m_diff_max_size(buffer_size), m_max_merge_gap(max_merge_gap), + m_offset_in_stream(0), m_diff(0), + m_search_state(SearchState::ReadPages){}; + + Diff findNextDiff() + { + for (;;) { + if (m_search_state == SearchState::ReadPages) { + m_old_page = m_old_page_reader.getNextPage(); + m_new_page = m_new_page_reader.getNextPage(); + assert(m_old_page.getStart() == m_new_page.getStart()); + + if (m_old_page.getSize() != m_new_page.getSize()) { + throw BackupError( + "cannot read the same amount of data from both files"); + } + + const bool end_of_stream{m_old_page.isEmpty() && + m_new_page.isEmpty()}; + if (end_of_stream) { + const Diff return_diff{m_diff}; + m_diff = Diff{m_offset_in_stream}; + return return_diff; + } + + m_search_state = SearchState::FindDiff; + + } else if (m_search_state == SearchState::FindDiff) { + Diff diff{findDiffInPages(m_old_page, m_new_page, + m_offset_in_stream)}; + m_offset_in_stream = diff.getEnd(); + + if (diff.isEmpty()) { + // End of pages. On the next call, read new pages. + m_old_page = Page{}; + m_new_page = Page{}; + m_search_state = SearchState::ReadPages; + } + + const MergeState merge_state{diffsTryMerge( + m_diff, diff, m_max_merge_gap, m_diff_max_size)}; + + if (merge_state == MergeState::Finished) { + const Diff return_diff{m_diff}; + m_diff = diff; + if (!return_diff.isEmpty()) { + return return_diff; + } + } + + } else { + assert(false); + } + } + }; + + private: + enum class SearchState { ReadPages, FindDiff }; + + PagedStreamReader m_old_page_reader; + PagedStreamReader m_new_page_reader; + const size_t m_diff_max_size; + const size_t m_max_merge_gap; + Page m_old_page; + Page m_new_page; + uint64_t m_offset_in_stream; + Diff m_diff; + SearchState m_search_state; + + Diff findDiffInPages(Page old_page, Page new_page, + uint64_t offset_in_stream) + { + const char *old_data{old_page.getData().get()}; + const char *new_data{new_page.getData().get()}; + const uint64_t data_size_bytes{old_page.getSize()}; + + assert(offset_in_stream >= new_page.getStart()); + size_t offset_in_pages{offset_in_stream - new_page.getStart()}; + + // Find offset of the first different byte + for (; offset_in_pages < data_size_bytes; ++offset_in_pages) { + if (new_data[offset_in_pages] != old_data[offset_in_pages]) { + break; + } + } + const size_t start_in_pages{offset_in_pages}; + + if (offset_in_pages < data_size_bytes) { + // Different byte found. Start searching for a same byte immediately + // after. + ++offset_in_pages; + } + + // Find offset of the first same byte + for (; offset_in_pages < data_size_bytes; ++offset_in_pages) { + if (new_data[offset_in_pages] == old_data[offset_in_pages]) { + break; + } + } + const size_t end_in_pages{offset_in_pages}; + + // In the case when no different byte is found, the end offset will be + // the same as the start offset + + const uint64_t start_in_stream{new_page.getStart() + start_in_pages}; + const uint64_t end_in_stream{new_page.getStart() + end_in_pages}; + if (start_in_stream == end_in_stream) { + return Diff{start_in_stream}; + } else { + return Diff{new_page, start_in_stream, end_in_stream}; + } + } +}; + void backup(const OptionsBackup &opts) { - check_files(opts); - - BufferedFileReader in_file(opts.getInFilePath(), opts.getBufferSize()); - BufferedFileReader base_file(opts.getBaseFilePath(), opts.getBufferSize()); - BufferedFileWriter out_file(opts.getOutFilePath(), opts.getBufferSize()); + std::ifstream in_istream{opts.getInFilePath(), + std::ifstream::in | std::ifstream::binary}; + if (!in_istream) { + throw BufferedFileError("cannot open input file"); + } - std::unique_ptr<char[]> in_buffer; - try { - in_buffer = std::make_unique<char[]>(opts.getSectorSize()); - } catch (const std::bad_alloc &e) { - throw BackupError("cannot allocate sector buffer for input file data"); + std::ifstream base_istream{opts.getBaseFilePath(), + std::ifstream::in | std::ifstream::binary}; + if (!base_istream) { + throw BufferedFileError("cannot open base file"); } - std::unique_ptr<char[]> base_buffer; - try { - base_buffer = std::make_unique<char[]>(opts.getSectorSize()); - } catch (const std::bad_alloc &e) { - throw BackupError("cannot allocate sector buffer for base file data"); + // When backing up, the output file is truncated to hold the new data + std::ofstream out_ostream{opts.getOutFilePath(), std::ofstream::out | + std::ofstream::trunc | + std::ofstream::binary}; + if (!out_ostream) { + throw BufferedFileError("cannot open output file"); } - uint64_t input_file_offset{0}; + DiffFinder diff_finder(base_istream, in_istream, opts.getBufferSize(), + FormatV2::RecordHeaderSize); + FormatV2::Writer diff_writer(out_ostream, opts.getBufferSize()); + for (;;) { - // Read sectors - const size_t in_read_size = - in_file.read(in_buffer.get(), opts.getSectorSize()); - const size_t base_read_size = - base_file.read(base_buffer.get(), opts.getSectorSize()); - - if (in_read_size != base_read_size) { - throw BackupError( - "cannot read equal amount of bytes from the input files"); - } else if (in_read_size == 0) { + const Diff diff{diff_finder.findNextDiff()}; + if (diff.isEmpty()) { break; - } else if (in_read_size != opts.getSectorSize()) { - throw BackupError("cannot read full sectors from the input files"); } - // Check for difference - const bool differ = (memcmp(in_buffer.get(), base_buffer.get(), - opts.getSectorSize()) != 0); - if (differ) { - // Backup sector - uint64_t o = htole64(input_file_offset); - out_file.write(reinterpret_cast<char *>(&o), sizeof(o)); - out_file.write(in_buffer.get(), opts.getSectorSize()); - } + diff_writer.writeDiffRecord(diff.getStart(), diff.getSize(), + diff.getData()); - input_file_offset += opts.getSectorSize(); + // Here, the diff is destructed and page data reference counters + // decremented } } diff --git a/src/buffered_file.cpp b/src/buffered_file.cpp index f5eb863..8d47eed 100644 --- a/src/buffered_file.cpp +++ b/src/buffered_file.cpp @@ -27,53 +27,71 @@ #include "buffered_file.h" #include "exception.h" +#include <algorithm> +#include <cassert> #include <cstring> #include <filesystem> #include <fstream> -BufferedFileReader::BufferedFileReader(std::filesystem::path path, +BufferedFileReader::BufferedFileReader(std::istream &istream, size_t buffer_capacity) - : m_buffer_offset(0), m_buffer_capacity(buffer_capacity) + : m_istream(istream), m_buffer_offset(0), m_buffer_size(0), + m_buffer_capacity(buffer_capacity) { - m_file.open(path, std::ifstream::in | std::ifstream::binary); - if (!m_file) { - throw BufferedFileError("cannot open input file"); - } - try { m_buffer = std::make_unique<char[]>(m_buffer_capacity); } catch (const std::bad_alloc &e) { throw BufferedFileError("cannot allocate buffer for input file data"); } - - refill_buffer(); }; size_t BufferedFileReader::read(char *data, size_t data_size) { - const size_t size_left = m_buffer_size - m_buffer_offset; - if (data_size <= size_left) { - return read_buffer(data, data_size); - } else { - const size_t size_outside_buffer = data_size - size_left; - read_buffer(data, size_left); - const size_t read_outside = - read_file(data + size_left, size_outside_buffer); + size_t retry_count{0}; + size_t offset{0}; + + while ((data_size > 0) && (retry_count < 2)) { + char *d; + const size_t r{tryRead(data_size, &d)}; + if (r == 0) { + ++retry_count; + continue; + } + + memcpy(data + offset, d, r); + offset += r; + data_size -= r; + } + + return offset; +} + +size_t +BufferedFileReader::tryRead(size_t data_size, char **return_data) +{ + const size_t size_left{m_buffer_size - m_buffer_offset}; + if (size_left == 0) { refill_buffer(); - return size_left + read_outside; + if (m_buffer_size == 0) { + return 0; + } } + // There is at least one byte in the buffer + const size_t size_read{read_buffer(data_size, return_data)}; + assert(size_read > 0); + return size_read; }; size_t -BufferedFileReader::read_buffer(char *data, size_t data_size) +BufferedFileReader::read_buffer(size_t data_size, char **return_data) { - // Assumes that the caller makes sure there is enough data in the buffer - // to read - memcpy(data, reinterpret_cast<char *>(m_buffer.get()) + m_buffer_offset, - data_size); - m_buffer_offset += data_size; - return data_size; + *return_data = static_cast<char *>(m_buffer.get()) + m_buffer_offset; + + const size_t size_left{m_buffer_size - m_buffer_offset}; + const size_t size_read{std::min(data_size, size_left)}; + m_buffer_offset += size_read; + return size_read; }; void @@ -86,28 +104,19 @@ BufferedFileReader::refill_buffer() size_t BufferedFileReader::read_file(char *data, size_t data_size) { - m_file.read(data, data_size); + m_istream.read(data, data_size); - if (!m_file.good() && !m_file.eof()) { + if (!m_istream.good() && !m_istream.eof()) { throw BufferedFileError("cannot read from file"); } - return m_file.gcount(); + return m_istream.gcount(); }; -BufferedFileWriter::BufferedFileWriter(std::filesystem::path path, +BufferedFileWriter::BufferedFileWriter(std::ostream &ostream, size_t buffer_capacity) - : m_buffer_size(0), m_buffer_capacity(buffer_capacity) + : m_ostream(ostream), m_buffer_size(0), m_buffer_capacity(buffer_capacity) { - /* When backing up, the output file is truncated to hold the - * new data - */ - m_file.open(path, std::ifstream::out | std::ifstream::trunc | - std::ifstream::binary); - if (!m_file) { - throw BufferedFileError("cannot open output file"); - } - try { m_buffer = std::make_unique<char[]>(m_buffer_capacity); } catch (const std::bad_alloc &e) { @@ -154,8 +163,8 @@ BufferedFileWriter::flush_buffer() void BufferedFileWriter::write_file(const char *data, size_t data_size) { - m_file.write(data, data_size); - if (!m_file) { + m_ostream.write(data, data_size); + if (!m_ostream) { throw BufferedFileError("cannot write to output file"); } }; diff --git a/src/buffered_file.h b/src/buffered_file.h index 1bcbb73..4e5219d 100644 --- a/src/buffered_file.h +++ b/src/buffered_file.h @@ -44,19 +44,20 @@ class BufferedFileError : public DiffddError class BufferedFileReader { public: - BufferedFileReader(std::filesystem::path path, size_t buffer_capacity); + BufferedFileReader(std::istream &istream, size_t buffer_capacity); virtual ~BufferedFileReader() = default; size_t read(char *data, size_t data_size); + size_t tryRead(size_t data_size, char **return_data); private: - std::ifstream m_file; + std::istream &m_istream; std::unique_ptr<char[]> m_buffer; size_t m_buffer_offset; size_t m_buffer_size; const size_t m_buffer_capacity; - size_t read_buffer(char *data, size_t data_size); + size_t read_buffer(size_t data_size, char **return_data); void refill_buffer(); size_t read_file(char *data, size_t data_size); }; @@ -64,13 +65,13 @@ class BufferedFileReader class BufferedFileWriter { public: - BufferedFileWriter(std::filesystem::path path, size_t buffer_capacity); + BufferedFileWriter(std::ostream &ostream, size_t buffer_capacity); virtual ~BufferedFileWriter(); void write(const char *data, size_t data_size); private: - std::fstream m_file; + std::ostream &m_ostream; std::unique_ptr<char[]> m_buffer; size_t m_buffer_size; const size_t m_buffer_capacity; diff --git a/src/format_v2.h b/src/format_v2.h new file mode 100644 index 0000000..708a252 --- /dev/null +++ b/src/format_v2.h @@ -0,0 +1,119 @@ +/* Copyright 2024 Ján Sučan <jan@jansucan.com> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <endian.h> + +#include <vector> + +namespace FormatV2 +{ + +const size_t RecordHeaderSize{sizeof(uint64_t) + sizeof(uint32_t)}; + +struct RecordData { + size_t size; + std::shared_ptr<char[]> data; +}; + +class Writer +{ + public: + Writer(std::ostream &ostream, size_t buffer_size) + : m_writer{BufferedFileWriter{ostream, buffer_size}} {}; + + void writeDiffRecord( + uint64_t offset, size_t size, + std::vector<RecordData> data) // cppcheck-suppress passedByValue + { + writeOffset(offset); + writeSize(size); + for (auto it = data.begin(); it != data.end(); ++it) { + writeData(*it); + } + } + + private: + BufferedFileWriter m_writer; + + void writeOffset(uint64_t offset) + { + uint64_t val{htobe64(offset)}; + m_writer.write(reinterpret_cast<char *>(&val), sizeof(val)); + }; + + void writeSize(size_t size) + { + uint32_t val{htobe32(size)}; + m_writer.write(reinterpret_cast<char *>(&val), sizeof(val)); + }; + + void writeData(RecordData data) + { + m_writer.write(reinterpret_cast<char *>(data.data.get()), data.size); + }; +}; + +class Reader +{ + public: + Reader(std::istream &istream, size_t buffer_size) + : m_reader{BufferedFileReader{istream, buffer_size}}, m_eof{false} {}; + + bool eof() { return m_eof; }; + + uint64_t readOffset() + { + uint64_t raw_offset; + const size_t r{m_reader.read(reinterpret_cast<char *>(&raw_offset), + sizeof(raw_offset))}; + if (r != sizeof(raw_offset)) { + m_eof = true; + } + return be64toh(raw_offset); + }; + + size_t readSize() + { + uint32_t raw_size; + const size_t r{m_reader.read(reinterpret_cast<char *>(&raw_size), + sizeof(raw_size))}; + if (r != sizeof(raw_size)) { + m_eof = true; + } + return be32toh(raw_size); + }; + + size_t readData(size_t size, char **return_data) + { + return m_reader.tryRead(size, return_data); + }; + + private: + BufferedFileReader m_reader; + bool m_eof; +}; + +} // namespace FormatV2 diff --git a/src/options.cpp b/src/options.cpp index 5b6ed15..c44fd06 100644 --- a/src/options.cpp +++ b/src/options.cpp @@ -35,17 +35,7 @@ */ #include "program_info.h" -Options::Options() - : sector_size{Options::DEFAULT_SECTOR_SIZE}, - buffer_size{Options::DEFAULT_BUFFER_SIZE} -{ -} - -uint32_t -Options::getSectorSize() const -{ - return sector_size; -} +Options::Options() : buffer_size{Options::DEFAULT_BUFFER_SIZE} {} uint32_t Options::getBufferSize() const @@ -86,11 +76,11 @@ OptionsRestore::getOutFilePath() const void OptionParser::printUsage() { - std::cout << "Usage: " << PROGRAM_NAME_STR << " backup [-S SECTOR_SIZE]"; + std::cout << "Usage: " << PROGRAM_NAME_STR << " backup"; std::cout << " [-B BUFFER_SIZE] -i INFILE -b BASEFILE -o OUTFILE" << std::endl; - std::cout << " Or: " << PROGRAM_NAME_STR << " restore [-S SECTOR_SIZE]"; + std::cout << " Or: " << PROGRAM_NAME_STR << " restore"; std::cout << "[-B BUFFER_SIZE] -d DIFFFILE -o OUTFILE" << std::endl; std::cout << " Or: " << PROGRAM_NAME_STR << " help" << std::endl; @@ -125,22 +115,17 @@ OptionParser::parseBackup(int argc, char **argv) argv += 1; int ch; - const char *arg_sector_size = NULL; const char *arg_buffer_size = NULL; const char *arg_input_file = NULL; const char *arg_base_file = NULL; const char *arg_output_file = NULL; - while ((ch = getopt(argc, argv, ":B:S:i:b:o:")) != -1) { + while ((ch = getopt(argc, argv, ":B:i:b:o:")) != -1) { switch (ch) { case 'B': arg_buffer_size = optarg; break; - case 'S': - arg_sector_size = optarg; - break; - case 'i': arg_input_file = optarg; break; @@ -165,20 +150,11 @@ OptionParser::parseBackup(int argc, char **argv) argc -= optind; /* Convert numbers in the arguments */ - if ((arg_sector_size != NULL) && - parse_unsigned(arg_sector_size, &(opts.sector_size))) { - throw OptionError("incorrect sector size"); - } else if ((arg_buffer_size != NULL) && - parse_unsigned(arg_buffer_size, &(opts.buffer_size))) { + if ((arg_buffer_size != NULL) && + parse_unsigned(arg_buffer_size, &(opts.buffer_size))) { throw OptionError("incorrect buffer size"); - } else if (opts.sector_size == 0) { - throw OptionError("sector size cannot be 0"); } else if (opts.buffer_size == 0) { throw OptionError("buffer size cannot be 0"); - } else if (opts.sector_size > opts.buffer_size) { - throw OptionError("sector size cannot larger than buffer size"); - } else if ((opts.buffer_size % opts.sector_size) != 0) { - throw OptionError("buffer size is not multiple of sector size"); } if (arg_input_file == NULL) { @@ -207,21 +183,16 @@ OptionParser::parseRestore(int argc, char **argv) argv += 1; int ch; - const char *arg_sector_size = NULL; const char *arg_buffer_size = NULL; const char *arg_diff_file = NULL; const char *arg_output_file = NULL; - while ((ch = getopt(argc, argv, ":B:S:d:o:")) != -1) { + while ((ch = getopt(argc, argv, ":B:d:o:")) != -1) { switch (ch) { case 'B': arg_buffer_size = optarg; break; - case 'S': - arg_sector_size = optarg; - break; - case 'd': arg_diff_file = optarg; break; @@ -242,20 +213,11 @@ OptionParser::parseRestore(int argc, char **argv) argc -= optind; /* Convert numbers in the arguments */ - if ((arg_sector_size != NULL) && - parse_unsigned(arg_sector_size, &(opts.sector_size))) { - throw OptionError("incorrect sector size"); - } else if ((arg_buffer_size != NULL) && - parse_unsigned(arg_buffer_size, &(opts.buffer_size))) { + if ((arg_buffer_size != NULL) && + parse_unsigned(arg_buffer_size, &(opts.buffer_size))) { throw OptionError("incorrect buffer size"); - } else if (opts.sector_size == 0) { - throw OptionError("sector size cannot be 0"); } else if (opts.buffer_size == 0) { throw OptionError("buffer size cannot be 0"); - } else if (opts.sector_size > opts.buffer_size) { - throw OptionError("sector size cannot larger than buffer size"); - } else if ((opts.buffer_size % opts.sector_size) != 0) { - throw OptionError("buffer size is not multiple of sector size"); } if (arg_diff_file == NULL) { diff --git a/src/options.h b/src/options.h index 08559e2..80cb8f8 100644 --- a/src/options.h +++ b/src/options.h @@ -42,17 +42,14 @@ class Options friend class OptionParser; public: - static const int DEFAULT_SECTOR_SIZE{512}; static const int DEFAULT_BUFFER_SIZE{4 * 1024 * 1024}; Options(); virtual ~Options() = default; - uint32_t getSectorSize() const; uint32_t getBufferSize() const; private: - uint32_t sector_size; uint32_t buffer_size; }; diff --git a/src/restore.cpp b/src/restore.cpp index 8a5b29f..a747d16 100644 --- a/src/restore.cpp +++ b/src/restore.cpp @@ -26,93 +26,23 @@ #include "restore.h" #include "buffered_file.h" +#include "format_v2.h" #include <filesystem> #include <fstream> - -static void -check_diff_file(const OptionsRestore &opts) -{ - size_t diff_size{0}; - try { - diff_size = std::filesystem::file_size(opts.getDiffFilePath()); - } catch (const std::exception &e) { - throw RestoreError("cannot get size of diff file: " + - std::string(e.what())); - } - - if (diff_size == 0) { - throw RestoreError("diff file is empty"); - } else if ((diff_size % (sizeof(uint64_t) + opts.getSectorSize())) != 0) { - /* The diff file must hold equally sized sectors and the - * offset of each of them - */ - throw RestoreError( - "diff file has size that cannot contain valid diff data"); - } - - size_t out_size{0}; - try { - out_size = std::filesystem::file_size(opts.getOutFilePath()); - } catch (const std::exception &e) { - throw RestoreError("cannot get size of output file: " + - std::string(e.what())); - } - - std::ifstream diff_file; - diff_file.open(opts.getDiffFilePath(), std::ios::in | std::ios::binary); - if (!diff_file) { - throw RestoreError("cannot open diff file"); - } - - uint64_t prev_out_offset = 0; - bool is_first_reading = true; - - /* Scan the diff file and check */ - for (;;) { - uint64_t out_offset; - /* Read the next offset */ - diff_file.read(reinterpret_cast<char *>(&out_offset), - sizeof(out_offset)); - - if (diff_file.eof() && diff_file.fail() && !diff_file.bad()) { - break; - } else if (!diff_file.good() && !diff_file.eof()) { - throw RestoreError("cannot read from file"); - } - out_offset = le64toh(out_offset); - - if (!is_first_reading && (out_offset <= prev_out_offset)) { - throw RestoreError( - "a sector offset points behind the previous offset"); - } else if ((out_offset + opts.getSectorSize()) > out_size) { - throw RestoreError( - "a sector offset points past the end of the output file"); - } else if (!diff_file.seekg(opts.getSectorSize(), std::ios_base::cur)) { - throw RestoreError("cannot seek in diff file"); - } - - is_first_reading = false; - prev_out_offset = out_offset; - } - - /* The diff file must be read completely */ - char c; - diff_file.read(&c, 1); - if (diff_file.gcount() != 0) { - throw RestoreError("diff file is not valid"); - } - diff_file.clear(); - - diff_file.close(); -} +#include <vector> void restore(const OptionsRestore &opts) { - check_diff_file(opts); + std::fstream diff_stream; + diff_stream.open(opts.getDiffFilePath(), + std::ifstream::in | std::ifstream::binary); + if (!diff_stream) { + throw RestoreError("cannot open diff file"); + } - BufferedFileReader diff_file(opts.getDiffFilePath(), opts.getBufferSize()); + FormatV2::Reader diff_reader(diff_stream, opts.getBufferSize()); std::fstream out_file; out_file.open(opts.getOutFilePath(), @@ -121,45 +51,34 @@ restore(const OptionsRestore &opts) throw RestoreError("cannot open output file"); } - const size_t diff_buffer_size = sizeof(uint64_t) + opts.getSectorSize(); - std::unique_ptr<char[]> diff_buffer; - try { - diff_buffer = std::make_unique<char[]>(diff_buffer_size); - } catch (const std::bad_alloc &e) { - throw RestoreError("cannot allocate sector buffer for diff file data"); - } - - /* Restore data from the differential image */ - size_t diff_read_size = {0}; for (;;) { - diff_read_size = diff_file.read(diff_buffer.get(), diff_buffer_size); - - if (diff_read_size == 0) { + const uint64_t offset{diff_reader.readOffset()}; + if (diff_reader.eof()) { break; - } else if (diff_read_size != diff_buffer_size) { - throw RestoreError("cannot read from diff file"); } - const uint64_t out_offset = - le64toh(*reinterpret_cast<uint64_t *>(diff_buffer.get())); - - if (!out_file.seekp(out_offset, std::ios_base::beg)) { + if (!out_file.seekp(offset, std::ios_base::beg)) { throw RestoreError("cannot seek in output file"); } - if (!out_file.write(reinterpret_cast<char *>(diff_buffer.get()) + - sizeof(uint64_t), - opts.getSectorSize())) { - throw RestoreError("cannot write to output file"); - } - } + uint64_t size{diff_reader.readSize()}; - out_file.close(); + while (size > 0) { + char *data; + const size_t r{diff_reader.readData(size, &data)}; + if (r == 0) { + break; + } - /* The diff file must be read completely */ - char c; - diff_read_size = diff_file.read(&c, 1); - if (diff_read_size != 0) { - throw RestoreError("diff file is not valid"); + if (!out_file.write(data, r)) { + throw RestoreError("cannot write to output file"); + } + + size -= r; + } + + if (size > 0) { + throw RestoreError("cannot read all the data of the record"); + } } } diff --git a/tests/005-missing_argument_for_option.sh b/tests/005-missing_argument_for_option.sh index b519513..26569bc 100644 --- a/tests/005-missing_argument_for_option.sh +++ b/tests/005-missing_argument_for_option.sh @@ -5,9 +5,7 @@ source ./assert.sh PROGRAM_EXEC="$1" assert "Usage" "missing argument for option '-B'" 1 $PROGRAM_EXEC backup -B -assert "Usage" "missing argument for option '-S'" 1 $PROGRAM_EXEC backup -S assert "Usage" "missing argument for option '-B'" 1 $PROGRAM_EXEC restore -B -assert "Usage" "missing argument for option '-S'" 1 $PROGRAM_EXEC restore -S exit 0 diff --git a/tests/006-incorrect_buffer_size.sh b/tests/006-incorrect_buffer_size.sh index 1dfbc24..9c9b4ea 100644 --- a/tests/006-incorrect_buffer_size.sh +++ b/tests/006-incorrect_buffer_size.sh @@ -4,12 +4,10 @@ source ./assert.sh PROGRAM_EXEC="$1" -assert "Usage" "incorrect sector size" 1 $PROGRAM_EXEC backup -S abc123 -i in -b base -o out -assert "Usage" "sector size cannot be 0" 1 $PROGRAM_EXEC backup -S 0 -i in -b base -o out -assert "Usage" "sector size cannot larger than buffer size" 1 $PROGRAM_EXEC backup -S 2 -B 1 -i in -b base -o out +assert "Usage" "incorrect buffer size" 1 $PROGRAM_EXEC backup -B abc123 -i in -b base -o out +assert "Usage" "buffer size cannot be 0" 1 $PROGRAM_EXEC backup -B 0 -i in -b base -o out -assert "Usage" "incorrect sector size" 1 $PROGRAM_EXEC restore -S abc123 -d diff -o out -assert "Usage" "sector size cannot be 0" 1 $PROGRAM_EXEC restore -S 0 -d diff -o out -assert "Usage" "sector size cannot larger than buffer size" 1 $PROGRAM_EXEC restore -S 2 -B 1 -d diff -o out +assert "Usage" "incorrect buffer size" 1 $PROGRAM_EXEC restore -B abc123 -d diff -o out +assert "Usage" "buffer size cannot be 0" 1 $PROGRAM_EXEC restore -B 0 -d diff -o out exit 0 diff --git a/tests/007-incorrect_sector_size.sh b/tests/007-incorrect_sector_size.sh deleted file mode 100644 index 5d8a689..0000000 --- a/tests/007-incorrect_sector_size.sh +++ /dev/null @@ -1,15 +0,0 @@ -#!/bin/bash - -source ./assert.sh - -PROGRAM_EXEC="$1" - -assert "Usage" "incorrect buffer size" 1 $PROGRAM_EXEC backup -B abc123 -i in -b base -o out -assert "Usage" "buffer size cannot be 0" 1 $PROGRAM_EXEC backup -B 0 -i in -b base -o out -assert "Usage" "buffer size is not multiple of sector size" 1 $PROGRAM_EXEC backup -B 3 -S 2 -i in -b base -o out - -assert "Usage" "incorrect buffer size" 1 $PROGRAM_EXEC restore -B abc123 -d diff -o out -assert "Usage" "buffer size cannot be 0" 1 $PROGRAM_EXEC restore -B 0 -d diff -o out -assert "Usage" "buffer size is not multiple of sector size" 1 $PROGRAM_EXEC restore -B 3 -S 2 -d diff -o out - -exit 0 diff --git a/tests/100-cannot_open_files.sh b/tests/100-cannot_open_files.sh index 34cf3e0..e0a59d3 100644 --- a/tests/100-cannot_open_files.sh +++ b/tests/100-cannot_open_files.sh @@ -6,11 +6,11 @@ PROGRAM_EXEC="$1" rm -f input base out touch base out -assert "" "cannot get size of input file" 1 $PROGRAM_EXEC backup -i input -b base -o out +assert "" "cannot open input file" 1 $PROGRAM_EXEC backup -i input -b base -o out rm -f input base out touch input out -assert "" "cannot get size of base file" 1 $PROGRAM_EXEC backup -i input -b base -o out +assert "" "cannot open base file" 1 $PROGRAM_EXEC backup -i input -b base -o out rm -f input base out rmdir outdir 2>/dev/null diff --git a/tests/200-input_and_reference_size_differs.sh b/tests/200-input_and_reference_size_differs.sh deleted file mode 100644 index dceb63b..0000000 --- a/tests/200-input_and_reference_size_differs.sh +++ /dev/null @@ -1,15 +0,0 @@ -#!/bin/bash - -source ./assert.sh - -PROGRAM_EXEC="$1" - -rm -f input base -dd if=/dev/zero of=input bs=500 count=1 1>/dev/null 2>&1 -dd if=/dev/zero of=base bs=501 count=1 1>/dev/null 2>&1 - -assert "" "input file and base file differ in size" 1 $PROGRAM_EXEC backup -i input -b base -o out - -rm -f input base out - -exit 0 diff --git a/tests/201-input_or_reference_size_is_not_multiple_of_sector_size.sh b/tests/201-input_or_reference_size_is_not_multiple_of_sector_size.sh deleted file mode 100644 index d7c6aa6..0000000 --- a/tests/201-input_or_reference_size_is_not_multiple_of_sector_size.sh +++ /dev/null @@ -1,16 +0,0 @@ -#!/bin/bash - -source ./assert.sh - -PROGRAM_EXEC="$1" - -rm -f input base -dd if=/dev/zero of=input bs=513 count=1 1>/dev/null 2>&1 -dd if=/dev/zero of=base bs=513 count=1 1>/dev/null 2>&1 - -assert "" "size of input file and base file is not multiple of [0-9]" \ - 1 $PROGRAM_EXEC backup -S 512 -i input -b base -o out - -rm -f input base out - -exit 0 diff --git a/tests/300-incorrect_reference_file.sh b/tests/300-incorrect_reference_file.sh deleted file mode 100644 index c250541..0000000 --- a/tests/300-incorrect_reference_file.sh +++ /dev/null @@ -1,37 +0,0 @@ -#!/bin/bash - -source ./assert.sh - -PROGRAM_EXEC="$1" - -rm -f diff out -touch diff out -assert "" "diff file is empty" 1 $PROGRAM_EXEC restore -d diff -o out - -dd if=/dev/zero of=diff bs=513 count=1 1>/dev/null 2>&1 -assert "" "diff file has size that cannot contain valid diff data" \ - 1 $PROGRAM_EXEC restore -S 512 -d diff -o out - -rm -f diff out -dd if=/dev/zero of=out bs=512 count=2 1>/dev/null 2>&1 -# Create a two-sector backup file -dd if=/dev/zero of=diff bs=$(( 512 + 8 )) count=2 1>/dev/null 2>&1 -# The first offset will be 2 -printf '\x02' | dd of=diff bs=1 count=1 seek=0 conv=notrunc 1>/dev/null 2>&1 -# The second offset will be 1 -printf '\x01' | dd of=diff bs=1 count=1 seek=520 conv=notrunc 1>/dev/null 2>&1 -assert "" "a sector offset points behind the previous offset" \ - 1 $PROGRAM_EXEC restore -S 512 -d diff -o out - -rm -f diff out -dd if=/dev/zero of=out bs=512 count=1 1>/dev/null 2>&1 -# Create a one-sector backup file -dd if=/dev/zero of=diff bs=$(( 512 + 8 )) count=2 1>/dev/null 2>&1 -# The first offset will be 1 -printf '\x01' | dd of=diff bs=1 count=1 seek=0 conv=notrunc 1>/dev/null 2>&1 -assert "" "a sector offset points past the end of the output file" \ - 1 $PROGRAM_EXEC restore -S 512 -d diff -o out - -rm -f diff out - -exit 0 diff --git a/tests/400-expected_backup_output.bin b/tests/400-expected_backup_output.bin Binary files differindex 10a3993..3df3237 100644 --- a/tests/400-expected_backup_output.bin +++ b/tests/400-expected_backup_output.bin diff --git a/tests/400-successful_backup_restore.sh b/tests/400-successful_backup_restore.sh index fb44dda..f1a07c5 100644 --- a/tests/400-successful_backup_restore.sh +++ b/tests/400-successful_backup_restore.sh @@ -29,7 +29,7 @@ printf '\xFF' | dd of=input bs=1 count=1 seek=$(( (512 * 3) - 1 )) conv=notrunc # The fourth sector will have the middle byte changed printf '\xFF' | dd of=input bs=1 count=1 seek=$(( (512 * 4) - (512 / 2) )) conv=notrunc 1>/dev/null 2>&1 -assert "" "" 0 $PROGRAM_EXEC backup -S 512 -i input -b base -o out +assert "" "" 0 $PROGRAM_EXEC backup -i input -b base -o out if ! files_are_the_same out 400-expected_backup_output.bin; then echo "assert: Backup output file differs from the expected one" @@ -44,7 +44,7 @@ if ! files_are_the_same base input; then exit 1 fi -assert "" "" 0 $PROGRAM_EXEC restore -S 512 -d out -o input +assert "" "" 0 $PROGRAM_EXEC restore -d out -o input if ! files_are_the_same input backedup_input; then echo "assert: Cannot restore the backup" |
