From 722a52b94e4d8ae73bc4ee92472dba8e7174a1ab Mon Sep 17 00:00:00 2001 From: Jan Sucan Date: Wed, 22 Jan 2025 17:04:24 +0100 Subject: Rename backup operation to create --- README.md | 10 +- src/backup.cpp | 391 ------------------------------- src/backup.h | 38 --- src/create.cpp | 391 +++++++++++++++++++++++++++++++ src/create.h | 38 +++ src/main.cpp | 6 +- src/options.cpp | 18 +- src/options.h | 8 +- tests/002-missing_arguments.sh | 2 +- tests/003-too_many_arguments.sh | 2 +- tests/004-unknown_option.sh | 2 +- tests/005-missing_argument_for_option.sh | 2 +- tests/006-incorrect_buffer_size.sh | 4 +- tests/100-cannot_open_files.sh | 6 +- tests/400-successful_backup_restore.sh | 2 +- 15 files changed, 460 insertions(+), 460 deletions(-) delete mode 100644 src/backup.cpp delete mode 100644 src/backup.h create mode 100644 src/create.cpp create mode 100644 src/create.h diff --git a/README.md b/README.md index 480b2df..57dfed3 100644 --- a/README.md +++ b/README.md @@ -18,16 +18,16 @@ utility. > diff-dd version -> diff-dd backup [-B BUFFER_SIZE] -i INFILE -b BASEFILE -o OUTFILE +> diff-dd create [-B BUFFER_SIZE] -i INFILE -b BASEFILE -o OUTFILE > diff-dd restore [-B BUFFER_SIZE] -d DIFFFILE -o OUTFILE -## Backup +## Create Using ```diff-dd ``` for backup requires the full backup image to exist. Differential backup is created with: -> diff-dd backup -i INFILE -b BASEFILE -o OUTFILE +> diff-dd create -i INFILE -b BASEFILE -o OUTFILE The ```INFILE``` is a path to the file to backup differentially, the ```BASEFILE``` is the full image, and the ```OUTFILE``` is the file to @@ -45,7 +45,7 @@ The restoration means application of the changed data saved in the ```-B``` sets the size of the buffer for the data of the input and output files (default is 4 MiB). The input data is always buffered. The -output data is buffered only in backup mode. +output data is not buffered in the restore mode. ## Example @@ -55,7 +55,7 @@ First, the full image of the partition to backup has to be created: When the user decides to create the differential image, he or she runs: -> diff-dd backup -i /dev/sda1 -b full.img -o diff.img +> diff-dd create -i /dev/sda1 -b full.img -o diff.img If a data accident happens, the partition can be restored by running: diff --git a/src/backup.cpp b/src/backup.cpp deleted file mode 100644 index 98898b9..0000000 --- a/src/backup.cpp +++ /dev/null @@ -1,391 +0,0 @@ -/* Copyright 2024 Ján Sučan - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include "backup.h" -#include "buffered_stream.h" -#include "format_v2.h" - -#include -#include -#include -#include -#include - -class Page -{ - friend bool operator==(const Page &lhs, const Page &rhs); - - public: - Page() : m_start(0), m_end(0){}; - - Page(std::shared_ptr data, uint64_t start, uint64_t end) - : m_data(data), m_start(start), m_end(end) - { - assert(m_start <= m_end); - }; - - std::shared_ptr getData() const { return m_data; }; - uint64_t getStart() const { return m_start; }; - uint64_t getEnd() const { return m_end; }; - size_t getSize() const { return m_end - m_start; }; - bool isEmpty() const { return getSize() == 0; }; - - private: - std::shared_ptr m_data; - uint64_t m_start; - uint64_t m_end; -}; - -bool -operator==(const Page &lhs, const Page &rhs) -{ - return (lhs.m_data == rhs.m_data) && (lhs.m_start == rhs.m_start) && - (lhs.m_end == rhs.m_end); -} - -class PagedStreamReader -{ - public: - PagedStreamReader(std::istream &istr, size_t page_size_bytes) - : m_page_size_bytes(page_size_bytes), - m_reader(istr, page_size_bytes, 2), m_stream_pos_bytes(0){}; - - Page getNextPage() - { - const BufferedStream::DataPart dp{ - m_reader.readMultipart(m_page_size_bytes)}; - - m_stream_pos_bytes += dp.size; - - return Page{dp.data, m_stream_pos_bytes - dp.size, m_stream_pos_bytes}; - } - - private: - const size_t m_page_size_bytes; - BufferedStream::Reader m_reader; - uint64_t m_stream_pos_bytes; -}; - -enum class MergeState { - Finished, - Incomplete, -}; - -class Diff -{ - friend MergeState diffsTryMerge(Diff &diff_a, Diff &diff_b, - size_t max_merge_gap, size_t max_size); - - public: - explicit Diff(uint64_t start_end) - : m_pages{}, m_start{start_end}, m_end{start_end} - { - assert(m_start <= m_end); - }; - Diff(Page page, uint64_t start, uint64_t end) - : m_pages{page}, m_start{start}, m_end{end} - { - assert(m_start <= m_end); - }; - uint64_t getStart() const { return m_start; }; - uint64_t getEnd() const { return m_end; }; - size_t getSize() const { return m_end - m_start; }; - bool isEmpty() const { return getSize() == 0; }; - - std::vector getData() const - { - std::vector data{}; - - if (!m_pages[0].isEmpty() && m_pages[1].isEmpty()) { - // Only the first page - assert((m_start >= m_pages[0].getStart()) && - (m_start <= m_pages[0].getEnd()) && - (m_end >= m_pages[0].getStart()) && - (m_end <= m_pages[0].getEnd())); - - const uint64_t offset{m_start - m_pages[0].getStart()}; - auto data_first{std::shared_ptr{ - m_pages[0].getData(), - static_cast(m_pages[0].getData().get()) + offset}}; - data.push_back(FormatV2::RecordData{getSize(), data_first}); - } else if (!m_pages[0].isEmpty() && !m_pages[1].isEmpty()) { - // Both pages - assert((m_start >= m_pages[0].getStart()) && - (m_start <= m_pages[0].getEnd()) && - (m_end >= m_pages[1].getStart()) && - (m_end <= m_pages[1].getEnd())); - - size_t size{m_pages[0].getEnd() - m_start}; - const uint64_t offset{m_start - m_pages[0].getStart()}; - auto data_first{std::shared_ptr{ - m_pages[0].getData(), - static_cast(m_pages[0].getData().get()) + offset}}; - data.push_back(FormatV2::RecordData{size, data_first}); - - size = m_end - m_pages[1].getStart(); - data.push_back(FormatV2::RecordData{size, m_pages[1].getData()}); - } - - return data; - }; - - private: - std::array m_pages; - uint64_t m_start; - uint64_t m_end; - - bool hasPage(size_t i) // cppcheck-suppress unusedPrivateFunction - { - return (i < m_pages.size()) && (m_pages[i].getData() != nullptr); - }; -}; - -MergeState -diffsTryMerge(Diff &diff_a, Diff &diff_b, size_t max_merge_gap, size_t max_size) -{ - if (diff_a.isEmpty()) { - // Do not merge to an empty diff - return MergeState::Finished; - } - - if (diff_b.isEmpty()) { - // Nothing to merge from an empty diff - return MergeState::Finished; - } - - assert(diff_a.getEnd() <= diff_b.getStart()); - const size_t gap{diff_b.getStart() - diff_a.getEnd()}; - if (gap > max_merge_gap) { - // B is too far away - return MergeState::Finished; - } - - if ((diff_a.getSize() + gap) >= max_size) { - // No space in A - return MergeState::Finished; - } - - // Can be merged - - // Adjust the diff start and end offsets - - // There is always at least 1 byte free in A here - const size_t free{max_size - (diff_a.getSize() + gap)}; - const size_t to_merge{std::min(free, diff_b.getSize())}; - // There is always at least 1 byte to merge from B here - - // Enlarge A - diff_a.m_end += gap + to_merge; - // Shrink B - diff_b.m_start += to_merge; - - // Add B's page to A if needed - - // Non-empty A must have only the first, or both pages - assert(diff_a.hasPage(0)); - // Non-empty B must have only the first page - assert(diff_b.hasPage(0) && !diff_b.hasPage(1)); - - // If A has both pages, B's page must only be the same as A's second - // page. No setting of pages in A is needed in this case - assert(!diff_a.hasPage(1) || (diff_b.m_pages[0] == diff_a.m_pages[1])); - if (!diff_a.hasPage(1)) { - // If A has only the first page, B's page must only be the same as the - // A's first page or following it - const bool b_follows{ - (diff_b.m_pages[0].getData() != diff_a.m_pages[0].getData()) && - (diff_b.m_pages[0].getStart() == diff_a.m_pages[0].getEnd())}; - assert((diff_b.m_pages[0] == diff_a.m_pages[0]) || b_follows); - if (b_follows) { - diff_a.m_pages[1] = diff_b.m_pages[0]; - } - } - - return (diff_a.getSize() >= max_size) ? MergeState::Finished - : MergeState::Incomplete; -} - -class DiffFinder -{ - public: - DiffFinder(std::istream &old_stream, std::istream &new_stream, - uint32_t buffer_size, size_t max_merge_gap) - : m_old_page_reader(old_stream, buffer_size), - m_new_page_reader(new_stream, buffer_size), - m_diff_max_size(buffer_size), m_max_merge_gap(max_merge_gap), - m_offset_in_stream(0), m_diff(0), - m_search_state(SearchState::ReadPages){}; - - Diff findNextDiff() - { - for (;;) { - if (m_search_state == SearchState::ReadPages) { - m_old_page = m_old_page_reader.getNextPage(); - m_new_page = m_new_page_reader.getNextPage(); - assert(m_old_page.getStart() == m_new_page.getStart()); - - if (m_old_page.getSize() != m_new_page.getSize()) { - throw BackupError( - "cannot read the same amount of data from both files"); - } - - const bool end_of_stream{m_old_page.isEmpty() && - m_new_page.isEmpty()}; - if (end_of_stream) { - const Diff return_diff{m_diff}; - m_diff = Diff{m_offset_in_stream}; - return return_diff; - } - - m_search_state = SearchState::FindDiff; - - } else if (m_search_state == SearchState::FindDiff) { - Diff diff{findDiffInPages(m_old_page, m_new_page, - m_offset_in_stream)}; - m_offset_in_stream = diff.getEnd(); - - if (diff.isEmpty()) { - // End of pages. On the next call, read new pages. - m_old_page = Page{}; - m_new_page = Page{}; - m_search_state = SearchState::ReadPages; - } - - const MergeState merge_state{diffsTryMerge( - m_diff, diff, m_max_merge_gap, m_diff_max_size)}; - - if (merge_state == MergeState::Finished) { - const Diff return_diff{m_diff}; - m_diff = diff; - if (!return_diff.isEmpty()) { - return return_diff; - } - } - - } else { - assert(false); - } - } - }; - - private: - enum class SearchState { ReadPages, FindDiff }; - - PagedStreamReader m_old_page_reader; - PagedStreamReader m_new_page_reader; - const size_t m_diff_max_size; - const size_t m_max_merge_gap; - Page m_old_page; - Page m_new_page; - uint64_t m_offset_in_stream; - Diff m_diff; - SearchState m_search_state; - - Diff findDiffInPages(Page old_page, Page new_page, - uint64_t offset_in_stream) - { - const char *old_data{old_page.getData().get()}; - const char *new_data{new_page.getData().get()}; - const uint64_t data_size_bytes{old_page.getSize()}; - - assert(offset_in_stream >= new_page.getStart()); - size_t offset_in_pages{offset_in_stream - new_page.getStart()}; - - // Find offset of the first different byte - for (; offset_in_pages < data_size_bytes; ++offset_in_pages) { - if (new_data[offset_in_pages] != old_data[offset_in_pages]) { - break; - } - } - const size_t start_in_pages{offset_in_pages}; - - if (offset_in_pages < data_size_bytes) { - // Different byte found. Start searching for a same byte immediately - // after. - ++offset_in_pages; - } - - // Find offset of the first same byte - for (; offset_in_pages < data_size_bytes; ++offset_in_pages) { - if (new_data[offset_in_pages] == old_data[offset_in_pages]) { - break; - } - } - const size_t end_in_pages{offset_in_pages}; - - // In the case when no different byte is found, the end offset will be - // the same as the start offset - - const uint64_t start_in_stream{new_page.getStart() + start_in_pages}; - const uint64_t end_in_stream{new_page.getStart() + end_in_pages}; - if (start_in_stream == end_in_stream) { - return Diff{start_in_stream}; - } else { - return Diff{new_page, start_in_stream, end_in_stream}; - } - } -}; - -void -backup(const OptionsBackup &opts) -{ - std::ifstream in_istream{opts.getInFilePath(), - std::ifstream::in | std::ifstream::binary}; - if (!in_istream) { - throw BufferedStream::Error("cannot open input file"); - } - - std::ifstream base_istream{opts.getBaseFilePath(), - std::ifstream::in | std::ifstream::binary}; - if (!base_istream) { - throw BufferedStream::Error("cannot open base file"); - } - - // When backing up, the output file is truncated to hold the new data - std::ofstream out_ostream{opts.getOutFilePath(), std::ofstream::out | - std::ofstream::trunc | - std::ofstream::binary}; - if (!out_ostream) { - throw BufferedStream::Error("cannot open output file"); - } - - DiffFinder diff_finder(base_istream, in_istream, opts.getBufferSize(), - FormatV2::RecordHeaderSize); - FormatV2::Writer diff_writer(out_ostream, opts.getBufferSize()); - - for (;;) { - const Diff diff{diff_finder.findNextDiff()}; - if (diff.isEmpty()) { - break; - } - - diff_writer.writeDiffRecord(diff.getStart(), diff.getSize(), - diff.getData()); - - // Here, the diff is destructed and page data reference counters - // decremented - } -} diff --git a/src/backup.h b/src/backup.h deleted file mode 100644 index ed7bf1a..0000000 --- a/src/backup.h +++ /dev/null @@ -1,38 +0,0 @@ -/* Copyright 2021 Ján Sučan - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#pragma once - -#include "exception.h" -#include "options.h" - -class BackupError : public DiffddError -{ - public: - explicit BackupError(const std::string &message) : DiffddError(message) {} -}; - -void backup(const OptionsBackup &opts); diff --git a/src/create.cpp b/src/create.cpp new file mode 100644 index 0000000..460bdd1 --- /dev/null +++ b/src/create.cpp @@ -0,0 +1,391 @@ +/* Copyright 2024 Ján Sučan + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "create.h" +#include "buffered_stream.h" +#include "format_v2.h" + +#include +#include +#include +#include +#include + +class Page +{ + friend bool operator==(const Page &lhs, const Page &rhs); + + public: + Page() : m_start(0), m_end(0){}; + + Page(std::shared_ptr data, uint64_t start, uint64_t end) + : m_data(data), m_start(start), m_end(end) + { + assert(m_start <= m_end); + }; + + std::shared_ptr getData() const { return m_data; }; + uint64_t getStart() const { return m_start; }; + uint64_t getEnd() const { return m_end; }; + size_t getSize() const { return m_end - m_start; }; + bool isEmpty() const { return getSize() == 0; }; + + private: + std::shared_ptr m_data; + uint64_t m_start; + uint64_t m_end; +}; + +bool +operator==(const Page &lhs, const Page &rhs) +{ + return (lhs.m_data == rhs.m_data) && (lhs.m_start == rhs.m_start) && + (lhs.m_end == rhs.m_end); +} + +class PagedStreamReader +{ + public: + PagedStreamReader(std::istream &istr, size_t page_size_bytes) + : m_page_size_bytes(page_size_bytes), + m_reader(istr, page_size_bytes, 2), m_stream_pos_bytes(0){}; + + Page getNextPage() + { + const BufferedStream::DataPart dp{ + m_reader.readMultipart(m_page_size_bytes)}; + + m_stream_pos_bytes += dp.size; + + return Page{dp.data, m_stream_pos_bytes - dp.size, m_stream_pos_bytes}; + } + + private: + const size_t m_page_size_bytes; + BufferedStream::Reader m_reader; + uint64_t m_stream_pos_bytes; +}; + +enum class MergeState { + Finished, + Incomplete, +}; + +class Diff +{ + friend MergeState diffsTryMerge(Diff &diff_a, Diff &diff_b, + size_t max_merge_gap, size_t max_size); + + public: + explicit Diff(uint64_t start_end) + : m_pages{}, m_start{start_end}, m_end{start_end} + { + assert(m_start <= m_end); + }; + Diff(Page page, uint64_t start, uint64_t end) + : m_pages{page}, m_start{start}, m_end{end} + { + assert(m_start <= m_end); + }; + uint64_t getStart() const { return m_start; }; + uint64_t getEnd() const { return m_end; }; + size_t getSize() const { return m_end - m_start; }; + bool isEmpty() const { return getSize() == 0; }; + + std::vector getData() const + { + std::vector data{}; + + if (!m_pages[0].isEmpty() && m_pages[1].isEmpty()) { + // Only the first page + assert((m_start >= m_pages[0].getStart()) && + (m_start <= m_pages[0].getEnd()) && + (m_end >= m_pages[0].getStart()) && + (m_end <= m_pages[0].getEnd())); + + const uint64_t offset{m_start - m_pages[0].getStart()}; + auto data_first{std::shared_ptr{ + m_pages[0].getData(), + static_cast(m_pages[0].getData().get()) + offset}}; + data.push_back(FormatV2::RecordData{getSize(), data_first}); + } else if (!m_pages[0].isEmpty() && !m_pages[1].isEmpty()) { + // Both pages + assert((m_start >= m_pages[0].getStart()) && + (m_start <= m_pages[0].getEnd()) && + (m_end >= m_pages[1].getStart()) && + (m_end <= m_pages[1].getEnd())); + + size_t size{m_pages[0].getEnd() - m_start}; + const uint64_t offset{m_start - m_pages[0].getStart()}; + auto data_first{std::shared_ptr{ + m_pages[0].getData(), + static_cast(m_pages[0].getData().get()) + offset}}; + data.push_back(FormatV2::RecordData{size, data_first}); + + size = m_end - m_pages[1].getStart(); + data.push_back(FormatV2::RecordData{size, m_pages[1].getData()}); + } + + return data; + }; + + private: + std::array m_pages; + uint64_t m_start; + uint64_t m_end; + + bool hasPage(size_t i) // cppcheck-suppress unusedPrivateFunction + { + return (i < m_pages.size()) && (m_pages[i].getData() != nullptr); + }; +}; + +MergeState +diffsTryMerge(Diff &diff_a, Diff &diff_b, size_t max_merge_gap, size_t max_size) +{ + if (diff_a.isEmpty()) { + // Do not merge to an empty diff + return MergeState::Finished; + } + + if (diff_b.isEmpty()) { + // Nothing to merge from an empty diff + return MergeState::Finished; + } + + assert(diff_a.getEnd() <= diff_b.getStart()); + const size_t gap{diff_b.getStart() - diff_a.getEnd()}; + if (gap > max_merge_gap) { + // B is too far away + return MergeState::Finished; + } + + if ((diff_a.getSize() + gap) >= max_size) { + // No space in A + return MergeState::Finished; + } + + // Can be merged + + // Adjust the diff start and end offsets + + // There is always at least 1 byte free in A here + const size_t free{max_size - (diff_a.getSize() + gap)}; + const size_t to_merge{std::min(free, diff_b.getSize())}; + // There is always at least 1 byte to merge from B here + + // Enlarge A + diff_a.m_end += gap + to_merge; + // Shrink B + diff_b.m_start += to_merge; + + // Add B's page to A if needed + + // Non-empty A must have only the first, or both pages + assert(diff_a.hasPage(0)); + // Non-empty B must have only the first page + assert(diff_b.hasPage(0) && !diff_b.hasPage(1)); + + // If A has both pages, B's page must only be the same as A's second + // page. No setting of pages in A is needed in this case + assert(!diff_a.hasPage(1) || (diff_b.m_pages[0] == diff_a.m_pages[1])); + if (!diff_a.hasPage(1)) { + // If A has only the first page, B's page must only be the same as the + // A's first page or following it + const bool b_follows{ + (diff_b.m_pages[0].getData() != diff_a.m_pages[0].getData()) && + (diff_b.m_pages[0].getStart() == diff_a.m_pages[0].getEnd())}; + assert((diff_b.m_pages[0] == diff_a.m_pages[0]) || b_follows); + if (b_follows) { + diff_a.m_pages[1] = diff_b.m_pages[0]; + } + } + + return (diff_a.getSize() >= max_size) ? MergeState::Finished + : MergeState::Incomplete; +} + +class DiffFinder +{ + public: + DiffFinder(std::istream &old_stream, std::istream &new_stream, + uint32_t buffer_size, size_t max_merge_gap) + : m_old_page_reader(old_stream, buffer_size), + m_new_page_reader(new_stream, buffer_size), + m_diff_max_size(buffer_size), m_max_merge_gap(max_merge_gap), + m_offset_in_stream(0), m_diff(0), + m_search_state(SearchState::ReadPages){}; + + Diff findNextDiff() + { + for (;;) { + if (m_search_state == SearchState::ReadPages) { + m_old_page = m_old_page_reader.getNextPage(); + m_new_page = m_new_page_reader.getNextPage(); + assert(m_old_page.getStart() == m_new_page.getStart()); + + if (m_old_page.getSize() != m_new_page.getSize()) { + throw CreateError( + "cannot read the same amount of data from both files"); + } + + const bool end_of_stream{m_old_page.isEmpty() && + m_new_page.isEmpty()}; + if (end_of_stream) { + const Diff return_diff{m_diff}; + m_diff = Diff{m_offset_in_stream}; + return return_diff; + } + + m_search_state = SearchState::FindDiff; + + } else if (m_search_state == SearchState::FindDiff) { + Diff diff{findDiffInPages(m_old_page, m_new_page, + m_offset_in_stream)}; + m_offset_in_stream = diff.getEnd(); + + if (diff.isEmpty()) { + // End of pages. On the next call, read new pages. + m_old_page = Page{}; + m_new_page = Page{}; + m_search_state = SearchState::ReadPages; + } + + const MergeState merge_state{diffsTryMerge( + m_diff, diff, m_max_merge_gap, m_diff_max_size)}; + + if (merge_state == MergeState::Finished) { + const Diff return_diff{m_diff}; + m_diff = diff; + if (!return_diff.isEmpty()) { + return return_diff; + } + } + + } else { + assert(false); + } + } + }; + + private: + enum class SearchState { ReadPages, FindDiff }; + + PagedStreamReader m_old_page_reader; + PagedStreamReader m_new_page_reader; + const size_t m_diff_max_size; + const size_t m_max_merge_gap; + Page m_old_page; + Page m_new_page; + uint64_t m_offset_in_stream; + Diff m_diff; + SearchState m_search_state; + + Diff findDiffInPages(Page old_page, Page new_page, + uint64_t offset_in_stream) + { + const char *old_data{old_page.getData().get()}; + const char *new_data{new_page.getData().get()}; + const uint64_t data_size_bytes{old_page.getSize()}; + + assert(offset_in_stream >= new_page.getStart()); + size_t offset_in_pages{offset_in_stream - new_page.getStart()}; + + // Find offset of the first different byte + for (; offset_in_pages < data_size_bytes; ++offset_in_pages) { + if (new_data[offset_in_pages] != old_data[offset_in_pages]) { + break; + } + } + const size_t start_in_pages{offset_in_pages}; + + if (offset_in_pages < data_size_bytes) { + // Different byte found. Start searching for a same byte immediately + // after. + ++offset_in_pages; + } + + // Find offset of the first same byte + for (; offset_in_pages < data_size_bytes; ++offset_in_pages) { + if (new_data[offset_in_pages] == old_data[offset_in_pages]) { + break; + } + } + const size_t end_in_pages{offset_in_pages}; + + // In the case when no different byte is found, the end offset will be + // the same as the start offset + + const uint64_t start_in_stream{new_page.getStart() + start_in_pages}; + const uint64_t end_in_stream{new_page.getStart() + end_in_pages}; + if (start_in_stream == end_in_stream) { + return Diff{start_in_stream}; + } else { + return Diff{new_page, start_in_stream, end_in_stream}; + } + } +}; + +void +create(const OptionsCreate &opts) +{ + std::ifstream in_istream{opts.getInFilePath(), + std::ifstream::in | std::ifstream::binary}; + if (!in_istream) { + throw BufferedStream::Error("cannot open input file"); + } + + std::ifstream base_istream{opts.getBaseFilePath(), + std::ifstream::in | std::ifstream::binary}; + if (!base_istream) { + throw BufferedStream::Error("cannot open base file"); + } + + // When backing up, the output file is truncated to hold the new data + std::ofstream out_ostream{opts.getOutFilePath(), std::ofstream::out | + std::ofstream::trunc | + std::ofstream::binary}; + if (!out_ostream) { + throw BufferedStream::Error("cannot open output file"); + } + + DiffFinder diff_finder(base_istream, in_istream, opts.getBufferSize(), + FormatV2::RecordHeaderSize); + FormatV2::Writer diff_writer(out_ostream, opts.getBufferSize()); + + for (;;) { + const Diff diff{diff_finder.findNextDiff()}; + if (diff.isEmpty()) { + break; + } + + diff_writer.writeDiffRecord(diff.getStart(), diff.getSize(), + diff.getData()); + + // Here, the diff is destructed and page data reference counters + // decremented + } +} diff --git a/src/create.h b/src/create.h new file mode 100644 index 0000000..bbfc374 --- /dev/null +++ b/src/create.h @@ -0,0 +1,38 @@ +/* Copyright 2021 Ján Sučan + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#pragma once + +#include "exception.h" +#include "options.h" + +class CreateError : public DiffddError +{ + public: + explicit CreateError(const std::string &message) : DiffddError(message) {} +}; + +void create(const OptionsCreate &opts); diff --git a/src/main.cpp b/src/main.cpp index 499140f..364762d 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -24,7 +24,7 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -#include "backup.h" +#include "create.h" #include "options.h" #include "restore.h" @@ -46,8 +46,8 @@ main(int argc, char **argv) OptionParser::printUsage(); } else if (OptionParser::isVersion(argc, argv)) { print_version(); - } else if (OptionParser::isBackup(argc, argv)) { - backup(OptionParser::parseBackup(argc, argv)); + } else if (OptionParser::isCreate(argc, argv)) { + create(OptionParser::parseCreate(argc, argv)); } else if (OptionParser::isRestore(argc, argv)) { restore(OptionParser::parseRestore(argc, argv)); } else { diff --git a/src/options.cpp b/src/options.cpp index a5912b6..9cf9d73 100644 --- a/src/options.cpp +++ b/src/options.cpp @@ -44,19 +44,19 @@ Options::getBufferSize() const } std::filesystem::path -OptionsBackup::getInFilePath() const +OptionsCreate::getInFilePath() const { return in_file_path; } std::filesystem::path -OptionsBackup::getBaseFilePath() const +OptionsCreate::getBaseFilePath() const { return base_file_path; } std::filesystem::path -OptionsBackup::getOutFilePath() const +OptionsCreate::getOutFilePath() const { return out_file_path; } @@ -76,7 +76,7 @@ OptionsRestore::getOutFilePath() const void OptionParser::printUsage() { - std::cout << "Usage: " << PROGRAM_NAME_STR << " backup"; + std::cout << "Usage: " << PROGRAM_NAME_STR << " create"; std::cout << " [-B BUFFER_SIZE] -i INFILE -b BASEFILE -o OUTFILE" << std::endl; @@ -101,9 +101,9 @@ OptionParser::isVersion(int argc, char **argv) } bool -OptionParser::isBackup(int argc, char **argv) +OptionParser::isCreate(int argc, char **argv) { - return isOperation(argc, argv, "backup"); + return isOperation(argc, argv, "create"); } bool @@ -112,10 +112,10 @@ OptionParser::isRestore(int argc, char **argv) return isOperation(argc, argv, "restore"); } -OptionsBackup -OptionParser::parseBackup(int argc, char **argv) +OptionsCreate +OptionParser::parseCreate(int argc, char **argv) { - OptionsBackup opts; + OptionsCreate opts; // Skip the executable name. Do not skip the operation name. getopt expects // to start at an argument immediately preceding the possible options. diff --git a/src/options.h b/src/options.h index ba7e7ac..593bcad 100644 --- a/src/options.h +++ b/src/options.h @@ -53,12 +53,12 @@ class Options uint32_t buffer_size; }; -class OptionsBackup : public Options +class OptionsCreate : public Options { friend class OptionParser; public: - virtual ~OptionsBackup() override = default; + virtual ~OptionsCreate() override = default; std::filesystem::path getInFilePath() const; std::filesystem::path getBaseFilePath() const; @@ -91,10 +91,10 @@ class OptionParser static void printUsage(); static bool isHelp(int argc, char **argv); static bool isVersion(int argc, char **argv); - static bool isBackup(int argc, char **argv); + static bool isCreate(int argc, char **argv); static bool isRestore(int argc, char **argv); ; - static OptionsBackup parseBackup(int argc, char **argv); + static OptionsCreate parseCreate(int argc, char **argv); static OptionsRestore parseRestore(int argc, char **argv); private: diff --git a/tests/002-missing_arguments.sh b/tests/002-missing_arguments.sh index c3aaa1b..2ebbd55 100644 --- a/tests/002-missing_arguments.sh +++ b/tests/002-missing_arguments.sh @@ -4,7 +4,7 @@ source ./assert.sh PROGRAM_EXEC="$1" -assert "Usage" "missing input file" 1 $PROGRAM_EXEC backup +assert "Usage" "missing input file" 1 $PROGRAM_EXEC create assert "Usage" "missing diff file" 1 $PROGRAM_EXEC restore exit 0 diff --git a/tests/003-too_many_arguments.sh b/tests/003-too_many_arguments.sh index 2891afd..dc53a69 100644 --- a/tests/003-too_many_arguments.sh +++ b/tests/003-too_many_arguments.sh @@ -4,7 +4,7 @@ source ./assert.sh PROGRAM_EXEC="$1" -assert "Usage" "too many arguments" 1 $PROGRAM_EXEC backup -i arg1 -b arg2 -o arg3 arg4 +assert "Usage" "too many arguments" 1 $PROGRAM_EXEC create -i arg1 -b arg2 -o arg3 arg4 assert "Usage" "too many arguments" 1 $PROGRAM_EXEC restore -d arg1 -o arg2 arg3 exit 0 diff --git a/tests/004-unknown_option.sh b/tests/004-unknown_option.sh index 452d904..9ed0d66 100644 --- a/tests/004-unknown_option.sh +++ b/tests/004-unknown_option.sh @@ -4,7 +4,7 @@ source ./assert.sh PROGRAM_EXEC="$1" -assert "Usage" "unknown option '-x'" 1 $PROGRAM_EXEC backup -x -i in -b base -o out +assert "Usage" "unknown option '-x'" 1 $PROGRAM_EXEC create -x -i in -b base -o out assert "Usage" "unknown option '-x'" 1 $PROGRAM_EXEC restore -x -d diff -o out exit 0 diff --git a/tests/005-missing_argument_for_option.sh b/tests/005-missing_argument_for_option.sh index 26569bc..6fdf18e 100644 --- a/tests/005-missing_argument_for_option.sh +++ b/tests/005-missing_argument_for_option.sh @@ -4,7 +4,7 @@ source ./assert.sh PROGRAM_EXEC="$1" -assert "Usage" "missing argument for option '-B'" 1 $PROGRAM_EXEC backup -B +assert "Usage" "missing argument for option '-B'" 1 $PROGRAM_EXEC create -B assert "Usage" "missing argument for option '-B'" 1 $PROGRAM_EXEC restore -B diff --git a/tests/006-incorrect_buffer_size.sh b/tests/006-incorrect_buffer_size.sh index 9c9b4ea..f104ebb 100644 --- a/tests/006-incorrect_buffer_size.sh +++ b/tests/006-incorrect_buffer_size.sh @@ -4,8 +4,8 @@ source ./assert.sh PROGRAM_EXEC="$1" -assert "Usage" "incorrect buffer size" 1 $PROGRAM_EXEC backup -B abc123 -i in -b base -o out -assert "Usage" "buffer size cannot be 0" 1 $PROGRAM_EXEC backup -B 0 -i in -b base -o out +assert "Usage" "incorrect buffer size" 1 $PROGRAM_EXEC create -B abc123 -i in -b base -o out +assert "Usage" "buffer size cannot be 0" 1 $PROGRAM_EXEC create -B 0 -i in -b base -o out assert "Usage" "incorrect buffer size" 1 $PROGRAM_EXEC restore -B abc123 -d diff -o out assert "Usage" "buffer size cannot be 0" 1 $PROGRAM_EXEC restore -B 0 -d diff -o out diff --git a/tests/100-cannot_open_files.sh b/tests/100-cannot_open_files.sh index e0a59d3..3ef74a6 100644 --- a/tests/100-cannot_open_files.sh +++ b/tests/100-cannot_open_files.sh @@ -6,18 +6,18 @@ PROGRAM_EXEC="$1" rm -f input base out touch base out -assert "" "cannot open input file" 1 $PROGRAM_EXEC backup -i input -b base -o out +assert "" "cannot open input file" 1 $PROGRAM_EXEC create -i input -b base -o out rm -f input base out touch input out -assert "" "cannot open base file" 1 $PROGRAM_EXEC backup -i input -b base -o out +assert "" "cannot open base file" 1 $PROGRAM_EXEC create -i input -b base -o out rm -f input base out rmdir outdir 2>/dev/null touch input base mkdir outdir chmod -w outdir -assert "" "cannot open output file" 1 $PROGRAM_EXEC backup -i input -b base -o outdir/out +assert "" "cannot open output file" 1 $PROGRAM_EXEC create -i input -b base -o outdir/out rm -f input base out rmdir outdir diff --git a/tests/400-successful_backup_restore.sh b/tests/400-successful_backup_restore.sh index f1a07c5..a10853b 100644 --- a/tests/400-successful_backup_restore.sh +++ b/tests/400-successful_backup_restore.sh @@ -29,7 +29,7 @@ printf '\xFF' | dd of=input bs=1 count=1 seek=$(( (512 * 3) - 1 )) conv=notrunc # The fourth sector will have the middle byte changed printf '\xFF' | dd of=input bs=1 count=1 seek=$(( (512 * 4) - (512 / 2) )) conv=notrunc 1>/dev/null 2>&1 -assert "" "" 0 $PROGRAM_EXEC backup -i input -b base -o out +assert "" "" 0 $PROGRAM_EXEC create -i input -b base -o out if ! files_are_the_same out 400-expected_backup_output.bin; then echo "assert: Backup output file differs from the expected one" -- cgit v1.2.3