aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/backup.cpp433
-rw-r--r--src/buffered_file.cpp91
-rw-r--r--src/buffered_file.h11
-rw-r--r--src/format_v2.h119
-rw-r--r--src/options.cpp56
-rw-r--r--src/options.h3
-rw-r--r--src/restore.cpp139
7 files changed, 583 insertions, 269 deletions
diff --git a/src/backup.cpp b/src/backup.cpp
index bf8bfaf..4a76be5 100644
--- a/src/backup.cpp
+++ b/src/backup.cpp
@@ -1,4 +1,4 @@
-/* Copyright 2021 Ján Sučan <jan@jansucan.com>
+/* Copyright 2024 Ján Sučan <jan@jansucan.com>
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
@@ -26,91 +26,398 @@
#include "backup.h"
#include "buffered_file.h"
+#include "format_v2.h"
-#include <cstring>
-#include <fstream>
+#include <algorithm>
+#include <array>
+#include <cassert>
+#include <iostream>
+#include <vector>
-static void check_files(const OptionsBackup &opts);
+class Page
+{
+ friend bool operator==(const Page &lhs, const Page &rhs);
+
+ public:
+ Page() : m_start(0), m_end(0){};
+
+ Page(std::shared_ptr<char[]> data, uint64_t start, uint64_t end)
+ : m_data(data), m_start(start), m_end(end)
+ {
+ assert(m_start <= m_end);
+ };
+
+ std::shared_ptr<char[]> getData() const { return m_data; };
+ uint64_t getStart() const { return m_start; };
+ uint64_t getEnd() const { return m_end; };
+ size_t getSize() const { return m_end - m_start; };
+ bool isEmpty() const { return getSize() == 0; };
+
+ private:
+ std::shared_ptr<char[]> m_data;
+ uint64_t m_start;
+ uint64_t m_end;
+};
+
+bool
+operator==(const Page &lhs, const Page &rhs)
+{
+ return (lhs.m_data == rhs.m_data) && (lhs.m_start == rhs.m_start) &&
+ (lhs.m_end == rhs.m_end);
+}
+
+class PagedStreamReader
+{
+ public:
+ PagedStreamReader(std::istream &istr, size_t page_size_bytes)
+ : m_page_size_bytes(page_size_bytes), m_istream(istr),
+ m_stream_pos_bytes(0), m_buffer_index(0)
+ {
+ try {
+ m_buffers[0] = std::shared_ptr<char[]>(new char[m_page_size_bytes]);
+ m_buffers[1] = std::shared_ptr<char[]>(new char[m_page_size_bytes]);
+ } catch (const std::bad_alloc &e) {
+ throw BufferedFileError(
+ "cannot allocate pages for input stream data");
+ }
+ };
+
+ Page getNextPage()
+ {
+ m_buffer_index = (m_buffer_index + 1) % 2;
+ auto buf = m_buffers[m_buffer_index];
+ // Buffer for new data must not in use
+ assert(buf.use_count() == 2);
+
+ const size_t bytes_read{readFromStream(buf.get())};
+ m_stream_pos_bytes += bytes_read;
+
+ if (bytes_read == 0) {
+ buf = std::shared_ptr<char[]>();
+ }
+ return Page{buf, m_stream_pos_bytes - bytes_read, m_stream_pos_bytes};
+ }
+
+ private:
+ const size_t m_page_size_bytes;
+ std::istream &m_istream;
+ uint64_t m_stream_pos_bytes;
+ std::array<std::shared_ptr<char[]>, 2> m_buffers;
+ unsigned m_buffer_index;
+
+ size_t readFromStream(char *const data)
+ {
+ if (m_istream.eof()) {
+ return 0;
+ }
+
+ m_istream.read(data, m_page_size_bytes);
-static void
-check_files(const OptionsBackup &opts)
+ if (!m_istream.good() && !m_istream.eof()) {
+ throw BufferedFileError("cannot read from stream");
+ }
+
+ return m_istream.gcount();
+ }
+};
+
+enum class MergeState {
+ Finished,
+ Incomplete,
+};
+
+class Diff
+{
+ friend MergeState diffsTryMerge(Diff &diff_a, Diff &diff_b,
+ size_t max_merge_gap, size_t max_size);
+
+ public:
+ explicit Diff(uint64_t start_end)
+ : m_pages{}, m_start{start_end}, m_end{start_end}
+ {
+ assert(m_start <= m_end);
+ };
+ Diff(Page page, uint64_t start, uint64_t end)
+ : m_pages{page}, m_start{start}, m_end{end}
+ {
+ assert(m_start <= m_end);
+ };
+ uint64_t getStart() const { return m_start; };
+ uint64_t getEnd() const { return m_end; };
+ size_t getSize() const { return m_end - m_start; };
+ bool isEmpty() const { return getSize() == 0; };
+
+ std::vector<FormatV2::RecordData> getData() const
+ {
+ std::vector<FormatV2::RecordData> data{};
+
+ if (!m_pages[0].isEmpty() && m_pages[1].isEmpty()) {
+ // Only the first page
+ assert((m_start >= m_pages[0].getStart()) &&
+ (m_start <= m_pages[0].getEnd()) &&
+ (m_end >= m_pages[0].getStart()) &&
+ (m_end <= m_pages[0].getEnd()));
+
+ const uint64_t offset{m_start - m_pages[0].getStart()};
+ auto data_first{std::shared_ptr<char[]>{
+ m_pages[0].getData(),
+ static_cast<char *>(m_pages[0].getData().get()) + offset}};
+ data.push_back(FormatV2::RecordData{getSize(), data_first});
+ } else if (!m_pages[0].isEmpty() && !m_pages[1].isEmpty()) {
+ // Both pages
+ assert((m_start >= m_pages[0].getStart()) &&
+ (m_start <= m_pages[0].getEnd()) &&
+ (m_end >= m_pages[1].getStart()) &&
+ (m_end <= m_pages[1].getEnd()));
+
+ size_t size{m_pages[0].getEnd() - m_start};
+ const uint64_t offset{m_start - m_pages[0].getStart()};
+ auto data_first{std::shared_ptr<char[]>{
+ m_pages[0].getData(),
+ static_cast<char *>(m_pages[0].getData().get()) + offset}};
+ data.push_back(FormatV2::RecordData{size, data_first});
+
+ size = m_end - m_pages[1].getStart();
+ data.push_back(FormatV2::RecordData{size, m_pages[1].getData()});
+ }
+
+ return data;
+ };
+
+ private:
+ std::array<Page, 2> m_pages;
+ uint64_t m_start;
+ uint64_t m_end;
+
+ bool hasPage(size_t i) // cppcheck-suppress unusedPrivateFunction
+ {
+ return (i < m_pages.size()) && (m_pages[i].getData() != nullptr);
+ };
+};
+
+MergeState
+diffsTryMerge(Diff &diff_a, Diff &diff_b, size_t max_merge_gap, size_t max_size)
{
- size_t in_size{0};
- try {
- in_size = std::filesystem::file_size(opts.getInFilePath());
- } catch (const std::exception &e) {
- throw BackupError("cannot get size of input file: " +
- std::string(e.what()));
+ if (diff_a.isEmpty()) {
+ // Do not merge to an empty diff
+ return MergeState::Finished;
}
- size_t base_size{0};
- try {
- base_size = std::filesystem::file_size(opts.getBaseFilePath());
- } catch (const std::exception &e) {
- throw BackupError("cannot get size of base file: " +
- std::string(e.what()));
+ if (diff_b.isEmpty()) {
+ // Nothing to merge from an empty diff
+ return MergeState::Finished;
}
- /* Check sizes of the input file and the base file */
- if (in_size != base_size) {
- throw BackupError("input file and base file differ in size");
- } else if ((in_size % opts.getSectorSize()) != 0) {
- throw BackupError(
- "size of input file and base file is not multiple of " +
- std::to_string(opts.getSectorSize()));
+ assert(diff_a.getEnd() <= diff_b.getStart());
+ const size_t gap{diff_b.getStart() - diff_a.getEnd()};
+ if (gap > max_merge_gap) {
+ // B is too far away
+ return MergeState::Finished;
}
+
+ if ((diff_a.getSize() + gap) >= max_size) {
+ // No space in A
+ return MergeState::Finished;
+ }
+
+ // Can be merged
+
+ // Adjust the diff start and end offsets
+
+ // There is always at least 1 byte free in A here
+ const size_t free{max_size - (diff_a.getSize() + gap)};
+ const size_t to_merge{std::min(free, diff_b.getSize())};
+ // There is always at least 1 byte to merge from B here
+
+ // Enlarge A
+ diff_a.m_end += gap + to_merge;
+ // Shrink B
+ diff_b.m_start += to_merge;
+
+ // Add B's page to A if needed
+
+ // Non-empty A must have only the first, or both pages
+ assert(diff_a.hasPage(0));
+ // Non-empty B must have only the first page
+ assert(diff_b.hasPage(0) && !diff_b.hasPage(1));
+
+ // If A has both pages, B's page must only be the same as A's second
+ // page. No setting of pages in A is needed in this case
+ assert(!diff_a.hasPage(1) || (diff_b.m_pages[0] == diff_a.m_pages[1]));
+ if (!diff_a.hasPage(1)) {
+ // If A has only the first page, B's page must only be the same as the
+ // A's first page or following it
+ const bool b_follows{
+ (diff_b.m_pages[0].getData() != diff_a.m_pages[0].getData()) &&
+ (diff_b.m_pages[0].getStart() == diff_a.m_pages[0].getEnd())};
+ assert((diff_b.m_pages[0] == diff_a.m_pages[0]) || b_follows);
+ if (b_follows) {
+ diff_a.m_pages[1] = diff_b.m_pages[0];
+ }
+ }
+
+ return (diff_a.getSize() >= max_size) ? MergeState::Finished
+ : MergeState::Incomplete;
}
+class DiffFinder
+{
+ public:
+ DiffFinder(std::istream &old_stream, std::istream &new_stream,
+ uint32_t buffer_size, size_t max_merge_gap)
+ : m_old_page_reader(old_stream, buffer_size),
+ m_new_page_reader(new_stream, buffer_size),
+ m_diff_max_size(buffer_size), m_max_merge_gap(max_merge_gap),
+ m_offset_in_stream(0), m_diff(0),
+ m_search_state(SearchState::ReadPages){};
+
+ Diff findNextDiff()
+ {
+ for (;;) {
+ if (m_search_state == SearchState::ReadPages) {
+ m_old_page = m_old_page_reader.getNextPage();
+ m_new_page = m_new_page_reader.getNextPage();
+ assert(m_old_page.getStart() == m_new_page.getStart());
+
+ if (m_old_page.getSize() != m_new_page.getSize()) {
+ throw BackupError(
+ "cannot read the same amount of data from both files");
+ }
+
+ const bool end_of_stream{m_old_page.isEmpty() &&
+ m_new_page.isEmpty()};
+ if (end_of_stream) {
+ const Diff return_diff{m_diff};
+ m_diff = Diff{m_offset_in_stream};
+ return return_diff;
+ }
+
+ m_search_state = SearchState::FindDiff;
+
+ } else if (m_search_state == SearchState::FindDiff) {
+ Diff diff{findDiffInPages(m_old_page, m_new_page,
+ m_offset_in_stream)};
+ m_offset_in_stream = diff.getEnd();
+
+ if (diff.isEmpty()) {
+ // End of pages. On the next call, read new pages.
+ m_old_page = Page{};
+ m_new_page = Page{};
+ m_search_state = SearchState::ReadPages;
+ }
+
+ const MergeState merge_state{diffsTryMerge(
+ m_diff, diff, m_max_merge_gap, m_diff_max_size)};
+
+ if (merge_state == MergeState::Finished) {
+ const Diff return_diff{m_diff};
+ m_diff = diff;
+ if (!return_diff.isEmpty()) {
+ return return_diff;
+ }
+ }
+
+ } else {
+ assert(false);
+ }
+ }
+ };
+
+ private:
+ enum class SearchState { ReadPages, FindDiff };
+
+ PagedStreamReader m_old_page_reader;
+ PagedStreamReader m_new_page_reader;
+ const size_t m_diff_max_size;
+ const size_t m_max_merge_gap;
+ Page m_old_page;
+ Page m_new_page;
+ uint64_t m_offset_in_stream;
+ Diff m_diff;
+ SearchState m_search_state;
+
+ Diff findDiffInPages(Page old_page, Page new_page,
+ uint64_t offset_in_stream)
+ {
+ const char *old_data{old_page.getData().get()};
+ const char *new_data{new_page.getData().get()};
+ const uint64_t data_size_bytes{old_page.getSize()};
+
+ assert(offset_in_stream >= new_page.getStart());
+ size_t offset_in_pages{offset_in_stream - new_page.getStart()};
+
+ // Find offset of the first different byte
+ for (; offset_in_pages < data_size_bytes; ++offset_in_pages) {
+ if (new_data[offset_in_pages] != old_data[offset_in_pages]) {
+ break;
+ }
+ }
+ const size_t start_in_pages{offset_in_pages};
+
+ if (offset_in_pages < data_size_bytes) {
+ // Different byte found. Start searching for a same byte immediately
+ // after.
+ ++offset_in_pages;
+ }
+
+ // Find offset of the first same byte
+ for (; offset_in_pages < data_size_bytes; ++offset_in_pages) {
+ if (new_data[offset_in_pages] == old_data[offset_in_pages]) {
+ break;
+ }
+ }
+ const size_t end_in_pages{offset_in_pages};
+
+ // In the case when no different byte is found, the end offset will be
+ // the same as the start offset
+
+ const uint64_t start_in_stream{new_page.getStart() + start_in_pages};
+ const uint64_t end_in_stream{new_page.getStart() + end_in_pages};
+ if (start_in_stream == end_in_stream) {
+ return Diff{start_in_stream};
+ } else {
+ return Diff{new_page, start_in_stream, end_in_stream};
+ }
+ }
+};
+
void
backup(const OptionsBackup &opts)
{
- check_files(opts);
-
- BufferedFileReader in_file(opts.getInFilePath(), opts.getBufferSize());
- BufferedFileReader base_file(opts.getBaseFilePath(), opts.getBufferSize());
- BufferedFileWriter out_file(opts.getOutFilePath(), opts.getBufferSize());
+ std::ifstream in_istream{opts.getInFilePath(),
+ std::ifstream::in | std::ifstream::binary};
+ if (!in_istream) {
+ throw BufferedFileError("cannot open input file");
+ }
- std::unique_ptr<char[]> in_buffer;
- try {
- in_buffer = std::make_unique<char[]>(opts.getSectorSize());
- } catch (const std::bad_alloc &e) {
- throw BackupError("cannot allocate sector buffer for input file data");
+ std::ifstream base_istream{opts.getBaseFilePath(),
+ std::ifstream::in | std::ifstream::binary};
+ if (!base_istream) {
+ throw BufferedFileError("cannot open base file");
}
- std::unique_ptr<char[]> base_buffer;
- try {
- base_buffer = std::make_unique<char[]>(opts.getSectorSize());
- } catch (const std::bad_alloc &e) {
- throw BackupError("cannot allocate sector buffer for base file data");
+ // When backing up, the output file is truncated to hold the new data
+ std::ofstream out_ostream{opts.getOutFilePath(), std::ofstream::out |
+ std::ofstream::trunc |
+ std::ofstream::binary};
+ if (!out_ostream) {
+ throw BufferedFileError("cannot open output file");
}
- uint64_t input_file_offset{0};
+ DiffFinder diff_finder(base_istream, in_istream, opts.getBufferSize(),
+ FormatV2::RecordHeaderSize);
+ FormatV2::Writer diff_writer(out_ostream, opts.getBufferSize());
+
for (;;) {
- // Read sectors
- const size_t in_read_size =
- in_file.read(in_buffer.get(), opts.getSectorSize());
- const size_t base_read_size =
- base_file.read(base_buffer.get(), opts.getSectorSize());
-
- if (in_read_size != base_read_size) {
- throw BackupError(
- "cannot read equal amount of bytes from the input files");
- } else if (in_read_size == 0) {
+ const Diff diff{diff_finder.findNextDiff()};
+ if (diff.isEmpty()) {
break;
- } else if (in_read_size != opts.getSectorSize()) {
- throw BackupError("cannot read full sectors from the input files");
}
- // Check for difference
- const bool differ = (memcmp(in_buffer.get(), base_buffer.get(),
- opts.getSectorSize()) != 0);
- if (differ) {
- // Backup sector
- uint64_t o = htole64(input_file_offset);
- out_file.write(reinterpret_cast<char *>(&o), sizeof(o));
- out_file.write(in_buffer.get(), opts.getSectorSize());
- }
+ diff_writer.writeDiffRecord(diff.getStart(), diff.getSize(),
+ diff.getData());
- input_file_offset += opts.getSectorSize();
+ // Here, the diff is destructed and page data reference counters
+ // decremented
}
}
diff --git a/src/buffered_file.cpp b/src/buffered_file.cpp
index f5eb863..8d47eed 100644
--- a/src/buffered_file.cpp
+++ b/src/buffered_file.cpp
@@ -27,53 +27,71 @@
#include "buffered_file.h"
#include "exception.h"
+#include <algorithm>
+#include <cassert>
#include <cstring>
#include <filesystem>
#include <fstream>
-BufferedFileReader::BufferedFileReader(std::filesystem::path path,
+BufferedFileReader::BufferedFileReader(std::istream &istream,
size_t buffer_capacity)
- : m_buffer_offset(0), m_buffer_capacity(buffer_capacity)
+ : m_istream(istream), m_buffer_offset(0), m_buffer_size(0),
+ m_buffer_capacity(buffer_capacity)
{
- m_file.open(path, std::ifstream::in | std::ifstream::binary);
- if (!m_file) {
- throw BufferedFileError("cannot open input file");
- }
-
try {
m_buffer = std::make_unique<char[]>(m_buffer_capacity);
} catch (const std::bad_alloc &e) {
throw BufferedFileError("cannot allocate buffer for input file data");
}
-
- refill_buffer();
};
size_t
BufferedFileReader::read(char *data, size_t data_size)
{
- const size_t size_left = m_buffer_size - m_buffer_offset;
- if (data_size <= size_left) {
- return read_buffer(data, data_size);
- } else {
- const size_t size_outside_buffer = data_size - size_left;
- read_buffer(data, size_left);
- const size_t read_outside =
- read_file(data + size_left, size_outside_buffer);
+ size_t retry_count{0};
+ size_t offset{0};
+
+ while ((data_size > 0) && (retry_count < 2)) {
+ char *d;
+ const size_t r{tryRead(data_size, &d)};
+ if (r == 0) {
+ ++retry_count;
+ continue;
+ }
+
+ memcpy(data + offset, d, r);
+ offset += r;
+ data_size -= r;
+ }
+
+ return offset;
+}
+
+size_t
+BufferedFileReader::tryRead(size_t data_size, char **return_data)
+{
+ const size_t size_left{m_buffer_size - m_buffer_offset};
+ if (size_left == 0) {
refill_buffer();
- return size_left + read_outside;
+ if (m_buffer_size == 0) {
+ return 0;
+ }
}
+ // There is at least one byte in the buffer
+ const size_t size_read{read_buffer(data_size, return_data)};
+ assert(size_read > 0);
+ return size_read;
};
size_t
-BufferedFileReader::read_buffer(char *data, size_t data_size)
+BufferedFileReader::read_buffer(size_t data_size, char **return_data)
{
- // Assumes that the caller makes sure there is enough data in the buffer
- // to read
- memcpy(data, reinterpret_cast<char *>(m_buffer.get()) + m_buffer_offset,
- data_size);
- m_buffer_offset += data_size;
- return data_size;
+ *return_data = static_cast<char *>(m_buffer.get()) + m_buffer_offset;
+
+ const size_t size_left{m_buffer_size - m_buffer_offset};
+ const size_t size_read{std::min(data_size, size_left)};
+ m_buffer_offset += size_read;
+ return size_read;
};
void
@@ -86,28 +104,19 @@ BufferedFileReader::refill_buffer()
size_t
BufferedFileReader::read_file(char *data, size_t data_size)
{
- m_file.read(data, data_size);
+ m_istream.read(data, data_size);
- if (!m_file.good() && !m_file.eof()) {
+ if (!m_istream.good() && !m_istream.eof()) {
throw BufferedFileError("cannot read from file");
}
- return m_file.gcount();
+ return m_istream.gcount();
};
-BufferedFileWriter::BufferedFileWriter(std::filesystem::path path,
+BufferedFileWriter::BufferedFileWriter(std::ostream &ostream,
size_t buffer_capacity)
- : m_buffer_size(0), m_buffer_capacity(buffer_capacity)
+ : m_ostream(ostream), m_buffer_size(0), m_buffer_capacity(buffer_capacity)
{
- /* When backing up, the output file is truncated to hold the
- * new data
- */
- m_file.open(path, std::ifstream::out | std::ifstream::trunc |
- std::ifstream::binary);
- if (!m_file) {
- throw BufferedFileError("cannot open output file");
- }
-
try {
m_buffer = std::make_unique<char[]>(m_buffer_capacity);
} catch (const std::bad_alloc &e) {
@@ -154,8 +163,8 @@ BufferedFileWriter::flush_buffer()
void
BufferedFileWriter::write_file(const char *data, size_t data_size)
{
- m_file.write(data, data_size);
- if (!m_file) {
+ m_ostream.write(data, data_size);
+ if (!m_ostream) {
throw BufferedFileError("cannot write to output file");
}
};
diff --git a/src/buffered_file.h b/src/buffered_file.h
index 1bcbb73..4e5219d 100644
--- a/src/buffered_file.h
+++ b/src/buffered_file.h
@@ -44,19 +44,20 @@ class BufferedFileError : public DiffddError
class BufferedFileReader
{
public:
- BufferedFileReader(std::filesystem::path path, size_t buffer_capacity);
+ BufferedFileReader(std::istream &istream, size_t buffer_capacity);
virtual ~BufferedFileReader() = default;
size_t read(char *data, size_t data_size);
+ size_t tryRead(size_t data_size, char **return_data);
private:
- std::ifstream m_file;
+ std::istream &m_istream;
std::unique_ptr<char[]> m_buffer;
size_t m_buffer_offset;
size_t m_buffer_size;
const size_t m_buffer_capacity;
- size_t read_buffer(char *data, size_t data_size);
+ size_t read_buffer(size_t data_size, char **return_data);
void refill_buffer();
size_t read_file(char *data, size_t data_size);
};
@@ -64,13 +65,13 @@ class BufferedFileReader
class BufferedFileWriter
{
public:
- BufferedFileWriter(std::filesystem::path path, size_t buffer_capacity);
+ BufferedFileWriter(std::ostream &ostream, size_t buffer_capacity);
virtual ~BufferedFileWriter();
void write(const char *data, size_t data_size);
private:
- std::fstream m_file;
+ std::ostream &m_ostream;
std::unique_ptr<char[]> m_buffer;
size_t m_buffer_size;
const size_t m_buffer_capacity;
diff --git a/src/format_v2.h b/src/format_v2.h
new file mode 100644
index 0000000..708a252
--- /dev/null
+++ b/src/format_v2.h
@@ -0,0 +1,119 @@
+/* Copyright 2024 Ján Sučan <jan@jansucan.com>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <endian.h>
+
+#include <vector>
+
+namespace FormatV2
+{
+
+const size_t RecordHeaderSize{sizeof(uint64_t) + sizeof(uint32_t)};
+
+struct RecordData {
+ size_t size;
+ std::shared_ptr<char[]> data;
+};
+
+class Writer
+{
+ public:
+ Writer(std::ostream &ostream, size_t buffer_size)
+ : m_writer{BufferedFileWriter{ostream, buffer_size}} {};
+
+ void writeDiffRecord(
+ uint64_t offset, size_t size,
+ std::vector<RecordData> data) // cppcheck-suppress passedByValue
+ {
+ writeOffset(offset);
+ writeSize(size);
+ for (auto it = data.begin(); it != data.end(); ++it) {
+ writeData(*it);
+ }
+ }
+
+ private:
+ BufferedFileWriter m_writer;
+
+ void writeOffset(uint64_t offset)
+ {
+ uint64_t val{htobe64(offset)};
+ m_writer.write(reinterpret_cast<char *>(&val), sizeof(val));
+ };
+
+ void writeSize(size_t size)
+ {
+ uint32_t val{htobe32(size)};
+ m_writer.write(reinterpret_cast<char *>(&val), sizeof(val));
+ };
+
+ void writeData(RecordData data)
+ {
+ m_writer.write(reinterpret_cast<char *>(data.data.get()), data.size);
+ };
+};
+
+class Reader
+{
+ public:
+ Reader(std::istream &istream, size_t buffer_size)
+ : m_reader{BufferedFileReader{istream, buffer_size}}, m_eof{false} {};
+
+ bool eof() { return m_eof; };
+
+ uint64_t readOffset()
+ {
+ uint64_t raw_offset;
+ const size_t r{m_reader.read(reinterpret_cast<char *>(&raw_offset),
+ sizeof(raw_offset))};
+ if (r != sizeof(raw_offset)) {
+ m_eof = true;
+ }
+ return be64toh(raw_offset);
+ };
+
+ size_t readSize()
+ {
+ uint32_t raw_size;
+ const size_t r{m_reader.read(reinterpret_cast<char *>(&raw_size),
+ sizeof(raw_size))};
+ if (r != sizeof(raw_size)) {
+ m_eof = true;
+ }
+ return be32toh(raw_size);
+ };
+
+ size_t readData(size_t size, char **return_data)
+ {
+ return m_reader.tryRead(size, return_data);
+ };
+
+ private:
+ BufferedFileReader m_reader;
+ bool m_eof;
+};
+
+} // namespace FormatV2
diff --git a/src/options.cpp b/src/options.cpp
index 5b6ed15..c44fd06 100644
--- a/src/options.cpp
+++ b/src/options.cpp
@@ -35,17 +35,7 @@
*/
#include "program_info.h"
-Options::Options()
- : sector_size{Options::DEFAULT_SECTOR_SIZE},
- buffer_size{Options::DEFAULT_BUFFER_SIZE}
-{
-}
-
-uint32_t
-Options::getSectorSize() const
-{
- return sector_size;
-}
+Options::Options() : buffer_size{Options::DEFAULT_BUFFER_SIZE} {}
uint32_t
Options::getBufferSize() const
@@ -86,11 +76,11 @@ OptionsRestore::getOutFilePath() const
void
OptionParser::printUsage()
{
- std::cout << "Usage: " << PROGRAM_NAME_STR << " backup [-S SECTOR_SIZE]";
+ std::cout << "Usage: " << PROGRAM_NAME_STR << " backup";
std::cout << " [-B BUFFER_SIZE] -i INFILE -b BASEFILE -o OUTFILE"
<< std::endl;
- std::cout << " Or: " << PROGRAM_NAME_STR << " restore [-S SECTOR_SIZE]";
+ std::cout << " Or: " << PROGRAM_NAME_STR << " restore";
std::cout << "[-B BUFFER_SIZE] -d DIFFFILE -o OUTFILE" << std::endl;
std::cout << " Or: " << PROGRAM_NAME_STR << " help" << std::endl;
@@ -125,22 +115,17 @@ OptionParser::parseBackup(int argc, char **argv)
argv += 1;
int ch;
- const char *arg_sector_size = NULL;
const char *arg_buffer_size = NULL;
const char *arg_input_file = NULL;
const char *arg_base_file = NULL;
const char *arg_output_file = NULL;
- while ((ch = getopt(argc, argv, ":B:S:i:b:o:")) != -1) {
+ while ((ch = getopt(argc, argv, ":B:i:b:o:")) != -1) {
switch (ch) {
case 'B':
arg_buffer_size = optarg;
break;
- case 'S':
- arg_sector_size = optarg;
- break;
-
case 'i':
arg_input_file = optarg;
break;
@@ -165,20 +150,11 @@ OptionParser::parseBackup(int argc, char **argv)
argc -= optind;
/* Convert numbers in the arguments */
- if ((arg_sector_size != NULL) &&
- parse_unsigned(arg_sector_size, &(opts.sector_size))) {
- throw OptionError("incorrect sector size");
- } else if ((arg_buffer_size != NULL) &&
- parse_unsigned(arg_buffer_size, &(opts.buffer_size))) {
+ if ((arg_buffer_size != NULL) &&
+ parse_unsigned(arg_buffer_size, &(opts.buffer_size))) {
throw OptionError("incorrect buffer size");
- } else if (opts.sector_size == 0) {
- throw OptionError("sector size cannot be 0");
} else if (opts.buffer_size == 0) {
throw OptionError("buffer size cannot be 0");
- } else if (opts.sector_size > opts.buffer_size) {
- throw OptionError("sector size cannot larger than buffer size");
- } else if ((opts.buffer_size % opts.sector_size) != 0) {
- throw OptionError("buffer size is not multiple of sector size");
}
if (arg_input_file == NULL) {
@@ -207,21 +183,16 @@ OptionParser::parseRestore(int argc, char **argv)
argv += 1;
int ch;
- const char *arg_sector_size = NULL;
const char *arg_buffer_size = NULL;
const char *arg_diff_file = NULL;
const char *arg_output_file = NULL;
- while ((ch = getopt(argc, argv, ":B:S:d:o:")) != -1) {
+ while ((ch = getopt(argc, argv, ":B:d:o:")) != -1) {
switch (ch) {
case 'B':
arg_buffer_size = optarg;
break;
- case 'S':
- arg_sector_size = optarg;
- break;
-
case 'd':
arg_diff_file = optarg;
break;
@@ -242,20 +213,11 @@ OptionParser::parseRestore(int argc, char **argv)
argc -= optind;
/* Convert numbers in the arguments */
- if ((arg_sector_size != NULL) &&
- parse_unsigned(arg_sector_size, &(opts.sector_size))) {
- throw OptionError("incorrect sector size");
- } else if ((arg_buffer_size != NULL) &&
- parse_unsigned(arg_buffer_size, &(opts.buffer_size))) {
+ if ((arg_buffer_size != NULL) &&
+ parse_unsigned(arg_buffer_size, &(opts.buffer_size))) {
throw OptionError("incorrect buffer size");
- } else if (opts.sector_size == 0) {
- throw OptionError("sector size cannot be 0");
} else if (opts.buffer_size == 0) {
throw OptionError("buffer size cannot be 0");
- } else if (opts.sector_size > opts.buffer_size) {
- throw OptionError("sector size cannot larger than buffer size");
- } else if ((opts.buffer_size % opts.sector_size) != 0) {
- throw OptionError("buffer size is not multiple of sector size");
}
if (arg_diff_file == NULL) {
diff --git a/src/options.h b/src/options.h
index 08559e2..80cb8f8 100644
--- a/src/options.h
+++ b/src/options.h
@@ -42,17 +42,14 @@ class Options
friend class OptionParser;
public:
- static const int DEFAULT_SECTOR_SIZE{512};
static const int DEFAULT_BUFFER_SIZE{4 * 1024 * 1024};
Options();
virtual ~Options() = default;
- uint32_t getSectorSize() const;
uint32_t getBufferSize() const;
private:
- uint32_t sector_size;
uint32_t buffer_size;
};
diff --git a/src/restore.cpp b/src/restore.cpp
index 8a5b29f..a747d16 100644
--- a/src/restore.cpp
+++ b/src/restore.cpp
@@ -26,93 +26,23 @@
#include "restore.h"
#include "buffered_file.h"
+#include "format_v2.h"
#include <filesystem>
#include <fstream>
-
-static void
-check_diff_file(const OptionsRestore &opts)
-{
- size_t diff_size{0};
- try {
- diff_size = std::filesystem::file_size(opts.getDiffFilePath());
- } catch (const std::exception &e) {
- throw RestoreError("cannot get size of diff file: " +
- std::string(e.what()));
- }
-
- if (diff_size == 0) {
- throw RestoreError("diff file is empty");
- } else if ((diff_size % (sizeof(uint64_t) + opts.getSectorSize())) != 0) {
- /* The diff file must hold equally sized sectors and the
- * offset of each of them
- */
- throw RestoreError(
- "diff file has size that cannot contain valid diff data");
- }
-
- size_t out_size{0};
- try {
- out_size = std::filesystem::file_size(opts.getOutFilePath());
- } catch (const std::exception &e) {
- throw RestoreError("cannot get size of output file: " +
- std::string(e.what()));
- }
-
- std::ifstream diff_file;
- diff_file.open(opts.getDiffFilePath(), std::ios::in | std::ios::binary);
- if (!diff_file) {
- throw RestoreError("cannot open diff file");
- }
-
- uint64_t prev_out_offset = 0;
- bool is_first_reading = true;
-
- /* Scan the diff file and check */
- for (;;) {
- uint64_t out_offset;
- /* Read the next offset */
- diff_file.read(reinterpret_cast<char *>(&out_offset),
- sizeof(out_offset));
-
- if (diff_file.eof() && diff_file.fail() && !diff_file.bad()) {
- break;
- } else if (!diff_file.good() && !diff_file.eof()) {
- throw RestoreError("cannot read from file");
- }
- out_offset = le64toh(out_offset);
-
- if (!is_first_reading && (out_offset <= prev_out_offset)) {
- throw RestoreError(
- "a sector offset points behind the previous offset");
- } else if ((out_offset + opts.getSectorSize()) > out_size) {
- throw RestoreError(
- "a sector offset points past the end of the output file");
- } else if (!diff_file.seekg(opts.getSectorSize(), std::ios_base::cur)) {
- throw RestoreError("cannot seek in diff file");
- }
-
- is_first_reading = false;
- prev_out_offset = out_offset;
- }
-
- /* The diff file must be read completely */
- char c;
- diff_file.read(&c, 1);
- if (diff_file.gcount() != 0) {
- throw RestoreError("diff file is not valid");
- }
- diff_file.clear();
-
- diff_file.close();
-}
+#include <vector>
void
restore(const OptionsRestore &opts)
{
- check_diff_file(opts);
+ std::fstream diff_stream;
+ diff_stream.open(opts.getDiffFilePath(),
+ std::ifstream::in | std::ifstream::binary);
+ if (!diff_stream) {
+ throw RestoreError("cannot open diff file");
+ }
- BufferedFileReader diff_file(opts.getDiffFilePath(), opts.getBufferSize());
+ FormatV2::Reader diff_reader(diff_stream, opts.getBufferSize());
std::fstream out_file;
out_file.open(opts.getOutFilePath(),
@@ -121,45 +51,34 @@ restore(const OptionsRestore &opts)
throw RestoreError("cannot open output file");
}
- const size_t diff_buffer_size = sizeof(uint64_t) + opts.getSectorSize();
- std::unique_ptr<char[]> diff_buffer;
- try {
- diff_buffer = std::make_unique<char[]>(diff_buffer_size);
- } catch (const std::bad_alloc &e) {
- throw RestoreError("cannot allocate sector buffer for diff file data");
- }
-
- /* Restore data from the differential image */
- size_t diff_read_size = {0};
for (;;) {
- diff_read_size = diff_file.read(diff_buffer.get(), diff_buffer_size);
-
- if (diff_read_size == 0) {
+ const uint64_t offset{diff_reader.readOffset()};
+ if (diff_reader.eof()) {
break;
- } else if (diff_read_size != diff_buffer_size) {
- throw RestoreError("cannot read from diff file");
}
- const uint64_t out_offset =
- le64toh(*reinterpret_cast<uint64_t *>(diff_buffer.get()));
-
- if (!out_file.seekp(out_offset, std::ios_base::beg)) {
+ if (!out_file.seekp(offset, std::ios_base::beg)) {
throw RestoreError("cannot seek in output file");
}
- if (!out_file.write(reinterpret_cast<char *>(diff_buffer.get()) +
- sizeof(uint64_t),
- opts.getSectorSize())) {
- throw RestoreError("cannot write to output file");
- }
- }
+ uint64_t size{diff_reader.readSize()};
- out_file.close();
+ while (size > 0) {
+ char *data;
+ const size_t r{diff_reader.readData(size, &data)};
+ if (r == 0) {
+ break;
+ }
- /* The diff file must be read completely */
- char c;
- diff_read_size = diff_file.read(&c, 1);
- if (diff_read_size != 0) {
- throw RestoreError("diff file is not valid");
+ if (!out_file.write(data, r)) {
+ throw RestoreError("cannot write to output file");
+ }
+
+ size -= r;
+ }
+
+ if (size > 0) {
+ throw RestoreError("cannot read all the data of the record");
+ }
}
}