36#include <gtest/gtest.h>
51#if defined(__unix__) || defined(__APPLE__)
60 namespace fs = std::filesystem;
64 static std::atomic<unsigned long long>
counter{0};
65 const auto now = std::chrono::steady_clock::now().time_since_epoch().count();
66 const auto id = std::to_string(
now) +
"_" + std::to_string(
counter++);
67 const fs::path dir = fs::temp_directory_path() /
"aleph_file_btree_tests";
68 fs::create_directories(dir);
69 return dir / (
id +
".idx");
76 explicit TempFile(fs::path p) : path(
std::move(p)) {}
82 fs::remove(path.string() +
".wal",
ec);
83 fs::remove(path.string() +
".wal.tmp",
ec);
84 fs::remove(path.string() +
".journal",
ec);
85 fs::remove(path.string() +
".journal.tmp",
ec);
86 fs::remove(path.string() +
".lock",
ec);
87 fs::remove(path.string() +
".old",
ec);
88 fs::remove(path.string() +
".new",
ec);
97 for (
size_t i = 0; i < arr.
size(); ++i)
98 ret.push_back(arr[i]);
102#if defined(__unix__) || defined(__APPLE__)
105# if defined(__SANITIZE_THREAD__)
107# elif defined(__has_feature)
108# if __has_feature(thread_sanitizer)
118 template <
typename F>
165 const std::uint64_t
size,
166 const std::uint64_t root_page,
167 const std::uint64_t page_count,
168 const std::uint64_t free_page_head,
169 const std::uint64_t checkpoint_sequence,
190 const size_t page_bytes,
194 for (std::uint64_t page_id = 1; page_id <= page_count; ++page_id)
198 crc,
page_blob.data() + (page_id - 1) * page_bytes, page_bytes);
213 constexpr size_t page_bytes =
214 sizeof(std::uint8_t) +
sizeof(std::uint16_t) * 2 + 3
215 +
sizeof(std::uint64_t) +
sizeof(std::uint64_t) +
sizeof(
int) *
max_keys
216 +
sizeof(std::uint64_t) *
max_children +
sizeof(std::uint32_t);
221 std::array<char, Aleph::detail::Ordered_Tree_Snapshot_Magic_Size> magic = {};
222 in.read(magic.data(), magic.size());
225 auto read_u32 = [&](std::uint32_t & value)
227 in.read(
reinterpret_cast<char *
>(&value),
sizeof(value));
230 auto read_u64 = [&](std::uint64_t & value)
232 in.read(
reinterpret_cast<char *
>(&value),
sizeof(value));
235 auto read_u8 = [&](std::uint8_t & value)
237 in.read(
reinterpret_cast<char *
>(&value),
sizeof(value));
241 std::uint32_t version = 0;
260 std::uint64_t
size = 0;
261 std::uint64_t root_page = 0;
262 std::uint64_t page_count = 0;
263 std::uint64_t free_page_head = 0;
264 std::uint64_t checkpoint_sequence = 0;
273 std::vector<char>
page_blob(page_bytes * page_count);
277 std::ofstream
out(
wal_path, std::ios::binary | std::ios::trunc);
280 const auto wal_magic =
284 out.write(wal_magic.data(), wal_magic.size());
286 out.write(
reinterpret_cast<const char *
>(&key_size),
sizeof(key_size));
287 out.write(
reinterpret_cast<const char *
>(&min_degree),
sizeof(min_degree));
290 out.write(
reinterpret_cast<const char *
>(&
size),
sizeof(
size));
291 out.write(
reinterpret_cast<const char *
>(&root_page),
sizeof(root_page));
292 out.write(
reinterpret_cast<const char *
>(&page_count),
sizeof(page_count));
293 out.write(
reinterpret_cast<const char *
>(&free_page_head),
sizeof(free_page_head));
294 out.write(
reinterpret_cast<const char *
>(&checkpoint_sequence),
sizeof(checkpoint_sequence));
295 out.write(
reinterpret_cast<const char *
>(&page_count),
sizeof(page_count));
298 checkpoint_sequence, page_count);
301 for (std::uint64_t page_id = 1; page_id <= page_count; ++page_id)
303 out.write(
reinterpret_cast<const char *
>(&page_id),
sizeof(page_id));
304 out.write(
page_blob.data() + (page_id - 1) * page_bytes, page_bytes);
310 out.write(
reinterpret_cast<const char *
>(&checkpoint_sequence),
311 sizeof(checkpoint_sequence));
312 out.write(
reinterpret_cast<const char *
>(&page_count),
sizeof(page_count));
327 +
sizeof(std::uint32_t) * 2 +
sizeof(std::uint64_t)
328 +
sizeof(std::uint8_t) + 7 +
sizeof(std::uint64_t) * 2);
330 std::uint64_t page_count = 0;
331 in.read(
reinterpret_cast<char *
>(&page_count),
sizeof(page_count));
338 constexpr size_t header_bytes =
340 +
sizeof(std::uint64_t) * 5 +
sizeof(std::uint8_t) + 7;
341 constexpr size_t page_bytes =
342 sizeof(std::uint8_t) +
sizeof(std::uint16_t) * 2 + 3
343 +
sizeof(std::uint64_t) +
sizeof(std::uint64_t) +
sizeof(
int) * 5
344 +
sizeof(std::uint64_t) * 6 +
sizeof(std::uint32_t);
352 +
sizeof(std::uint32_t) * 2 +
sizeof(std::uint64_t)
353 +
sizeof(std::uint8_t) + 7 +
sizeof(std::uint64_t));
356 std::uint64_t root_page = 0;
357 in.read(
reinterpret_cast<char *
>(&root_page),
sizeof(root_page));
360 const auto offset =
static_cast<std::streamoff
>(
361 header_bytes + (root_page - 1) * page_bytes +
sizeof(std::uint8_t)
362 +
sizeof(std::uint16_t) * 2 + 3 +
sizeof(std::uint64_t) * 2);
366 std::array<unsigned char, Aleph::detail::Paged_Value_Codec<int>::encoded_size>
375 constexpr size_t page_bytes =
376 sizeof(std::uint8_t) +
sizeof(std::uint16_t) * 2 + 3
377 +
sizeof(std::uint64_t) +
sizeof(std::uint64_t) +
sizeof(
int) * 5
378 +
sizeof(std::uint64_t) * 6 +
sizeof(std::uint32_t);
382 std::vector<std::uint64_t>
page_ids;
386 auto read_u32 = [&](std::uint32_t & value)
388 in.read(
reinterpret_cast<char *
>(&value),
sizeof(value));
391 auto read_u64 = [&](std::uint64_t & value)
393 in.read(
reinterpret_cast<char *
>(&value),
sizeof(value));
396 auto read_u8 = [&](std::uint8_t & value)
398 in.read(
reinterpret_cast<char *
>(&value),
sizeof(value));
402 std::array<char, Aleph::detail::Ordered_Tree_Snapshot_Magic_Size> magic = {};
404 std::array<char, Aleph::detail::Ordered_Tree_Snapshot_Magic_Size>
407 std::uint32_t key_size = 0;
408 std::uint64_t min_degree = 0;
414 in.read(magic.data(), magic.size());
433 std::uint64_t page_id = 0;
436 in.seekg(
static_cast<std::streamoff
>(page_bytes), std::ios::cur);
446 const std::streamoff
offset,
const size_t size)
448 std::ifstream
in(src, std::ios::binary);
450 std::fstream
out(
dst, std::ios::binary | std::ios::in | std::ios::out);
453 std::vector<char> buffer(
size);
456 in.read(buffer.data(), buffer.size());
461 out.write(buffer.data(), buffer.size());
472 for (
int value : {40, 10, 90, 20, 70, 60, 30})
477 (std::vector<int>{10, 20, 30, 40, 60, 70, 90}));
483 (std::vector<int>{10, 20, 30, 40, 60, 70, 90}));
520 std::uint64_t first = 0;
521 std::uint64_t second = 0;
545 const std::vector<std::string>
expected = {
546 "alfa",
"beta",
"delta",
"epsilon",
"gama"
551 tree(
tmp.path.string(),
false);
568 std::optional<std::string>(
"delta"));
637 std::ofstream
out(
tmp.path.string() +
".wal",
638 std::ios::binary | std::ios::trunc);
640 out <<
"pending recovery";
651#if defined(__unix__) || defined(__APPLE__)
653 GTEST_SKIP() <<
"TSAN does not support these fork-based lock checks";
691 catch (
const std::runtime_error &)
704 GTEST_SKIP() <<
"fork-based lock validation is only available on Unix-like systems";
713 std::ofstream
out(
tmp.path.string() +
".lock",
714 std::ios::binary | std::ios::trunc);
752 std::ofstream
out(
tmp.path, std::ios::binary | std::ios::trunc);
753 out <<
"not a valid Aleph snapshot";
766 for (
int value : {10, 20, 30, 40, 50})
770 std::fstream
io(
tmp.path, std::ios::binary | std::ios::in | std::ios::out);
772 io.seekg(0, std::ios::end);
773 const auto end =
io.tellg();
775 io.seekg(end - std::streamoff(1));
780 byte ^=
static_cast<char>(0x5A);
781 io.seekp(end - std::streamoff(1));
793 const auto journal =
tmp.path.string() +
".journal";
797 for (
int value : {12, 6, 18, 3, 9, 15, 21})
803 fs::copy_file(
tmp.path,
journal, fs::copy_options::overwrite_existing,
ec);
807 std::ofstream
out(
tmp.path, std::ios::binary | std::ios::trunc);
815 (std::vector<int>{3, 6, 9, 12, 15, 18, 21}));
822 const auto wal =
tmp.path.string() +
".wal";
826 for (
int value : {12, 6, 18, 3, 9, 15, 21})
833 std::ofstream
out(
tmp.path, std::ios::binary | std::ios::trunc);
841 (std::vector<int>{3, 6, 9, 12, 15, 18, 21}));
848 const auto wal =
tmp.path.string() +
".wal";
852 for (
int value : {12, 6, 18, 3, 9, 15, 21})
858 std::fstream
wal_io(
wal, std::ios::binary | std::ios::in | std::ios::out);
860 wal_io.seekg(0, std::ios::end);
868 byte ^=
static_cast<char>(0x11);
875 std::ofstream
out(
tmp.path, std::ios::binary | std::ios::trunc);
886 constexpr size_t header_bytes =
888 +
sizeof(std::uint64_t) * 5 +
sizeof(std::uint8_t) + 7;
889 constexpr size_t page_bytes =
890 sizeof(std::uint8_t) +
sizeof(std::uint16_t) * 2 + 3
891 +
sizeof(std::uint64_t) +
sizeof(std::uint64_t) +
sizeof(
int) * 5
892 +
sizeof(std::uint64_t) * 6 +
sizeof(std::uint32_t);
895 const auto wal =
tmp.path.string() +
".wal";
901 for (
int value = 1; value <= 40; ++value)
930 static_cast<std::streamoff
>(header_bytes
944 const auto wal =
tmp.path.string() +
".wal";
949 for (
int value : {12, 6, 18, 3, 9, 15, 21})
971 (std::vector<int>{3, 6, 9, 12, 15, 18, 21, 24, 27}));
995 for (
int value = 1; value <= 80; ++value)
998 for (
int value : {1, 2, 3, 7, 8, 9, 15, 16, 17, 31, 32, 33, 63, 64, 65, 80})
1011 4, 5, 6, 10, 11, 12, 13, 14, 18, 19, 20, 21, 22, 23, 24, 25,
1012 26, 27, 28, 29, 30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44,
1013 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60,
1014 61, 62, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79
Simple dynamic array with automatic resizing and functional operations.
constexpr size_t size() const noexcept
Return the number of elements stored in the stack.
Persistent page-managed B+ Tree stored in a single binary file.
bool insert(const Key &key)
Insert a key if it is not already present.
Persistent page-managed B-Tree stored in a single binary file.
bool verify() const
Verify structural B-Tree invariants across cached pages.
void set_auto_sync(const bool enabled) noexcept
Enable or disable automatic flushing of dirty pages.
bool contains(const Key &key) const
Return whether a key is present.
bool auto_sync_enabled() const noexcept
Return whether automatic synchronization is enabled.
Array< Key > keys() const
Materialize the tree contents in sorted order.
void checkpoint() const
Synonym for sync().
void sync() const
Flush the current tree image using redo WAL or full-image fallback.
bool remove(const Key &key)
Remove a key if present.
void reload()
Discard unsynchronized changes and reload pages from disk.
std::uint64_t checkpoint_sequence() const noexcept
Return the durable checkpoint sequence stored in the backing file.
bool insert(const Key &key)
Insert a key if it is not already present.
const long double offset[]
Offset values indexed by symbol string length (bounded by MAX_OFFSET_INDEX)
std::uint32_t crc32_add_bytes(std::uint32_t crc, const void *data, const size_t size) noexcept
constexpr std::array< char, Ordered_Tree_Snapshot_Magic_Size > ordered_tree_snapshot_magic(const char(&text)[N]) noexcept
std::uint32_t crc32_begin() noexcept
std::uint32_t crc32_add(std::uint32_t crc, const T &value) noexcept
constexpr size_t Ordered_Tree_Snapshot_Magic_Size
std::uint32_t crc32_finish(const std::uint32_t crc) noexcept
Main namespace for Aleph-w library functions.
size_t size(Node *root) noexcept
Divide_Conquer_DP_Result< Cost > divide_and_conquer_partition_dp(const size_t groups, const size_t n, Transition_Cost_Fn transition_cost, const Cost inf=dp_optimization_detail::default_inf< Cost >())
Optimize partition DP using divide-and-conquer optimization.
std::string code(Node *root)
Compute a string with the Lukasiewicz`s word of a tree.
std::vector< typename C::Item_Type > to_vector(const C &c)
Convert a container to a std::vector.
Page-managed persistent B-Tree with file-backed storage.
Page-managed persistent B+ Tree with file-backed storage.