Aleph-w 3.0
A C++ Library for Data Structures and Algorithms
Loading...
Searching...
No Matches
tpl_file_b_tree.H
Go to the documentation of this file.
1/*
2 Aleph_w
3
4 Data structures & Algorithms
5 version 2.0.0b
6 https://github.com/lrleon/Aleph-w
7
8 This file is part of Aleph-w library
9
10 Copyright (c) 2002-2026 Leandro Rabindranath Leon
11
12 Permission is hereby granted, free of charge, to any person obtaining a copy
13 of this software and associated documentation files (the "Software"), to deal
14 in the Software without restriction, including without limitation the rights
15 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
16 copies of the Software, and to permit persons to whom the Software is
17 furnished to do so, subject to the following conditions:
18
19 The above copyright notice and this permission notice shall be included in all
20 copies or substantial portions of the Software.
21
22 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
23 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
24 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
25 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
26 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
27 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
28 SOFTWARE.
29*/
30
40# ifndef TPL_FILE_B_TREE_H
41# define TPL_FILE_B_TREE_H
42
43# include <array>
44# include <bit>
45# include <cstdint>
46# include <cstring>
47# include <filesystem>
48# include <fstream>
49# include <optional>
50# include <string>
51# include <type_traits>
52# include <utility>
53
54# include <ah-concepts.H>
55# include <ah-errors.H>
56# include <ahFunction.H>
57# include <tpl_array.H>
58# include <tpl_b_tree.H>
61# include <tpl_tree_snapshot.H>
62
63namespace Aleph
64{
151 template <typename Key,
152 class Compare = Aleph::less<Key>,
153 size_t MinDegree = 16,
157 {
158 static_assert(MinDegree >= 2, "File_B_Tree requires MinDegree >= 2");
159 static_assert(std::default_initializable<Key>,
160 "File_B_Tree requires default-initializable keys");
161 static_assert(std::copy_constructible<Key> and std::is_copy_assignable_v<Key>,
162 "File_B_Tree requires copyable keys");
164 "File_B_Tree requires a fixed-size key codec");
165 static_assert(std::same_as<decltype(Codec::decode(
166 static_cast<const unsigned char *>(nullptr))),
167 Key>,
168 "File_B_Tree codec must decode back to Key");
169
170 static constexpr size_t MaxKeys = 2 * MinDegree - 1;
171 static constexpr size_t MaxChildren = 2 * MinDegree;
172 static constexpr size_t MinKeys = MinDegree - 1;
173 static constexpr std::uint32_t LegacyFormatVersion = 1;
174 static constexpr std::uint32_t ChecksummedFormatVersion = 2;
175 static constexpr std::uint32_t HeaderCheckpointFormatVersion = 3;
176 static constexpr std::uint32_t NativeCheckpointFormatVersion = 4;
177 static constexpr std::uint32_t PortableFormatVersion = 5;
178 static constexpr std::uint32_t FormatVersion = PortableFormatVersion;
179 static constexpr std::uint32_t EncodedKeySize =
180 static_cast<std::uint32_t>(Codec::encoded_size);
181 static constexpr std::uint8_t PortableEncodingMarker = 3;
182 static constexpr std::uint8_t EndianMarker =
183 std::endian::native == std::endian::little ? 1 : 2;
184
185 using page_id_t = std::uint64_t;
186
187 enum class PageKind : std::uint8_t
188 {
189 Free = 0,
190 Internal = 1,
191 Leaf = 2
192 };
193
194 struct Page
195 {
196 std::uint8_t kind = static_cast<std::uint8_t>(PageKind::Free);
197 std::uint16_t key_count = 0;
198 std::uint16_t child_count = 0;
199 page_id_t link = 0; // next free page for free pages
200 std::uint64_t checkpoint_sequence = 0;
201 Key keys[MaxKeys] = {};
203 };
204
214
216 {
219 };
220
221 Compare cmp_;
222 std::string file_path_;
224 std::string journal_path_;
225 std::string journal_tmp_path_;
226 std::string wal_path_;
227 std::string wal_tmp_path_;
228 std::string lock_path_;
230 mutable std::fstream file_;
233 mutable bool header_dirty_ = false;
234 bool auto_sync_ = true;
235 mutable std::uint32_t storage_format_version_ = FormatVersion;
236 mutable std::uint64_t checkpoint_sequence_ = 0;
237
238 size_t size_ = 0;
241
242 [[nodiscard]] static constexpr const char * tree_kind() noexcept
243 {
244 return "File_B_Tree";
245 }
246
253
258
259 void ensure_writable(const char * operation) const
260 {
262 << "File_B_Tree::" << operation
263 << "(): file was opened in read-only mode";
264 }
265
266 [[nodiscard]] static constexpr auto file_magic() noexcept
267 {
268 return detail::ordered_tree_snapshot_magic("AlephFBTPg-v1");
269 }
270
271 [[nodiscard]] static constexpr auto wal_magic() noexcept
272 {
273 return detail::ordered_tree_snapshot_magic("AlephFBTWalv1");
274 }
275
276 [[nodiscard]] static constexpr auto wal_trailer_magic() noexcept
277 {
278 return detail::ordered_tree_snapshot_magic("AlephFBTCom1");
279 }
280
281 static constexpr std::uint32_t LegacyWalVersion = 1;
282 static constexpr std::uint32_t CommitTrailerWalVersion = 2;
283 static constexpr std::uint32_t NativeWalVersion = 3;
284 static constexpr std::uint32_t PortableWalVersion = 4;
285 static constexpr std::uint32_t WalVersion = PortableWalVersion;
286
287 [[nodiscard]] static constexpr size_t legacy_header_bytes() noexcept
288 {
289 return detail::Ordered_Tree_Snapshot_Magic_Size + sizeof(std::uint32_t) * 2
290 + sizeof(std::uint64_t) * 5 + sizeof(std::uint8_t) + 7;
291 }
292
293 [[nodiscard]] static constexpr size_t checksummed_header_bytes() noexcept
294 {
295 return legacy_header_bytes() + sizeof(std::uint32_t);
296 }
297
298 [[nodiscard]] static constexpr size_t header_bytes() noexcept
299 {
300 return checksummed_header_bytes() + sizeof(std::uint64_t);
301 }
302
303 [[nodiscard]] static constexpr size_t legacy_page_bytes() noexcept
304 {
305 return sizeof(std::uint8_t) + sizeof(std::uint16_t) * 2 + 3
306 + sizeof(page_id_t)
307 + sizeof(Key) * MaxKeys
308 + sizeof(page_id_t) * MaxChildren;
309 }
310
311 [[nodiscard]] static constexpr size_t checksummed_page_bytes() noexcept
312 {
313 return legacy_page_bytes() + sizeof(std::uint32_t);
314 }
315
316 [[nodiscard]] static constexpr size_t native_page_bytes() noexcept
317 {
318 return legacy_page_bytes() + sizeof(std::uint64_t) + sizeof(std::uint32_t);
319 }
320
321 [[nodiscard]] static constexpr size_t portable_page_bytes() noexcept
322 {
323 return sizeof(std::uint8_t) + sizeof(std::uint16_t) * 2 + 3
324 + sizeof(page_id_t) + sizeof(std::uint64_t)
325 + Codec::encoded_size * MaxKeys
326 + sizeof(page_id_t) * MaxChildren + sizeof(std::uint32_t);
327 }
328
329 [[nodiscard]] static constexpr size_t page_bytes() noexcept
330 {
331 return portable_page_bytes();
332 }
333
334 [[nodiscard]] static constexpr size_t serialized_header_bytes(
335 const std::uint32_t version) noexcept
336 {
337 return version >= HeaderCheckpointFormatVersion ? header_bytes()
340 }
341
342 [[nodiscard]] static constexpr size_t serialized_page_bytes(
343 const std::uint32_t version) noexcept
344 {
345 return version >= PortableFormatVersion ? portable_page_bytes()
349 }
350
351 [[nodiscard]] static constexpr bool version_uses_portable_codec(
352 const std::uint32_t version) noexcept
353 {
354 return version >= PortableFormatVersion;
355 }
356
357 [[nodiscard]] static constexpr std::uint8_t encoding_marker_for_version(
358 const std::uint32_t version) noexcept
359 {
360 return version_uses_portable_codec(version)
362 : EndianMarker;
363 }
364
365 [[nodiscard]] static constexpr std::uint32_t key_size_for_version(
366 const std::uint32_t version) noexcept
367 {
368 return version_uses_portable_codec(version)
370 : static_cast<std::uint32_t>(sizeof(Key));
371 }
372
373 [[nodiscard]] static constexpr std::array<char, 7>
374 reserved_bytes_for_version(const std::uint32_t version) noexcept
375 {
376 std::array<char, 7> reserved = {};
377 if (version_uses_portable_codec(version))
378 {
379 const auto codec_id = Codec::storage_id;
380 reserved[0] = static_cast<char>(codec_id & 0xFFu);
381 reserved[1] = static_cast<char>((codec_id >> 8) & 0xFFu);
382 reserved[2] = static_cast<char>((codec_id >> 16) & 0xFFu);
383 reserved[3] = static_cast<char>((codec_id >> 24) & 0xFFu);
384 }
385 return reserved;
386 }
387
388 [[nodiscard]] static constexpr Page make_page(const PageKind kind) noexcept
389 {
390 Page page;
391 page.kind = static_cast<std::uint8_t>(kind);
392 return page;
393 }
394
396 const std::string & source_path,
397 const std::uint32_t version)
398 {
399 if constexpr (not std::is_trivially_copyable_v<Key>)
401 << "File_B_Tree: file " << source_path
402 << " uses legacy native format version " << version
403 << ", which requires trivially copyable keys; reopen or rewrite it "
404 << "with portable format version " << PortableFormatVersion
405 << " or newer";
406 }
407
409 const std::string & source_path,
410 const std::uint32_t wal_version)
411 {
412 if constexpr (not std::is_trivially_copyable_v<Key>)
414 << "File_B_Tree: WAL " << source_path
415 << " uses legacy native format version " << wal_version
416 << ", which requires trivially copyable keys; recover it with a "
417 << "trivially copyable instantiation or discard the stale WAL";
418 }
419
420 [[nodiscard]] static constexpr bool is_leaf(const Page & page) noexcept
421 {
422 return page.kind == static_cast<std::uint8_t>(PageKind::Leaf);
423 }
424
425 [[nodiscard]] static constexpr bool is_internal(const Page & page) noexcept
426 {
427 return page.kind == static_cast<std::uint8_t>(PageKind::Internal);
428 }
429
430 [[nodiscard]] static constexpr bool is_free(const Page & page) noexcept
431 {
432 return page.kind == static_cast<std::uint8_t>(PageKind::Free);
433 }
434
436 {
437 pages_.empty();
439 pages_.append(Page{});
441 size_ = 0;
442 root_page_ = 0;
443 free_page_head_ = 0;
445 header_dirty_ = true;
447 }
448
449 void ensure_parent_dir() const
450 {
451 namespace fs = std::filesystem;
452
453 const fs::path path(file_path_);
454 if (not path.has_parent_path())
455 return;
456
457 std::error_code ec;
458 (void) fs::create_directories(path.parent_path(), ec);
460 << "File_B_Tree: cannot create directory "
461 << path.parent_path().string() << " for " << file_path_
462 << " (" << ec.message() << ")";
463 }
464
465 void open_storage(const bool truncate = false) const
466 {
467 if (truncate)
468 ensure_writable("open_storage");
469
470 if (not read_only())
472
473 if (file_.is_open())
474 file_.close();
475
476 if (truncate)
477 {
478 std::ofstream create(file_path_, std::ios::binary | std::ios::trunc);
479 ah_runtime_error_unless(create.good())
480 << "File_B_Tree: cannot create " << file_path_;
481 }
482
483 const auto mode = read_only()
484 ? (std::ios::binary | std::ios::in)
485 : (std::ios::binary | std::ios::in | std::ios::out);
486 file_.open(file_path_, mode);
488 << "File_B_Tree: cannot open " << file_path_;
489 }
490
492 {
493 journal_path_ = file_path_ + ".journal";
494 journal_tmp_path_ = file_path_ + ".journal.tmp";
495 wal_path_ = file_path_ + ".wal";
496 wal_tmp_path_ = file_path_ + ".wal.tmp";
497 lock_path_ = file_path_ + ".lock";
498 }
499
500 template <typename T>
501 static void write_pod(std::ostream & out,
502 const T & value,
503 const std::string & file_path,
504 const char * what)
505 {
506 out.write(reinterpret_cast<const char *>(&value), sizeof(value));
508 << "File_B_Tree: cannot write " << what << " to " << file_path;
509 }
510
511 template <typename T>
512 static T read_pod(std::istream & in,
513 const std::string & file_path,
514 const char * what)
515 {
516 T value{};
517 in.read(reinterpret_cast<char *>(&value), sizeof(value));
519 << "File_B_Tree: cannot read " << what << " from " << file_path;
520 return value;
521 }
522
523 static void write_native_key(std::ostream & out,
524 const Key & key,
525 const std::string & file_path,
526 const char * what)
527 {
528 if constexpr (std::is_trivially_copyable_v<Key>)
529 {
530 const auto bytes = std::bit_cast<std::array<char, sizeof(Key)>>(key);
531 out.write(bytes.data(), bytes.size());
533 << "File_B_Tree: cannot write " << what << " to " << file_path;
534 }
535 else
537 << "File_B_Tree: cannot write legacy native " << what << " to "
538 << file_path << " because legacy native formats require trivially "
539 << "copyable keys";
540 }
541
542 static void write_portable_key(std::ostream & out,
543 const Key & key,
544 const std::string & file_path,
545 const char * what)
546 {
547 std::array<unsigned char, Codec::encoded_size> bytes = {};
548 Codec::encode(key, bytes.data());
549 out.write(reinterpret_cast<const char *>(bytes.data()), bytes.size());
551 << "File_B_Tree: cannot write " << what << " to " << file_path;
552 }
553
554 static void write_key(std::ostream & out,
555 const Key & key,
556 const std::string & file_path,
557 const char * what)
558 {
560 }
561
562 [[nodiscard]] static Key read_native_key(std::istream & in,
563 const std::string & file_path,
564 const char * what)
565 {
566 if constexpr (std::is_trivially_copyable_v<Key>)
567 {
568 std::array<char, sizeof(Key)> bytes = {};
569 in.read(bytes.data(), bytes.size());
571 << "File_B_Tree: cannot read " << what << " from " << file_path;
572 return std::bit_cast<Key>(bytes);
573 }
574 else
575 {
577 << "File_B_Tree: cannot read legacy native " << what << " from "
578 << file_path << " because legacy native formats require trivially "
579 << "copyable keys";
580 return Key{};
581 }
582 }
583
584 [[nodiscard]] static Key read_portable_key(std::istream & in,
585 const std::string & file_path,
586 const char * what)
587 {
588 std::array<unsigned char, Codec::encoded_size> bytes = {};
589 in.read(reinterpret_cast<char *>(bytes.data()), bytes.size());
591 << "File_B_Tree: cannot read " << what << " from " << file_path;
592 return Codec::decode(bytes.data());
593 }
594
595 [[nodiscard]] static Key read_key(std::istream & in,
596 const std::string & file_path,
597 const char * what,
598 const std::uint32_t version)
599 {
600 return version_uses_portable_codec(version)
603 }
604
605 [[nodiscard]] static std::uint32_t add_key_to_crc(
606 const std::uint32_t crc,
607 const Key & key)
608 {
609 return Codec::add_to_crc(crc, key);
610 }
611
612 [[nodiscard]] static std::uint32_t add_native_key_to_crc(
613 const std::uint32_t crc,
614 const Key & key)
615 {
616 if constexpr (std::is_trivially_copyable_v<Key>)
617 return detail::crc32_add_bytes(crc, &key, sizeof(Key));
618 else
619 {
621 << "File_B_Tree: cannot checksum legacy native keys because "
622 << "legacy native formats require trivially copyable keys";
623 return crc;
624 }
625 }
626
627 [[nodiscard]] static std::uint32_t header_checksum_v2(
628 const std::uint32_t version,
629 const std::uint64_t size,
630 const page_id_t root_page,
631 const std::uint64_t page_count,
632 const page_id_t free_page_head) noexcept
633 {
634 std::uint32_t crc = detail::crc32_begin();
635 crc = detail::crc32_add(crc, version);
637 crc = detail::crc32_add(crc, static_cast<std::uint64_t>(MinDegree));
639 const auto reserved = reserved_bytes_for_version(version);
642 crc = detail::crc32_add(crc, root_page);
644 crc = detail::crc32_add(crc, free_page_head);
646 }
647
648 [[nodiscard]] static std::uint32_t header_checksum(
649 const std::uint32_t version,
650 const std::uint64_t size,
651 const page_id_t root_page,
652 const std::uint64_t page_count,
653 const page_id_t free_page_head,
654 const std::uint64_t checkpoint_sequence) noexcept
655 {
656 std::uint32_t crc = detail::crc32_begin();
657 crc = detail::crc32_add(crc, version);
659 crc = detail::crc32_add(crc, static_cast<std::uint64_t>(MinDegree));
661 const auto reserved = reserved_bytes_for_version(version);
664 crc = detail::crc32_add(crc, root_page);
666 crc = detail::crc32_add(crc, free_page_head);
669 }
670
671 [[nodiscard]] static std::uint32_t page_checksum_v3(const Page & page)
672 {
673 std::uint32_t crc = detail::crc32_begin();
674 const std::array<char, 3> padding = {};
675
679 crc = detail::crc32_add_bytes(crc, padding.data(), padding.size());
681 for (size_t i = 0; i < MaxKeys; ++i)
685 }
686
687 [[nodiscard]] static std::uint32_t page_checksum(const Page & page)
688 {
689 std::uint32_t crc = detail::crc32_begin();
690 const std::array<char, 3> padding = {};
691
695 crc = detail::crc32_add_bytes(crc, padding.data(), padding.size());
698 for (size_t i = 0; i < MaxKeys; ++i)
702 }
703
704 [[nodiscard]] static std::uint32_t page_checksum_v4(const Page & page)
705 {
706 std::uint32_t crc = detail::crc32_begin();
707 const std::array<char, 3> padding = {};
708
712 crc = detail::crc32_add_bytes(crc, padding.data(), padding.size());
715 for (size_t i = 0; i < MaxKeys; ++i)
719 }
720
721 [[nodiscard]] static std::uint32_t wal_record_checksum_add(
722 std::uint32_t crc,
723 const page_id_t page_id,
724 const Page & page)
725 {
726 const std::array<char, 3> padding = {};
727
728 crc = detail::crc32_add(crc, page_id);
732 crc = detail::crc32_add_bytes(crc, padding.data(), padding.size());
735 for (size_t i = 0; i < MaxKeys; ++i)
739 return crc;
740 }
741
742 [[nodiscard]] static std::uint32_t wal_record_checksum_add_v3(
743 std::uint32_t crc,
744 const page_id_t page_id,
745 const Page & page)
746 {
747 const std::array<char, 3> padding = {};
748
749 crc = detail::crc32_add(crc, page_id);
753 crc = detail::crc32_add_bytes(crc, padding.data(), padding.size());
756 for (size_t i = 0; i < MaxKeys; ++i)
760 return crc;
761 }
762
763 [[nodiscard]] static std::uint32_t wal_record_checksum_add_v2(
764 std::uint32_t crc,
765 const page_id_t page_id,
766 const Page & page)
767 {
768 const std::array<char, 3> padding = {};
769
770 crc = detail::crc32_add(crc, page_id);
774 crc = detail::crc32_add_bytes(crc, padding.data(), padding.size());
776 for (size_t i = 0; i < MaxKeys; ++i)
780 return crc;
781 }
782
783 static void write_page_record(std::ostream & out,
784 const Page & page,
785 const std::string & target_path)
786 {
787 write_pod(out, page.kind, target_path, "page kind");
788 write_pod(out, page.key_count, target_path, "page key count");
789 write_pod(out, page.child_count, target_path, "page child count");
790
791 const std::array<char, 3> padding = {};
792 out.write(padding.data(), padding.size());
794 << "File_B_Tree: cannot write page padding to " << target_path;
795
796 write_pod(out, page.link, target_path, "page link");
798 "page checkpoint sequence");
799
800 for (size_t i = 0; i < MaxKeys; ++i)
801 write_key(out, page.keys[i], target_path, "page key");
802
803 for (size_t i = 0; i < MaxChildren; ++i)
804 write_pod(out, page.children[i], target_path, "page child");
805
806 write_pod(out, page_checksum(page), target_path, "page checksum");
807 }
808
809 void seekp_to(const std::streamoff offset) const
810 {
811 file_.clear();
812 file_.seekp(offset);
814 << "File_B_Tree: cannot seek for writing in " << file_path_;
815 }
816
817 [[nodiscard]] std::streamoff current_page_offset(const page_id_t page_id) const noexcept
818 {
819 return static_cast<std::streamoff>(header_bytes()
820 + (page_id - 1) * page_bytes());
821 }
822
823 void write_header_to_storage(const std::uint64_t size,
824 const page_id_t root_page,
825 const std::uint64_t page_count,
826 const page_id_t free_page_head,
827 const std::uint64_t checkpoint_sequence) const
828 {
829 seekp_to(0);
830 const auto magic = file_magic();
831 file_.write(magic.data(), magic.size());
833 << "File_B_Tree: cannot write magic to " << file_path_;
834
836
837 write_pod(file_, FormatVersion, file_path_, "header version");
839 "header key size");
840 write_pod(file_, static_cast<std::uint64_t>(MinDegree), file_path_,
841 "header minimum degree");
843 "header encoding");
844 file_.write(reserved.data(), reserved.size());
846 << "File_B_Tree: cannot write reserved header bytes to " << file_path_;
847 write_pod(file_, size, file_path_, "header size");
848 write_pod(file_, root_page, file_path_, "header root page");
849 write_pod(file_, page_count, file_path_, "header page count");
850 write_pod(file_, free_page_head, file_path_, "header free page head");
852 "header checkpoint sequence");
855 free_page_head, checkpoint_sequence),
856 file_path_, "header checksum");
857 }
858
860 {
861 write_header_to_storage(static_cast<std::uint64_t>(size_), root_page_,
862 static_cast<std::uint64_t>(pages_.size() - 1),
864 }
865
867 const Page & page) const
868 {
871 }
872
874 {
875 size_t count = 0;
876 for (page_id_t page_id = 1; page_id < dirty_pages_.size(); ++page_id)
877 if (dirty_pages_[page_id] != 0)
878 ++count;
879 return count;
880 }
881
882 [[nodiscard]] static constexpr size_t wal_header_bytes() noexcept
883 {
884 return detail::Ordered_Tree_Snapshot_Magic_Size + sizeof(std::uint32_t) * 3
885 + sizeof(std::uint64_t) * 7 + sizeof(std::uint8_t) + 7;
886 }
887
888 [[nodiscard]] static constexpr std::uint32_t
895
896 [[nodiscard]] static constexpr size_t
897 wal_record_bytes(const std::uint32_t wal_version) noexcept
898 {
899 return sizeof(page_id_t)
901 }
902
903 [[nodiscard]] static std::uint32_t wal_header_checksum(
904 const std::uint32_t wal_version,
905 const std::uint64_t size,
906 const page_id_t root_page,
907 const std::uint64_t page_count,
908 const page_id_t free_page_head,
909 const std::uint64_t checkpoint_sequence,
910 const std::uint64_t dirty_count) noexcept
911 {
912 std::uint32_t crc = detail::crc32_begin();
916 crc = detail::crc32_add(crc, static_cast<std::uint64_t>(MinDegree));
919 const auto reserved =
923 crc = detail::crc32_add(crc, root_page);
925 crc = detail::crc32_add(crc, free_page_head);
929 }
930
931 [[nodiscard]] static constexpr size_t wal_trailer_bytes() noexcept
932 {
934 + sizeof(std::uint64_t) * 2 + sizeof(std::uint32_t);
935 }
936
937 void write_wal_to_path(const std::string & target_path,
938 const std::uint64_t checkpoint_sequence) const
939 {
940 std::ofstream out(target_path, std::ios::binary | std::ios::trunc);
942 << "File_B_Tree: cannot open " << target_path << " for writing";
943
944 const auto magic = wal_magic();
945 out.write(magic.data(), magic.size());
947 << "File_B_Tree: cannot write WAL magic to " << target_path;
948
949 const auto page_count = static_cast<std::uint64_t>(pages_.size() - 1);
950 const auto size = static_cast<std::uint64_t>(size_);
951 const auto dirty_count = static_cast<std::uint64_t>(dirty_page_count());
953 std::uint32_t payload_crc = detail::crc32_begin();
954
955 write_pod(out, WalVersion, target_path, "WAL version");
957 "WAL key size");
958 write_pod(out, static_cast<std::uint64_t>(MinDegree), target_path,
959 "WAL minimum degree");
961 "WAL encoding");
962 out.write(reserved.data(), reserved.size());
964 << "File_B_Tree: cannot write WAL reserved bytes to " << target_path;
965 write_pod(out, size, target_path, "WAL size");
966 write_pod(out, root_page_, target_path, "WAL root page");
967 write_pod(out, page_count, target_path, "WAL page count");
968 write_pod(out, free_page_head_, target_path, "WAL free page head");
970 "WAL checkpoint sequence");
971 write_pod(out, dirty_count, target_path, "WAL dirty count");
976 target_path, "WAL checksum");
977
978 for (page_id_t page_id = 1; page_id < dirty_pages_.size(); ++page_id)
979 if (dirty_pages_[page_id] != 0)
980 {
982 page(page_id));
983 write_pod(out, page_id, target_path, "WAL page id");
985 }
986
987 const auto trailer_magic = wal_trailer_magic();
988 out.write(trailer_magic.data(), trailer_magic.size());
990 << "File_B_Tree: cannot write WAL commit trailer to " << target_path;
992 "WAL trailer checkpoint sequence");
993 write_pod(out, dirty_count, target_path, "WAL trailer dirty count");
995 "WAL payload checksum");
996
997 out.flush();
999 << "File_B_Tree: cannot flush " << target_path;
1000 out.close();
1002 << "File_B_Tree: cannot close " << target_path;
1003
1006 }
1007
1008 [[nodiscard]] static Page read_page_from_stream(std::istream & in,
1009 const std::string & path,
1010 const std::uint32_t version)
1011 {
1012 Page page;
1013 page.kind = read_pod<std::uint8_t>(in, path, "page kind");
1014 page.key_count = read_pod<std::uint16_t>(in, path, "page key count");
1015 page.child_count = read_pod<std::uint16_t>(in, path, "page child count");
1016
1017 std::array<char, 3> padding = {};
1018 in.read(padding.data(), padding.size());
1020 << "File_B_Tree: cannot read page padding from " << path;
1021
1022 page.link = read_pod<page_id_t>(in, path, "page link");
1023
1024 if (version >= NativeCheckpointFormatVersion)
1026 read_pod<std::uint64_t>(in, path, "page checkpoint sequence");
1027
1028 for (size_t i = 0; i < MaxKeys; ++i)
1029 page.keys[i] = read_key(in, path, "page key", version);
1030
1031 for (size_t i = 0; i < MaxChildren; ++i)
1032 page.children[i] = read_pod<page_id_t>(in, path, "page child");
1033
1034 if (version >= HeaderCheckpointFormatVersion)
1035 {
1036 const auto stored = read_pod<std::uint32_t>(in, path, "page checksum");
1038 == (version >= PortableFormatVersion
1043 << "File_B_Tree: page checksum mismatch in " << path;
1044 }
1045
1046 return page;
1047 }
1048
1049 void write_full_image(std::ostream & out,
1050 const std::string & target_path,
1051 const std::uint64_t checkpoint_sequence) const
1052 {
1053 const auto magic = file_magic();
1054 out.write(magic.data(), magic.size());
1056 << "File_B_Tree: cannot write magic to " << target_path;
1057
1058 const auto page_count = static_cast<std::uint64_t>(pages_.size() - 1);
1059 const auto size = static_cast<std::uint64_t>(size_);
1061
1062 write_pod(out, FormatVersion, target_path, "header version");
1064 "header key size");
1065 write_pod(out, static_cast<std::uint64_t>(MinDegree), target_path,
1066 "header minimum degree");
1068 "header encoding");
1069 out.write(reserved.data(), reserved.size());
1071 << "File_B_Tree: cannot write reserved header bytes to " << target_path;
1072 write_pod(out, size, target_path, "header size");
1073 write_pod(out, root_page_, target_path, "header root page");
1074 write_pod(out, page_count, target_path, "header page count");
1075 write_pod(out, free_page_head_, target_path, "header free page head");
1077 "header checkpoint sequence");
1078 write_pod(out,
1081 target_path, "header checksum");
1082
1083 for (page_id_t page_id = 1; page_id < pages_.size(); ++page_id)
1085 }
1086
1087 void write_full_image_to_path(const std::string & target_path,
1088 const std::uint64_t checkpoint_sequence) const
1089 {
1090 namespace fs = std::filesystem;
1091 const fs::path path(target_path);
1092 if (path.has_parent_path())
1093 {
1094 std::error_code ec;
1095 (void) fs::create_directories(path.parent_path(), ec);
1097 << "File_B_Tree: cannot create directory "
1098 << path.parent_path().string() << " for " << target_path
1099 << " (" << ec.message() << ")";
1100 }
1101
1102 std::ofstream out(target_path, std::ios::binary | std::ios::trunc);
1104 << "File_B_Tree: cannot open " << target_path << " for writing";
1105
1107 out.flush();
1109 << "File_B_Tree: cannot flush " << target_path;
1110 out.close();
1112 << "File_B_Tree: cannot close " << target_path;
1113
1116 }
1117
1119 {
1121 {
1122 if (file_.is_open())
1123 file_.close();
1125 if (not file_.is_open())
1126 open_storage(false);
1129 return;
1130 }
1131
1132 if (not file_.is_open())
1133 open_storage(false);
1134
1136 for (page_id_t page_id = 1; page_id < dirty_pages_.size(); ++page_id)
1137 if (dirty_pages_[page_id] != 0)
1138 write_current_page_to_storage(page_id, page(page_id));
1139
1140 file_.flush();
1142 << "File_B_Tree: cannot flush " << file_path_;
1146 }
1147
1149 {
1150 namespace fs = std::filesystem;
1151 if (read_only())
1152 {
1154 and not fs::exists(wal_path_))
1155 << "File_B_Tree: cannot open read-only while WAL recovery sidecars "
1156 << "are pending for " << file_path_;
1157 return;
1158 }
1159
1161 if (not fs::exists(wal_path_))
1162 return;
1163
1164 std::ifstream in(wal_path_, std::ios::binary);
1166 << "File_B_Tree: cannot open WAL " << wal_path_ << " for reading";
1167
1168 const auto expected_magic = wal_magic();
1169 std::array<char, detail::Ordered_Tree_Snapshot_Magic_Size> magic = {};
1170 in.read(magic.data(), magic.size());
1172 << "File_B_Tree: cannot read WAL magic from " << wal_path_;
1174 << "File_B_Tree: incompatible WAL file " << wal_path_;
1175
1176 const auto wal_version = read_pod<std::uint32_t>(in, wal_path_, "WAL version");
1181 << "File_B_Tree: unsupported WAL version " << wal_version
1182 << " in " << wal_path_;
1185
1186 const auto key_size = read_pod<std::uint32_t>(in, wal_path_, "WAL key size");
1190 << "File_B_Tree: WAL " << wal_path_ << " was created for key size "
1191 << key_size << ", but this instantiation expects "
1193
1194 const auto min_degree =
1195 read_pod<std::uint64_t>(in, wal_path_, "WAL minimum degree");
1196 ah_runtime_error_unless(min_degree == MinDegree)
1197 << "File_B_Tree: WAL " << wal_path_
1198 << " was created with minimum degree " << min_degree
1199 << ", but this instantiation expects " << MinDegree;
1200
1201 const auto encoding = read_pod<std::uint8_t>(in, wal_path_, "WAL encoding");
1205 << "File_B_Tree: WAL " << wal_path_
1206 << " was created with an incompatible encoding";
1207
1208 std::array<char, 7> reserved = {};
1209 in.read(reserved.data(), reserved.size());
1211 << "File_B_Tree: cannot read WAL reserved bytes from " << wal_path_;
1215 << "File_B_Tree: WAL " << wal_path_
1216 << " was created with an incompatible key codec";
1217
1218 const auto wal_size = read_pod<std::uint64_t>(in, wal_path_, "WAL size");
1219 const auto wal_root_page =
1220 read_pod<page_id_t>(in, wal_path_, "WAL root page");
1221 const auto wal_page_count =
1222 read_pod<std::uint64_t>(in, wal_path_, "WAL page count");
1223 const auto wal_free_head =
1224 read_pod<page_id_t>(in, wal_path_, "WAL free page head");
1225 const auto wal_checkpoint_sequence =
1226 read_pod<std::uint64_t>(in, wal_path_, "WAL checkpoint sequence");
1227 const auto wal_dirty_count =
1228 read_pod<std::uint64_t>(in, wal_path_, "WAL dirty count");
1229 const auto wal_checksum =
1230 read_pod<std::uint32_t>(in, wal_path_, "WAL checksum");
1231
1237 << "File_B_Tree: WAL checksum mismatch in " << wal_path_;
1238
1239 std::error_code ec;
1240 const auto actual_size = fs::file_size(wal_path_, ec);
1242 << "File_B_Tree: cannot determine the size of WAL " << wal_path_
1243 << " (" << ec.message() << ")";
1245 const auto expected_size = wal_header_bytes()
1248 ah_runtime_error_unless(actual_size == expected_size)
1249 << "File_B_Tree: WAL size mismatch in " << wal_path_
1250 << " (expected " << expected_size << ", got " << actual_size << ")";
1251
1252 if (file_.is_open())
1253 file_.close();
1254
1255 if (not file_exists())
1256 open_storage(true);
1257 else
1258 open_storage(false);
1259
1261 std::uint32_t payload_crc = detail::crc32_begin();
1262 for (std::uint64_t i = 0; i < wal_dirty_count; ++i)
1263 {
1264 const auto page_id = read_pod<page_id_t>(in, wal_path_, "WAL page id");
1265 ah_runtime_error_unless(page_id > 0 and page_id <= wal_page_count)
1266 << "File_B_Tree: invalid WAL page id " << page_id
1267 << " in " << wal_path_;
1272 else if (wal_version >= NativeWalVersion)
1274 recovered);
1275 else if (has_commit_trailer)
1277 recovered);
1278 recovered_records.append(Wal_Record{page_id, recovered});
1279 }
1280
1282 {
1284 std::array<char, detail::Ordered_Tree_Snapshot_Magic_Size>
1285 trailer_magic = {};
1286 in.read(trailer_magic.data(), trailer_magic.size());
1288 << "File_B_Tree: cannot read WAL commit trailer from "
1289 << wal_path_;
1291 << "File_B_Tree: missing WAL commit trailer in " << wal_path_;
1292
1293 const auto trailer_checkpoint_sequence =
1295 "WAL trailer checkpoint sequence");
1296 const auto trailer_dirty_count =
1297 read_pod<std::uint64_t>(in, wal_path_, "WAL trailer dirty count");
1298 const auto trailer_payload_checksum =
1299 read_pod<std::uint32_t>(in, wal_path_, "WAL payload checksum");
1300
1303 << "File_B_Tree: WAL trailer checkpoint mismatch in "
1304 << wal_path_;
1306 << "File_B_Tree: WAL trailer dirty-count mismatch in "
1307 << wal_path_;
1310 << "File_B_Tree: WAL payload checksum mismatch in " << wal_path_;
1311 }
1312
1313 bool stale_wal = false;
1314 if (storage_format_version_on_disk() == std::optional<std::uint32_t>(FormatVersion))
1315 {
1316 // Verify the on-disk header already reflects wal_checkpoint_sequence
1317 // before treating the WAL as stale. When recovered_records is empty
1318 // (dirty_count == 0) the per-page loop below does not execute, which
1319 // would wrongly leave stale_wal=true even when the header write had
1320 // not yet been flushed to disk at crash time.
1321 bool header_current = false;
1322 try
1323 {
1326 disk.checkpoint_sequence >= wal_checkpoint_sequence;
1327 }
1328 catch (...) {}
1329
1330 if (header_current)
1331 {
1332 stale_wal = true;
1333 for (size_t i = 0; i < recovered_records.size(); ++i)
1334 {
1335 const auto persisted =
1337 if (not persisted.has_value()
1338 or persisted->checkpoint_sequence < wal_checkpoint_sequence)
1339 {
1340 stale_wal = false;
1341 break;
1342 }
1343 }
1344 }
1345 }
1346
1347 if (stale_wal)
1348 {
1350 return;
1351 }
1352
1355 for (size_t i = 0; i < recovered_records.size(); ++i)
1356 {
1357 const auto persisted =
1359 if (persisted.has_value()
1360 and persisted->checkpoint_sequence >= wal_checkpoint_sequence)
1361 continue;
1364 }
1365
1366 file_.flush();
1368 << "File_B_Tree: cannot flush recovered data to " << file_path_;
1373 }
1374
1376 {
1377 namespace fs = std::filesystem;
1379 if (read_only())
1380 {
1382 and not fs::exists(journal_path_))
1383 << "File_B_Tree: cannot open read-only while journal recovery "
1384 << "sidecars are pending for " << file_path_;
1385 return;
1386 }
1387
1389 if (not fs::exists(journal_path_))
1390 return;
1391
1393
1394 if (file_.is_open())
1395 file_.close();
1396
1399 }
1400
1401 [[nodiscard]] Loaded_Image read_image(const std::string & source_path) const
1402 {
1403 namespace fs = std::filesystem;
1404
1405 std::ifstream in(source_path, std::ios::binary);
1407 << "File_B_Tree: cannot open " << source_path << " for reading";
1408
1409 const auto expected_magic = file_magic();
1410 std::array<char, detail::Ordered_Tree_Snapshot_Magic_Size> magic = {};
1411 in.read(magic.data(), magic.size());
1413 << "File_B_Tree: cannot read header magic from " << source_path;
1414
1416 << "File_B_Tree: file " << source_path
1417 << " does not contain a compatible paged B-Tree";
1418
1419 const auto version =
1420 read_pod<std::uint32_t>(in, source_path, "header version");
1422 or version == ChecksummedFormatVersion
1425 or version == FormatVersion)
1426 << "File_B_Tree: unsupported format version " << version
1427 << " in " << source_path;
1428 if (version < PortableFormatVersion)
1430
1431 const auto key_size =
1432 read_pod<std::uint32_t>(in, source_path, "header key size");
1433 ah_runtime_error_unless(key_size == key_size_for_version(version))
1434 << "File_B_Tree: file " << source_path << " was created for key size "
1435 << key_size << ", but this instantiation expects "
1436 << key_size_for_version(version);
1437
1438 const auto min_degree =
1439 read_pod<std::uint64_t>(in, source_path, "header minimum degree");
1440 ah_runtime_error_unless(min_degree == MinDegree)
1441 << "File_B_Tree: file " << source_path
1442 << " was created with minimum degree " << min_degree
1443 << ", but this instantiation expects " << MinDegree;
1444
1445 const auto encoding =
1446 read_pod<std::uint8_t>(in, source_path, "header encoding");
1448 << "File_B_Tree: file " << source_path
1449 << " was created with an incompatible encoding";
1450
1451 std::array<char, 7> reserved = {};
1452 in.read(reserved.data(), reserved.size());
1454 << "File_B_Tree: cannot read reserved header bytes from " << source_path;
1456 << "File_B_Tree: file " << source_path
1457 << " was created with an incompatible key codec";
1458
1460 image.version = version;
1461 image.size = read_pod<std::uint64_t>(in, source_path, "header size");
1462 image.root_page = read_pod<page_id_t>(in, source_path, "header root page");
1463 const auto page_count =
1464 read_pod<std::uint64_t>(in, source_path, "header page count");
1465 image.free_page_head = read_pod<page_id_t>(in, source_path,
1466 "header free page head");
1467
1468 if (version >= HeaderCheckpointFormatVersion)
1469 {
1470 image.checkpoint_sequence =
1472 "header checkpoint sequence");
1474 "header checksum");
1476 version,
1477 static_cast<std::uint64_t>(image.size),
1478 image.root_page, page_count,
1479 image.free_page_head,
1480 image.checkpoint_sequence))
1481 << "File_B_Tree: header checksum mismatch in " << source_path;
1482 }
1483 else if (version >= ChecksummedFormatVersion)
1484 {
1486 "header checksum");
1488 version, static_cast<std::uint64_t>(image.size),
1489 image.root_page, page_count,
1490 image.free_page_head))
1491 << "File_B_Tree: header checksum mismatch in " << source_path;
1492 }
1493
1494 std::error_code ec;
1495 const auto actual_size = fs::file_size(source_path, ec);
1497 << "File_B_Tree: cannot determine the size of " << source_path
1498 << " (" << ec.message() << ")";
1499 const auto expected_size =
1501 ah_runtime_error_unless(actual_size == expected_size)
1502 << "File_B_Tree: file size mismatch in " << source_path
1503 << " (expected " << expected_size << ", got " << actual_size << ")";
1504
1505 image.pages.empty();
1506 image.pages.reserve(page_count + 1);
1507 image.pages.append(Page{});
1508 for (page_id_t page_id = 1; page_id <= page_count; ++page_id)
1509 image.pages.append(read_page_from_stream(in, source_path, version));
1510
1512 << "File_B_Tree: invalid root page id in " << source_path;
1513 ah_runtime_error_unless(image.free_page_head <= page_count)
1514 << "File_B_Tree: invalid free-list head in " << source_path;
1515
1516 return image;
1517 }
1518
1520 {
1522
1523 size_ = image.size;
1524 root_page_ = image.root_page;
1525 free_page_head_ = image.free_page_head;
1526 checkpoint_sequence_ = image.checkpoint_sequence;
1528 pages_ = std::move(image.pages);
1529
1531 dirty_pages_.reserve(pages_.size());
1532 for (size_t i = 0; i < pages_.size(); ++i)
1534
1535 header_dirty_ = false;
1537 << "File_B_Tree: structural verification failed for " << file_path_;
1538 open_storage(false);
1539 }
1540
1542 {
1543 ensure_writable("create_empty_file");
1545 open_storage(true);
1546 sync();
1547 }
1548
1549 [[nodiscard]] bool file_exists() const
1550 {
1551 return std::ifstream(file_path_, std::ios::binary).good();
1552 }
1553
1554 [[nodiscard]] std::optional<std::uint32_t> storage_format_version_on_disk() const
1555 {
1556 try
1557 {
1558 if (not file_exists())
1559 return std::nullopt;
1560
1561 std::ifstream in(file_path_, std::ios::binary);
1562 if (not in.good())
1563 return std::nullopt;
1564
1565 std::array<char, detail::Ordered_Tree_Snapshot_Magic_Size> magic = {};
1566 in.read(magic.data(), magic.size());
1567 if (not in.good() or magic != file_magic())
1568 return std::nullopt;
1569
1570 return read_pod<std::uint32_t>(in, file_path_, "header version");
1571 }
1572 catch (...)
1573 {
1574 return std::nullopt;
1575 }
1576 }
1577
1578 [[nodiscard]] std::optional<Page>
1580 {
1581 if (storage_format_version_on_disk() != std::optional<std::uint32_t>(FormatVersion))
1582 return std::nullopt;
1583
1584 if (not file_.is_open())
1585 open_storage(false);
1586
1587 file_.clear();
1588 file_.seekg(0, std::ios::end);
1589 if (not file_.good())
1590 return std::nullopt;
1591
1592 const auto file_size = file_.tellg();
1593 const auto offset = current_page_offset(page_id);
1594 if (file_size < 0
1595 or offset < 0
1596 or file_size - offset < static_cast<std::streamoff>(page_bytes()))
1597 return std::nullopt;
1598
1599 file_.clear();
1600 file_.seekg(offset);
1601 if (not file_.good())
1602 return std::nullopt;
1603
1604 try
1605 {
1607 }
1608 catch (...)
1609 {
1610 file_.clear();
1611 return std::nullopt;
1612 }
1613 }
1614
1616 {
1617 if (header_dirty_)
1618 return true;
1619
1620 for (page_id_t page_id = 1; page_id < dirty_pages_.size(); ++page_id)
1621 if (dirty_pages_[page_id] != 0)
1622 return true;
1623 return false;
1624 }
1625
1627 {
1628 header_dirty_ = false;
1629 for (page_id_t page_id = 1; page_id < dirty_pages_.size(); ++page_id)
1630 dirty_pages_(page_id) = 0;
1631 }
1632
1633 void mark_page_dirty(const page_id_t page_id)
1634 {
1636 dirty_pages_(page_id) = 1;
1637 }
1638
1640 const std::uint64_t checkpoint_sequence) noexcept
1641 {
1642 for (page_id_t page_id = 1; page_id < pages_.size(); ++page_id)
1644 }
1645
1647 {
1648 header_dirty_ = true;
1649 }
1650
1651 [[nodiscard]] Page & page(const page_id_t page_id)
1652 {
1653 ah_runtime_error_unless(page_id > 0 and page_id < pages_.size())
1654 << "File_B_Tree: invalid page id " << page_id;
1655 return pages_(page_id);
1656 }
1657
1658 [[nodiscard]] const Page & page(const page_id_t page_id) const
1659 {
1660 ah_runtime_error_unless(page_id > 0 and page_id < pages_.size())
1661 << "File_B_Tree: invalid page id " << page_id;
1662 return pages_(page_id);
1663 }
1664
1665 [[nodiscard]] bool equals(const Key & lhs, const Key & rhs) const
1666 {
1667 return not cmp_(lhs, rhs) and not cmp_(rhs, lhs);
1668 }
1669
1670 [[nodiscard]] bool strictly_sorted(const Page & page) const
1671 {
1672 if (page.key_count == 0)
1673 return true;
1674
1675 for (size_t i = 1; i < page.key_count; ++i)
1676 if (not cmp_(page.keys[i - 1], page.keys[i]))
1677 return false;
1678 return true;
1679 }
1680
1682 const Key & key) const
1683 {
1684 size_t idx = 0;
1685 while (idx < page.key_count and cmp_(page.keys[idx], key))
1686 ++idx;
1687 return idx;
1688 }
1689
1691 const Key & key) const
1692 {
1693 size_t idx = 0;
1694 while (idx < page.key_count
1695 and (cmp_(page.keys[idx], key) or equals(page.keys[idx], key)))
1696 ++idx;
1697 return idx;
1698 }
1699
1700 static void insert_key_at(Page & page, const size_t idx, const Key & key)
1701 {
1702 for (size_t i = page.key_count; i > idx; --i)
1703 page.keys[i] = page.keys[i - 1];
1704 page.keys[idx] = key;
1705 ++page.key_count;
1706 }
1707
1708 static Key erase_key_at(Page & page, const size_t idx)
1709 {
1710 const Key ret = page.keys[idx];
1711 for (size_t i = idx + 1; i < page.key_count; ++i)
1712 page.keys[i - 1] = page.keys[i];
1713 --page.key_count;
1714 return ret;
1715 }
1716
1717 static void insert_child_at(Page & page, const size_t idx,
1718 const page_id_t child_id)
1719 {
1720 for (size_t i = page.child_count; i > idx; --i)
1721 page.children[i] = page.children[i - 1];
1722 page.children[idx] = child_id;
1723 ++page.child_count;
1724 }
1725
1726 static page_id_t erase_child_at(Page & page, const size_t idx)
1727 {
1728 const page_id_t ret = page.children[idx];
1729 for (size_t i = idx + 1; i < page.child_count; ++i)
1730 page.children[i - 1] = page.children[i];
1731 --page.child_count;
1732 return ret;
1733 }
1734
1736 {
1737 if (free_page_head_ != 0)
1738 {
1739 const page_id_t page_id = free_page_head_;
1740 free_page_head_ = page(page_id).link;
1741 page(page_id) = make_page(kind);
1742 mark_page_dirty(page_id);
1744 return page_id;
1745 }
1746
1747 pages_.append(make_page(kind));
1749 mark_page_dirty(pages_.size() - 1);
1751 return pages_.size() - 1;
1752 }
1753
1754 void release_page(const page_id_t page_id)
1755 {
1758 page(page_id) = free_page;
1759 free_page_head_ = page_id;
1760 mark_page_dirty(page_id);
1762 }
1763
1764 [[nodiscard]] std::optional<Key> min_in(page_id_t page_id) const
1765 {
1766 if (page_id == 0)
1767 return std::nullopt;
1768
1769 while (is_internal(page(page_id)))
1770 {
1771 const Page & curr = page(page_id);
1772 if (curr.child_count == 0)
1773 return std::nullopt;
1774 page_id = curr.children[0];
1775 }
1776
1777 const Page & leaf = page(page_id);
1778 if (leaf.key_count == 0)
1779 return std::nullopt;
1780 return leaf.keys[0];
1781 }
1782
1783 [[nodiscard]] std::optional<Key> max_in(page_id_t page_id) const
1784 {
1785 if (page_id == 0)
1786 return std::nullopt;
1787
1788 while (is_internal(page(page_id)))
1789 {
1790 const Page & curr = page(page_id);
1791 if (curr.child_count == 0)
1792 return std::nullopt;
1793 page_id = curr.children[curr.child_count - 1];
1794 }
1795
1796 const Page & leaf = page(page_id);
1797 if (leaf.key_count == 0)
1798 return std::nullopt;
1799 return leaf.keys[leaf.key_count - 1];
1800 }
1801
1802 void split_child(const page_id_t parent_id, const size_t idx)
1803 {
1804 const page_id_t child_id = page(parent_id).children[idx];
1805 const bool child_is_leaf = is_leaf(page(child_id));
1806 const page_id_t right_id =
1808
1809 const Key median = page(child_id).keys[MinKeys];
1810
1811 {
1812 Page & child = page(child_id);
1813 Page & right = page(right_id);
1814
1815 for (size_t i = MinDegree; i < child.key_count; ++i)
1816 right.keys[right.key_count++] = child.keys[i];
1817 child.key_count = MinKeys;
1818
1819 if (not child_is_leaf)
1820 {
1821 for (size_t i = MinDegree; i < child.child_count; ++i)
1822 right.children[right.child_count++] = child.children[i];
1823 child.child_count = MinDegree;
1824 }
1825 }
1826
1827 insert_key_at(page(parent_id), idx, median);
1828 insert_child_at(page(parent_id), idx + 1, right_id);
1831 mark_page_dirty(parent_id);
1832 }
1833
1834 void insert_nonfull(const page_id_t page_id, const Key & key)
1835 {
1836 if (is_leaf(page(page_id)))
1837 {
1838 const size_t idx = lower_bound_index(page(page_id), key);
1839 insert_key_at(page(page_id), idx, key);
1840 mark_page_dirty(page_id);
1841 return;
1842 }
1843
1844 size_t idx = lower_bound_index(page(page_id), key);
1845 const page_id_t child_id = page(page_id).children[idx];
1846 if (page(child_id).key_count == MaxKeys)
1847 {
1848 split_child(page_id, idx);
1849 if (cmp_(page(page_id).keys[idx], key))
1850 ++idx;
1851 }
1852 insert_nonfull(page(page_id).children[idx], key);
1853 }
1854
1855 void borrow_from_prev(const page_id_t parent_id, const size_t idx)
1856 {
1857 const page_id_t child_id = page(parent_id).children[idx];
1858 const page_id_t left_id = page(parent_id).children[idx - 1];
1859
1860 insert_key_at(page(child_id), 0, page(parent_id).keys[idx - 1]);
1861 page(parent_id).keys[idx - 1] =
1863 erase_key_at(page(left_id), page(left_id).key_count - 1);
1864
1865 if (not is_leaf(page(left_id)))
1866 {
1868 page(left_id).children[page(left_id).child_count - 1]);
1869 erase_child_at(page(left_id), page(left_id).child_count - 1);
1870 }
1871
1872 mark_page_dirty(parent_id);
1875 }
1876
1877 void borrow_from_next(const page_id_t parent_id, const size_t idx)
1878 {
1879 const page_id_t child_id = page(parent_id).children[idx];
1880 const page_id_t right_id = page(parent_id).children[idx + 1];
1881
1882 page(child_id).keys[page(child_id).key_count++] = page(parent_id).keys[idx];
1883 page(parent_id).keys[idx] = page(right_id).keys[0];
1885
1886 if (not is_leaf(page(right_id)))
1887 {
1890 }
1891
1892 mark_page_dirty(parent_id);
1895 }
1896
1897 void merge_children(const page_id_t parent_id, const size_t idx)
1898 {
1899 const page_id_t left_id = page(parent_id).children[idx];
1900 const page_id_t right_id = page(parent_id).children[idx + 1];
1901
1902 Page & left = page(left_id);
1903 const Page right = page(right_id);
1904
1905 left.keys[left.key_count++] = page(parent_id).keys[idx];
1906 for (size_t i = 0; i < right.key_count; ++i)
1907 left.keys[left.key_count++] = right.keys[i];
1908
1909 if (not is_leaf(left))
1910 for (size_t i = 0; i < right.child_count; ++i)
1911 left.children[left.child_count++] = right.children[i];
1912
1913 erase_key_at(page(parent_id), idx);
1914 erase_child_at(page(parent_id), idx + 1);
1916 mark_page_dirty(parent_id);
1918 }
1919
1920 [[nodiscard]] size_t ensure_child_has_extra(const page_id_t parent_id,
1921 const size_t idx)
1922 {
1923 if (page(page(parent_id).children[idx]).key_count >= MinDegree)
1924 return idx;
1925
1926 if (idx > 0
1927 and page(page(parent_id).children[idx - 1]).key_count >= MinDegree)
1928 {
1929 borrow_from_prev(parent_id, idx);
1930 return idx;
1931 }
1932
1933 if (idx + 1 < page(parent_id).child_count
1934 and page(page(parent_id).children[idx + 1]).key_count >= MinDegree)
1935 {
1936 borrow_from_next(parent_id, idx);
1937 return idx;
1938 }
1939
1940 if (idx + 1 < page(parent_id).child_count)
1941 {
1942 merge_children(parent_id, idx);
1943 return idx;
1944 }
1945
1946 merge_children(parent_id, idx - 1);
1947 return idx - 1;
1948 }
1949
1950 [[nodiscard]] bool remove_from(const page_id_t page_id, const Key & key)
1951 {
1952 const size_t idx = lower_bound_index(page(page_id), key);
1953 const bool found =
1954 idx < page(page_id).key_count and equals(page(page_id).keys[idx], key);
1955
1956 if (found)
1957 {
1958 if (is_leaf(page(page_id)))
1959 {
1960 erase_key_at(page(page_id), idx);
1961 mark_page_dirty(page_id);
1962 return true;
1963 }
1964
1965 const page_id_t left_child = page(page_id).children[idx];
1966 const page_id_t right_child = page(page_id).children[idx + 1];
1967
1968 if (page(left_child).key_count >= MinDegree)
1969 {
1970 const Key pred = *max_in(left_child);
1971 page(page_id).keys[idx] = pred;
1972 mark_page_dirty(page_id);
1973 return remove_from(left_child, pred);
1974 }
1975
1976 if (page(right_child).key_count >= MinDegree)
1977 {
1978 const Key succ = *min_in(right_child);
1979 page(page_id).keys[idx] = succ;
1980 mark_page_dirty(page_id);
1981 return remove_from(right_child, succ);
1982 }
1983
1984 merge_children(page_id, idx);
1985 return remove_from(page(page_id).children[idx], key);
1986 }
1987
1988 if (is_leaf(page(page_id)))
1989 return false;
1990
1991 const size_t child_idx = ensure_child_has_extra(page_id, idx);
1992 return remove_from(page(page_id).children[child_idx], key);
1993 }
1994
1995 void collect_keys(const page_id_t page_id, Array<Key> & out) const
1996 {
1997 if (page_id == 0)
1998 return;
1999
2000 const Page & p = page(page_id);
2001 if (is_leaf(p))
2002 {
2003 for (size_t i = 0; i < p.key_count; ++i)
2004 out.append(p.keys[i]);
2005 return;
2006 }
2007
2008 for (size_t i = 0; i < p.key_count; ++i)
2009 {
2010 collect_keys(p.children[i], out);
2011 out.append(p.keys[i]);
2012 }
2014 }
2015
2016 [[nodiscard]] bool verify_node(const page_id_t page_id,
2017 const std::optional<Key> & min_key,
2018 const std::optional<Key> & max_key,
2019 const size_t depth,
2020 size_t & leaf_depth,
2021 size_t & counted,
2022 Array<unsigned char> & visited) const
2023 {
2024 if (page_id == 0 or page_id >= pages_.size())
2025 return false;
2026
2027 // Cycle detection: reject any page visited more than once in this traversal.
2028 if (visited[page_id] != 0)
2029 return false;
2030 visited[page_id] = 1;
2031
2032 const Page & p = page(page_id);
2033
2034 // Only Leaf and Internal pages are structurally valid here.
2035 if (not is_leaf(p) and not is_internal(p))
2036 return false;
2037
2038 if (page_id != root_page_)
2039 {
2040 if (p.key_count < MinKeys or p.key_count > MaxKeys)
2041 return false;
2042 }
2043 else if (p.key_count > MaxKeys)
2044 return false;
2045
2046 if (not strictly_sorted(p))
2047 return false;
2048
2049 if (min_key.has_value() and p.key_count > 0
2050 and not cmp_(*min_key, p.keys[0]))
2051 return false;
2052
2053 if (max_key.has_value() and p.key_count > 0
2054 and not cmp_(p.keys[p.key_count - 1], *max_key))
2055 return false;
2056
2057 counted += p.key_count;
2058
2059 if (is_leaf(p))
2060 {
2061 if (p.child_count != 0)
2062 return false;
2063 if (leaf_depth == static_cast<size_t>(-1))
2064 leaf_depth = depth;
2065 return leaf_depth == depth;
2066 }
2067
2068 if (p.child_count != p.key_count + 1)
2069 return false;
2070
2071 for (size_t i = 0; i < p.child_count; ++i)
2072 {
2073 if (p.children[i] == 0 or p.children[i] >= pages_.size())
2074 return false;
2075
2076 std::optional<Key> child_min = min_key;
2077 std::optional<Key> child_max = max_key;
2078 if (i > 0)
2079 child_min = p.keys[i - 1];
2080 if (i < p.key_count)
2081 child_max = p.keys[i];
2082
2084 depth + 1, leaf_depth, counted, visited))
2085 return false;
2086 }
2087
2088 return true;
2089 }
2090
2092 {
2093 if (auto_sync_)
2094 sync();
2095 }
2096
2097 public:
2098 using key_type = Key;
2102
2105
2115 explicit Gen_File_B_Tree(std::string file_path,
2116 const bool auto_sync = true,
2117 const Compare & cmp = Compare())
2119 auto_sync, cmp) {}
2120
2133 explicit Gen_File_B_Tree(std::string file_path,
2135 const bool auto_sync = true,
2136 const Compare & cmp = Compare())
2137 : cmp_(cmp),
2138 file_path_(std::move(file_path)),
2141 : auto_sync)
2142 {
2144 << "File_B_Tree requires a non-empty file path";
2145
2148 if (read_only())
2150 << "File_B_Tree: cannot open missing file " << file_path_
2151 << " in read-only mode";
2153
2154 if (file_exists())
2155 {
2156 open_storage(false);
2157 file_.seekg(0, std::ios::end);
2158 const std::streamoff file_size = file_.tellg();
2160 << "File_B_Tree: cannot determine the size of " << file_path_;
2161
2162 if (file_size == 0)
2163 {
2164 ensure_writable("constructor");
2166 }
2167 else
2169 }
2170 else
2172 }
2173
2183 explicit Gen_File_B_Tree(const char * file_path,
2184 const bool auto_sync = true,
2185 const Compare & cmp = Compare())
2186 : Gen_File_B_Tree(file_path == nullptr ? std::string()
2187 : std::string(file_path),
2188 auto_sync, cmp) {}
2189
2202 explicit Gen_File_B_Tree(const char * file_path,
2204 const bool auto_sync = true,
2205 const Compare & cmp = Compare())
2206 : Gen_File_B_Tree(file_path == nullptr ? std::string()
2207 : std::string(file_path),
2208 open_mode, auto_sync, cmp) {}
2209
2212
2215
2218
2221
2226 {
2227 if (file_.is_open())
2228 file_.close();
2229 }
2230
2239 [[nodiscard]] const Compare & key_comp() const noexcept { return cmp_; }
2240
2245 [[nodiscard]] const Compare & get_compare() const noexcept
2246 {
2247 return key_comp();
2248 }
2249
2254 [[nodiscard]] const std::string & file_path() const noexcept
2255 {
2256 return file_path_;
2257 }
2258
2263 [[nodiscard]] const std::string & get_file_path() const noexcept
2264 {
2265 return file_path();
2266 }
2267
2276
2282 {
2283 return read_only();
2284 }
2285
2291 {
2292 return auto_sync_;
2293 }
2294
2299 void set_auto_sync(const bool enabled) noexcept
2300 {
2301 auto_sync_ = enabled and not read_only();
2302 }
2303
2309 {
2310 return page_bytes();
2311 }
2312
2318 {
2319 return pages_.size() - 1;
2320 }
2321
2327 {
2328 return checkpoint_sequence_;
2329 }
2330
2334 void checkpoint() const
2335 {
2336 sync();
2337 }
2338
2368
2373 void reload()
2374 {
2377 << "File_B_Tree: backing file " << file_path_ << " no longer exists";
2379 }
2380
2385 [[nodiscard]] bool empty() const noexcept { return size_ == 0; }
2386
2391 [[nodiscard]] bool is_empty() const noexcept { return empty(); }
2392
2397 [[nodiscard]] size_t size() const noexcept { return size_; }
2398
2404 {
2405 size_t h = 0;
2406 for (page_id_t curr = root_page_; curr != 0; )
2407 {
2408 ++h;
2409 if (is_leaf(page(curr)))
2410 break;
2411 curr = page(curr).children[0];
2412 }
2413 return h;
2414 }
2415
2419 void clear()
2420 {
2421 ensure_writable("clear");
2424 }
2425
2431 [[nodiscard]] bool contains(const Key & key) const
2432 {
2433 page_id_t curr = root_page_;
2434 while (curr != 0)
2435 {
2436 const size_t idx = lower_bound_index(page(curr), key);
2437 if (idx < page(curr).key_count and equals(page(curr).keys[idx], key))
2438 return true;
2439 if (is_leaf(page(curr)))
2440 return false;
2441 curr = page(curr).children[idx];
2442 }
2443 return false;
2444 }
2445
2451 [[nodiscard]] bool search(const Key & key) const
2452 {
2453 return contains(key);
2454 }
2455
2463 bool insert(const Key & key)
2464 {
2465 ensure_writable("insert");
2466 if (contains(key))
2467 return false;
2468
2469 if (root_page_ == 0)
2470 {
2472 page(leaf_id).keys[0] = key;
2473 page(leaf_id).key_count = 1;
2475 ++size_;
2479 return true;
2480 }
2481
2482 if (page(root_page_).key_count == MaxKeys)
2483 {
2490 }
2491
2493 ++size_;
2496 return true;
2497 }
2498
2506 bool insert(Key && key)
2507 {
2508 Key copy = std::move(key);
2509 return insert(copy);
2510 }
2511
2518 bool remove(const Key & key)
2519 {
2520 ensure_writable("remove");
2521 if (root_page_ == 0)
2522 return false;
2523
2524 if (not remove_from(root_page_, key))
2525 return false;
2526
2527 --size_;
2529
2530 while (root_page_ != 0 and page(root_page_).key_count == 0)
2531 {
2532 if (is_leaf(page(root_page_)))
2533 {
2535 root_page_ = 0;
2537 break;
2538 }
2539
2540 if (page(root_page_).child_count == 0)
2541 {
2543 root_page_ = 0;
2545 break;
2546 }
2547
2552 }
2553
2555 return true;
2556 }
2557
2562 [[nodiscard]] std::optional<Key> min_key() const
2563 {
2564 return min_in(root_page_);
2565 }
2566
2571 [[nodiscard]] std::optional<Key> max_key() const
2572 {
2573 return max_in(root_page_);
2574 }
2575
2581 [[nodiscard]] std::optional<Key> lower_bound(const Key & key) const
2582 {
2583 page_id_t curr = root_page_;
2584 std::optional<Key> candidate;
2585
2586 while (curr != 0)
2587 {
2588 const size_t idx = lower_bound_index(page(curr), key);
2589 if (idx < page(curr).key_count)
2590 candidate = page(curr).keys[idx];
2591 if (is_leaf(page(curr)))
2592 return candidate;
2593 curr = page(curr).children[idx];
2594 }
2595
2596 return candidate;
2597 }
2598
2604 [[nodiscard]] std::optional<Key> upper_bound(const Key & key) const
2605 {
2606 page_id_t curr = root_page_;
2607 std::optional<Key> candidate;
2608
2609 while (curr != 0)
2610 {
2611 const size_t idx = upper_bound_index(page(curr), key);
2612 if (idx < page(curr).key_count)
2613 candidate = page(curr).keys[idx];
2614 if (is_leaf(page(curr)))
2615 return candidate;
2616 curr = page(curr).children[idx];
2617 }
2618
2619 return candidate;
2620 }
2621
2628 {
2630 out.reserve(size_ == 0 ? 1 : size_);
2632 return out;
2633 }
2634
2639 [[nodiscard]] bool verify() const
2640 {
2641 if (root_page_ == 0)
2642 return size_ == 0;
2643
2644 if (root_page_ >= pages_.size())
2645 return false;
2646
2647 size_t leaf_depth = static_cast<size_t>(-1);
2648 size_t counted = 0;
2649 Array<unsigned char> visited(pages_.size(), 0);
2650 return verify_node(root_page_, std::nullopt, std::nullopt, 1,
2651 leaf_depth, counted, visited)
2652 and counted == size_;
2653 }
2654 };
2655
2657 template <typename Key,
2658 class Compare = Aleph::less<Key>,
2659 size_t MinDegree = 16,
2662} // namespace Aleph
2663
2664# endif // TPL_FILE_B_TREE_H
C++20 concepts for constraining comparison functors.
Exception handling system with formatted messages for Aleph-w.
#define ah_runtime_error_unless(C)
Throws std::runtime_error if condition does NOT hold.
Definition ah-errors.H:250
#define ah_runtime_error()
Throws std::runtime_error unconditionally.
Definition ah-errors.H:282
Standard functor implementations and comparison objects.
long double h
Definition btreepic.C:154
Simple dynamic array with automatic resizing and functional operations.
Definition tpl_array.H:139
constexpr size_t size() const noexcept
Return the number of elements stored in the stack.
Definition tpl_array.H:351
void empty() noexcept
Empties the container.
Definition tpl_array.H:336
T & append(const T &data)
Append a copy of data
Definition tpl_array.H:245
void reserve(size_t cap)
Reserves cap cells into the array.
Definition tpl_array.H:315
Generic B-Tree with configurable minimum degree.
Definition tpl_b_tree.H:134
Persistent page-managed B-Tree stored in a single binary file.
static constexpr std::uint32_t NativeWalVersion
static Key erase_key_at(Page &page, const size_t idx)
Gen_File_B_Tree & operator=(const Gen_File_B_Tree &)=delete
Assignment is disabled.
void mark_page_dirty(const page_id_t page_id)
std::optional< Key > min_in(page_id_t page_id) const
static constexpr size_t wal_trailer_bytes() noexcept
static std::uint32_t add_native_key_to_crc(const std::uint32_t crc, const Key &key)
const std::string & get_file_path() const noexcept
Synonym for file_path().
std::optional< Key > max_in(page_id_t page_id) const
detail::Pid_File_Lock lock_
static constexpr std::uint32_t PortableFormatVersion
size_t page_count() const noexcept
Return the number of allocated pages currently tracked.
static Key read_portable_key(std::istream &in, const std::string &file_path, const char *what)
bool verify() const
Verify structural B-Tree invariants across cached pages.
static constexpr size_t header_bytes() noexcept
size_t lower_bound_index(const Page &page, const Key &key) const
void ensure_writable(const char *operation) const
void release_page(const page_id_t page_id)
Gen_File_B_Tree(std::string file_path, const bool auto_sync=true, const Compare &cmp=Compare())
Open or create a paged persistent B-Tree file for read-write access.
bool search(const Key &key) const
Synonym for contains().
void set_auto_sync(const bool enabled) noexcept
Enable or disable automatic flushing of dirty pages.
std::optional< Page > read_storage_page_if_current(const page_id_t page_id) const
void write_current_header_to_storage(const std::uint64_t checkpoint_sequence) const
static constexpr bool version_uses_portable_codec(const std::uint32_t version) noexcept
size_t dirty_page_count() const noexcept
static constexpr std::uint32_t NativeCheckpointFormatVersion
static constexpr std::uint32_t key_size_for_version(const std::uint32_t version) noexcept
void write_wal_to_path(const std::string &target_path, const std::uint64_t checkpoint_sequence) const
bool contains(const Key &key) const
Return whether a key is present.
~Gen_File_B_Tree() noexcept
Close the backing file.
static constexpr size_t serialized_page_bytes(const std::uint32_t version) noexcept
std::optional< Key > max_key() const
Return the maximum key, if any.
static constexpr const char * tree_kind() noexcept
static constexpr bool is_free(const Page &page) noexcept
Loaded_Image read_image(const std::string &source_path) const
void split_child(const page_id_t parent_id, const size_t idx)
static constexpr size_t checksummed_page_bytes() noexcept
static constexpr std::uint32_t EncodedKeySize
Array< unsigned char > dirty_pages_
static void write_portable_key(std::ostream &out, const Key &key, const std::string &file_path, const char *what)
static void ensure_legacy_native_format_supported(const std::string &source_path, const std::uint32_t version)
Gen_File_B_Tree(const char *file_path, const bool auto_sync=true, const Compare &cmp=Compare())
Open or create a paged persistent B-Tree file for read-write access.
std::optional< Key > lower_bound(const Key &key) const
Return the first key not less than key.
bool is_empty() const noexcept
Synonym for empty().
void clear()
Remove every key from the tree.
bool auto_sync_enabled() const noexcept
Return whether automatic synchronization is enabled.
static constexpr size_t page_bytes() noexcept
static Key read_native_key(std::istream &in, const std::string &file_path, const char *what)
void mark_header_dirty() const noexcept
std::optional< std::uint32_t > storage_format_version_on_disk() const
size_t height() const noexcept
Return the current height of the paged tree.
Gen_File_B_Tree(Gen_File_B_Tree &&)=delete
Moving is disabled.
static constexpr size_t checksummed_header_bytes() noexcept
static void insert_key_at(Page &page, const size_t idx, const Key &key)
static constexpr bool is_leaf(const Page &page) noexcept
static std::uint32_t wal_record_checksum_add_v2(std::uint32_t crc, const page_id_t page_id, const Page &page)
void merge_children(const page_id_t parent_id, const size_t idx)
static constexpr auto wal_trailer_magic() noexcept
const Page & page(const page_id_t page_id) const
void open_storage(const bool truncate=false) const
void write_full_image_to_path(const std::string &target_path, const std::uint64_t checkpoint_sequence) const
static constexpr std::uint32_t CommitTrailerWalVersion
static constexpr std::uint32_t WalVersion
Array< Key > keys() const
Materialize the tree contents in sorted order.
static std::uint32_t page_checksum_v3(const Page &page)
size_t ensure_child_has_extra(const page_id_t parent_id, const size_t idx)
static constexpr size_t legacy_page_bytes() noexcept
static constexpr auto Read_Only
static std::uint32_t page_checksum_v4(const Page &page)
static void write_key(std::ostream &out, const Key &key, const std::string &file_path, const char *what)
void write_header_to_storage(const std::uint64_t size, const page_id_t root_page, const std::uint64_t page_count, const page_id_t free_page_head, const std::uint64_t checkpoint_sequence) const
Gen_File_B_Tree(const char *file_path, const Paged_File_Open_Mode open_mode, const bool auto_sync=true, const Compare &cmp=Compare())
Open a paged persistent B-Tree file with an explicit access mode.
std::optional< Key > min_key() const
Return the minimum key, if any.
static constexpr std::uint32_t FormatVersion
bool verify_node(const page_id_t page_id, const std::optional< Key > &min_key, const std::optional< Key > &max_key, const size_t depth, size_t &leaf_depth, size_t &counted, Array< unsigned char > &visited) const
void seekp_to(const std::streamoff offset) const
static std::uint32_t header_checksum(const std::uint32_t version, const std::uint64_t size, const page_id_t root_page, const std::uint64_t page_count, const page_id_t free_page_head, const std::uint64_t checkpoint_sequence) noexcept
static Key read_key(std::istream &in, const std::string &file_path, const char *what, const std::uint32_t version)
static constexpr std::uint32_t PortableWalVersion
void borrow_from_prev(const page_id_t parent_id, const size_t idx)
static void write_native_key(std::ostream &out, const Key &key, const std::string &file_path, const char *what)
static constexpr auto file_magic() noexcept
static void write_page_record(std::ostream &out, const Page &page, const std::string &target_path)
size_t page_size_bytes() const noexcept
Return the fixed serialized page size.
static Page read_page_from_stream(std::istream &in, const std::string &path, const std::uint32_t version)
void borrow_from_next(const page_id_t parent_id, const size_t idx)
std::optional< Key > upper_bound(const Key &key) const
Return the first key strictly greater than key.
static constexpr size_t MinKeys
void write_full_image(std::ostream &out, const std::string &target_path, const std::uint64_t checkpoint_sequence) const
void checkpoint() const
Synonym for sync().
size_t size() const noexcept
Return the number of stored keys.
static constexpr bool is_internal(const Page &page) noexcept
static page_id_t erase_child_at(Page &page, const size_t idx)
void collect_keys(const page_id_t page_id, Array< Key > &out) const
static constexpr std::array< char, 7 > reserved_bytes_for_version(const std::uint32_t version) noexcept
Paged_File_Open_Mode open_mode_
void stamp_all_pages_for_checkpoint(const std::uint64_t checkpoint_sequence) noexcept
std::streamoff current_page_offset(const page_id_t page_id) const noexcept
bool empty() const noexcept
Return whether the tree has no keys.
const Compare & key_comp() const noexcept
Return the comparison object.
static constexpr std::uint32_t HeaderCheckpointFormatVersion
static constexpr size_t serialized_header_bytes(const std::uint32_t version) noexcept
static constexpr std::uint32_t wal_record_format_version(const std::uint32_t wal_version) noexcept
static void insert_child_at(Page &page, const size_t idx, const page_id_t child_id)
Paged_File_Open_Mode open_mode() const noexcept
Return the access mode used to open the file.
static constexpr std::uint8_t encoding_marker_for_version(const std::uint32_t version) noexcept
void sync() const
Flush the current tree image using redo WAL or full-image fallback.
Gen_File_B_Tree(const Gen_File_B_Tree &)=delete
Copying is disabled.
static constexpr size_t wal_header_bytes() noexcept
bool remove(const Key &key)
Remove a key if present.
void reload()
Discard unsynchronized changes and reload pages from disk.
const std::string & file_path() const noexcept
Return the backing file path.
static constexpr size_t wal_record_bytes(const std::uint32_t wal_version) noexcept
std::uint64_t checkpoint_sequence() const noexcept
Return the durable checkpoint sequence stored in the backing file.
Key key_type
Stored key type.
const Compare & get_compare() const noexcept
Return the comparison object.
static constexpr auto Read_Write
void insert_nonfull(const page_id_t page_id, const Key &key)
static void write_pod(std::ostream &out, const T &value, const std::string &file_path, const char *what)
static std::uint32_t wal_record_checksum_add(std::uint32_t crc, const page_id_t page_id, const Page &page)
static T read_pod(std::istream &in, const std::string &file_path, const char *what)
static constexpr std::uint32_t LegacyFormatVersion
static constexpr size_t MaxChildren
static std::uint32_t wal_header_checksum(const std::uint32_t wal_version, const std::uint64_t size, const page_id_t root_page, const std::uint64_t page_count, const page_id_t free_page_head, const std::uint64_t checkpoint_sequence, const std::uint64_t dirty_count) noexcept
static constexpr std::uint8_t PortableEncodingMarker
bool insert(Key &&key)
Insert a key by move if it is not already present.
page_id_t allocate_page(const PageKind kind)
static std::uint32_t page_checksum(const Page &page)
std::uint64_t checkpoint_sequence_
std::uint32_t storage_format_version_
static constexpr size_t MaxKeys
static std::uint32_t wal_record_checksum_add_v3(std::uint32_t crc, const page_id_t page_id, const Page &page)
detail::Pid_File_Lock_Mode lock_mode() const noexcept
bool strictly_sorted(const Page &page) const
static constexpr auto wal_magic() noexcept
static constexpr std::uint32_t ChecksummedFormatVersion
void clear_dirty_state() const noexcept
static constexpr size_t portable_page_bytes() noexcept
static constexpr std::uint32_t LegacyWalVersion
void write_current_page_to_storage(const page_id_t page_id, const Page &page) const
static constexpr std::uint8_t EndianMarker
static void ensure_legacy_native_wal_supported(const std::string &source_path, const std::uint32_t wal_version)
bool equals(const Key &lhs, const Key &rhs) const
bool has_pending_changes() const noexcept
bool remove_from(const page_id_t page_id, const Key &key)
static std::uint32_t add_key_to_crc(const std::uint32_t crc, const Key &key)
page_id_t page_id_type
Type of on-disk page identifiers.
static constexpr size_t legacy_header_bytes() noexcept
bool read_only() const noexcept
Gen_File_B_Tree(std::string file_path, const Paged_File_Open_Mode open_mode, const bool auto_sync=true, const Compare &cmp=Compare())
Open a paged persistent B-Tree file with an explicit access mode.
bool is_read_only() const noexcept
Return whether this handle is read-only.
size_t upper_bound_index(const Page &page, const Key &key) const
bool insert(const Key &key)
Insert a key if it is not already present.
static constexpr size_t native_page_bytes() noexcept
static std::uint32_t header_checksum_v2(const std::uint32_t version, const std::uint64_t size, const page_id_t root_page, const std::uint64_t page_count, const page_id_t free_page_head) noexcept
static constexpr Page make_page(const PageKind kind) noexcept
Page & page(const page_id_t page_id)
void apply_current_cache_to_storage(const std::uint64_t checkpoint_sequence) const
void acquire(const std::string &path, const char *kind, const Pid_File_Lock_Mode mode=Pid_File_Lock_Mode::Exclusive)
int cmp(const __gmp_expr< T, U > &expr1, const __gmp_expr< V, W > &expr2)
Definition gmpfrxx.h:4118
Freq_Node * pred
Predecessor node in level-order traversal.
const long double offset[]
Offset values indexed by symbol string length (bounded by MAX_OFFSET_INDEX)
std::uint32_t crc32_add_bytes(std::uint32_t crc, const void *data, const size_t size) noexcept
void sync_parent_directory(const std::string &path, const char *kind)
constexpr std::array< char, Ordered_Tree_Snapshot_Magic_Size > ordered_tree_snapshot_magic(const char(&text)[N]) noexcept
void copy_file_contents(const std::string &src, const std::string &dst, const char *kind)
void remove_file_if_exists(const std::string &path, const char *kind)
std::uint32_t crc32_begin() noexcept
std::uint32_t crc32_add(std::uint32_t crc, const T &value) noexcept
constexpr size_t Ordered_Tree_Snapshot_Magic_Size
void rename_file_atomic(const std::string &src, const std::string &dst, const char *kind)
std::uint32_t crc32_finish(const std::uint32_t crc) noexcept
void sync_file_by_path(const std::string &path, const char *kind)
Main namespace for Aleph-w library functions.
Definition ah-arena.H:89
and
Check uniqueness with explicit hash + equality functors.
Paged_File_Open_Mode
Open mode for paged persistent tree files.
Itor2 copy(Itor1 sourceBeg, const Itor1 &sourceEnd, Itor2 destBeg)
Copy elements from one range to another.
Definition ahAlgo.H:584
const T * median(const T &a, const T &b, const T &c, const Compare &cmp=Compare())
Return a pointer to the median value among three elements.
Definition ahUtils.H:84
Divide_Conquer_DP_Result< Cost > divide_and_conquer_partition_dp(const size_t groups, const size_t n, Transition_Cost_Fn transition_cost, const Cost inf=dp_optimization_detail::default_inf< Cost >())
Optimize partition DP using divide-and-conquer optimization.
std::decay_t< typename HeadC::Item_Type > T
Definition ah-zip.H:105
auto mode(const Container &data) -> std::decay_t< decltype(*std::begin(data))>
Compute the mode (most frequent value).
Definition stat_utils.H:456
Itor::difference_type count(const Itor &beg, const Itor &end, const T &value)
Count elements equal to a value.
Definition ahAlgo.H:127
STL namespace.
page_id_t children[MaxChildren]
Dynamic array container with automatic resizing.
B-Tree implementation with configurable minimum degree.
Helpers for durable paged-tree files.
Fixed-size portable codecs for paged persistent tree payloads.
Internal helpers for snapshot-backed persistent tree wrappers.