Aleph-w 3.0
A C++ Library for Data Structures and Algorithms
Loading...
Searching...
No Matches
tpl_file_bplus_tree.H
Go to the documentation of this file.
1/*
2 Aleph_w
3
4 Data structures & Algorithms
5 version 2.0.0b
6 https://github.com/lrleon/Aleph-w
7
8 This file is part of Aleph-w library
9
10 Copyright (c) 2002-2026 Leandro Rabindranath Leon
11
12 Permission is hereby granted, free of charge, to any person obtaining a copy
13 of this software and associated documentation files (the "Software"), to deal
14 in the Software without restriction, including without limitation the rights
15 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
16 copies of the Software, and to permit persons to whom the Software is
17 furnished to do so, subject to the following conditions:
18
19 The above copyright notice and this permission notice shall be included in all
20 copies or substantial portions of the Software.
21
22 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
23 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
24 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
25 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
26 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
27 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
28 SOFTWARE.
29*/
30
40# ifndef TPL_FILE_BPLUS_TREE_H
41# define TPL_FILE_BPLUS_TREE_H
42
43# include <array>
44# include <bit>
45# include <cstdint>
46# include <cstring>
47# include <filesystem>
48# include <fstream>
49# include <optional>
50# include <string>
51# include <type_traits>
52# include <utility>
53
54# include <ah-concepts.H>
55# include <ah-errors.H>
56# include <ahFunction.H>
57# include <tpl_array.H>
60# include <tpl_sort_utils.H>
61# include <tpl_tree_snapshot.H>
62
63namespace Aleph
64{
151 template <typename Key,
152 class Compare = Aleph::less<Key>,
153 size_t MinDegree = 16,
157 {
158 static_assert(MinDegree >= 2, "File_BPlus_Tree requires MinDegree >= 2");
159 static_assert(std::default_initializable<Key>,
160 "File_BPlus_Tree requires default-initializable keys");
161 static_assert(std::copy_constructible<Key> and std::is_copy_assignable_v<Key>,
162 "File_BPlus_Tree requires copyable keys");
164 "File_BPlus_Tree requires a fixed-size key codec");
165 static_assert(std::same_as<decltype(Codec::decode(
166 static_cast<const unsigned char *>(nullptr))),
167 Key>,
168 "File_BPlus_Tree codec must decode back to Key");
169
170 static constexpr size_t MaxKeys = 2 * MinDegree - 1;
171 static constexpr size_t MaxChildren = 2 * MinDegree;
172 static constexpr size_t MinKeys = MinDegree - 1;
173 static constexpr std::uint32_t LegacyFormatVersion = 1;
174 static constexpr std::uint32_t ChecksummedFormatVersion = 2;
175 static constexpr std::uint32_t HeaderCheckpointFormatVersion = 3;
176 static constexpr std::uint32_t NativeCheckpointFormatVersion = 4;
177 static constexpr std::uint32_t PortableFormatVersion = 5;
178 static constexpr std::uint32_t FormatVersion = PortableFormatVersion;
179 static constexpr std::uint32_t EncodedKeySize =
180 static_cast<std::uint32_t>(Codec::encoded_size);
181 static constexpr std::uint8_t PortableEncodingMarker = 3;
182 static constexpr std::uint8_t EndianMarker =
183 std::endian::native == std::endian::little ? 1 : 2;
184
185 using page_id_t = std::uint64_t;
186
187 enum class PageKind : std::uint8_t
188 {
189 Free = 0,
190 Internal = 1,
191 Leaf = 2
192 };
193
194 struct Page
195 {
196 std::uint8_t kind = static_cast<std::uint8_t>(PageKind::Free);
197 std::uint16_t key_count = 0;
198 std::uint16_t child_count = 0;
199 page_id_t link = 0; // next leaf for leaves, next free page for free pages
200 std::uint64_t checkpoint_sequence = 0;
201 Key keys[MaxKeys] = {};
203 };
204
215
217 {
220 };
221
222 Compare cmp_;
223 std::string file_path_;
225 std::string journal_path_;
226 std::string journal_tmp_path_;
227 std::string wal_path_;
228 std::string wal_tmp_path_;
229 std::string lock_path_;
231 mutable std::fstream file_;
234 mutable bool header_dirty_ = false;
235 bool auto_sync_ = true;
236 mutable std::uint32_t storage_format_version_ = FormatVersion;
237 mutable std::uint64_t checkpoint_sequence_ = 0;
238
239 size_t size_ = 0;
243
244 [[nodiscard]] static constexpr const char * tree_kind() noexcept
245 {
246 return "File_BPlus_Tree";
247 }
248
255
260
261 void ensure_writable(const char * operation) const
262 {
264 << "File_BPlus_Tree::" << operation
265 << "(): file was opened in read-only mode";
266 }
267
268 [[nodiscard]] static constexpr auto file_magic() noexcept
269 {
270 return detail::ordered_tree_snapshot_magic("AlephBPTPg-v1");
271 }
272
273 [[nodiscard]] static constexpr auto wal_magic() noexcept
274 {
275 return detail::ordered_tree_snapshot_magic("AlephBPTWalv1");
276 }
277
278 [[nodiscard]] static constexpr auto wal_trailer_magic() noexcept
279 {
280 return detail::ordered_tree_snapshot_magic("AlephBPTCom1");
281 }
282
283 static constexpr std::uint32_t LegacyWalVersion = 1;
284 static constexpr std::uint32_t CommitTrailerWalVersion = 2;
285 static constexpr std::uint32_t NativeWalVersion = 3;
286 static constexpr std::uint32_t PortableWalVersion = 4;
287 static constexpr std::uint32_t WalVersion = PortableWalVersion;
288
289 [[nodiscard]] static constexpr size_t legacy_header_bytes() noexcept
290 {
291 return detail::Ordered_Tree_Snapshot_Magic_Size + sizeof(std::uint32_t) * 2
292 + sizeof(std::uint64_t) * 6 + sizeof(std::uint8_t) + 7;
293 }
294
295 [[nodiscard]] static constexpr size_t checksummed_header_bytes() noexcept
296 {
297 return legacy_header_bytes() + sizeof(std::uint32_t);
298 }
299
300 [[nodiscard]] static constexpr size_t header_bytes() noexcept
301 {
302 return checksummed_header_bytes() + sizeof(std::uint64_t);
303 }
304
305 [[nodiscard]] static constexpr size_t legacy_page_bytes() noexcept
306 {
307 return sizeof(std::uint8_t) + sizeof(std::uint16_t) * 2 + 3
308 + sizeof(page_id_t)
309 + sizeof(Key) * MaxKeys
310 + sizeof(page_id_t) * MaxChildren;
311 }
312
313 [[nodiscard]] static constexpr size_t checksummed_page_bytes() noexcept
314 {
315 return legacy_page_bytes() + sizeof(std::uint32_t);
316 }
317
318 [[nodiscard]] static constexpr size_t native_page_bytes() noexcept
319 {
320 return legacy_page_bytes() + sizeof(std::uint64_t) + sizeof(std::uint32_t);
321 }
322
323 [[nodiscard]] static constexpr size_t portable_page_bytes() noexcept
324 {
325 return sizeof(std::uint8_t) + sizeof(std::uint16_t) * 2 + 3
326 + sizeof(page_id_t) + sizeof(std::uint64_t)
327 + Codec::encoded_size * MaxKeys
328 + sizeof(page_id_t) * MaxChildren + sizeof(std::uint32_t);
329 }
330
331 [[nodiscard]] static constexpr size_t page_bytes() noexcept
332 {
333 return portable_page_bytes();
334 }
335
336 [[nodiscard]] static constexpr size_t serialized_header_bytes(
337 const std::uint32_t version) noexcept
338 {
339 return version >= HeaderCheckpointFormatVersion ? header_bytes()
342 }
343
344 [[nodiscard]] static constexpr size_t serialized_page_bytes(
345 const std::uint32_t version) noexcept
346 {
347 return version >= PortableFormatVersion ? portable_page_bytes()
351 }
352
353 [[nodiscard]] static constexpr bool version_uses_portable_codec(
354 const std::uint32_t version) noexcept
355 {
356 return version >= PortableFormatVersion;
357 }
358
359 [[nodiscard]] static constexpr std::uint8_t encoding_marker_for_version(
360 const std::uint32_t version) noexcept
361 {
362 return version_uses_portable_codec(version)
364 : EndianMarker;
365 }
366
367 [[nodiscard]] static constexpr std::uint32_t key_size_for_version(
368 const std::uint32_t version) noexcept
369 {
370 return version_uses_portable_codec(version)
372 : static_cast<std::uint32_t>(sizeof(Key));
373 }
374
375 [[nodiscard]] static constexpr std::array<char, 7>
376 reserved_bytes_for_version(const std::uint32_t version) noexcept
377 {
378 std::array<char, 7> reserved = {};
379 if (version_uses_portable_codec(version))
380 {
381 const auto codec_id = Codec::storage_id;
382 reserved[0] = static_cast<char>(codec_id & 0xFFu);
383 reserved[1] = static_cast<char>((codec_id >> 8) & 0xFFu);
384 reserved[2] = static_cast<char>((codec_id >> 16) & 0xFFu);
385 reserved[3] = static_cast<char>((codec_id >> 24) & 0xFFu);
386 }
387 return reserved;
388 }
389
390 [[nodiscard]] static constexpr Page make_page(const PageKind kind) noexcept
391 {
392 Page page;
393 page.kind = static_cast<std::uint8_t>(kind);
394 return page;
395 }
396
398 const std::string & source_path,
399 const std::uint32_t version)
400 {
401 if constexpr (not std::is_trivially_copyable_v<Key>)
403 << "File_BPlus_Tree: file " << source_path
404 << " uses legacy native format version " << version
405 << ", which requires trivially copyable keys; reopen or rewrite it "
406 << "with portable format version " << PortableFormatVersion
407 << " or newer";
408 }
409
411 const std::string & source_path,
412 const std::uint32_t wal_version)
413 {
414 if constexpr (not std::is_trivially_copyable_v<Key>)
416 << "File_BPlus_Tree: WAL " << source_path
417 << " uses legacy native format version " << wal_version
418 << ", which requires trivially copyable keys; recover it with a "
419 << "trivially copyable instantiation or discard the stale WAL";
420 }
421
422 [[nodiscard]] static constexpr bool is_leaf(const Page & page) noexcept
423 {
424 return page.kind == static_cast<std::uint8_t>(PageKind::Leaf);
425 }
426
427 [[nodiscard]] static constexpr bool is_internal(const Page & page) noexcept
428 {
429 return page.kind == static_cast<std::uint8_t>(PageKind::Internal);
430 }
431
432 [[nodiscard]] static constexpr bool is_free(const Page & page) noexcept
433 {
434 return page.kind == static_cast<std::uint8_t>(PageKind::Free);
435 }
436
438 {
439 pages_.empty();
441 pages_.append(Page{});
443 size_ = 0;
444 root_page_ = 0;
446 free_page_head_ = 0;
448 header_dirty_ = true;
450 }
451
452 void ensure_parent_dir() const
453 {
454 namespace fs = std::filesystem;
455
456 const fs::path path(file_path_);
457 if (not path.has_parent_path())
458 return;
459
460 std::error_code ec;
461 (void) fs::create_directories(path.parent_path(), ec);
463 << "File_BPlus_Tree: cannot create directory "
464 << path.parent_path().string() << " for " << file_path_
465 << " (" << ec.message() << ")";
466 }
467
468 void open_storage(const bool truncate = false) const
469 {
470 if (truncate)
471 ensure_writable("open_storage");
472
473 if (not read_only())
475
476 if (file_.is_open())
477 file_.close();
478
479 if (truncate)
480 {
481 std::ofstream create(file_path_, std::ios::binary | std::ios::trunc);
482 ah_runtime_error_unless(create.good())
483 << "File_BPlus_Tree: cannot create " << file_path_;
484 }
485
486 const auto mode = read_only()
487 ? (std::ios::binary | std::ios::in)
488 : (std::ios::binary | std::ios::in | std::ios::out);
489 file_.open(file_path_, mode);
491 << "File_BPlus_Tree: cannot open " << file_path_;
492 }
493
495 {
496 journal_path_ = file_path_ + ".journal";
497 journal_tmp_path_ = file_path_ + ".journal.tmp";
498 wal_path_ = file_path_ + ".wal";
499 wal_tmp_path_ = file_path_ + ".wal.tmp";
500 lock_path_ = file_path_ + ".lock";
501 }
502
503 template <typename T>
504 static void write_pod(std::ostream & out,
505 const T & value,
506 const std::string & file_path,
507 const char * what)
508 {
509 out.write(reinterpret_cast<const char *>(&value), sizeof(value));
511 << "File_BPlus_Tree: cannot write " << what << " to " << file_path;
512 }
513
514 template <typename T>
515 static T read_pod(std::istream & in,
516 const std::string & file_path,
517 const char * what)
518 {
519 T value{};
520 in.read(reinterpret_cast<char *>(&value), sizeof(value));
522 << "File_BPlus_Tree: cannot read " << what << " from " << file_path;
523 return value;
524 }
525
526 static void write_native_key(std::ostream & out,
527 const Key & key,
528 const std::string & file_path,
529 const char * what)
530 {
531 if constexpr (std::is_trivially_copyable_v<Key>)
532 {
533 const auto bytes = std::bit_cast<std::array<char, sizeof(Key)>>(key);
534 out.write(bytes.data(), bytes.size());
536 << "File_BPlus_Tree: cannot write " << what << " to "
537 << file_path;
538 }
539 else
541 << "File_BPlus_Tree: cannot write legacy native " << what << " to "
542 << file_path << " because legacy native formats require trivially "
543 << "copyable keys";
544 }
545
546 static void write_portable_key(std::ostream & out,
547 const Key & key,
548 const std::string & file_path,
549 const char * what)
550 {
551 std::array<unsigned char, Codec::encoded_size> bytes = {};
552 Codec::encode(key, bytes.data());
553 out.write(reinterpret_cast<const char *>(bytes.data()), bytes.size());
555 << "File_BPlus_Tree: cannot write " << what << " to " << file_path;
556 }
557
558 static void write_key(std::ostream & out,
559 const Key & key,
560 const std::string & file_path,
561 const char * what)
562 {
564 }
565
566 [[nodiscard]] static Key read_native_key(std::istream & in,
567 const std::string & file_path,
568 const char * what)
569 {
570 if constexpr (std::is_trivially_copyable_v<Key>)
571 {
572 std::array<char, sizeof(Key)> bytes = {};
573 in.read(bytes.data(), bytes.size());
575 << "File_BPlus_Tree: cannot read " << what << " from "
576 << file_path;
577 return std::bit_cast<Key>(bytes);
578 }
579 else
580 {
582 << "File_BPlus_Tree: cannot read legacy native " << what
583 << " from " << file_path
584 << " because legacy native formats require trivially copyable "
585 << "keys";
586 return Key{};
587 }
588 }
589
590 [[nodiscard]] static Key read_portable_key(std::istream & in,
591 const std::string & file_path,
592 const char * what)
593 {
594 std::array<unsigned char, Codec::encoded_size> bytes = {};
595 in.read(reinterpret_cast<char *>(bytes.data()), bytes.size());
597 << "File_BPlus_Tree: cannot read " << what << " from " << file_path;
598 return Codec::decode(bytes.data());
599 }
600
601 [[nodiscard]] static Key read_key(std::istream & in,
602 const std::string & file_path,
603 const char * what,
604 const std::uint32_t version)
605 {
606 return version_uses_portable_codec(version)
609 }
610
611 [[nodiscard]] static std::uint32_t add_key_to_crc(
612 const std::uint32_t crc,
613 const Key & key)
614 {
615 return Codec::add_to_crc(crc, key);
616 }
617
618 [[nodiscard]] static std::uint32_t add_native_key_to_crc(
619 const std::uint32_t crc,
620 const Key & key)
621 {
622 if constexpr (std::is_trivially_copyable_v<Key>)
623 return detail::crc32_add_bytes(crc, &key, sizeof(Key));
624 else
625 {
627 << "File_BPlus_Tree: cannot checksum legacy native keys because "
628 << "legacy native formats require trivially copyable keys";
629 return crc;
630 }
631 }
632
633 [[nodiscard]] static std::uint32_t header_checksum_v2(
634 const std::uint32_t version,
635 const std::uint64_t size,
636 const page_id_t root_page,
637 const page_id_t first_leaf_page,
638 const std::uint64_t page_count,
639 const page_id_t free_page_head) noexcept
640 {
641 std::uint32_t crc = detail::crc32_begin();
642 crc = detail::crc32_add(crc, version);
644 crc = detail::crc32_add(crc, static_cast<std::uint64_t>(MinDegree));
646 const auto reserved = reserved_bytes_for_version(version);
649 crc = detail::crc32_add(crc, root_page);
650 crc = detail::crc32_add(crc, first_leaf_page);
652 crc = detail::crc32_add(crc, free_page_head);
654 }
655
656 [[nodiscard]] static std::uint32_t header_checksum(
657 const std::uint32_t version,
658 const std::uint64_t size,
659 const page_id_t root_page,
660 const page_id_t first_leaf_page,
661 const std::uint64_t page_count,
662 const page_id_t free_page_head,
663 const std::uint64_t checkpoint_sequence) noexcept
664 {
665 std::uint32_t crc = detail::crc32_begin();
666 crc = detail::crc32_add(crc, version);
668 crc = detail::crc32_add(crc, static_cast<std::uint64_t>(MinDegree));
670 const auto reserved = reserved_bytes_for_version(version);
673 crc = detail::crc32_add(crc, root_page);
674 crc = detail::crc32_add(crc, first_leaf_page);
676 crc = detail::crc32_add(crc, free_page_head);
679 }
680
681 [[nodiscard]] static std::uint32_t page_checksum_v3(const Page & page)
682 {
683 std::uint32_t crc = detail::crc32_begin();
684 const std::array<char, 3> padding = {};
685
689 crc = detail::crc32_add_bytes(crc, padding.data(), padding.size());
691 for (size_t i = 0; i < MaxKeys; ++i)
695 }
696
697 [[nodiscard]] static std::uint32_t page_checksum(const Page & page)
698 {
699 std::uint32_t crc = detail::crc32_begin();
700 const std::array<char, 3> padding = {};
701
705 crc = detail::crc32_add_bytes(crc, padding.data(), padding.size());
708 for (size_t i = 0; i < MaxKeys; ++i)
712 }
713
714 [[nodiscard]] static std::uint32_t page_checksum_v4(const Page & page)
715 {
716 std::uint32_t crc = detail::crc32_begin();
717 const std::array<char, 3> padding = {};
718
722 crc = detail::crc32_add_bytes(crc, padding.data(), padding.size());
725 for (size_t i = 0; i < MaxKeys; ++i)
729 }
730
731 [[nodiscard]] static std::uint32_t wal_record_checksum_add(
732 std::uint32_t crc,
733 const page_id_t page_id,
734 const Page & page)
735 {
736 const std::array<char, 3> padding = {};
737
738 crc = detail::crc32_add(crc, page_id);
742 crc = detail::crc32_add_bytes(crc, padding.data(), padding.size());
745 for (size_t i = 0; i < MaxKeys; ++i)
749 return crc;
750 }
751
752 [[nodiscard]] static std::uint32_t wal_record_checksum_add_v3(
753 std::uint32_t crc,
754 const page_id_t page_id,
755 const Page & page)
756 {
757 const std::array<char, 3> padding = {};
758
759 crc = detail::crc32_add(crc, page_id);
763 crc = detail::crc32_add_bytes(crc, padding.data(), padding.size());
766 for (size_t i = 0; i < MaxKeys; ++i)
770 return crc;
771 }
772
773 [[nodiscard]] static std::uint32_t wal_record_checksum_add_v2(
774 std::uint32_t crc,
775 const page_id_t page_id,
776 const Page & page)
777 {
778 const std::array<char, 3> padding = {};
779
780 crc = detail::crc32_add(crc, page_id);
784 crc = detail::crc32_add_bytes(crc, padding.data(), padding.size());
786 for (size_t i = 0; i < MaxKeys; ++i)
790 return crc;
791 }
792
793 static void write_page_record(std::ostream & out,
794 const Page & page,
795 const std::string & target_path)
796 {
797 write_pod(out, page.kind, target_path, "page kind");
798 write_pod(out, page.key_count, target_path, "page key count");
799 write_pod(out, page.child_count, target_path, "page child count");
800
801 const std::array<char, 3> padding = {};
802 out.write(padding.data(), padding.size());
804 << "File_BPlus_Tree: cannot write page padding to " << target_path;
805
806 write_pod(out, page.link, target_path, "page link");
808 "page checkpoint sequence");
809
810 for (size_t i = 0; i < MaxKeys; ++i)
811 write_key(out, page.keys[i], target_path, "page key");
812
813 for (size_t i = 0; i < MaxChildren; ++i)
814 write_pod(out, page.children[i], target_path, "page child");
815
816 write_pod(out, page_checksum(page), target_path, "page checksum");
817 }
818
819 void seekp_to(const std::streamoff offset) const
820 {
821 file_.clear();
822 file_.seekp(offset);
824 << "File_BPlus_Tree: cannot seek for writing in " << file_path_;
825 }
826
827 [[nodiscard]] std::streamoff current_page_offset(const page_id_t page_id) const noexcept
828 {
829 return static_cast<std::streamoff>(header_bytes()
830 + (page_id - 1) * page_bytes());
831 }
832
833 void write_header_to_storage(const std::uint64_t size,
834 const page_id_t root_page,
835 const page_id_t first_leaf_page,
836 const std::uint64_t page_count,
837 const page_id_t free_page_head,
838 const std::uint64_t checkpoint_sequence) const
839 {
840 seekp_to(0);
841 const auto magic = file_magic();
842 file_.write(magic.data(), magic.size());
844 << "File_BPlus_Tree: cannot write magic to " << file_path_;
845
847
848 write_pod(file_, FormatVersion, file_path_, "header version");
850 "header key size");
851 write_pod(file_, static_cast<std::uint64_t>(MinDegree), file_path_,
852 "header minimum degree");
854 "header encoding");
855 file_.write(reserved.data(), reserved.size());
857 << "File_BPlus_Tree: cannot write reserved header bytes to "
858 << file_path_;
859 write_pod(file_, size, file_path_, "header size");
860 write_pod(file_, root_page, file_path_, "header root page");
861 write_pod(file_, first_leaf_page, file_path_, "header first leaf");
862 write_pod(file_, page_count, file_path_, "header page count");
863 write_pod(file_, free_page_head, file_path_, "header free page head");
865 "header checkpoint sequence");
867 header_checksum(FormatVersion, size, root_page, first_leaf_page,
868 page_count, free_page_head,
870 file_path_, "header checksum");
871 }
872
874 {
875 write_header_to_storage(static_cast<std::uint64_t>(size_), root_page_,
877 static_cast<std::uint64_t>(pages_.size() - 1),
879 }
880
882 const Page & page) const
883 {
886 }
887
889 {
890 size_t count = 0;
891 for (page_id_t page_id = 1; page_id < dirty_pages_.size(); ++page_id)
892 if (dirty_pages_[page_id] != 0)
893 ++count;
894 return count;
895 }
896
897 [[nodiscard]] static constexpr size_t wal_header_bytes() noexcept
898 {
899 return detail::Ordered_Tree_Snapshot_Magic_Size + sizeof(std::uint32_t) * 3
900 + sizeof(std::uint64_t) * 8 + sizeof(std::uint8_t) + 7;
901 }
902
903 [[nodiscard]] static constexpr std::uint32_t
910
911 [[nodiscard]] static constexpr size_t
912 wal_record_bytes(const std::uint32_t wal_version) noexcept
913 {
914 return sizeof(page_id_t)
916 }
917
918 [[nodiscard]] static std::uint32_t wal_header_checksum(
919 const std::uint32_t wal_version,
920 const std::uint64_t size,
921 const page_id_t root_page,
922 const page_id_t first_leaf_page,
923 const std::uint64_t page_count,
924 const page_id_t free_page_head,
925 const std::uint64_t checkpoint_sequence,
926 const std::uint64_t dirty_count) noexcept
927 {
928 std::uint32_t crc = detail::crc32_begin();
932 crc = detail::crc32_add(crc, static_cast<std::uint64_t>(MinDegree));
935 const auto reserved =
939 crc = detail::crc32_add(crc, root_page);
940 crc = detail::crc32_add(crc, first_leaf_page);
942 crc = detail::crc32_add(crc, free_page_head);
946 }
947
948 [[nodiscard]] static constexpr size_t wal_trailer_bytes() noexcept
949 {
951 + sizeof(std::uint64_t) * 2 + sizeof(std::uint32_t);
952 }
953
954 void write_wal_to_path(const std::string & target_path,
955 const std::uint64_t checkpoint_sequence) const
956 {
957 std::ofstream out(target_path, std::ios::binary | std::ios::trunc);
959 << "File_BPlus_Tree: cannot open " << target_path << " for writing";
960
961 const auto magic = wal_magic();
962 out.write(magic.data(), magic.size());
964 << "File_BPlus_Tree: cannot write WAL magic to " << target_path;
965
966 const auto page_count = static_cast<std::uint64_t>(pages_.size() - 1);
967 const auto size = static_cast<std::uint64_t>(size_);
968 const auto dirty_count = static_cast<std::uint64_t>(dirty_page_count());
970 std::uint32_t payload_crc = detail::crc32_begin();
971
972 write_pod(out, WalVersion, target_path, "WAL version");
974 "WAL key size");
975 write_pod(out, static_cast<std::uint64_t>(MinDegree), target_path,
976 "WAL minimum degree");
978 "WAL encoding");
979 out.write(reserved.data(), reserved.size());
981 << "File_BPlus_Tree: cannot write WAL reserved bytes to " << target_path;
982 write_pod(out, size, target_path, "WAL size");
983 write_pod(out, root_page_, target_path, "WAL root page");
984 write_pod(out, first_leaf_page_, target_path, "WAL first leaf");
985 write_pod(out, page_count, target_path, "WAL page count");
986 write_pod(out, free_page_head_, target_path, "WAL free page head");
988 "WAL checkpoint sequence");
989 write_pod(out, dirty_count, target_path, "WAL dirty count");
994 target_path, "WAL checksum");
995
996 for (page_id_t page_id = 1; page_id < dirty_pages_.size(); ++page_id)
997 if (dirty_pages_[page_id] != 0)
998 {
1000 page(page_id));
1001 write_pod(out, page_id, target_path, "WAL page id");
1003 }
1004
1005 const auto trailer_magic = wal_trailer_magic();
1006 out.write(trailer_magic.data(), trailer_magic.size());
1008 << "File_BPlus_Tree: cannot write WAL commit trailer to "
1009 << target_path;
1011 "WAL trailer checkpoint sequence");
1012 write_pod(out, dirty_count, target_path, "WAL trailer dirty count");
1014 "WAL payload checksum");
1015
1016 out.flush();
1018 << "File_BPlus_Tree: cannot flush " << target_path;
1019 out.close();
1021 << "File_BPlus_Tree: cannot close " << target_path;
1022
1025 }
1026
1027 [[nodiscard]] static Page read_page_from_stream(std::istream & in,
1028 const std::string & path,
1029 const std::uint32_t version)
1030 {
1031 Page page;
1032 page.kind = read_pod<std::uint8_t>(in, path, "page kind");
1033 page.key_count = read_pod<std::uint16_t>(in, path, "page key count");
1034 page.child_count = read_pod<std::uint16_t>(in, path, "page child count");
1035
1036 std::array<char, 3> padding = {};
1037 in.read(padding.data(), padding.size());
1039 << "File_BPlus_Tree: cannot read page padding from " << path;
1040
1041 page.link = read_pod<page_id_t>(in, path, "page link");
1042
1043 if (version >= NativeCheckpointFormatVersion)
1045 read_pod<std::uint64_t>(in, path, "page checkpoint sequence");
1046
1047 for (size_t i = 0; i < MaxKeys; ++i)
1048 page.keys[i] = read_key(in, path, "page key", version);
1049
1050 for (size_t i = 0; i < MaxChildren; ++i)
1051 page.children[i] = read_pod<page_id_t>(in, path, "page child");
1052
1053 if (version >= HeaderCheckpointFormatVersion)
1054 {
1055 const auto stored = read_pod<std::uint32_t>(in, path, "page checksum");
1057 == (version >= PortableFormatVersion
1062 << "File_BPlus_Tree: page checksum mismatch in " << path;
1063 }
1064
1065 return page;
1066 }
1067
1068 void write_full_image(std::ostream & out,
1069 const std::string & target_path,
1070 const std::uint64_t checkpoint_sequence) const
1071 {
1072 const auto magic = file_magic();
1073 out.write(magic.data(), magic.size());
1075 << "File_BPlus_Tree: cannot write magic to " << target_path;
1076
1077 const auto page_count = static_cast<std::uint64_t>(pages_.size() - 1);
1078 const auto size = static_cast<std::uint64_t>(size_);
1080
1081 write_pod(out, FormatVersion, target_path, "header version");
1083 "header key size");
1084 write_pod(out, static_cast<std::uint64_t>(MinDegree), target_path,
1085 "header minimum degree");
1087 "header encoding");
1088 out.write(reserved.data(), reserved.size());
1090 << "File_BPlus_Tree: cannot write reserved header bytes to "
1091 << target_path;
1092 write_pod(out, size, target_path, "header size");
1093 write_pod(out, root_page_, target_path, "header root page");
1094 write_pod(out, first_leaf_page_, target_path, "header first leaf");
1095 write_pod(out, page_count, target_path, "header page count");
1096 write_pod(out, free_page_head_, target_path, "header free page head");
1098 "header checkpoint sequence");
1099 write_pod(out,
1103 target_path, "header checksum");
1104
1105 for (page_id_t page_id = 1; page_id < pages_.size(); ++page_id)
1107 }
1108
1109 void write_full_image_to_path(const std::string & target_path,
1110 const std::uint64_t checkpoint_sequence) const
1111 {
1112 namespace fs = std::filesystem;
1113 const fs::path path(target_path);
1114 if (path.has_parent_path())
1115 {
1116 std::error_code ec;
1117 (void) fs::create_directories(path.parent_path(), ec);
1119 << "File_BPlus_Tree: cannot create directory "
1120 << path.parent_path().string() << " for " << target_path
1121 << " (" << ec.message() << ")";
1122 }
1123
1124 std::ofstream out(target_path, std::ios::binary | std::ios::trunc);
1126 << "File_BPlus_Tree: cannot open " << target_path << " for writing";
1127
1129 out.flush();
1131 << "File_BPlus_Tree: cannot flush " << target_path;
1132 out.close();
1134 << "File_BPlus_Tree: cannot close " << target_path;
1135
1138 }
1139
1141 {
1143 {
1144 if (file_.is_open())
1145 file_.close();
1147 if (not file_.is_open())
1148 open_storage(false);
1151 return;
1152 }
1153
1154 if (not file_.is_open())
1155 open_storage(false);
1156
1158 for (page_id_t page_id = 1; page_id < dirty_pages_.size(); ++page_id)
1159 if (dirty_pages_[page_id] != 0)
1160 write_current_page_to_storage(page_id, page(page_id));
1161
1162 file_.flush();
1164 << "File_BPlus_Tree: cannot flush " << file_path_;
1168 }
1169
1171 {
1172 namespace fs = std::filesystem;
1173 if (read_only())
1174 {
1176 and not fs::exists(wal_path_))
1177 << "File_BPlus_Tree: cannot open read-only while WAL recovery "
1178 << "sidecars are pending for " << file_path_;
1179 return;
1180 }
1181
1183 if (not fs::exists(wal_path_))
1184 return;
1185
1186 std::ifstream in(wal_path_, std::ios::binary);
1188 << "File_BPlus_Tree: cannot open WAL " << wal_path_ << " for reading";
1189
1190 const auto expected_magic = wal_magic();
1191 std::array<char, detail::Ordered_Tree_Snapshot_Magic_Size> magic = {};
1192 in.read(magic.data(), magic.size());
1194 << "File_BPlus_Tree: cannot read WAL magic from " << wal_path_;
1196 << "File_BPlus_Tree: incompatible WAL file " << wal_path_;
1197
1198 const auto wal_version = read_pod<std::uint32_t>(in, wal_path_, "WAL version");
1203 << "File_BPlus_Tree: unsupported WAL version " << wal_version
1204 << " in " << wal_path_;
1207
1208 const auto key_size = read_pod<std::uint32_t>(in, wal_path_, "WAL key size");
1212 << "File_BPlus_Tree: WAL " << wal_path_ << " was created for key size "
1213 << key_size << ", but this instantiation expects "
1215
1216 const auto min_degree =
1217 read_pod<std::uint64_t>(in, wal_path_, "WAL minimum degree");
1218 ah_runtime_error_unless(min_degree == MinDegree)
1219 << "File_BPlus_Tree: WAL " << wal_path_
1220 << " was created with minimum degree " << min_degree
1221 << ", but this instantiation expects " << MinDegree;
1222
1223 const auto encoding = read_pod<std::uint8_t>(in, wal_path_, "WAL encoding");
1227 << "File_BPlus_Tree: WAL " << wal_path_
1228 << " was created with an incompatible encoding";
1229
1230 std::array<char, 7> reserved = {};
1231 in.read(reserved.data(), reserved.size());
1233 << "File_BPlus_Tree: cannot read WAL reserved bytes from " << wal_path_;
1237 << "File_BPlus_Tree: WAL " << wal_path_
1238 << " was created with an incompatible key codec";
1239
1240 const auto wal_size = read_pod<std::uint64_t>(in, wal_path_, "WAL size");
1241 const auto wal_root_page =
1242 read_pod<page_id_t>(in, wal_path_, "WAL root page");
1243 const auto wal_first_leaf =
1244 read_pod<page_id_t>(in, wal_path_, "WAL first leaf");
1245 const auto wal_page_count =
1246 read_pod<std::uint64_t>(in, wal_path_, "WAL page count");
1247 const auto wal_free_head =
1248 read_pod<page_id_t>(in, wal_path_, "WAL free page head");
1249 const auto wal_checkpoint_sequence =
1250 read_pod<std::uint64_t>(in, wal_path_, "WAL checkpoint sequence");
1251 const auto wal_dirty_count =
1252 read_pod<std::uint64_t>(in, wal_path_, "WAL dirty count");
1253 const auto wal_checksum =
1254 read_pod<std::uint32_t>(in, wal_path_, "WAL checksum");
1255
1261 << "File_BPlus_Tree: WAL checksum mismatch in " << wal_path_;
1262
1263 std::error_code ec;
1264 const auto actual_size = fs::file_size(wal_path_, ec);
1266 << "File_BPlus_Tree: cannot determine the size of WAL " << wal_path_
1267 << " (" << ec.message() << ")";
1269 const auto expected_size = wal_header_bytes()
1272 ah_runtime_error_unless(actual_size == expected_size)
1273 << "File_BPlus_Tree: WAL size mismatch in " << wal_path_
1274 << " (expected " << expected_size << ", got " << actual_size << ")";
1275
1276 if (file_.is_open())
1277 file_.close();
1278
1279 if (not file_exists())
1280 open_storage(true);
1281 else
1282 open_storage(false);
1283
1285 std::uint32_t payload_crc = detail::crc32_begin();
1286 for (std::uint64_t i = 0; i < wal_dirty_count; ++i)
1287 {
1288 const auto page_id = read_pod<page_id_t>(in, wal_path_, "WAL page id");
1289 ah_runtime_error_unless(page_id > 0 and page_id <= wal_page_count)
1290 << "File_BPlus_Tree: invalid WAL page id " << page_id
1291 << " in " << wal_path_;
1296 recovered);
1297 else if (wal_version >= NativeWalVersion)
1299 recovered);
1300 else if (has_commit_trailer)
1302 recovered);
1303 recovered_records.append(Wal_Record{page_id, recovered});
1304 }
1305
1307 {
1309 std::array<char, detail::Ordered_Tree_Snapshot_Magic_Size>
1310 trailer_magic = {};
1311 in.read(trailer_magic.data(), trailer_magic.size());
1313 << "File_BPlus_Tree: cannot read WAL commit trailer from "
1314 << wal_path_;
1316 << "File_BPlus_Tree: missing WAL commit trailer in "
1317 << wal_path_;
1318
1319 const auto trailer_checkpoint_sequence =
1321 "WAL trailer checkpoint sequence");
1322 const auto trailer_dirty_count =
1323 read_pod<std::uint64_t>(in, wal_path_, "WAL trailer dirty count");
1324 const auto trailer_payload_checksum =
1325 read_pod<std::uint32_t>(in, wal_path_, "WAL payload checksum");
1326
1329 << "File_BPlus_Tree: WAL trailer checkpoint mismatch in "
1330 << wal_path_;
1332 << "File_BPlus_Tree: WAL trailer dirty-count mismatch in "
1333 << wal_path_;
1336 << "File_BPlus_Tree: WAL payload checksum mismatch in "
1337 << wal_path_;
1338 }
1339
1340 bool stale_wal = false;
1341 if (storage_format_version_on_disk() == std::optional<std::uint32_t>(FormatVersion))
1342 {
1343 stale_wal = true;
1344 for (size_t i = 0; i < recovered_records.size(); ++i)
1345 {
1346 const auto persisted =
1348 if (not persisted.has_value()
1349 or persisted->checkpoint_sequence < wal_checkpoint_sequence)
1350 {
1351 stale_wal = false;
1352 break;
1353 }
1354 }
1355 }
1356
1357 if (stale_wal)
1358 {
1360 return;
1361 }
1362
1366 for (size_t i = 0; i < recovered_records.size(); ++i)
1367 {
1368 const auto persisted =
1370 if (persisted.has_value()
1371 and persisted->checkpoint_sequence >= wal_checkpoint_sequence)
1372 continue;
1375 }
1376
1377 file_.flush();
1379 << "File_BPlus_Tree: cannot flush recovered data to " << file_path_;
1384 }
1385
1387 {
1388 namespace fs = std::filesystem;
1390 if (read_only())
1391 {
1393 and not fs::exists(journal_path_))
1394 << "File_BPlus_Tree: cannot open read-only while journal recovery "
1395 << "sidecars are pending for " << file_path_;
1396 return;
1397 }
1398
1400 if (not fs::exists(journal_path_))
1401 return;
1402
1404
1405 if (file_.is_open())
1406 file_.close();
1407
1410 }
1411
1412 [[nodiscard]] Loaded_Image read_image(const std::string & source_path) const
1413 {
1414 namespace fs = std::filesystem;
1415
1416 std::ifstream in(source_path, std::ios::binary);
1418 << "File_BPlus_Tree: cannot open " << source_path << " for reading";
1419
1420 const auto expected_magic = file_magic();
1421 std::array<char, detail::Ordered_Tree_Snapshot_Magic_Size> magic = {};
1422 in.read(magic.data(), magic.size());
1424 << "File_BPlus_Tree: cannot read header magic from " << source_path;
1425
1427 << "File_BPlus_Tree: file " << source_path
1428 << " does not contain a compatible paged B+ Tree";
1429
1430 const auto version =
1431 read_pod<std::uint32_t>(in, source_path, "header version");
1433 or version == ChecksummedFormatVersion
1436 or version == FormatVersion)
1437 << "File_BPlus_Tree: unsupported format version " << version
1438 << " in " << source_path;
1439 if (version < PortableFormatVersion)
1441
1442 const auto key_size =
1443 read_pod<std::uint32_t>(in, source_path, "header key size");
1444 ah_runtime_error_unless(key_size == key_size_for_version(version))
1445 << "File_BPlus_Tree: file " << source_path << " was created for key size "
1446 << key_size << ", but this instantiation expects "
1447 << key_size_for_version(version);
1448
1449 const auto min_degree =
1450 read_pod<std::uint64_t>(in, source_path, "header minimum degree");
1451 ah_runtime_error_unless(min_degree == MinDegree)
1452 << "File_BPlus_Tree: file " << source_path
1453 << " was created with minimum degree " << min_degree
1454 << ", but this instantiation expects " << MinDegree;
1455
1456 const auto encoding =
1457 read_pod<std::uint8_t>(in, source_path, "header encoding");
1459 << "File_BPlus_Tree: file " << source_path
1460 << " was created with an incompatible encoding";
1461
1462 std::array<char, 7> reserved = {};
1463 in.read(reserved.data(), reserved.size());
1465 << "File_BPlus_Tree: cannot read reserved header bytes from "
1466 << source_path;
1468 << "File_BPlus_Tree: file " << source_path
1469 << " was created with an incompatible key codec";
1470
1472 image.version = version;
1473 image.size = read_pod<std::uint64_t>(in, source_path, "header size");
1474 image.root_page = read_pod<page_id_t>(in, source_path, "header root page");
1475 image.first_leaf_page =
1476 read_pod<page_id_t>(in, source_path, "header first leaf");
1477 const auto page_count =
1478 read_pod<std::uint64_t>(in, source_path, "header page count");
1479 image.free_page_head =
1480 read_pod<page_id_t>(in, source_path, "header free page head");
1481
1482 if (version >= HeaderCheckpointFormatVersion)
1483 {
1484 image.checkpoint_sequence =
1486 "header checkpoint sequence");
1488 "header checksum");
1490 version,
1491 static_cast<std::uint64_t>(image.size),
1492 image.root_page, image.first_leaf_page,
1493 page_count, image.free_page_head,
1494 image.checkpoint_sequence))
1495 << "File_BPlus_Tree: header checksum mismatch in " << source_path;
1496 }
1497 else if (version >= ChecksummedFormatVersion)
1498 {
1500 "header checksum");
1502 version, static_cast<std::uint64_t>(image.size),
1503 image.root_page, image.first_leaf_page,
1504 page_count, image.free_page_head))
1505 << "File_BPlus_Tree: header checksum mismatch in " << source_path;
1506 }
1507
1508 std::error_code ec;
1509 const auto actual_size = fs::file_size(source_path, ec);
1511 << "File_BPlus_Tree: cannot determine the size of " << source_path
1512 << " (" << ec.message() << ")";
1513 const auto expected_size =
1515 ah_runtime_error_unless(actual_size == expected_size)
1516 << "File_BPlus_Tree: file size mismatch in " << source_path
1517 << " (expected " << expected_size << ", got " << actual_size << ")";
1518
1519 image.pages.empty();
1520 image.pages.reserve(page_count + 1);
1521 image.pages.append(Page{});
1522 for (page_id_t page_id = 1; page_id <= page_count; ++page_id)
1523 image.pages.append(read_page_from_stream(in, source_path, version));
1524
1526 << "File_BPlus_Tree: invalid root page id in " << source_path;
1527 ah_runtime_error_unless(image.first_leaf_page <= page_count)
1528 << "File_BPlus_Tree: invalid first leaf id in " << source_path;
1529 ah_runtime_error_unless(image.free_page_head <= page_count)
1530 << "File_BPlus_Tree: invalid free-list head in " << source_path;
1531
1532 return image;
1533 }
1534
1536 {
1538
1539 size_ = image.size;
1540 root_page_ = image.root_page;
1541 first_leaf_page_ = image.first_leaf_page;
1542 free_page_head_ = image.free_page_head;
1543 checkpoint_sequence_ = image.checkpoint_sequence;
1545 pages_ = std::move(image.pages);
1546
1548 dirty_pages_.reserve(pages_.size());
1549 for (size_t i = 0; i < pages_.size(); ++i)
1551
1552 header_dirty_ = false;
1554 << "File_BPlus_Tree: structural verification failed for " << file_path_;
1555 open_storage(false);
1556 }
1557
1559 {
1560 ensure_writable("create_empty_file");
1562 open_storage(true);
1563 sync();
1564 }
1565
1566 [[nodiscard]] bool file_exists() const
1567 {
1568 return std::ifstream(file_path_, std::ios::binary).good();
1569 }
1570
1571 [[nodiscard]] std::optional<std::uint32_t> storage_format_version_on_disk() const
1572 {
1573 try
1574 {
1575 if (not file_exists())
1576 return std::nullopt;
1577
1578 std::ifstream in(file_path_, std::ios::binary);
1579 if (not in.good())
1580 return std::nullopt;
1581
1582 std::array<char, detail::Ordered_Tree_Snapshot_Magic_Size> magic = {};
1583 in.read(magic.data(), magic.size());
1584 if (not in.good() or magic != file_magic())
1585 return std::nullopt;
1586
1587 return read_pod<std::uint32_t>(in, file_path_, "header version");
1588 }
1589 catch (...)
1590 {
1591 return std::nullopt;
1592 }
1593 }
1594
1595 [[nodiscard]] std::optional<Page>
1597 {
1598 if (storage_format_version_on_disk() != std::optional<std::uint32_t>(FormatVersion))
1599 return std::nullopt;
1600
1601 if (not file_.is_open())
1602 open_storage(false);
1603
1604 file_.clear();
1605 file_.seekg(0, std::ios::end);
1606 if (not file_.good())
1607 return std::nullopt;
1608
1609 const auto file_size = file_.tellg();
1610 const auto offset = current_page_offset(page_id);
1611 if (file_size < 0
1612 or offset < 0
1613 or file_size - offset < static_cast<std::streamoff>(page_bytes()))
1614 return std::nullopt;
1615
1616 file_.clear();
1617 file_.seekg(offset);
1618 if (not file_.good())
1619 return std::nullopt;
1620
1621 try
1622 {
1624 }
1625 catch (...)
1626 {
1627 file_.clear();
1628 return std::nullopt;
1629 }
1630 }
1631
1633 {
1634 if (header_dirty_)
1635 return true;
1636
1637 for (page_id_t page_id = 1; page_id < dirty_pages_.size(); ++page_id)
1638 if (dirty_pages_[page_id] != 0)
1639 return true;
1640 return false;
1641 }
1642
1644 {
1645 header_dirty_ = false;
1646 for (page_id_t page_id = 1; page_id < dirty_pages_.size(); ++page_id)
1647 dirty_pages_(page_id) = 0;
1648 }
1649
1650 void mark_page_dirty(const page_id_t page_id)
1651 {
1653 dirty_pages_(page_id) = 1;
1654 }
1655
1657 const std::uint64_t checkpoint_sequence) noexcept
1658 {
1659 for (page_id_t page_id = 1; page_id < pages_.size(); ++page_id)
1661 }
1662
1664 {
1665 header_dirty_ = true;
1666 }
1667
1668 [[nodiscard]] Page & page(const page_id_t page_id)
1669 {
1670 ah_runtime_error_unless(page_id > 0 and page_id < pages_.size())
1671 << "File_BPlus_Tree: invalid page id " << page_id;
1672 return pages_(page_id);
1673 }
1674
1675 [[nodiscard]] const Page & page(const page_id_t page_id) const
1676 {
1677 ah_runtime_error_unless(page_id > 0 and page_id < pages_.size())
1678 << "File_BPlus_Tree: invalid page id " << page_id;
1679 return pages_(page_id);
1680 }
1681
1682 [[nodiscard]] bool equals(const Key & lhs, const Key & rhs) const
1683 {
1684 return not cmp_(lhs, rhs) and not cmp_(rhs, lhs);
1685 }
1686
1687 [[nodiscard]] bool strictly_sorted(const Page & page) const
1688 {
1689 if (page.key_count == 0)
1690 return true;
1691
1692 for (size_t i = 1; i < page.key_count; ++i)
1693 if (not cmp_(page.keys[i - 1], page.keys[i]))
1694 return false;
1695 return true;
1696 }
1697
1699 const Key & key) const
1700 {
1701 size_t idx = 0;
1702 while (idx < page.key_count and cmp_(page.keys[idx], key))
1703 ++idx;
1704 return idx;
1705 }
1706
1708 const Key & key) const
1709 {
1710 size_t idx = 0;
1711 while (idx < page.key_count
1712 and (cmp_(page.keys[idx], key) or equals(page.keys[idx], key)))
1713 ++idx;
1714 return idx;
1715 }
1716
1717 [[nodiscard]] size_t child_index(const Page & page, const Key & key) const
1718 {
1719 return upper_bound_index(page, key);
1720 }
1721
1722 static void insert_key_at(Page & page, const size_t idx, const Key & key)
1723 {
1724 for (size_t i = page.key_count; i > idx; --i)
1725 page.keys[i] = page.keys[i - 1];
1726 page.keys[idx] = key;
1727 ++page.key_count;
1728 }
1729
1730 static void erase_key_at(Page & page, const size_t idx)
1731 {
1732 for (size_t i = idx + 1; i < page.key_count; ++i)
1733 page.keys[i - 1] = page.keys[i];
1734 --page.key_count;
1735 }
1736
1737 static void insert_child_at(Page & page, const size_t idx,
1738 const page_id_t child_id)
1739 {
1740 for (size_t i = page.child_count; i > idx; --i)
1741 page.children[i] = page.children[i - 1];
1742 page.children[idx] = child_id;
1743 ++page.child_count;
1744 }
1745
1746 static void erase_child_at(Page & page, const size_t idx)
1747 {
1748 for (size_t i = idx + 1; i < page.child_count; ++i)
1749 page.children[i - 1] = page.children[i];
1750 --page.child_count;
1751 }
1752
1754 {
1755 if (free_page_head_ != 0)
1756 {
1757 const page_id_t page_id = free_page_head_;
1758 free_page_head_ = page(page_id).link;
1759 page(page_id) = make_page(kind);
1760 mark_page_dirty(page_id);
1762 return page_id;
1763 }
1764
1765 pages_.append(make_page(kind));
1767 mark_page_dirty(pages_.size() - 1);
1769 return pages_.size() - 1;
1770 }
1771
1772 void release_page(const page_id_t page_id)
1773 {
1776 page(page_id) = free_page;
1777 free_page_head_ = page_id;
1778 mark_page_dirty(page_id);
1780 }
1781
1782 [[nodiscard]] Key subtree_min(const page_id_t page_id) const
1783 {
1784 page_id_t curr = page_id;
1785 while (is_internal(page(curr)))
1786 curr = page(curr).children[0];
1787
1788 ah_runtime_error_unless(is_leaf(page(curr)) and page(curr).key_count > 0)
1789 << "File_BPlus_Tree: invalid subtree minimum request";
1790 return page(curr).keys[0];
1791 }
1792
1793 void rebuild_separators(const page_id_t page_id)
1794 {
1795 if (not is_internal(page(page_id)))
1796 return;
1797
1798 Page & p = page(page_id);
1800 << "File_BPlus_Tree: internal page without children";
1801
1802 p.key_count = p.child_count - 1;
1803 for (size_t i = 1; i < p.child_count; ++i)
1804 p.keys[i - 1] = subtree_min(p.children[i]);
1805
1806 mark_page_dirty(page_id);
1807 }
1808
1810 {
1811 while (page_id != 0 and is_internal(page(page_id)))
1812 page_id = page(page_id).children[0];
1813 return page_id;
1814 }
1815
1817 {
1818 while (page_id != 0 and is_internal(page(page_id)))
1819 page_id = page(page_id).children[page(page_id).child_count - 1];
1820 return page_id;
1821 }
1822
1823 [[nodiscard]] page_id_t find_leaf_page(const Key & key) const
1824 {
1825 page_id_t curr = root_page_;
1826 while (curr != 0 and is_internal(page(curr)))
1827 curr = page(curr).children[child_index(page(curr), key)];
1828 return curr;
1829 }
1830
1831 void split_child(const page_id_t parent_id, const size_t idx)
1832 {
1833 const page_id_t child_id = page(parent_id).children[idx];
1834 const bool child_is_leaf = is_leaf(page(child_id));
1835 const page_id_t right_id =
1837
1838 if (child_is_leaf)
1839 {
1840 Page & child = page(child_id);
1841 Page & right = page(right_id);
1842
1843 for (size_t i = MinKeys; i < child.key_count; ++i)
1844 right.keys[right.key_count++] = child.keys[i];
1845
1846 child.key_count = MinKeys;
1847 right.link = child.link;
1848 child.link = right_id;
1849
1852 }
1853 else
1854 {
1855 Page & child = page(child_id);
1856 Page & right = page(right_id);
1857
1858 for (size_t i = MinDegree; i < child.child_count; ++i)
1859 right.children[right.child_count++] = child.children[i];
1860
1861 child.child_count = MinDegree;
1864 }
1865
1866 insert_child_at(page(parent_id), idx + 1, right_id);
1867 rebuild_separators(parent_id);
1868 }
1869
1870 void insert_nonfull(const page_id_t page_id, const Key & key)
1871 {
1872 if (is_leaf(page(page_id)))
1873 {
1874 const size_t idx = lower_bound_index(page(page_id), key);
1875 insert_key_at(page(page_id), idx, key);
1876 mark_page_dirty(page_id);
1877 return;
1878 }
1879
1880 size_t idx = child_index(page(page_id), key);
1881 const page_id_t child_id = page(page_id).children[idx];
1882 if (page(child_id).key_count == MaxKeys)
1883 {
1884 split_child(page_id, idx);
1885 idx = child_index(page(page_id), key);
1886 }
1887
1888 insert_nonfull(page(page_id).children[idx], key);
1889 rebuild_separators(page_id);
1890 }
1891
1892 void borrow_from_prev(const page_id_t parent_id, const size_t idx)
1893 {
1894 const page_id_t left_id = page(parent_id).children[idx - 1];
1895 const page_id_t child_id = page(parent_id).children[idx];
1896
1897 if (is_leaf(page(child_id)))
1898 {
1899 const Key borrowed = page(left_id).keys[page(left_id).key_count - 1];
1901 erase_key_at(page(left_id), page(left_id).key_count - 1);
1904 }
1905 else
1906 {
1910 erase_child_at(page(left_id), page(left_id).child_count - 1);
1913 }
1914
1915 rebuild_separators(parent_id);
1916 }
1917
1918 void borrow_from_next(const page_id_t parent_id, const size_t idx)
1919 {
1920 const page_id_t child_id = page(parent_id).children[idx];
1921 const page_id_t right_id = page(parent_id).children[idx + 1];
1922
1923 if (is_leaf(page(child_id)))
1924 {
1929 }
1930 else
1931 {
1936 }
1937
1938 rebuild_separators(parent_id);
1939 }
1940
1941 void merge_children(const page_id_t parent_id, const size_t left_idx)
1942 {
1943 const page_id_t left_id = page(parent_id).children[left_idx];
1944 const page_id_t right_id = page(parent_id).children[left_idx + 1];
1945
1946 if (is_leaf(page(left_id)))
1947 {
1948 Page & left = page(left_id);
1949 const Page right = page(right_id);
1950
1951 for (size_t i = 0; i < right.key_count; ++i)
1952 left.keys[left.key_count++] = right.keys[i];
1953 left.link = right.link;
1954
1957 {
1960 }
1961 }
1962 else
1963 {
1964 Page & left = page(left_id);
1965 const Page right = page(right_id);
1966
1967 for (size_t i = 0; i < right.child_count; ++i)
1968 left.children[left.child_count++] = right.children[i];
1970 }
1971
1972 erase_child_at(page(parent_id), left_idx + 1);
1973 rebuild_separators(parent_id);
1975 }
1976
1977 void fix_underflow(const page_id_t parent_id, const size_t idx)
1978 {
1979 const page_id_t child_id = page(parent_id).children[idx];
1980 if (page(child_id).key_count >= MinKeys)
1981 {
1982 rebuild_separators(parent_id);
1983 return;
1984 }
1985
1986 if (idx > 0
1987 and page(page(parent_id).children[idx - 1]).key_count > MinKeys)
1988 {
1989 borrow_from_prev(parent_id, idx);
1990 return;
1991 }
1992
1993 if (idx + 1 < page(parent_id).child_count
1994 and page(page(parent_id).children[idx + 1]).key_count > MinKeys)
1995 {
1996 borrow_from_next(parent_id, idx);
1997 return;
1998 }
1999
2000 if (idx + 1 < page(parent_id).child_count)
2001 merge_children(parent_id, idx);
2002 else
2003 merge_children(parent_id, idx - 1);
2004 }
2005
2006 [[nodiscard]] bool remove_from(const page_id_t page_id, const Key & key)
2007 {
2008 if (is_leaf(page(page_id)))
2009 {
2010 const size_t idx = lower_bound_index(page(page_id), key);
2011 if (idx >= page(page_id).key_count or not equals(page(page_id).keys[idx], key))
2012 return false;
2013
2014 erase_key_at(page(page_id), idx);
2015 mark_page_dirty(page_id);
2016 return true;
2017 }
2018
2019 const size_t idx = child_index(page(page_id), key);
2020 const page_id_t child_id = page(page_id).children[idx];
2021 if (not remove_from(child_id, key))
2022 return false;
2023
2024 fix_underflow(page_id, idx);
2025 rebuild_separators(page_id);
2026 return true;
2027 }
2028
2030 {
2031 if (size_ == 0)
2032 return first_leaf_page_ == 0;
2033
2034 if (first_leaf_page_ == 0)
2035 return false;
2036
2038 size_t steps = 0;
2039 size_t counted = 0;
2040 std::optional<Key> prev;
2041
2042 while (leaf_id != 0)
2043 {
2044 if (++steps > pages_.size())
2045 return false;
2046
2047 const Page & leaf = page(leaf_id);
2048 if (not is_leaf(leaf) or leaf.key_count == 0 or leaf.child_count != 0)
2049 return false;
2050
2051 for (size_t i = 0; i < leaf.key_count; ++i)
2052 {
2053 if (prev.has_value() and not cmp_(*prev, leaf.keys[i]))
2054 return false;
2055 prev = leaf.keys[i];
2056 ++counted;
2057 }
2058
2059 if (leaf.link != 0)
2060 {
2061 const Page & next = page(leaf.link);
2062 if (not is_leaf(next) or next.key_count == 0)
2063 return false;
2064 if (not cmp_(leaf.keys[leaf.key_count - 1], next.keys[0]))
2065 return false;
2066 }
2067
2068 leaf_id = leaf.link;
2069 }
2070
2071 return counted == size_;
2072 }
2073
2074 [[nodiscard]] bool verify_node(const page_id_t page_id,
2075 const std::optional<Key> & min_key,
2076 const std::optional<Key> & max_key,
2077 const size_t depth,
2078 size_t & leaf_depth,
2079 size_t & counted) const
2080 {
2081 const Page & p = page(page_id);
2082 if (is_free(p))
2083 return false;
2084
2085 if (page_id != root_page_)
2086 {
2087 if (p.key_count < MinKeys or p.key_count > MaxKeys)
2088 return false;
2089 }
2090 else if (p.key_count > MaxKeys)
2091 return false;
2092
2093 if (not strictly_sorted(p))
2094 return false;
2095
2096 if (is_leaf(p))
2097 {
2098 if (p.child_count != 0)
2099 return false;
2100
2101 if (min_key.has_value() and p.key_count > 0 and cmp_(p.keys[0], *min_key))
2102 return false;
2103
2104 if (max_key.has_value() and p.key_count > 0 and not cmp_(p.keys[p.key_count - 1], *max_key))
2105 return false;
2106
2107 counted += p.key_count;
2108
2109 if (leaf_depth == static_cast<size_t>(-1))
2110 leaf_depth = depth;
2111 return leaf_depth == depth;
2112 }
2113
2114 if (p.child_count != p.key_count + 1 or p.child_count == 0)
2115 return false;
2116
2117 for (size_t i = 1; i < p.child_count; ++i)
2118 if (not equals(p.keys[i - 1], subtree_min(p.children[i])))
2119 return false;
2120
2121 for (size_t i = 0; i < p.child_count; ++i)
2122 {
2123 std::optional<Key> child_min = min_key;
2124 std::optional<Key> child_max = max_key;
2125
2126 if (i > 0)
2127 child_min = p.keys[i - 1];
2128 if (i < p.key_count)
2129 child_max = p.keys[i];
2130
2132 depth + 1, leaf_depth, counted))
2133 return false;
2134 }
2135
2136 return true;
2137 }
2138
2140 {
2141 if (auto_sync_)
2142 sync();
2143 }
2144
2145 public:
2146 using key_type = Key;
2150
2153
2161 {
2162 const tree_type * tree_ = nullptr;
2164 size_t idx_ = 0;
2165 std::optional<Key> last_;
2166
2168 {
2169 while (tree_ != nullptr and leaf_id_ != 0)
2170 {
2171 const auto & leaf = tree_->page(leaf_id_);
2172 if (idx_ >= leaf.key_count)
2173 {
2174 leaf_id_ = leaf.link;
2175 idx_ = 0;
2176 continue;
2177 }
2178
2179 if (last_.has_value() and tree_->cmp_(*last_, leaf.keys[idx_]))
2180 {
2181 leaf_id_ = 0;
2182 idx_ = 0;
2183 }
2184 return;
2185 }
2186 }
2187
2188 public:
2193
2199 : tree_(&tree), leaf_id_(tree.first_leaf_page_), idx_(0)
2200 {
2201 skip_past_end();
2202 }
2203
2211 Iterator(const tree_type & tree, const Key & first, const Key & last)
2212 : tree_(&tree), leaf_id_(0), idx_(0), last_(last)
2213 {
2214 ah_invalid_argument_if(tree.cmp_(last, first))
2215 << "File_BPlus_Tree::Iterator(): invalid range ["
2216 << first << ", " << last << "]";
2217
2218 leaf_id_ = tree.find_leaf_page(first);
2219 if (leaf_id_ != 0)
2220 idx_ = tree.lower_bound_index(tree.page(leaf_id_), first);
2221 skip_past_end();
2222 }
2223
2229 {
2230 return tree_ != nullptr and leaf_id_ != 0;
2231 }
2232
2237 [[nodiscard]] const Key & get_curr() const
2238 {
2240 << "File_BPlus_Tree::Iterator::get_curr(): no current key";
2241 return tree_->page(leaf_id_).keys[idx_];
2242 }
2243
2248 void next_ne()
2249 {
2251 << "File_BPlus_Tree::Iterator::next_ne(): no current key";
2252 ++idx_;
2253 skip_past_end();
2254 }
2255
2260 void next()
2261 {
2262 next_ne();
2263 }
2264
2269 [[nodiscard]] const Key & operator * () const
2270 {
2271 return get_curr();
2272 }
2273
2278 [[nodiscard]] const Key * operator -> () const
2279 {
2280 return &get_curr();
2281 }
2282 };
2283
2293 explicit Gen_File_BPlus_Tree(std::string file_path,
2294 const bool auto_sync = true,
2295 const Compare & cmp = Compare())
2298 auto_sync, cmp) {}
2299
2312 explicit Gen_File_BPlus_Tree(std::string file_path,
2314 const bool auto_sync = true,
2315 const Compare & cmp = Compare())
2316 : cmp_(cmp),
2317 file_path_(std::move(file_path)),
2320 : auto_sync)
2321 {
2323 << "File_BPlus_Tree requires a non-empty file path";
2324
2327 if (read_only())
2329 << "File_BPlus_Tree: cannot open missing file " << file_path_
2330 << " in read-only mode";
2332
2333 if (file_exists())
2334 {
2335 open_storage(false);
2336 file_.seekg(0, std::ios::end);
2337 const std::streamoff file_size = file_.tellg();
2339 << "File_BPlus_Tree: cannot determine the size of " << file_path_;
2340
2341 if (file_size == 0)
2342 {
2343 ensure_writable("constructor");
2345 }
2346 else
2348 }
2349 else
2351 }
2352
2362 explicit Gen_File_BPlus_Tree(const char * file_path,
2363 const bool auto_sync = true,
2364 const Compare & cmp = Compare())
2365 : Gen_File_BPlus_Tree(file_path == nullptr ? std::string()
2366 : std::string(file_path),
2367 auto_sync, cmp) {}
2368
2381 explicit Gen_File_BPlus_Tree(const char * file_path,
2383 const bool auto_sync = true,
2384 const Compare & cmp = Compare())
2385 : Gen_File_BPlus_Tree(file_path == nullptr ? std::string()
2386 : std::string(file_path),
2387 open_mode, auto_sync, cmp) {}
2388
2391
2394
2397
2400
2405 {
2406 if (file_.is_open())
2407 file_.close();
2408 }
2409
2418 [[nodiscard]] const Compare & key_comp() const noexcept { return cmp_; }
2419
2424 [[nodiscard]] const Compare & get_compare() const noexcept
2425 {
2426 return key_comp();
2427 }
2428
2433 [[nodiscard]] const std::string & file_path() const noexcept
2434 {
2435 return file_path_;
2436 }
2437
2442 [[nodiscard]] const std::string & get_file_path() const noexcept
2443 {
2444 return file_path();
2445 }
2446
2455
2461 {
2462 return read_only();
2463 }
2464
2470 {
2471 return auto_sync_;
2472 }
2473
2478 void set_auto_sync(const bool enabled) noexcept
2479 {
2480 auto_sync_ = enabled;
2481 }
2482
2488 {
2489 return page_bytes();
2490 }
2491
2497 {
2498 return pages_.size() - 1;
2499 }
2500
2506 {
2507 return checkpoint_sequence_;
2508 }
2509
2513 void checkpoint() const
2514 {
2515 sync();
2516 }
2517
2547
2552 void reload()
2553 {
2556 << "File_BPlus_Tree: backing file " << file_path_ << " no longer exists";
2558 }
2559
2564 [[nodiscard]] bool empty() const noexcept { return size_ == 0; }
2565
2570 [[nodiscard]] bool is_empty() const noexcept { return empty(); }
2571
2576 [[nodiscard]] size_t size() const noexcept { return size_; }
2577
2583 {
2584 size_t h = 0;
2585 for (page_id_t curr = root_page_; curr != 0; )
2586 {
2587 ++h;
2588 if (is_leaf(page(curr)))
2589 break;
2590 curr = page(curr).children[0];
2591 }
2592 return h;
2593 }
2594
2598 void clear()
2599 {
2600 ensure_writable("clear");
2603 }
2604
2610 [[nodiscard]] bool contains(const Key & key) const
2611 {
2612 const page_id_t leaf_id = find_leaf_page(key);
2613 if (leaf_id == 0)
2614 return false;
2615
2616 const Page & leaf = page(leaf_id);
2617 const size_t idx = lower_bound_index(leaf, key);
2618 return idx < leaf.key_count and equals(leaf.keys[idx], key);
2619 }
2620
2626 [[nodiscard]] bool search(const Key & key) const
2627 {
2628 return contains(key);
2629 }
2630
2638 bool insert(const Key & key)
2639 {
2640 ensure_writable("insert");
2641 if (contains(key))
2642 return false;
2643
2644 if (root_page_ == 0)
2645 {
2647 page(leaf_id).keys[0] = key;
2648 page(leaf_id).key_count = 1;
2651 ++size_;
2655 return true;
2656 }
2657
2658 if (page(root_page_).key_count == MaxKeys)
2659 {
2666 }
2667
2669 ++size_;
2672 return true;
2673 }
2674
2682 bool insert(Key && key)
2683 {
2684 Key copy = std::move(key);
2685 return insert(copy);
2686 }
2687
2694 bool remove(const Key & key)
2695 {
2696 ensure_writable("remove");
2697 if (root_page_ == 0)
2698 return false;
2699
2700 if (not remove_from(root_page_, key))
2701 return false;
2702
2703 --size_;
2705
2706 if (is_leaf(page(root_page_)) and page(root_page_).key_count == 0)
2707 {
2709 root_page_ = 0;
2710 first_leaf_page_ = 0;
2712 }
2713 else if (is_internal(page(root_page_)) and page(root_page_).child_count == 1)
2714 {
2719 }
2720
2722 return true;
2723 }
2724
2729 [[nodiscard]] std::optional<Key> min_key() const
2730 {
2731 if (first_leaf_page_ == 0)
2732 return std::nullopt;
2733 return page(first_leaf_page_).keys[0];
2734 }
2735
2740 [[nodiscard]] std::optional<Key> max_key() const
2741 {
2742 if (root_page_ == 0)
2743 return std::nullopt;
2745 return page(leaf_id).keys[page(leaf_id).key_count - 1];
2746 }
2747
2753 [[nodiscard]] std::optional<Key> lower_bound(const Key & key) const
2754 {
2756 if (leaf_id == 0)
2757 return std::nullopt;
2758
2759 size_t idx = lower_bound_index(page(leaf_id), key);
2760 while (leaf_id != 0)
2761 {
2762 const Page & leaf = page(leaf_id);
2763 if (idx < leaf.key_count)
2764 return leaf.keys[idx];
2765 leaf_id = leaf.link;
2766 idx = 0;
2767 }
2768
2769 return std::nullopt;
2770 }
2771
2777 [[nodiscard]] std::optional<Key> upper_bound(const Key & key) const
2778 {
2780 if (leaf_id == 0)
2781 return std::nullopt;
2782
2783 size_t idx = upper_bound_index(page(leaf_id), key);
2784 while (leaf_id != 0)
2785 {
2786 const Page & leaf = page(leaf_id);
2787 if (idx < leaf.key_count)
2788 return leaf.keys[idx];
2789 leaf_id = leaf.link;
2790 idx = 0;
2791 }
2792
2793 return std::nullopt;
2794 }
2795
2801 {
2802 return Iterator(*this);
2803 }
2804
2812 [[nodiscard]] Iterator get_range_it(const Key & first, const Key & last) const
2813 {
2814 return Iterator(*this, first, last);
2815 }
2816
2825 [[nodiscard]] Array<Key> range(const Key & first, const Key & last) const
2826 {
2828 out.reserve(8);
2829 for (auto it = get_range_it(first, last); it.has_curr(); it.next_ne())
2830 out.append(it.get_curr());
2831
2832 return out;
2833 }
2834
2841 {
2843 out.reserve(size_ == 0 ? 1 : size_);
2844 for (auto it = get_it(); it.has_curr(); it.next_ne())
2845 out.append(it.get_curr());
2846
2847 return out;
2848 }
2849
2854 [[nodiscard]] bool verify() const
2855 {
2856 if (root_page_ == 0)
2857 return size_ == 0 and first_leaf_page_ == 0;
2858
2859 if (root_page_ >= pages_.size() or first_leaf_page_ >= pages_.size())
2860 return false;
2861
2863 return false;
2864
2865 size_t leaf_depth = static_cast<size_t>(-1);
2866 size_t counted = 0;
2867 return verify_node(root_page_, std::nullopt, std::nullopt, 1,
2868 leaf_depth, counted)
2869 and counted == size_
2871 }
2872 };
2873
2875 template <typename Key,
2876 class Compare = Aleph::less<Key>,
2877 size_t MinDegree = 16,
2880} // namespace Aleph
2881
2882# endif // TPL_FILE_BPLUS_TREE_H
C++20 concepts for constraining comparison functors.
Exception handling system with formatted messages for Aleph-w.
#define ah_runtime_error_unless(C)
Throws std::runtime_error if condition does NOT hold.
Definition ah-errors.H:250
#define ah_runtime_error()
Throws std::runtime_error unconditionally.
Definition ah-errors.H:282
#define ah_underflow_error_if(C)
Throws std::underflow_error if condition holds.
Definition ah-errors.H:368
#define ah_invalid_argument_if(C)
Throws std::invalid_argument if condition holds.
Definition ah-errors.H:639
Standard functor implementations and comparison objects.
long double h
Definition btreepic.C:154
Simple dynamic array with automatic resizing and functional operations.
Definition tpl_array.H:139
constexpr size_t size() const noexcept
Return the number of elements stored in the stack.
Definition tpl_array.H:351
void empty() noexcept
Empties the container.
Definition tpl_array.H:336
T & append(const T &data)
Append a copy of data
Definition tpl_array.H:245
void reserve(size_t cap)
Reserves cap cells into the array.
Definition tpl_array.H:315
Lazy iterator over ordered keys stored in leaf pages.
void next_ne()
Advance to the next key.
Iterator(const tree_type &tree, const Key &first, const Key &last)
Construct an iterator over the inclusive range [first, last].
bool has_curr() const noexcept
Return whether the iterator still points to a key.
const Key & get_curr() const
Return the current key.
const Key * operator->() const
Member-access shorthand for the current key.
Iterator() noexcept=default
Construct an empty iterator.
const Key & operator*() const
Dereference the iterator.
Persistent page-managed B+ Tree stored in a single binary file.
static void erase_child_at(Page &page, const size_t idx)
void write_current_page_to_storage(const page_id_t page_id, const Page &page) const
std::streamoff current_page_offset(const page_id_t page_id) const noexcept
static void write_pod(std::ostream &out, const T &value, const std::string &file_path, const char *what)
const Compare & key_comp() const noexcept
Return the comparison object.
static void insert_key_at(Page &page, const size_t idx, const Key &key)
static constexpr size_t MaxChildren
static std::uint32_t header_checksum_v2(const std::uint32_t version, const std::uint64_t size, const page_id_t root_page, const page_id_t first_leaf_page, const std::uint64_t page_count, const page_id_t free_page_head) noexcept
static constexpr size_t page_bytes() noexcept
static constexpr bool is_internal(const Page &page) noexcept
void apply_current_cache_to_storage(const std::uint64_t checkpoint_sequence) const
size_t child_index(const Page &page, const Key &key) const
bool is_read_only() const noexcept
Return whether this handle is read-only.
void mark_page_dirty(const page_id_t page_id)
bool insert(const Key &key)
Insert a key if it is not already present.
static constexpr size_t serialized_header_bytes(const std::uint32_t version) noexcept
Page & page(const page_id_t page_id)
static void ensure_legacy_native_format_supported(const std::string &source_path, const std::uint32_t version)
void insert_nonfull(const page_id_t page_id, const Key &key)
static Page read_page_from_stream(std::istream &in, const std::string &path, const std::uint32_t version)
void seekp_to(const std::streamoff offset) const
Gen_File_BPlus_Tree(Gen_File_BPlus_Tree &&)=delete
Moving is disabled.
size_t lower_bound_index(const Page &page, const Key &key) const
void write_wal_to_path(const std::string &target_path, const std::uint64_t checkpoint_sequence) const
page_id_t page_id_type
Type of on-disk page identifiers.
bool empty() const noexcept
Return whether the tree has no keys.
page_id_t rightmost_leaf_page(page_id_t page_id) const
static void ensure_legacy_native_wal_supported(const std::string &source_path, const std::uint32_t wal_version)
static constexpr std::array< char, 7 > reserved_bytes_for_version(const std::uint32_t version) noexcept
static constexpr std::uint32_t WalVersion
Gen_File_BPlus_Tree & operator=(const Gen_File_BPlus_Tree &)=delete
Assignment is disabled.
static void write_key(std::ostream &out, const Key &key, const std::string &file_path, const char *what)
static constexpr size_t wal_trailer_bytes() noexcept
const Page & page(const page_id_t page_id) const
static constexpr size_t checksummed_header_bytes() noexcept
static constexpr size_t checksummed_page_bytes() noexcept
static std::uint32_t wal_header_checksum(const std::uint32_t wal_version, const std::uint64_t size, const page_id_t root_page, const page_id_t first_leaf_page, const std::uint64_t page_count, const page_id_t free_page_head, const std::uint64_t checkpoint_sequence, const std::uint64_t dirty_count) noexcept
Key subtree_min(const page_id_t page_id) const
const std::string & file_path() const noexcept
Return the backing file path.
bool strictly_sorted(const Page &page) const
const std::string & get_file_path() const noexcept
Synonym for file_path().
void mark_header_dirty() const noexcept
static std::uint32_t wal_record_checksum_add(std::uint32_t crc, const page_id_t page_id, const Page &page)
static std::uint32_t add_native_key_to_crc(const std::uint32_t crc, const Key &key)
Iterator get_it() const noexcept
Return a lazy iterator over the full key order.
static Key read_native_key(std::istream &in, const std::string &file_path, const char *what)
Paged_File_Open_Mode open_mode() const noexcept
Return the access mode used to open the file.
page_id_t find_leaf_page(const Key &key) const
void release_page(const page_id_t page_id)
static constexpr std::uint32_t ChecksummedFormatVersion
bool verify_node(const page_id_t page_id, const std::optional< Key > &min_key, const std::optional< Key > &max_key, const size_t depth, size_t &leaf_depth, size_t &counted) const
static constexpr bool is_leaf(const Page &page) noexcept
static constexpr size_t MinKeys
Gen_File_BPlus_Tree(const char *file_path, const bool auto_sync=true, const Compare &cmp=Compare())
Open or create a paged persistent B+ Tree file for read-write access.
std::optional< std::uint32_t > storage_format_version_on_disk() const
static std::uint32_t wal_record_checksum_add_v3(std::uint32_t crc, const page_id_t page_id, const Page &page)
static Key read_key(std::istream &in, const std::string &file_path, const char *what, const std::uint32_t version)
static constexpr std::uint32_t wal_record_format_version(const std::uint32_t wal_version) noexcept
static constexpr size_t wal_record_bytes(const std::uint32_t wal_version) noexcept
std::optional< Key > max_key() const
Return the maximum key, if any.
static constexpr std::uint8_t EndianMarker
std::uint64_t checkpoint_sequence() const noexcept
Return the durable checkpoint sequence stored in the backing file.
void borrow_from_next(const page_id_t parent_id, const size_t idx)
static constexpr bool is_free(const Page &page) noexcept
void sync() const
Flush the current tree image using redo WAL or full-image fallback.
static constexpr const char * tree_kind() noexcept
static constexpr std::uint32_t key_size_for_version(const std::uint32_t version) noexcept
bool auto_sync_enabled() const noexcept
Return whether automatic synchronization is enabled.
Loaded_Image read_image(const std::string &source_path) const
static constexpr size_t portable_page_bytes() noexcept
bool verify() const
Verify structural B+ Tree invariants across cached pages.
static constexpr std::uint32_t PortableFormatVersion
static std::uint32_t add_key_to_crc(const std::uint32_t crc, const Key &key)
void clear_dirty_state() const noexcept
void write_header_to_storage(const std::uint64_t size, const page_id_t root_page, const page_id_t first_leaf_page, const std::uint64_t page_count, const page_id_t free_page_head, const std::uint64_t checkpoint_sequence) const
Gen_File_BPlus_Tree(std::string file_path, const Paged_File_Open_Mode open_mode, const bool auto_sync=true, const Compare &cmp=Compare())
Open a paged persistent B+ Tree file with an explicit access mode.
static T read_pod(std::istream &in, const std::string &file_path, const char *what)
bool remove(const Key &key)
Remove a key if present.
const Compare & get_compare() const noexcept
Return the comparison object.
static constexpr auto Read_Only
void open_storage(const bool truncate=false) const
Gen_File_BPlus_Tree(const char *file_path, const Paged_File_Open_Mode open_mode, const bool auto_sync=true, const Compare &cmp=Compare())
Open a paged persistent B+ Tree file with an explicit access mode.
static constexpr size_t legacy_header_bytes() noexcept
static std::uint32_t wal_record_checksum_add_v2(std::uint32_t crc, const page_id_t page_id, const Page &page)
static void write_native_key(std::ostream &out, const Key &key, const std::string &file_path, const char *what)
static constexpr size_t serialized_page_bytes(const std::uint32_t version) noexcept
void reload()
Discard unsynchronized changes and reload pages from disk.
static constexpr auto wal_trailer_magic() noexcept
bool search(const Key &key) const
Synonym for contains().
bool is_empty() const noexcept
Synonym for empty().
Array< unsigned char > dirty_pages_
Gen_File_BPlus_Tree(const Gen_File_BPlus_Tree &)=delete
Copying is disabled.
void clear()
Remove every key from the tree.
void rebuild_separators(const page_id_t page_id)
void write_current_header_to_storage(const std::uint64_t checkpoint_sequence) const
static constexpr std::uint32_t LegacyWalVersion
static std::uint32_t page_checksum_v3(const Page &page)
static std::uint32_t header_checksum(const std::uint32_t version, const std::uint64_t size, const page_id_t root_page, const page_id_t first_leaf_page, const std::uint64_t page_count, const page_id_t free_page_head, const std::uint64_t checkpoint_sequence) noexcept
std::optional< Key > min_key() const
Return the minimum key, if any.
static constexpr bool version_uses_portable_codec(const std::uint32_t version) noexcept
static constexpr std::uint8_t PortableEncodingMarker
static void erase_key_at(Page &page, const size_t idx)
size_t size() const noexcept
Return the number of stored keys.
void merge_children(const page_id_t parent_id, const size_t left_idx)
page_id_t leftmost_leaf_page(page_id_t page_id) const
Array< Key > range(const Key &first, const Key &last) const
Collect all keys in the inclusive range [first, last].
page_id_t allocate_page(const PageKind kind)
static constexpr size_t header_bytes() noexcept
Paged_File_Open_Mode open_mode_
static std::uint32_t page_checksum_v4(const Page &page)
static constexpr std::uint32_t CommitTrailerWalVersion
bool remove_from(const page_id_t page_id, const Key &key)
static constexpr std::uint32_t PortableWalVersion
static void write_portable_key(std::ostream &out, const Key &key, const std::string &file_path, const char *what)
~Gen_File_BPlus_Tree() noexcept
Close the backing file.
void set_auto_sync(const bool enabled) noexcept
Enable or disable automatic flushing of dirty pages.
bool read_only() const noexcept
static constexpr std::uint32_t HeaderCheckpointFormatVersion
static constexpr std::uint32_t EncodedKeySize
Gen_File_BPlus_Tree(std::string file_path, const bool auto_sync=true, const Compare &cmp=Compare())
Open or create a paged persistent B+ Tree file for read-write access.
size_t upper_bound_index(const Page &page, const Key &key) const
detail::Pid_File_Lock_Mode lock_mode() const noexcept
static constexpr size_t MaxKeys
size_t dirty_page_count() const noexcept
static constexpr size_t native_page_bytes() noexcept
void split_child(const page_id_t parent_id, const size_t idx)
static constexpr std::uint8_t encoding_marker_for_version(const std::uint32_t version) noexcept
static constexpr auto Read_Write
static constexpr size_t wal_header_bytes() noexcept
static constexpr std::uint32_t NativeWalVersion
size_t page_count() const noexcept
Return the number of allocated pages currently tracked.
static void write_page_record(std::ostream &out, const Page &page, const std::string &target_path)
void fix_underflow(const page_id_t parent_id, const size_t idx)
void checkpoint() const
Synonym for sync().
Array< Key > keys() const
Materialize the tree contents in sorted order.
static Key read_portable_key(std::istream &in, const std::string &file_path, const char *what)
static constexpr size_t legacy_page_bytes() noexcept
static constexpr auto wal_magic() noexcept
static constexpr std::uint32_t LegacyFormatVersion
void borrow_from_prev(const page_id_t parent_id, const size_t idx)
Iterator get_range_it(const Key &first, const Key &last) const
Return a lazy iterator over an inclusive key range.
bool contains(const Key &key) const
Return whether a key is present.
static constexpr std::uint32_t NativeCheckpointFormatVersion
bool insert(Key &&key)
Insert a key by move if it is not already present.
std::optional< Page > read_storage_page_if_current(const page_id_t page_id) const
std::optional< Key > upper_bound(const Key &key) const
Return the first key strictly greater than key.
void ensure_writable(const char *operation) const
size_t page_size_bytes() const noexcept
Return the fixed serialized page size.
bool has_pending_changes() const noexcept
size_t height() const noexcept
Return the current height of the paged tree.
void stamp_all_pages_for_checkpoint(const std::uint64_t checkpoint_sequence) noexcept
static void insert_child_at(Page &page, const size_t idx, const page_id_t child_id)
static constexpr auto file_magic() noexcept
void write_full_image_to_path(const std::string &target_path, const std::uint64_t checkpoint_sequence) const
std::optional< Key > lower_bound(const Key &key) const
Return the first key not less than key.
static constexpr Page make_page(const PageKind kind) noexcept
static std::uint32_t page_checksum(const Page &page)
static constexpr std::uint32_t FormatVersion
bool equals(const Key &lhs, const Key &rhs) const
void write_full_image(std::ostream &out, const std::string &target_path, const std::uint64_t checkpoint_sequence) const
void acquire(const std::string &path, const char *kind, const Pid_File_Lock_Mode mode=Pid_File_Lock_Mode::Exclusive)
int cmp(const __gmp_expr< T, U > &expr1, const __gmp_expr< V, W > &expr2)
Definition gmpfrxx.h:4118
const long double offset[]
Offset values indexed by symbol string length (bounded by MAX_OFFSET_INDEX)
std::uint32_t crc32_add_bytes(std::uint32_t crc, const void *data, const size_t size) noexcept
void sync_parent_directory(const std::string &path, const char *kind)
constexpr std::array< char, Ordered_Tree_Snapshot_Magic_Size > ordered_tree_snapshot_magic(const char(&text)[N]) noexcept
void copy_file_contents(const std::string &src, const std::string &dst, const char *kind)
void remove_file_if_exists(const std::string &path, const char *kind)
std::uint32_t crc32_begin() noexcept
std::uint32_t crc32_add(std::uint32_t crc, const T &value) noexcept
constexpr size_t Ordered_Tree_Snapshot_Magic_Size
void rename_file_atomic(const std::string &src, const std::string &dst, const char *kind)
std::uint32_t crc32_finish(const std::uint32_t crc) noexcept
void sync_file_by_path(const std::string &path, const char *kind)
Main namespace for Aleph-w library functions.
Definition ah-arena.H:89
and
Check uniqueness with explicit hash + equality functors.
Paged_File_Open_Mode
Open mode for paged persistent tree files.
Itor2 copy(Itor1 sourceBeg, const Itor1 &sourceEnd, Itor2 destBeg)
Copy elements from one range to another.
Definition ahAlgo.H:584
Divide_Conquer_DP_Result< Cost > divide_and_conquer_partition_dp(const size_t groups, const size_t n, Transition_Cost_Fn transition_cost, const Cost inf=dp_optimization_detail::default_inf< Cost >())
Optimize partition DP using divide-and-conquer optimization.
std::decay_t< typename HeadC::Item_Type > T
Definition ah-zip.H:105
void next()
Advance all underlying iterators (bounds-checked).
Definition ah-zip.H:171
auto mode(const Container &data) -> std::decay_t< decltype(*std::begin(data))>
Compute the mode (most frequent value).
Definition stat_utils.H:456
Itor::difference_type count(const Itor &beg, const Itor &end, const T &value)
Count elements equal to a value.
Definition ahAlgo.H:127
STL namespace.
Dynamic array container with automatic resizing.
Helpers for durable paged-tree files.
Fixed-size portable codecs for paged persistent tree payloads.
Comprehensive sorting algorithms and search utilities for Aleph-w.
Internal helpers for snapshot-backed persistent tree wrappers.