Aleph-w 3.0
A C++ Library for Data Structures and Algorithms
Loading...
Searching...
No Matches
tpl_file_b_map.H
Go to the documentation of this file.
1/*
2 Aleph_w
3
4 Data structures & Algorithms
5 version 2.0.0b
6 https://github.com/lrleon/Aleph-w
7
8 This file is part of Aleph-w library
9
10 Copyright (c) 2002-2026 Leandro Rabindranath Leon
11
12 Permission is hereby granted, free of charge, to any person obtaining a copy
13 of this software and associated documentation files (the "Software"), to deal
14 in the Software without restriction, including without limitation the rights
15 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
16 copies of the Software, and to permit persons to whom the Software is
17 furnished to do so, subject to the following conditions:
18
19 The above copyright notice and this permission notice shall be included in all
20 copies or substantial portions of the Software.
21
22 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
23 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
24 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
25 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
26 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
27 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
28 SOFTWARE.
29*/
30
40# ifndef TPL_FILE_B_MAP_H
41# define TPL_FILE_B_MAP_H
42
43# include <array>
44# include <bit>
45# include <cstddef>
46# include <cstring>
47# include <exception>
48# include <optional>
49# include <ostream>
50# include <string>
51# include <type_traits>
52# include <utility>
53
54# include <ah-concepts.H>
55# include <ah-errors.H>
56# include <tpl_array.H>
57# include <tpl_file_b_tree.H>
58
59namespace Aleph
60{
89 template <typename Key,
90 typename Value,
91 class Compare = Aleph::less<Key>,
92 size_t MinDegree = 16,
97 {
100
101 struct Record
102 {
103 std::array<unsigned char,
104 key_codec::encoded_size + value_codec::encoded_size> bytes = {};
105
106 friend std::ostream & operator << (std::ostream & out, const Record &)
107 {
108 return out << "<record>";
109 }
110
111 [[nodiscard]] Key get_key() const
112 {
113 return key_codec::decode(bytes.data());
114 }
115
117 {
118 return value_codec::decode(bytes.data() + key_codec::encoded_size);
119 }
120 };
121
123 {
124 static constexpr std::uint32_t storage_id =
126 key_codec::storage_id),
127 value_codec::storage_id);
128 static constexpr size_t encoded_size =
129 key_codec::encoded_size + value_codec::encoded_size;
130
131 static void encode(const Record & record, unsigned char * out) noexcept
132 {
133 std::memcpy(out, record.bytes.data(), record.bytes.size());
134 }
135
136 [[nodiscard]] static Record decode(const unsigned char * in) noexcept
137 {
138 Record record;
139 std::memcpy(record.bytes.data(), in, record.bytes.size());
140 return record;
141 }
142
143 [[nodiscard]] static std::uint32_t add_to_crc(
144 const std::uint32_t crc,
145 const Record & record) noexcept
146 {
147 return detail::crc32_add_bytes(crc, record.bytes.data(),
148 record.bytes.size());
149 }
150 };
151
153 {
154 Compare cmp;
155
156 bool operator () (const Record & lhs, const Record & rhs) const
157 {
158 return cmp(lhs.get_key(), rhs.get_key());
159 }
160 };
161
164
166 "File_B_Map requires a fixed-size key codec");
168 "File_B_Map requires a fixed-size mapped-value codec");
169 static_assert(std::is_trivially_copyable_v<Record>,
170 "File_B_Map requires trivially copyable records");
171
173
174 [[nodiscard]] static Record make_probe(const Key & key)
175 {
176 Record record;
177 key_codec::encode(key, record.bytes.data());
178 return record;
179 }
180
181 [[nodiscard]] static Record make_record(const Key & key,
182 const Value & value)
183 {
184 Record record;
185 key_codec::encode(key, record.bytes.data());
186 value_codec::encode(value, record.bytes.data() + key_codec::encoded_size);
187 return record;
188 }
189
190 [[nodiscard]] static std::pair<Key, Value>
191 to_pair(const Record & record)
192 {
193 return std::pair<Key, Value>(record.get_key(), record.get_value());
194 }
195
198 {
200 out.reserve(records.size());
201 for (const auto & rec : records)
202 out.append(to_pair(rec));
203 return out;
204 }
205
206 [[nodiscard]] bool equals_key(const Key & lhs, const Key & rhs) const
207 {
208 const auto & cmp = tree_.key_comp().cmp;
209 return not cmp(lhs, rhs) and not cmp(rhs, lhs);
210 }
211
212 [[nodiscard]] std::optional<Record> find_record(const Key & key) const
213 {
214 const auto probe = make_probe(key);
215 auto candidate = tree_.lower_bound(probe);
216 if (candidate.has_value() and equals_key(candidate->get_key(), key))
217 return candidate;
218 return std::nullopt;
219 }
220
221 [[nodiscard]] static const char * validated_path(const char * p)
222 {
223 ah_invalid_argument_if(p == nullptr) << "Gen_File_B_Map: file_path must not be null";
224 return p;
225 }
226
227 public:
228 using key_type = Key;
230 using value_type = std::pair<Key, Value>;
233
234 static constexpr auto Read_Write = tree_type::Read_Write;
235 static constexpr auto Read_Only = tree_type::Read_Only;
236
237
246 explicit Gen_File_B_Map(std::string file_path,
247 const bool auto_sync = true,
248 const Compare & cmp = Compare())
250
260 explicit Gen_File_B_Map(std::string file_path,
262 const bool auto_sync = true,
263 const Compare & cmp = Compare())
265
276 explicit Gen_File_B_Map(const char * file_path,
277 const bool auto_sync = true,
278 const Compare & cmp = Compare())
280 {
281 ah_invalid_argument_if(file_path == nullptr) << "Gen_File_B_Map: file_path must not be null";
282 }
283
295 explicit Gen_File_B_Map(const char * file_path,
297 const bool auto_sync = true,
298 const Compare & cmp = Compare())
300 {
301 ah_invalid_argument_if(file_path == nullptr) << "Gen_File_B_Map: file_path must not be null";
302 }
303
306
309
312
315
321 [[nodiscard]] const std::string & file_path() const noexcept
322 {
323 return tree_.file_path();
324 }
325
335
342 {
343 return tree_.is_read_only();
344 }
345
352 {
353 return tree_.auto_sync_enabled();
354 }
355
361 void set_auto_sync(const bool enabled) noexcept
362 {
363 tree_.set_auto_sync(enabled);
364 }
365
371 [[nodiscard]] const Compare & key_comp() const noexcept
372 {
373 return tree_.key_comp().cmp;
374 }
375
381 [[nodiscard]] bool empty() const noexcept { return tree_.empty(); }
382
388 [[nodiscard]] size_t size() const noexcept { return tree_.size(); }
389
395 [[nodiscard]] size_t height() const noexcept { return tree_.height(); }
396
403 {
404 return tree_.page_size_bytes();
405 }
406
413 {
414 return tree_.page_count();
415 }
416
423 {
424 return tree_.checkpoint_sequence();
425 }
426
432 void sync() const
433 {
434 tree_.sync();
435 }
436
441 void checkpoint() const
442 {
444 }
445
450 void reload()
451 {
452 tree_.reload();
453 }
454
460 void clear()
461 {
462 tree_.clear();
463 }
464
471 [[nodiscard]] bool contains(const Key & key) const
472 {
473 return find_record(key).has_value();
474 }
475
482 [[nodiscard]] bool search(const Key & key) const
483 {
484 return contains(key);
485 }
486
493 [[nodiscard]] std::optional<Value> find(const Key & key) const
494 {
495 const auto record = find_record(key);
496 if (not record.has_value())
497 return std::nullopt;
498 return record->get_value();
499 }
500
508 [[nodiscard]] Value at(const Key & key) const
509 {
510 const auto value = find(key);
511 ah_domain_error_if(not value.has_value())
512 << "File_B_Map::at(): key not found";
513 return *value;
514 }
515
525 bool insert(const Key & key, const Value & value)
526 {
527 return tree_.insert(make_record(key, value));
528 }
529
547 bool insert_or_assign(const Key & key, const Value & value)
548 {
549 const auto probe = make_probe(key);
550 const auto record = make_record(key, value);
551 if (tree_.insert(record))
552 return true;
553 // Suppress intermediate syncs so the remove+insert pair is written as a
554 // single WAL-flushed unit; restore and sync once at the end.
555 const bool was_sync = auto_sync_enabled();
556 set_auto_sync(false);
557 try
558 {
559 // Retrieve the full record (key+value) before removing so we can
560 // rollback to it if the subsequent insert throws.
561 auto old_record = tree_.lower_bound(probe);
563 equals_key(old_record->get_key(), key))
564 << "File_B_Map::insert_or_assign(): existing record not found";
566 << "File_B_Map::insert_or_assign(): internal replace failed";
567 try
568 {
570 << "File_B_Map::insert_or_assign(): internal reinsert failed";
571 }
572 catch (...)
573 {
574 auto ep = std::current_exception();
575 try { tree_.insert(*old_record); } catch (...) { /* rollback failed; propagate original */ }
576 std::rethrow_exception(ep);
577 }
578 }
579 catch (...)
580 {
582 throw;
583 }
585 if (was_sync)
586 sync();
587 return false;
588 }
589
597 bool remove(const Key & key)
598 {
599 return tree_.remove(make_probe(key));
600 }
601
607 [[nodiscard]] std::optional<value_type> min_item() const
608 {
609 const auto record = tree_.min_key();
610 if (not record.has_value())
611 return std::nullopt;
612 return to_pair(*record);
613 }
614
620 [[nodiscard]] std::optional<value_type> max_item() const
621 {
622 const auto record = tree_.max_key();
623 if (not record.has_value())
624 return std::nullopt;
625 return to_pair(*record);
626 }
627
634 [[nodiscard]] std::optional<value_type> lower_bound(const Key & key) const
635 {
636 const auto record = tree_.lower_bound(make_probe(key));
637 if (not record.has_value())
638 return std::nullopt;
639 return to_pair(*record);
640 }
641
648 [[nodiscard]] std::optional<value_type> upper_bound(const Key & key) const
649 {
650 const auto record = tree_.upper_bound(make_probe(key));
651 if (not record.has_value())
652 return std::nullopt;
653 return to_pair(*record);
654 }
655
662 {
664 const auto records = tree_.keys();
665 out.reserve(records.size());
666 for (const auto & rec : records)
667 out.append(rec.get_key());
668 return out;
669 }
670
677 {
679 const auto records = tree_.keys();
680 out.reserve(records.size());
681 for (const auto & rec : records)
682 out.append(rec.get_value());
683 return out;
684 }
685
692 {
693 return to_pairs(tree_.keys());
694 }
695
696
702 [[nodiscard]] bool verify() const
703 {
704 return tree_.verify();
705 }
706 };
707
709 template <typename Key,
710 typename Value,
711 class Compare = Aleph::less<Key>,
712 size_t MinDegree = 16,
715 using File_B_Map = Gen_File_B_Map<Key, Value, Compare, MinDegree,
717} // namespace Aleph
718
719# endif // TPL_FILE_B_MAP_H
C++20 concepts for constraining comparison functors.
Exception handling system with formatted messages for Aleph-w.
#define ah_runtime_error_unless(C)
Throws std::runtime_error if condition does NOT hold.
Definition ah-errors.H:250
#define ah_domain_error_if(C)
Throws std::domain_error if condition holds.
Definition ah-errors.H:522
#define ah_invalid_argument_if(C)
Throws std::invalid_argument if condition holds.
Definition ah-errors.H:639
Simple dynamic array with automatic resizing and functional operations.
Definition tpl_array.H:139
void reserve(size_t cap)
Reserves cap cells into the array.
Definition tpl_array.H:315
Persistent ordered map backed by a paged File_B_Tree.
std::pair< Key, Value > value_type
static const char * validated_path(const char *p)
static constexpr auto Read_Only
const Compare & key_comp() const noexcept
Return the key comparator.
static std::pair< Key, Value > to_pair(const Record &record)
static constexpr auto Read_Write
std::uint64_t checkpoint_sequence() const noexcept
Return the latest durable checkpoint sequence.
std::optional< value_type > max_item() const
Return the largest stored key/value pair.
size_t size() const noexcept
Return the number of stored key/value pairs.
bool equals_key(const Key &lhs, const Key &rhs) const
Value at(const Key &key) const
Return the mapped value associated with a key.
bool verify() const
Verify the structural invariants of the backing tree.
size_t height() const noexcept
Return the current tree height.
Gen_File_B_Map(Gen_File_B_Map &&)=delete
Moving is disabled.
std::optional< value_type > lower_bound(const Key &key) const
Return the first key/value pair whose key is not less than key.
bool is_read_only() const noexcept
Return whether the map handle is read-only.
bool search(const Key &key) const
Synonym for contains().
size_t page_count() const noexcept
Return the number of allocated pages.
bool empty() const noexcept
Return whether the map is empty.
void checkpoint() const
Synonym for sync().
std::optional< value_type > upper_bound(const Key &key) const
Return the first key/value pair whose key is greater than key.
std::optional< Record > find_record(const Key &key) const
void clear()
Remove every key/value pair from the map.
Gen_File_B_Tree< Record, Record_Compare, MinDegree, Record_Codec > tree_impl_type
Gen_File_B_Map(const char *file_path, const bool auto_sync=true, const Compare &cmp=Compare())
Open or create a persistent B-Tree map for read-write access.
Array< Value > values() const
Materialize all mapped values in key order.
bool remove(const Key &key)
Remove a key and its mapped value, if present.
std::optional< Value > find(const Key &key) const
Return the mapped value associated with a key, if any.
Gen_File_B_Map(const Gen_File_B_Map &)=delete
Copying is disabled.
static Array< std::pair< Key, Value > > to_pairs(const Array< Record > &records)
void sync() const
Flush pending changes to the backing file.
bool contains(const Key &key) const
Return whether a key exists.
size_t page_size_bytes() const noexcept
Return the serialized page size of the backing tree.
open_mode_type open_mode() const noexcept
Return the open mode.
bool insert(const Key &key, const Value &value)
Insert a new key/value pair if the key is not present.
void set_auto_sync(const bool enabled) noexcept
Enable or disable automatic synchronization.
static Record make_probe(const Key &key)
Gen_File_B_Map(std::string file_path, const open_mode_type open_mode, const bool auto_sync=true, const Compare &cmp=Compare())
Open or create a persistent B-Tree map with an explicit mode.
typename tree_type::open_mode_type open_mode_type
Array< value_type > items() const
Materialize all key/value pairs in sorted order.
Gen_File_B_Map(const char *file_path, const open_mode_type open_mode, const bool auto_sync=true, const Compare &cmp=Compare())
Open or create a persistent B-Tree map with an explicit mode.
static Record make_record(const Key &key, const Value &value)
Gen_File_B_Map(std::string file_path, const bool auto_sync=true, const Compare &cmp=Compare())
Open or create a persistent B-Tree map for read-write access.
const std::string & file_path() const noexcept
Return the file path.
void reload()
Discard unsynchronized changes and reload the file.
bool auto_sync_enabled() const noexcept
Return whether automatic synchronization is enabled.
std::optional< value_type > min_item() const
Return the smallest stored key/value pair.
bool insert_or_assign(const Key &key, const Value &value)
Insert or overwrite a key/value pair.
Gen_File_B_Map & operator=(const Gen_File_B_Map &)=delete
Assignment is disabled.
Array< Key > keys() const
Materialize all keys in sorted order.
Persistent page-managed B-Tree stored in a single binary file.
size_t page_count() const noexcept
Return the number of allocated pages currently tracked.
bool verify() const
Verify structural B-Tree invariants across cached pages.
void set_auto_sync(const bool enabled) noexcept
Enable or disable automatic flushing of dirty pages.
std::optional< Key > max_key() const
Return the maximum key, if any.
std::optional< Key > lower_bound(const Key &key) const
Return the first key not less than key.
void clear()
Remove every key from the tree.
bool auto_sync_enabled() const noexcept
Return whether automatic synchronization is enabled.
size_t height() const noexcept
Return the current height of the paged tree.
Array< Key > keys() const
Materialize the tree contents in sorted order.
static constexpr auto Read_Only
std::optional< Key > min_key() const
Return the minimum key, if any.
size_t page_size_bytes() const noexcept
Return the fixed serialized page size.
std::optional< Key > upper_bound(const Key &key) const
Return the first key strictly greater than key.
void checkpoint() const
Synonym for sync().
size_t size() const noexcept
Return the number of stored keys.
bool empty() const noexcept
Return whether the tree has no keys.
const Compare & key_comp() const noexcept
Return the comparison object.
Paged_File_Open_Mode open_mode() const noexcept
Return the access mode used to open the file.
void sync() const
Flush the current tree image using redo WAL or full-image fallback.
bool remove(const Key &key)
Remove a key if present.
Paged_File_Open_Mode open_mode_type
File opening mode type.
void reload()
Discard unsynchronized changes and reload pages from disk.
const std::string & file_path() const noexcept
Return the backing file path.
std::uint64_t checkpoint_sequence() const noexcept
Return the durable checkpoint sequence stored in the backing file.
static constexpr auto Read_Write
bool is_read_only() const noexcept
Return whether this handle is read-only.
bool insert(const Key &key)
Insert a key if it is not already present.
int cmp(const __gmp_expr< T, U > &expr1, const __gmp_expr< V, W > &expr2)
Definition gmpfrxx.h:4118
std::uint32_t crc32_add_bytes(std::uint32_t crc, const void *data, const size_t size) noexcept
constexpr std::uint32_t mix_codec_id(const std::uint32_t seed, const std::uint32_t value) noexcept
Main namespace for Aleph-w library functions.
Definition ah-arena.H:89
and
Check uniqueness with explicit hash + equality functors.
Divide_Conquer_DP_Result< Cost > divide_and_conquer_partition_dp(const size_t groups, const size_t n, Transition_Cost_Fn transition_cost, const Cost inf=dp_optimization_detail::default_inf< Cost >())
Optimize partition DP using divide-and-conquer optimization.
STL namespace.
static Record decode(const unsigned char *in) noexcept
static constexpr std::uint32_t storage_id
static void encode(const Record &record, unsigned char *out) noexcept
static std::uint32_t add_to_crc(const std::uint32_t crc, const Record &record) noexcept
static constexpr size_t encoded_size
bool operator()(const Record &lhs, const Record &rhs) const
std::array< unsigned char, key_codec::encoded_size+value_codec::encoded_size > bytes
friend std::ostream & operator<<(std::ostream &out, const Record &)
Dynamic array container with automatic resizing.
Page-managed persistent B-Tree with file-backed storage.