Aleph-w 3.0
A C++ Library for Data Structures and Algorithms
Loading...
Searching...
No Matches
Huffman.H
Go to the documentation of this file.
1
2/*
3 Aleph_w
4
5 Data structures & Algorithms
6 version 2.0.0b
7 https://github.com/lrleon/Aleph-w
8
9 This file is part of Aleph-w library
10
11 Copyright (c) 2002-2026 Leandro Rabindranath Leon
12
13 Permission is hereby granted, free of charge, to any person obtaining a copy
14 of this software and associated documentation files (the "Software"), to deal
15 in the Software without restriction, including without limitation the rights
16 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
17 copies of the Software, and to permit persons to whom the Software is
18 furnished to do so, subject to the following conditions:
19
20 The above copyright notice and this permission notice shall be included in all
21 copies or substantial portions of the Software.
22
23 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
24 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
25 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
26 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
27 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
28 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
29 SOFTWARE.
30*/
31
32
44# ifndef HUFFMAN_H
45# define HUFFMAN_H
46
47
48# include <memory>
49# include <istream>
50# include <tpl_binNodeUtils.H>
51# include <tpl_treap.H>
52# include <tpl_binHeap.H>
53# include <tpl_dynMapTree.H>
54# include <bitArray.H>
55# include <ah-errors.H>
56
57
58namespace Aleph {
59
60struct Huffman_Node;
61
63
65
66struct Huffman_Node : public BinHeap<size_t>::Node
67{
69
71
72public:
73
76 {
77 /* empty */
78 }
79
82 {
83 /* empty */
84 }
85
86 ~Huffman_Node() { /* bin_node memory must not be released here */ }
87
88};
89
91
92static inline const size_t & get_freq(Huffman_Node * huffman_node) noexcept
93{
94 return huffman_node->get_key();
95}
96
97
98static inline void increase_freq(Huffman_Node * huffman_node) noexcept
99{
100 huffman_node->get_key()++;
101}
102
103
104static inline void set_freq(Huffman_Node * huffman_node, const size_t & freq)
105 noexcept
106{
107 huffman_node->get_key() = freq;
108}
109
111static inline bool is_leaf(BinNode<std::string> * p) noexcept
112{
113 return LLINK(p) == nullptr and RLINK(p) == nullptr;
114}
115
122{
127
129
130 std::string end_symbol;
131 size_t text_len;
132
134 {
135 if (is_leaf(p))
136 {
137 const std::string & str = p->get_key();
138 code_map.insert(str, BitArray(array));
139 return;
140 }
141 array.push(0); build_prefix_encoding(LLINK(p), array); array.pop();
142 array.push(1); build_prefix_encoding(RLINK(p), array); array.pop();
143 }
144
146 {
147 ah_domain_error_if(root == nullptr) << "Huffman encoding tree has not been generated";
148
149 BitArray array(0);
150 code_map.empty();
153 }
154
155 bool test_end(const std::string & str) const
156 {
157 if (end_symbol == "NO-END")
158 return false;
159 return end_symbol == str;
160 }
161
162 void update_freq(const std::string & str)
163 {
164 ah_domain_error_if(root != nullptr) << "Huffman encoding tree has already been generated";
165
166 ah_domain_error_if(test_end(str)) << "End symbol has already been inserted";
167
168 Huffman_Node * huffman_node = nullptr;
170 if (huffman_node_ptr == nullptr) // symbol defined previously?
171 { // No ==> create a new entry in symbol_map and insert it into heap
172 std::unique_ptr<BinNode<std::string> > bin_node_auto(new BinNode<std::string>(str));
173 huffman_node = static_cast<Huffman_Node*>
176 bin_node_auto.release();
177 }
178 else
179 huffman_node = huffman_node_ptr->second; // already defined, retrieve it
180
183 }
184
186 {
187 const size_t symbol_code_size = symbol_code.size();
188 for (size_t i = 0; i < symbol_code_size; ++i)
190 }
191
192 public:
193
196 : root(nullptr), freq_root(nullptr), end_symbol("NO-END"), text_len(0)
197 {
198 // empty
199 }
200
203
204 private:
205
206 struct Get_Key
207 {
208 std::string operator () (BinNode<std::string> * p) const noexcept
209 {
210 return is_leaf(p) ? p->get_key() : "";
211 }
212 };
213
214 struct Load_Key
215 {
216 void operator () (BinNode<std::string> * p, std::istream & input) const noexcept
217 {
218 if (is_leaf(p))
219 input >> p->get_key();
220 }
221 };
222
223 public:
224
236 void save_tree(std::ostream & output)
237 {
238 ah_domain_error_if(root == nullptr)
239 << "Huffman tree has not been generated";
240
243 prefix.save(output);
244
246 output << '\n';
247
249 }
250
269 void save_tree_in_array_of_chars(const std::string & array_name, std::ostream & output)
270 {
271 ah_domain_error_if(root == nullptr)
272 << "Huffman tree has not been generated";
273
276 }
277
280 {
281 ah_domain_error_if(root == nullptr)
282 << "Huffman tree has not been generated";
283
284 return root;
285 }
286
300 {
301 ah_domain_error_if(root != nullptr) << "Huffman encoding tree has already been generated";
302 ah_domain_error_if(heap.is_empty()) << "No symbols have been inserted";
303
304 freq_root = nullptr;
305
306 while (heap.size() > 1) // until only one node remains
307 {
310 BinNode <std::string> * bin_node = new BinNode <std::string>;
311 Huffman_Node * huffman_node = new Huffman_Node (bin_node);
312 LLINK(bin_node) = l_huffman_node->bin_node;
313 RLINK(bin_node) = r_huffman_node->bin_node;
314 const size_t new_freq = get_freq(l_huffman_node) +
317
318 if (with_freqs)
319 {
320 Freq_Node *& l_freq_node = l_huffman_node->freq_node;
321 if (l_freq_node == nullptr)
322 {
324 l_freq_node->get_key().first =
325 l_huffman_node->bin_node->get_key();
326 l_freq_node->get_key().second = l_huffman_node->get_key();
327 }
328
329 Freq_Node *& r_freq_node = r_huffman_node->freq_node;
330 if (r_freq_node == nullptr)
331 {
333 r_freq_node->get_key().first =
334 r_huffman_node->bin_node->get_key();
335 r_freq_node->get_key().second = r_huffman_node->get_key();
336 }
337
338 const std::string str = std::to_string(new_freq);
339 Freq_Node *& freq_node = huffman_node->freq_node;
340 freq_node = new Freq_Node;
341 freq_node->get_key().first = str;
342 freq_node->get_key().second = huffman_node->get_key();
343 LLINK(freq_node) = l_freq_node;
344 RLINK(freq_node) = r_freq_node;
345 }
346
347 delete l_huffman_node;
348 delete r_huffman_node;
350 } // the remaining node in heap is the prefix tree root
351
353 root = huffman_root->bin_node;
354
355 if (with_freqs)
356 freq_root = huffman_root->freq_node;
357
358 delete huffman_root;
359 build_encoding_map(); // build code map
360
361 return root;
362 }
363
390
393 {
394 ah_domain_error_if(freq_root == nullptr)
395 << "Huffman tree has not been generated";
396
397 return freq_root;
398 }
399
409 void set_freq(const std::string & str, const size_t & freq)
410 {
411 ah_domain_error_if(root != nullptr) << "Huffman encoding tree has already been generated";
412 ah_domain_error_if(test_end(str)) << "End symbol has already been inserted";
413
414 // Search symbol str
416 if (huffman_node_ptr != nullptr) // already defined?
417 {
418 std::string msg = "Frequency for symbol " + str + " has already set";
419 ah_domain_error() << msg; // Yes ==> this is an error!
420 }
421
422 std::unique_ptr<BinNode<std::string> > bin_node_auto(new BinNode<std::string>(str));
427 bin_node_auto.release();
428 }
429
430 private:
431
432 static const size_t Max_Token_Size = 256;
433
434 static void save_string_as_bytes(const std::string & str, std::ostream & output)
435 {
436 output << str.size() << " ";
437 for (unsigned char c : str)
438 output << static_cast<unsigned int>(c) << " ";
439 }
440
441 static std::string load_string_from_bytes(std::istream & input)
442 {
443 size_t len = 0;
444 input >> len;
445 ah_domain_error_if(not input) << "Malformed Huffman tree stream";
446 ah_domain_error_if(len > Max_Token_Size) << "Symbol too large in Huffman tree stream";
447
448 std::string str;
449 str.resize(len);
450 for (size_t i = 0; i < len; ++i)
451 {
452 unsigned int byte = 0;
453 input >> byte;
454 ah_domain_error_if(not input) << "Malformed Huffman tree stream";
455 ah_domain_error_if(byte > 255u) << "Invalid byte value in Huffman tree stream";
456 str[i] = static_cast<char>(static_cast<unsigned char>(byte));
457 }
458 return str;
459 }
460
461 static void save_leaf_keys_in_prefix(BinNode<std::string> * p, std::ostream & output)
462 {
464 return;
465
466 if (is_leaf(p))
467 {
469 output << '\n';
470 return;
471 }
472
475 }
476
477 static void load_leaf_keys_in_prefix(BinNode<std::string> * p, std::istream & input)
478 {
480 return;
481
482 if (is_leaf(p))
483 {
485 return;
486 }
487
490 }
491
492 void insert_end_symbol_node(const std::string & str)
493 {
494 std::unique_ptr<BinNode<std::string> > bin_node_auto(new BinNode<std::string>(str));
495
496 Huffman_Node * huffman_node = static_cast<Huffman_Node*>
499 bin_node_auto.release();
500 }
501
503 {
504 while (not heap.is_empty())
505 {
506 auto * node = static_cast<Huffman_Node*>(heap.getMin_ne());
507 delete node->bin_node;
508 delete node;
509 }
511 code_map.empty();
512 text_len = 0;
513 end_symbol = "NO-END";
514 }
515
516 public:
517
528 void read_input(char * input, const bool & with_freqs = false)
529 {
530 ah_domain_error_if(root != nullptr) << "Huffman encoding tree has already been generated";
531
532 char * curr_stream = input;
534 curr_token[1] = '\0';
535 text_len = 0;
536
537 while (*curr_stream != '\0')
538 {
539 curr_token[0] = *curr_stream++;
541 text_len++;
542 }
543
544 if (end_symbol == "NO-END")
546 else if (symbol_map.search(end_symbol) == nullptr)
549 }
550
561 void read_input(std::istream & input, const bool & with_freqs = false)
562 {
563 ah_domain_error_if(root != nullptr) << "Huffman encoding tree has already been generated";
564
565 char curr_token[2] = {'\0', '\0'};
566 char ch = '\0';
567 text_len = 0;
568 while (input.get(ch))
569 {
570 curr_token[0] = ch;
572 ++text_len;
573 }
574
575 if (end_symbol == "NO-END")
577 else if (symbol_map.search(end_symbol) == nullptr)
580 }
581
583 void set_end_of_stream(const std::string & str)
584 {
585 ah_domain_error_if(end_symbol != "NO-END") << "End symbol has already been inserted";
586
587 ah_domain_error_if(root != nullptr) << "Huffman encoding tree has already been generated";
588
589 ah_domain_error_if(symbol_map.search(str) != nullptr)
590 << "End symbol is already present as a normal symbol";
591
593 end_symbol = str;
594 }
595
597 const std::string & get_end_of_stream() const noexcept
598 {
599 return end_symbol;
600 }
601
612 size_t encode(char * input, BitArray & bit_stream)
613 {
614 ah_domain_error_if(root == nullptr)
615 << "Huffman tree has not been generated";
616
617 ah_domain_error_if(end_symbol == "NO-END")
618 << "End symbol has not been configured";
619
620 char * curr_stream = input;
622 curr_token[1] = '\0';
623 while (*curr_stream != '\0')
624 {
625 curr_token[0] = *curr_stream++;
627 }
629
630 return bit_stream.size();
631 }
632
643 size_t encode(std::istream & input, BitArray & bit_stream)
644 {
645 ah_domain_error_if(root == nullptr)
646 << "Huffman tree has not been generated";
647
648 ah_domain_error_if(end_symbol == "NO-END")
649 << "End symbol has not been configured";
650
651 char curr_token[2] = {'\0', '\0'};
652 char ch = '\0';
653 while (input.get(ch))
654 {
655 curr_token[0] = ch;
657 }
658
660 return bit_stream.size();
661 }
662};
663
670{
672 std::string end_symbol;
673 public:
674
682 Huffman_Decoder_Engine(BinNode<std::string> * p, const std::string & end)
683 : root(p), end_symbol(end)
684 {
685 // empty
686 }
687
690 {
691 ah_domain_error_if(root == nullptr)
692 << "Huffman tree has not been generated";
693
694 return root;
695 }
696
705 void decode(BitArray & bit_stream, std::ostream & output)
706 {
707 const size_t & bit_stream_len = bit_stream.size();
709 for (size_t i = 0; i < bit_stream_len; ++i)
710 {
711 if (bit_stream.read_bit(i) == 0)
712 p = LLINK(p);
713 else
714 p = RLINK(p);
715
716
717 ah_domain_error_if(p == nullptr)
718 << "Invalid bits sequence";
719
720 if (is_leaf(p)) // leaf?
721 { // yes ==> write symbol and reset to root
722 const std::string & symbol = p->get_key();
723 if (symbol == end_symbol) // end reached?
724 break;
725
726 output << symbol;
727 p = root; // reset to root, a new code will be read
728 }
729 }
730 }
731};
732
733} // end namespace Aleph
734
735# endif // HUFFMAN_H
Exception handling system with formatted messages for Aleph-w.
#define ah_domain_error()
Throws std::domain_error unconditionally.
Definition ah-errors.H:554
#define ah_domain_error_if(C)
Throws std::domain_error if condition holds.
Definition ah-errors.H:522
WeightedDigraph::Node Node
Space-efficient bit array implementation.
Node for binary search tree.
Key & get_key() noexcept
Contiguous array of bits.
Definition bitArray.H:189
void pop()
Removes the last bit of the array.
Definition bitArray.H:456
void load(std::istream &input)
Loads an array of bits from a file.
Definition bitArray.H:590
void push(const unsigned int value)
Inserts the value at the end of the array.
Definition bitArray.H:450
T & push(const T &item)
Definition htlist.H:1523
Pair * search(const Key &key) const noexcept
Collect all keys.
Pair * insert(const Key &key, const Data &data)
Insert a key-value pair.
Data & find(const Key &key)
Find the value associated with key.
void empty()
remove all elements from the set
void update(Node *p) noexcept
Actualiza prioridad de un nodo contenido en el heap.
bool is_empty() const noexcept
Node * getMin()
Elimina del heap el nodo de menor prioridad.
Node * getMin_ne() noexcept
Node * insert(Node *p) noexcept
Inserta un nodo en un heap.
const size_t & size() const noexcept
size_t size() const noexcept
Count the number of elements of the list.
Definition htlist.H:1319
BinNode< std::string > * root
Definition Huffman.H:671
void decode(BitArray &bit_stream, std::ostream &output)
Decode a bit stream.
Definition Huffman.H:705
Huffman_Decoder_Engine(BinNode< std::string > *p, const std::string &end)
Decoder constructor.
Definition Huffman.H:682
BinNode< std::string > *& get_root()
Returns the root of the Huffman decoding tree.
Definition Huffman.H:689
void insert_end_symbol_node(const std::string &str)
Definition Huffman.H:492
Freq_Node *& get_freq_root()
Returns the root of the frequency tree.
Definition Huffman.H:392
static void save_leaf_keys_in_prefix(BinNode< std::string > *p, std::ostream &output)
Definition Huffman.H:461
void update_freq(const std::string &str)
Definition Huffman.H:162
size_t encode(char *input, BitArray &bit_stream)
Encode the input text.
Definition Huffman.H:612
static std::string load_string_from_bytes(std::istream &input)
Definition Huffman.H:441
void load_tree(std::istream &input)
Load and build a binary tree from a stream.
Definition Huffman.H:374
static const size_t Max_Token_Size
Definition Huffman.H:432
BinNode< std::string > * root
Definition Huffman.H:123
void clear_build_state() noexcept
Definition Huffman.H:502
void save_tree(std::ostream &output)
Save a Huffman tree into a stream.
Definition Huffman.H:236
BinNode< std::string > *& get_root()
Returns the root of the Huffman decoding tree.
Definition Huffman.H:279
void set_freq(const std::string &str, const size_t &freq)
Define the frequency of a symbol.
Definition Huffman.H:409
static void save_string_as_bytes(const std::string &str, std::ostream &output)
Definition Huffman.H:434
void read_input(char *input, const bool &with_freqs=false)
Read a NUL-terminated character std::string, count frequencies and build the prefix tree.
Definition Huffman.H:528
void save_tree_in_array_of_chars(const std::string &array_name, std::ostream &output)
Generate C/C++ array declarations for a Huffman tree.
Definition Huffman.H:269
static void load_leaf_keys_in_prefix(BinNode< std::string > *p, std::istream &input)
Definition Huffman.H:477
void read_input(std::istream &input, const bool &with_freqs=false)
Read a stream, count frequencies and build the prefix tree.
Definition Huffman.H:561
bool test_end(const std::string &str) const
Definition Huffman.H:155
void set_end_of_stream(const std::string &str)
Defines the end-of-stream symbol.
Definition Huffman.H:583
~Huffman_Encoder_Engine()
Destructor - frees all allocated memory.
Definition Huffman.H:202
Huffman_Encoder_Engine()
Encoder constructor.
Definition Huffman.H:195
static void append_code(BitArray &bit_stream, const BitArray &symbol_code)
Definition Huffman.H:185
void build_prefix_encoding(BinNode< std::string > *p, BitArray &array)
Definition Huffman.H:133
const std::string & get_end_of_stream() const noexcept
Return the configured end-of-stream symbol.
Definition Huffman.H:597
size_t encode(std::istream &input, BitArray &bit_stream)
Encode the input text from a stream.
Definition Huffman.H:643
BinNode< std::string > * generate_huffman_tree(const bool &with_freqs=false)
Generate the Huffman prefix tree.
Definition Huffman.H:299
constexpr Node *& RLINK(Node *p) noexcept
Return the right tree of p.
void tree_to_bits(Node *root, BitArray &array)
Compute a bit code for the binary tree.
void destroyRec(Node *&root) noexcept
Free recursively all the memory occupied by the tree root
constexpr Node *& LLINK(Node *p) noexcept
Return a pointer to left subtree.
Main namespace for Aleph-w library functions.
Definition ah-arena.H:89
BinNode< std::pair< std::string, size_t > > Freq_Node
Definition Huffman.H:64
BinHeap< size_t > Huffman_Heap
Definition Huffman.H:90
DynMapTree< std::string, BitArray, Treap_Vtl > Code_Map
Definition Huffman.H:110
static void increase_freq(Huffman_Node *huffman_node) noexcept
Definition Huffman.H:98
static void prefix(Node *root, DynList< Node * > &acc)
static const size_t & get_freq(Huffman_Node *huffman_node) noexcept
Definition Huffman.H:92
static bool is_leaf(BinNode< std::string > *p) noexcept
Definition Huffman.H:111
DynList< T > maps(const C &c, Op op)
Classic map operation.
static void set_freq(Huffman_Node *huffman_node, const size_t &freq) noexcept
Definition Huffman.H:104
Node heap without virtual destructor.
std::string operator()(BinNode< std::string > *p) const noexcept
Definition Huffman.H:208
void operator()(BinNode< std::string > *p, std::istream &input) const noexcept
Definition Huffman.H:216
Huffman_Node(BinNode< std::string > *node)
Definition Huffman.H:80
BinNode< std::string > * bin_node
Definition Huffman.H:68
Freq_Node * freq_node
Definition Huffman.H:70
Binary heap implementation using tree structure.
Utility functions for binary tree operations.
Dynamic key-value map based on balanced binary search trees.
Treap: randomized BST combining tree and heap properties.
ofstream output
Definition writeHeap.C:213