Aleph-w 3.0
A C++ Library for Data Structures and Algorithms
Loading...
Searching...
No Matches
write_huffman.C
Go to the documentation of this file.
1
2/*
3 Aleph_w
4
5 Data structures & Algorithms
6 version 2.0.0b
7 https://github.com/lrleon/Aleph-w
8
9 This file is part of Aleph-w library
10
11 Copyright (c) 2002-2026 Leandro Rabindranath Leon
12
13 Permission is hereby granted, free of charge, to any person obtaining a copy
14 of this software and associated documentation files (the "Software"), to deal
15 in the Software without restriction, including without limitation the rights
16 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
17 copies of the Software, and to permit persons to whom the Software is
18 furnished to do so, subject to the following conditions:
19
20 The above copyright notice and this permission notice shall be included in all
21 copies or substantial portions of the Software.
22
23 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
24 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
25 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
26 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
27 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
28 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
29 SOFTWARE.
30*/
31
32
282# include <cstring>
283# include <iostream>
284# include <fstream>
285# include <tclap/CmdLine.h>
286# include <Huffman.H>
287# include <huffman_btreepic.H>
288
289using namespace std;
290
291// "Muerte de Antoñito el Camborio" by Federico García Lorca
292char poem[] =
293"Muerte De Antoñito El Camborio\n"
294" Federico Garcia Lorca\n"
295"\n"
296" Voces de muerte sonaron\n"
297"cerca del Guadalquivir.\n"
298"Voces antiguas que cercan\n"
299"voz de clavel varonil.\n"
300"Les clavo sobre las botas\n"
301"mordiscos de jabali.\n"
302"En la lucha daba saltos\n"
303"jabonados de delfin.\n"
304"Bano con sangre enemiga\n"
305"su corbata carmesi,\n"
306"pero eran cuatro punales\n"
307"y tuvo que sucumbir.\n"
308"Cuando las estrellas clavan\n"
309"rejones al agua gris,\n"
310"cuando los erales suenan\n"
311"veronicas de alheli,\n"
312"voces de muerte sonaron\n"
313"cerca del Guadalquivir.\n"
314"\n"
315" Antonio Torres Heredia,\n"
316"Camborio de dura crin,\n"
317"moreno de verde luna,\n"
318"voz de clavel varonil:\n"
319"quien te ha quitado la vida\n"
320"cerca del Guadalquivir?\n"
321"Mis cuatro primos Heredias\n"
322"hijos de Benameji.\n"
323"Lo que en otros no envidiaban,\n"
324"ya lo envidiaban en mi.\n"
325"Zapatos color corinto,\n"
326"medallones de marfil,\n"
327"y este cutis amasado\n"
328"con aceituna y jazmin.\n"
329"Ay Antonito el Camborio,\n"
330"digno de una Emperatriz!\n"
331"Acuerdate de la Virgen\n"
332"porque te vas a morir.\n"
333"Ay Federico Garcia,\n"
334"llama a la Guardia Civil!\n"
335"Ya mi talle se ha quebrado\n"
336"como cana de maiz.\n"
337"\n"
338" Tres golpes de sangre tuvo\n"
339"y se murio de perfil.\n"
340"Viva moneda que nunca\n"
341"se volvera a repetir.\n"
342"Un angel marchoso pone\n"
343"su cabeza en un cojin.\n"
344"Otros de rubor cansado,\n"
345"encendieron un candil.\n"
346"Y cuando los cuatro primos\n"
347"llegan a Benameji,\n"
348"voces de muerte cesaron\n"
349"cerca del Guadalquivir.\n";
350
354size_t read_and_encode(char* str,
357{
358 huffman_engine.read_input(str, true);
359 return huffman_engine.encode(str, bit_stream);
360}
361
365void write_verse(const char* text, const string& filename)
366{
367 ofstream verso(filename, ios::out);
368 verso << "\\begin{verse}" << endl;
369
370 const char* curr = text;
371 while (*curr != '\0')
372 {
373 if (*curr == '\n')
374 {
375 if (curr[1] == '\n')
376 {
377 verso << " \\\\" << endl
378 << "\\ \\" << endl
379 << endl;
380 curr++;
381 }
382 else if (curr[1] != '\0')
383 {
384 verso << " \\\\" << endl;
385 }
386 }
387 else
388 {
389 verso << *curr;
390 }
391 curr++;
392 }
393
394 verso << "\\end{verse}" << endl;
395}
396
397int main(int argc, char *argv[])
398{
399 try
400 {
401 TCLAP::CmdLine cmd("Huffman encoding demonstration", ' ', "1.0");
402
403 TCLAP::UnlabeledValueArg<string> fileArg("input",
404 "Input file to encode (optional, uses built-in poem if not provided)",
405 false, "", "filename");
406 cmd.add(fileArg);
407
408 TCLAP::ValueArg<string> outputArg("o", "output",
409 "Output tree file prefix",
410 false, "huffman", "prefix");
411 cmd.add(outputArg);
412
413 cmd.parse(argc, argv);
414
415 string inputFile = fileArg.getValue();
416 string outputPrefix = outputArg.getValue();
417
418 cout << "Huffman Encoding Example" << endl;
419 cout << "========================" << endl << endl;
420
421 ofstream huf_output(outputPrefix + ".Tree", ios::out);
423
425 BitArray code(0);
426 size_t code_len = 0;
427
428 if (inputFile.empty())
429 {
430 // Use built-in poem
431 cout << "Encoding built-in poem (Lorca)..." << endl;
432
434
435 // Generate Huffman tree visualization
436 huffman_to_btreepic(encoder.get_freq_root());
437
438 // Save tree in array format
439 {
440 ofstream output(outputPrefix + "_tree.h");
441 encoder.save_tree_in_array_of_chars(outputPrefix.c_str(), output);
442 }
443
444 // Decode and verify
445 cout << endl << "Decoded text:" << endl;
446 cout << "-------------" << endl;
447 Huffman_Decoder_Engine decoder(encoder.get_root(), "");
448 decoder.decode(code, cout);
449 cout << endl;
450
451 // Write LaTeX files
452 write_verse(poem, outputPrefix + "-verso.tex");
453
454 {
455 ofstream stat1(outputPrefix + "-stat1.tex", ios::out);
456 stat1 << "$" << strlen(poem) * 8 << "$";
457 }
458
459 {
460 ofstream stat2(outputPrefix + "-stat2.tex", ios::out);
461 stat2 << "$" << code_len << "$";
462 }
463
464 // Print statistics
465 size_t original_bits = strlen(poem) * 8;
466 cout << endl << "Compression Statistics:" << endl;
467 cout << "-----------------------" << endl;
468 cout << "Original size: " << original_bits << " bits" << endl;
469 cout << "Encoded size: " << code_len << " bits" << endl;
470 cout << "Compression ratio: "
471 << (100.0 * code_len / original_bits) << "%" << endl;
472 cout << "Space saved: "
473 << (100.0 - 100.0 * code_len / original_bits) << "%" << endl;
474
475 destroyRec(encoder.get_root());
476 destroyRec(encoder.get_freq_root());
477 }
478 else
479 {
480 // Encode file
481 cout << "Encoding file: " << inputFile << endl;
482
483 ifstream input1(inputFile, ios::in);
484 if (!input1)
485 {
486 cerr << "Error: cannot open file " << inputFile << endl;
487 return 1;
488 }
489
490 encoder.read_input(input1, true);
491 input1.close();
492
493 huffman_to_btreepic(encoder.get_freq_root(), true);
494
495 ifstream input2(inputFile, ios::in);
496 BitArray bit_stream(1024 * 1024 * 500); // 500MB max
497
499 input2.close();
500
501 // Get original file size
502 ifstream input3(inputFile, ios::ate);
503 size_t original_size = input3.tellg();
504 input3.close();
505
506 cout << endl << "Compression Statistics:" << endl;
507 cout << "-----------------------" << endl;
508 cout << "Original size: " << (original_size * 8) << " bits" << endl;
509 cout << "Encoded size: " << code_len << " bits" << endl;
510 cout << "Compression ratio: "
511 << (100.0 * code_len / (original_size * 8)) << "%" << endl;
512 }
513
514 cout << endl << "Done. Output files:" << endl;
515 cout << " - " << outputPrefix << ".Tree" << endl;
516 if (inputFile.empty())
517 {
518 cout << " - " << outputPrefix << "_tree.h" << endl;
519 cout << " - " << outputPrefix << "-verso.tex" << endl;
520 cout << " - " << outputPrefix << "-stat1.tex" << endl;
521 cout << " - " << outputPrefix << "-stat2.tex" << endl;
522 }
523 }
524 catch (TCLAP::ArgException &e)
525 {
526 cerr << "Error: " << e.error() << " for arg " << e.argId() << endl;
527 return 1;
528 }
529
530 return 0;
531}
Huffman coding for data compression.
int main()
Contiguous array of bits.
Definition bitArray.H:189
void empty() noexcept
empty the list
Definition htlist.H:1689
void destroyRec(Node *&root) noexcept
Free recursively all the memory occupied by the tree root
Huffman tree visualization for btreepic LaTeX package.
void huffman_to_btreepic(Freq_Node *p, const bool with_level_adjust=false)
Generate btreepic specification for a Huffman tree.
std::ostream * output_ptr
Output stream pointer for btreepic commands.
std::string code(Node *root)
Compute a string with the Lukasiewicz`s word of a tree.
DynList< T > maps(const C &c, Op op)
Classic map operation.
STL namespace.
ofstream output
Definition writeHeap.C:213
void write_verse(const char *text, const string &filename)
Write poem in LaTeX verse format.
char poem[]
size_t read_and_encode(char *str, Huffman_Encoder_Engine &huffman_engine, BitArray &bit_stream)
Encode string and return encoded bit stream length.