annotate HuffmanSet.cpp @ 38:7905fa8a3f1b

Test for empty string.
author Tom Fredrik Blenning Klaussen <bfg@blenning.no>
date Fri, 07 Sep 2012 13:07:46 +0200
parents c52a0627337c
children f711ddb56ae7
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
21
3bcdb8bb6914 Huffman representations.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
1 #include "HuffmanString.hpp"
28
b2c2c2bf2bbd Refactor Exceptions into a separate directory.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents: 26
diff changeset
2 #include "Exception/NoSuchValueException.hpp"
b2c2c2bf2bbd Refactor Exceptions into a separate directory.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents: 26
diff changeset
3 #include "Exception/InvalidDataException.hpp"
21
3bcdb8bb6914 Huffman representations.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
4
3bcdb8bb6914 Huffman representations.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
5 #include <QtCore/QHash>
3bcdb8bb6914 Huffman representations.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
6
26
c0ddc978475a Remove debug info from HuffmanSet.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents: 21
diff changeset
7 HuffmanSet::HuffmanSet() : cutoff(256), numInserts(0), lut(0)
21
3bcdb8bb6914 Huffman representations.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
8 {
3bcdb8bb6914 Huffman representations.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
9 }
3bcdb8bb6914 Huffman representations.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
10
3bcdb8bb6914 Huffman representations.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
11 void HuffmanSet::setCutoff(uint cutoff)
3bcdb8bb6914 Huffman representations.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
12 {
3bcdb8bb6914 Huffman representations.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
13 this->cutoff = cutoff;
3bcdb8bb6914 Huffman representations.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
14 }
3bcdb8bb6914 Huffman representations.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
15
3bcdb8bb6914 Huffman representations.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
16 QStringList HuffmanSet::chunks(const QString& str)
3bcdb8bb6914 Huffman representations.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
17 {
37
c52a0627337c BUGFIX: Chunking got extra values at beginning and end.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents: 34
diff changeset
18 return str.split("", QString::SkipEmptyParts);
21
3bcdb8bb6914 Huffman representations.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
19 }
3bcdb8bb6914 Huffman representations.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
20
3bcdb8bb6914 Huffman representations.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
21 BitDecoder* HuffmanSet::createLut(const QMap<QString, uint>& freqTable)
3bcdb8bb6914 Huffman representations.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
22 {
34
fda70a362ed5 Remove whitespace.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents: 28
diff changeset
23 QMultiMap<uint, BitDecoder* > freqs;
21
3bcdb8bb6914 Huffman representations.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
24 for(QMap<QString, uint>::const_iterator it = freqTable.begin();
3bcdb8bb6914 Huffman representations.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
25 it != freqTable.end(); ++it) {
3bcdb8bb6914 Huffman representations.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
26 freqs.insert(it.value(), new BitDecoder(it.key()));
3bcdb8bb6914 Huffman representations.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
27 }
3bcdb8bb6914 Huffman representations.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
28
3bcdb8bb6914 Huffman representations.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
29 if (freqs.size() == 1) {
3bcdb8bb6914 Huffman representations.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
30 QList<uint> keys = freqs.keys();
3bcdb8bb6914 Huffman representations.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
31 return freqs.take(keys[0]);
3bcdb8bb6914 Huffman representations.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
32 }
3bcdb8bb6914 Huffman representations.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
33
3bcdb8bb6914 Huffman representations.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
34 QList<uint> keys = freqs.keys();
3bcdb8bb6914 Huffman representations.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
35
3bcdb8bb6914 Huffman representations.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
36 while (freqs.size() >= 2) {
3bcdb8bb6914 Huffman representations.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
37 QList<uint> keys = freqs.keys();
3bcdb8bb6914 Huffman representations.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
38
3bcdb8bb6914 Huffman representations.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
39 BitDecoder* v0 = freqs.take(keys[0]);
3bcdb8bb6914 Huffman representations.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
40 BitDecoder* v1 = freqs.take(keys[1]);
3bcdb8bb6914 Huffman representations.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
41
3bcdb8bb6914 Huffman representations.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
42 BitDecoder* n = BitDecoder::merge(v0, v1);
3bcdb8bb6914 Huffman representations.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
43
3bcdb8bb6914 Huffman representations.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
44 freqs.insert(keys[0] + keys[1], n);
3bcdb8bb6914 Huffman representations.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
45 }
3bcdb8bb6914 Huffman representations.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
46 BitDecoder* retVal = freqs.values()[0];
3bcdb8bb6914 Huffman representations.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
47 return retVal;
3bcdb8bb6914 Huffman representations.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
48 }
3bcdb8bb6914 Huffman representations.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
49
3bcdb8bb6914 Huffman representations.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
50 QString HuffmanSet::decode(const QBitArray& bits) const
3bcdb8bb6914 Huffman representations.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
51 {
3bcdb8bb6914 Huffman representations.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
52 return lut->decode(bits);
3bcdb8bb6914 Huffman representations.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
53 }
3bcdb8bb6914 Huffman representations.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
54
3bcdb8bb6914 Huffman representations.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
55 QBitArray HuffmanSet::encode(const QString& string, const QMap<QString, QBitArray>& encoder)
3bcdb8bb6914 Huffman representations.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
56 {
3bcdb8bb6914 Huffman representations.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
57 QBitArray retVal;
3bcdb8bb6914 Huffman representations.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
58 QStringList c = chunks(string);
3bcdb8bb6914 Huffman representations.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
59 foreach(const QString& fragment, c) {
3bcdb8bb6914 Huffman representations.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
60 if (encoder.contains(fragment))
3bcdb8bb6914 Huffman representations.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
61 retVal = BitDecoder::unite(retVal, encoder[fragment]);
3bcdb8bb6914 Huffman representations.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
62 else
3bcdb8bb6914 Huffman representations.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
63 throw InvalidDataException();
3bcdb8bb6914 Huffman representations.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
64 }
3bcdb8bb6914 Huffman representations.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
65 return retVal;
3bcdb8bb6914 Huffman representations.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
66 }
3bcdb8bb6914 Huffman representations.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
67
3bcdb8bb6914 Huffman representations.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
68 uint HuffmanSet::totalElements() const
3bcdb8bb6914 Huffman representations.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
69 {
3bcdb8bb6914 Huffman representations.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
70 return newStrings.size() + map.size();
3bcdb8bb6914 Huffman representations.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
71 }
3bcdb8bb6914 Huffman representations.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
72
3bcdb8bb6914 Huffman representations.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
73 void HuffmanSet::rebuild()
3bcdb8bb6914 Huffman representations.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
74 {
3bcdb8bb6914 Huffman representations.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
75 QMap<QString, uint> freqTable;
3bcdb8bb6914 Huffman representations.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
76
3bcdb8bb6914 Huffman representations.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
77 foreach(key_t key, map.keys()) {
3bcdb8bb6914 Huffman representations.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
78 foreach(const QString& chunk, chunks(decode(map.value(key)))) {
3bcdb8bb6914 Huffman representations.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
79 ++freqTable[chunk];
3bcdb8bb6914 Huffman representations.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
80 }
3bcdb8bb6914 Huffman representations.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
81 }
3bcdb8bb6914 Huffman representations.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
82 foreach(key_t key, newStrings.keys()) {
3bcdb8bb6914 Huffman representations.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
83 foreach(const QString& chunk, chunks(newStrings.value(key))) {
3bcdb8bb6914 Huffman representations.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
84 ++freqTable[chunk];
3bcdb8bb6914 Huffman representations.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
85 }
3bcdb8bb6914 Huffman representations.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
86 }
3bcdb8bb6914 Huffman representations.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
87
3bcdb8bb6914 Huffman representations.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
88 BitDecoder* newLut = createLut(freqTable);
3bcdb8bb6914 Huffman representations.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
89
3bcdb8bb6914 Huffman representations.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
90 encoder = newLut->createEncoder();
3bcdb8bb6914 Huffman representations.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
91
3bcdb8bb6914 Huffman representations.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
92 foreach(key_t key, map.keys()) {
3bcdb8bb6914 Huffman representations.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
93 map.insert(key, encode(decode(map.value(key)), encoder));
3bcdb8bb6914 Huffman representations.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
94 }
3bcdb8bb6914 Huffman representations.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
95 foreach(key_t key, newStrings.keys()) {
3bcdb8bb6914 Huffman representations.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
96 map.insert(key, encode(newStrings.value(key), encoder));
3bcdb8bb6914 Huffman representations.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
97 }
3bcdb8bb6914 Huffman representations.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
98 numInserts = 0;
3bcdb8bb6914 Huffman representations.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
99 delete lut;
3bcdb8bb6914 Huffman representations.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
100 lut = newLut;
3bcdb8bb6914 Huffman representations.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
101 newStrings.clear();
3bcdb8bb6914 Huffman representations.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
102 }
3bcdb8bb6914 Huffman representations.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
103
3bcdb8bb6914 Huffman representations.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
104 bool HuffmanSet::contains(key_t key) const
3bcdb8bb6914 Huffman representations.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
105 {
3bcdb8bb6914 Huffman representations.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
106 return newStrings.contains(key) || map.contains(key);
3bcdb8bb6914 Huffman representations.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
107 }
3bcdb8bb6914 Huffman representations.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
108
3bcdb8bb6914 Huffman representations.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
109 HuffmanSet::key_t HuffmanSet::hash(const QString& str)
3bcdb8bb6914 Huffman representations.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
110 {
3bcdb8bb6914 Huffman representations.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
111 key_t key = qHash(str);
3bcdb8bb6914 Huffman representations.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
112 while (contains(key) && value(key) != str)
3bcdb8bb6914 Huffman representations.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
113 ++key;
3bcdb8bb6914 Huffman representations.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
114 return key;
3bcdb8bb6914 Huffman representations.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
115 }
3bcdb8bb6914 Huffman representations.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
116
3bcdb8bb6914 Huffman representations.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
117 HuffmanSet::key_t HuffmanSet::insert(const QString& str)
3bcdb8bb6914 Huffman representations.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
118 {
3bcdb8bb6914 Huffman representations.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
119 key_t key = hash(str);
3bcdb8bb6914 Huffman representations.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
120 if (!contains(key)) {
3bcdb8bb6914 Huffman representations.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
121 try {
3bcdb8bb6914 Huffman representations.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
122 QBitArray bits = encode(str, encoder);
3bcdb8bb6914 Huffman representations.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
123 map.insert(key, bits);
3bcdb8bb6914 Huffman representations.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
124 }
3bcdb8bb6914 Huffman representations.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
125 catch (InvalidDataException& e) {
3bcdb8bb6914 Huffman representations.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
126 newStrings.insert(key, str);
3bcdb8bb6914 Huffman representations.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
127 }
3bcdb8bb6914 Huffman representations.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
128 if (++numInserts >= cutoff) {
3bcdb8bb6914 Huffman representations.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
129 rebuild();
3bcdb8bb6914 Huffman representations.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
130 }
3bcdb8bb6914 Huffman representations.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
131 }
3bcdb8bb6914 Huffman representations.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
132 return key;
3bcdb8bb6914 Huffman representations.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
133 }
3bcdb8bb6914 Huffman representations.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
134
3bcdb8bb6914 Huffman representations.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
135 QString HuffmanSet::value(key_t key) const
3bcdb8bb6914 Huffman representations.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
136 {
3bcdb8bb6914 Huffman representations.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
137 if (map.contains(key))
3bcdb8bb6914 Huffman representations.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
138 return decode(map.value(key));
3bcdb8bb6914 Huffman representations.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
139 if (newStrings.contains(key))
3bcdb8bb6914 Huffman representations.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
140 return newStrings.value(key);
3bcdb8bb6914 Huffman representations.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
141 throw NoSuchValueException();
3bcdb8bb6914 Huffman representations.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
142 }