Mercurial > dedupe
comparison HuffmanSet.cpp @ 21:3bcdb8bb6914
Huffman representations.
| author | Tom Fredrik Blenning Klaussen <bfg@blenning.no> |
|---|---|
| date | Wed, 05 Sep 2012 21:54:53 +0200 |
| parents | |
| children | c0ddc978475a |
comparison
equal
deleted
inserted
replaced
| 20:754e12c927b3 | 21:3bcdb8bb6914 |
|---|---|
| 1 #include "HuffmanString.hpp" | |
| 2 | |
| 3 #include <QtCore/QHash> | |
| 4 #include "NoSuchValueException.hpp" | |
| 5 #include <QtCore/QDebug> | |
| 6 | |
| 7 #include "InvalidDataException.hpp" | |
| 8 | |
| 9 | |
| 10 HuffmanSet::HuffmanSet() : cutoff(128), numInserts(0), lut(0) | |
| 11 { | |
| 12 qDebug() << __FUNCTION__; | |
| 13 } | |
| 14 | |
| 15 void HuffmanSet::setCutoff(uint cutoff) | |
| 16 { | |
| 17 this->cutoff = cutoff; | |
| 18 } | |
| 19 | |
| 20 QStringList HuffmanSet::chunks(const QString& str) | |
| 21 { | |
| 22 return str.split(""); | |
| 23 } | |
| 24 | |
| 25 BitDecoder* HuffmanSet::createLut(const QMap<QString, uint>& freqTable) | |
| 26 { | |
| 27 QMultiMap<uint, BitDecoder* > freqs; | |
| 28 for(QMap<QString, uint>::const_iterator it = freqTable.begin(); | |
| 29 it != freqTable.end(); ++it) { | |
| 30 freqs.insert(it.value(), new BitDecoder(it.key())); | |
| 31 } | |
| 32 | |
| 33 if (freqs.size() == 1) { | |
| 34 QList<uint> keys = freqs.keys(); | |
| 35 return freqs.take(keys[0]); | |
| 36 } | |
| 37 | |
| 38 QList<uint> keys = freqs.keys(); | |
| 39 | |
| 40 while (freqs.size() >= 2) { | |
| 41 QList<uint> keys = freqs.keys(); | |
| 42 | |
| 43 BitDecoder* v0 = freqs.take(keys[0]); | |
| 44 BitDecoder* v1 = freqs.take(keys[1]); | |
| 45 | |
| 46 BitDecoder* n = BitDecoder::merge(v0, v1); | |
| 47 | |
| 48 freqs.insert(keys[0] + keys[1], n); | |
| 49 } | |
| 50 BitDecoder* retVal = freqs.values()[0]; | |
| 51 return retVal; | |
| 52 } | |
| 53 | |
| 54 QString HuffmanSet::decode(const QBitArray& bits) const | |
| 55 { | |
| 56 return lut->decode(bits); | |
| 57 } | |
| 58 | |
| 59 QBitArray HuffmanSet::encode(const QString& string, const QMap<QString, QBitArray>& encoder) | |
| 60 { | |
| 61 QBitArray retVal; | |
| 62 QStringList c = chunks(string); | |
| 63 foreach(const QString& fragment, c) { | |
| 64 if (encoder.contains(fragment)) | |
| 65 retVal = BitDecoder::unite(retVal, encoder[fragment]); | |
| 66 else | |
| 67 throw InvalidDataException(); | |
| 68 } | |
| 69 return retVal; | |
| 70 } | |
| 71 | |
| 72 uint HuffmanSet::totalElements() const | |
| 73 { | |
| 74 return newStrings.size() + map.size(); | |
| 75 } | |
| 76 | |
| 77 void HuffmanSet::rebuild() | |
| 78 { | |
| 79 qDebug() << __FUNCTION__ << totalElements() << numInserts << newStrings.size(); | |
| 80 | |
| 81 QMap<key_t, QBitArray> newVals; | |
| 82 | |
| 83 QMap<QString, uint> freqTable; | |
| 84 | |
| 85 foreach(key_t key, map.keys()) { | |
| 86 foreach(const QString& chunk, chunks(decode(map.value(key)))) { | |
| 87 ++freqTable[chunk]; | |
| 88 } | |
| 89 } | |
| 90 foreach(key_t key, newStrings.keys()) { | |
| 91 foreach(const QString& chunk, chunks(newStrings.value(key))) { | |
| 92 ++freqTable[chunk]; | |
| 93 } | |
| 94 } | |
| 95 | |
| 96 BitDecoder* newLut = createLut(freqTable); | |
| 97 | |
| 98 encoder = newLut->createEncoder(); | |
| 99 | |
| 100 foreach(key_t key, map.keys()) { | |
| 101 map.insert(key, encode(decode(map.value(key)), encoder)); | |
| 102 } | |
| 103 foreach(key_t key, newStrings.keys()) { | |
| 104 map.insert(key, encode(newStrings.value(key), encoder)); | |
| 105 } | |
| 106 numInserts = 0; | |
| 107 delete lut; | |
| 108 lut = newLut; | |
| 109 newStrings.clear(); | |
| 110 } | |
| 111 | |
| 112 bool HuffmanSet::contains(key_t key) const | |
| 113 { | |
| 114 return newStrings.contains(key) || map.contains(key); | |
| 115 } | |
| 116 | |
| 117 HuffmanSet::key_t HuffmanSet::hash(const QString& str) | |
| 118 { | |
| 119 key_t key = qHash(str); | |
| 120 while (contains(key) && value(key) != str) | |
| 121 ++key; | |
| 122 return key; | |
| 123 } | |
| 124 | |
| 125 HuffmanSet::key_t HuffmanSet::insert(const QString& str) | |
| 126 { | |
| 127 key_t key = hash(str); | |
| 128 if (!contains(key)) { | |
| 129 try { | |
| 130 QBitArray bits = encode(str, encoder); | |
| 131 map.insert(key, bits); | |
| 132 } | |
| 133 catch (InvalidDataException& e) { | |
| 134 newStrings.insert(key, str); | |
| 135 } | |
| 136 if (++numInserts >= cutoff) { | |
| 137 rebuild(); | |
| 138 } | |
| 139 } | |
| 140 return key; | |
| 141 } | |
| 142 | |
| 143 QString HuffmanSet::value(key_t key) const | |
| 144 { | |
| 145 if (map.contains(key)) | |
| 146 return decode(map.value(key)); | |
| 147 if (newStrings.contains(key)) | |
| 148 return newStrings.value(key); | |
| 149 throw NoSuchValueException(); | |
| 150 } |
