Mercurial > dedupe
comparison HuffmanSet.cpp @ 55:19b2a2d98788
Use define to multiplex between different types of decoders.
Expand the API.
| author | Tom Fredrik Blenning Klaussen <bfg@blenning.no> |
|---|---|
| date | Thu, 13 Sep 2012 23:47:29 +0200 |
| parents | f8d0ea827db3 |
| children | e5fa379d4030 |
comparison
equal
deleted
inserted
replaced
| 54:f339499ecd79 | 55:19b2a2d98788 |
|---|---|
| 6 #include "Exception/InvalidDataException.hpp" | 6 #include "Exception/InvalidDataException.hpp" |
| 7 #include "Exception/NoSuchValueException.hpp" | 7 #include "Exception/NoSuchValueException.hpp" |
| 8 | 8 |
| 9 #include <QtCore/QHash> | 9 #include <QtCore/QHash> |
| 10 | 10 |
| 11 HuffmanSet::HuffmanSet() : cutoff(256), numInserts(0), lut(0) | 11 HuffmanSet::HuffmanSet() : cutoff(1024), numInserts(0), lut(0) |
| 12 { | 12 { |
| 13 } | |
| 14 | |
| 15 HuffmanSet::~HuffmanSet() | |
| 16 { | |
| 17 delete lut; | |
| 13 } | 18 } |
| 14 | 19 |
| 15 void HuffmanSet::setCutoff(uint cutoff) | 20 void HuffmanSet::setCutoff(uint cutoff) |
| 16 { | 21 { |
| 17 this->cutoff = cutoff; | 22 this->cutoff = cutoff; |
| 23 } | |
| 24 | |
| 25 void HuffmanSet::setAutoRebuild(bool autoRebuild) | |
| 26 { | |
| 27 this->autoRebuild = autoRebuild; | |
| 18 } | 28 } |
| 19 | 29 |
| 20 QStringList HuffmanSet::chunks(const QString& str) | 30 QStringList HuffmanSet::chunks(const QString& str) |
| 21 { | 31 { |
| 22 return str.split("", QString::SkipEmptyParts); | 32 return str.split("", QString::SkipEmptyParts); |
| 72 uint HuffmanSet::totalElements() const | 82 uint HuffmanSet::totalElements() const |
| 73 { | 83 { |
| 74 return newStrings.size() + map.size(); | 84 return newStrings.size() + map.size(); |
| 75 } | 85 } |
| 76 | 86 |
| 77 void HuffmanSet::rebuild() | 87 void HuffmanSet::generateFreqTable(QMap<QString, uint>& freqTable) const |
| 78 { | 88 { |
| 79 QMap<QString, uint> freqTable; | |
| 80 | |
| 81 foreach(key_t key, map.keys()) { | 89 foreach(key_t key, map.keys()) { |
| 82 foreach(const QString& chunk, chunks(decode(map.value(key)))) { | 90 foreach(const QString& chunk, chunks(decode(map.value(key)))) { |
| 83 ++freqTable[chunk]; | 91 ++freqTable[chunk]; |
| 84 } | 92 } |
| 85 } | 93 } |
| 86 foreach(key_t key, newStrings.keys()) { | 94 foreach(key_t key, newStrings.keys()) { |
| 87 foreach(const QString& chunk, chunks(newStrings.value(key))) { | 95 foreach(const QString& chunk, chunks(newStrings.value(key))) { |
| 88 ++freqTable[chunk]; | 96 ++freqTable[chunk]; |
| 89 } | 97 } |
| 90 } | 98 } |
| 99 } | |
| 100 | |
| 101 QMap<QString, uint> HuffmanSet::generateFreqTable() const | |
| 102 { | |
| 103 QMap<QString, uint> freqTable; | |
| 104 generateFreqTable(freqTable); | |
| 105 return freqTable; | |
| 106 } | |
| 107 | |
| 108 | |
| 109 void HuffmanSet::rebuild() | |
| 110 { | |
| 111 QMap<QString, uint> freqTable; | |
| 112 | |
| 113 generateFreqTable(freqTable); | |
| 91 | 114 |
| 92 BitDecoder* newLut = createLut(freqTable); | 115 BitDecoder* newLut = createLut(freqTable); |
| 93 | 116 |
| 94 encoder = newLut->createEncoder(); | 117 encoder = newLut->createEncoder(); |
| 95 | 118 |
| 99 foreach(key_t key, newStrings.keys()) { | 122 foreach(key_t key, newStrings.keys()) { |
| 100 map.insert(key, encode(newStrings.value(key), encoder)); | 123 map.insert(key, encode(newStrings.value(key), encoder)); |
| 101 } | 124 } |
| 102 numInserts = 0; | 125 numInserts = 0; |
| 103 delete lut; | 126 delete lut; |
| 104 /* | 127 #if FASTENCODE |
| 105 lut = new FastBitDecoder(encoder); | 128 lut = new FastBitDecoder(encoder); |
| 106 delete newLut; | 129 delete newLut; |
| 107 */ | 130 #else |
| 108 lut = newLut; | 131 lut = newLut; |
| 132 #endif //FASTENCODE | |
| 109 newStrings.clear(); | 133 newStrings.clear(); |
| 110 } | 134 } |
| 111 | 135 |
| 112 bool HuffmanSet::contains(key_t key) const | 136 bool HuffmanSet::contains(key_t key) const |
| 113 { | 137 { |
| 131 map.insert(key, bits); | 155 map.insert(key, bits); |
| 132 } | 156 } |
| 133 catch (InvalidDataException& e) { | 157 catch (InvalidDataException& e) { |
| 134 newStrings.insert(key, str); | 158 newStrings.insert(key, str); |
| 135 } | 159 } |
| 136 if (++numInserts >= cutoff) { | 160 if ((++numInserts >= cutoff) && autoRebuild) { |
| 137 rebuild(); | 161 rebuild(); |
| 138 } | 162 } |
| 139 } | 163 } |
| 140 return key; | 164 return key; |
| 141 } | 165 } |
