Mercurial > dedupe
view HuffmanSet.cpp @ 64:b9515dc35fe4
Make sure no file has greater linewidth than 80.
| author | Tom Fredrik Blenning Klaussen <bfg@blenning.no> |
|---|---|
| date | Fri, 14 Sep 2012 22:50:45 +0200 |
| parents | e5fa379d4030 |
| children |
line wrap: on
line source
#include "HuffmanString.hpp" #include "FastBitDecoder.hpp" #include "BitDecoder.hpp" #include "Exception/InvalidDataException.hpp" #include "Exception/NoSuchValueException.hpp" #include <QtCore/QHash> #include <QtCore/QStringList> HuffmanSet::HuffmanSet() : cutoff(1024), numInserts(0), lut(0) { } HuffmanSet::~HuffmanSet() { delete lut; } void HuffmanSet::setCutoff(uint cutoff) { this->cutoff = cutoff; } void HuffmanSet::setAutoRebuild(bool autoRebuild) { this->autoRebuild = autoRebuild; } QStringList HuffmanSet::chunks(const QString& str) { return str.split("", QString::SkipEmptyParts); } BitDecoder* HuffmanSet::createLut(const QMap<QString, uint>& freqTable) { QMultiMap<uint, BitDecoder* > freqs; for(QMap<QString, uint>::const_iterator it = freqTable.begin(); it != freqTable.end(); ++it) { freqs.insert(it.value(), new BitDecoder(it.key())); } if (freqs.size() == 1) { QList<uint> keys = freqs.keys(); return freqs.take(keys[0]); } QList<uint> keys = freqs.keys(); while (freqs.size() >= 2) { QList<uint> keys = freqs.keys(); BitDecoder* v0 = freqs.take(keys[0]); BitDecoder* v1 = freqs.take(keys[1]); BitDecoder* n = BitDecoder::merge(v0, v1); freqs.insert(keys[0] + keys[1], n); } BitDecoder* retVal = freqs.values()[0]; return retVal; } QString HuffmanSet::decode(const BitArray& bits) const { return lut->decode(bits); } BitArray HuffmanSet::encode(const QString& string, const QMap<QString, BitArray>& encoder) { BitArray retVal; QStringList c = chunks(string); foreach(const QString& fragment, c) { if (encoder.contains(fragment)) retVal = BitDecoder::unite(retVal, encoder[fragment]); else throw InvalidDataException(); } return retVal; } uint HuffmanSet::totalElements() const { return newStrings.size() + map.size(); } void HuffmanSet::generateFreqTable(QMap<QString, uint>& freqTable) const { foreach(key_t key, map.keys()) { foreach(const QString& chunk, chunks(decode(map.value(key)))) { ++freqTable[chunk]; } } foreach(key_t key, newStrings.keys()) { foreach(const QString& chunk, chunks(newStrings.value(key))) { ++freqTable[chunk]; } } } QMap<QString, uint> HuffmanSet::generateFreqTable() const { QMap<QString, uint> freqTable; generateFreqTable(freqTable); return freqTable; } void HuffmanSet::rebuild() { QMap<QString, uint> freqTable; generateFreqTable(freqTable); BitDecoder* newLut = createLut(freqTable); encoder = newLut->createEncoder(); foreach(key_t key, map.keys()) { map.insert(key, encode(decode(map.value(key)), encoder)); } foreach(key_t key, newStrings.keys()) { map.insert(key, encode(newStrings.value(key), encoder)); } numInserts = 0; delete lut; #if FASTENCODE lut = new FastBitDecoder(encoder); delete newLut; #else lut = newLut; #endif //FASTENCODE newStrings.clear(); } bool HuffmanSet::contains(key_t key) const { return newStrings.contains(key) || map.contains(key); } HuffmanSet::key_t HuffmanSet::hash(const QString& str) { key_t key = qHash(str); while (contains(key) && value(key) != str) ++key; return key; } HuffmanSet::key_t HuffmanSet::insert(const QString& str) { key_t key = hash(str); if (!contains(key)) { try { BitArray bits = encode(str, encoder); map.insert(key, bits); } catch (InvalidDataException& e) { newStrings.insert(key, str); } if ((++numInserts >= cutoff) && autoRebuild) { rebuild(); } } return key; } QString HuffmanSet::value(key_t key) const { if (map.contains(key)) return decode(map.value(key)); if (newStrings.contains(key)) return newStrings.value(key); throw NoSuchValueException(); }
