Mercurial > dedupe
view HuffmanSet.cpp @ 40:f711ddb56ae7
Sort up includes.
| author | Tom Fredrik Blenning Klaussen <bfg@blenning.no> |
|---|---|
| date | Fri, 07 Sep 2012 13:32:33 +0200 |
| parents | c52a0627337c |
| children | f8d0ea827db3 |
line wrap: on
line source
#include "HuffmanString.hpp" #include "Exception/InvalidDataException.hpp" #include "Exception/NoSuchValueException.hpp" #include <QtCore/QHash> HuffmanSet::HuffmanSet() : cutoff(256), numInserts(0), lut(0) { } void HuffmanSet::setCutoff(uint cutoff) { this->cutoff = cutoff; } QStringList HuffmanSet::chunks(const QString& str) { return str.split("", QString::SkipEmptyParts); } BitDecoder* HuffmanSet::createLut(const QMap<QString, uint>& freqTable) { QMultiMap<uint, BitDecoder* > freqs; for(QMap<QString, uint>::const_iterator it = freqTable.begin(); it != freqTable.end(); ++it) { freqs.insert(it.value(), new BitDecoder(it.key())); } if (freqs.size() == 1) { QList<uint> keys = freqs.keys(); return freqs.take(keys[0]); } QList<uint> keys = freqs.keys(); while (freqs.size() >= 2) { QList<uint> keys = freqs.keys(); BitDecoder* v0 = freqs.take(keys[0]); BitDecoder* v1 = freqs.take(keys[1]); BitDecoder* n = BitDecoder::merge(v0, v1); freqs.insert(keys[0] + keys[1], n); } BitDecoder* retVal = freqs.values()[0]; return retVal; } QString HuffmanSet::decode(const QBitArray& bits) const { return lut->decode(bits); } QBitArray HuffmanSet::encode(const QString& string, const QMap<QString, QBitArray>& encoder) { QBitArray retVal; QStringList c = chunks(string); foreach(const QString& fragment, c) { if (encoder.contains(fragment)) retVal = BitDecoder::unite(retVal, encoder[fragment]); else throw InvalidDataException(); } return retVal; } uint HuffmanSet::totalElements() const { return newStrings.size() + map.size(); } void HuffmanSet::rebuild() { QMap<QString, uint> freqTable; foreach(key_t key, map.keys()) { foreach(const QString& chunk, chunks(decode(map.value(key)))) { ++freqTable[chunk]; } } foreach(key_t key, newStrings.keys()) { foreach(const QString& chunk, chunks(newStrings.value(key))) { ++freqTable[chunk]; } } BitDecoder* newLut = createLut(freqTable); encoder = newLut->createEncoder(); foreach(key_t key, map.keys()) { map.insert(key, encode(decode(map.value(key)), encoder)); } foreach(key_t key, newStrings.keys()) { map.insert(key, encode(newStrings.value(key), encoder)); } numInserts = 0; delete lut; lut = newLut; newStrings.clear(); } bool HuffmanSet::contains(key_t key) const { return newStrings.contains(key) || map.contains(key); } HuffmanSet::key_t HuffmanSet::hash(const QString& str) { key_t key = qHash(str); while (contains(key) && value(key) != str) ++key; return key; } HuffmanSet::key_t HuffmanSet::insert(const QString& str) { key_t key = hash(str); if (!contains(key)) { try { QBitArray bits = encode(str, encoder); map.insert(key, bits); } catch (InvalidDataException& e) { newStrings.insert(key, str); } if (++numInserts >= cutoff) { rebuild(); } } return key; } QString HuffmanSet::value(key_t key) const { if (map.contains(key)) return decode(map.value(key)); if (newStrings.contains(key)) return newStrings.value(key); throw NoSuchValueException(); }
