diff HuffmanSet.cpp @ 21:3bcdb8bb6914

Huffman representations.
author Tom Fredrik Blenning Klaussen <bfg@blenning.no>
date Wed, 05 Sep 2012 21:54:53 +0200
parents
children c0ddc978475a
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/HuffmanSet.cpp	Wed Sep 05 21:54:53 2012 +0200
@@ -0,0 +1,150 @@
+#include "HuffmanString.hpp"
+
+#include <QtCore/QHash>
+#include "NoSuchValueException.hpp"
+#include <QtCore/QDebug>
+
+#include "InvalidDataException.hpp"
+
+
+HuffmanSet::HuffmanSet() : cutoff(128), numInserts(0), lut(0)
+{
+  qDebug() << __FUNCTION__;
+}
+
+void HuffmanSet::setCutoff(uint cutoff)
+{
+  this->cutoff = cutoff;
+}
+
+QStringList HuffmanSet::chunks(const QString& str)
+{
+  return str.split("");
+}
+
+BitDecoder* HuffmanSet::createLut(const QMap<QString, uint>& freqTable)
+{
+  QMultiMap<uint, BitDecoder* > freqs;    
+  for(QMap<QString, uint>::const_iterator it = freqTable.begin();
+      it != freqTable.end(); ++it) {
+    freqs.insert(it.value(), new BitDecoder(it.key()));
+  }
+
+  if (freqs.size() == 1) {
+    QList<uint> keys = freqs.keys();
+    return freqs.take(keys[0]);
+  }
+
+  QList<uint> keys = freqs.keys();
+
+  while (freqs.size() >= 2) {
+    QList<uint> keys = freqs.keys();
+
+    BitDecoder* v0 = freqs.take(keys[0]);
+    BitDecoder* v1 = freqs.take(keys[1]);
+
+    BitDecoder* n = BitDecoder::merge(v0, v1);
+
+    freqs.insert(keys[0] + keys[1], n);
+  }
+  BitDecoder* retVal = freqs.values()[0];
+  return retVal;
+}
+
+QString HuffmanSet::decode(const QBitArray& bits) const
+{
+  return lut->decode(bits);
+}
+
+QBitArray HuffmanSet::encode(const QString& string, const QMap<QString, QBitArray>& encoder)
+{
+  QBitArray retVal;
+  QStringList c = chunks(string);
+  foreach(const QString& fragment, c) {
+    if (encoder.contains(fragment))
+      retVal = BitDecoder::unite(retVal, encoder[fragment]);
+    else
+      throw InvalidDataException();
+  }
+  return retVal;
+}
+
+uint HuffmanSet::totalElements() const
+{
+  return newStrings.size() + map.size();
+}
+
+void HuffmanSet::rebuild()
+{
+  qDebug() << __FUNCTION__ << totalElements() << numInserts << newStrings.size();
+
+  QMap<key_t, QBitArray> newVals;
+
+  QMap<QString, uint> freqTable;
+
+  foreach(key_t key, map.keys()) {
+    foreach(const QString& chunk, chunks(decode(map.value(key)))) {
+      ++freqTable[chunk];
+    }
+  }
+  foreach(key_t key, newStrings.keys()) {
+    foreach(const QString& chunk, chunks(newStrings.value(key))) {
+      ++freqTable[chunk];
+    }
+  }
+
+  BitDecoder* newLut = createLut(freqTable);
+
+  encoder = newLut->createEncoder();
+
+  foreach(key_t key, map.keys()) {
+    map.insert(key, encode(decode(map.value(key)), encoder));
+  }
+  foreach(key_t key, newStrings.keys()) {
+    map.insert(key, encode(newStrings.value(key), encoder));
+  }
+  numInserts = 0;
+  delete lut;
+  lut = newLut;
+  newStrings.clear();
+}
+
+bool HuffmanSet::contains(key_t key) const
+{
+  return newStrings.contains(key) || map.contains(key);
+}
+
+HuffmanSet::key_t HuffmanSet::hash(const QString& str)
+{
+  key_t key = qHash(str);
+  while (contains(key) && value(key) != str)
+    ++key;
+  return key;
+}
+
+HuffmanSet::key_t HuffmanSet::insert(const QString& str)
+{
+  key_t key = hash(str);
+  if (!contains(key)) {
+    try {
+      QBitArray bits = encode(str, encoder);
+      map.insert(key, bits);
+    }
+    catch (InvalidDataException& e) {
+      newStrings.insert(key, str);
+    }
+    if (++numInserts >= cutoff) {
+      rebuild();
+    }
+  }
+  return key;
+}
+
+QString HuffmanSet::value(key_t key) const
+{
+  if (map.contains(key))
+    return decode(map.value(key));
+  if (newStrings.contains(key))
+    return newStrings.value(key);
+  throw NoSuchValueException();
+}