changeset 21:3bcdb8bb6914

Huffman representations.
author Tom Fredrik Blenning Klaussen <bfg@blenning.no>
date Wed, 05 Sep 2012 21:54:53 +0200
parents 754e12c927b3
children d62f708ad88b
files HuffmanSet.cpp HuffmanSet.hpp HuffmanString.cpp HuffmanString.hpp TestHuffmanString.cpp
diffstat 5 files changed, 261 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/HuffmanSet.cpp	Wed Sep 05 21:54:53 2012 +0200
@@ -0,0 +1,150 @@
+#include "HuffmanString.hpp"
+
+#include <QtCore/QHash>
+#include "NoSuchValueException.hpp"
+#include <QtCore/QDebug>
+
+#include "InvalidDataException.hpp"
+
+
+HuffmanSet::HuffmanSet() : cutoff(128), numInserts(0), lut(0)
+{
+  qDebug() << __FUNCTION__;
+}
+
+void HuffmanSet::setCutoff(uint cutoff)
+{
+  this->cutoff = cutoff;
+}
+
+QStringList HuffmanSet::chunks(const QString& str)
+{
+  return str.split("");
+}
+
+BitDecoder* HuffmanSet::createLut(const QMap<QString, uint>& freqTable)
+{
+  QMultiMap<uint, BitDecoder* > freqs;    
+  for(QMap<QString, uint>::const_iterator it = freqTable.begin();
+      it != freqTable.end(); ++it) {
+    freqs.insert(it.value(), new BitDecoder(it.key()));
+  }
+
+  if (freqs.size() == 1) {
+    QList<uint> keys = freqs.keys();
+    return freqs.take(keys[0]);
+  }
+
+  QList<uint> keys = freqs.keys();
+
+  while (freqs.size() >= 2) {
+    QList<uint> keys = freqs.keys();
+
+    BitDecoder* v0 = freqs.take(keys[0]);
+    BitDecoder* v1 = freqs.take(keys[1]);
+
+    BitDecoder* n = BitDecoder::merge(v0, v1);
+
+    freqs.insert(keys[0] + keys[1], n);
+  }
+  BitDecoder* retVal = freqs.values()[0];
+  return retVal;
+}
+
+QString HuffmanSet::decode(const QBitArray& bits) const
+{
+  return lut->decode(bits);
+}
+
+QBitArray HuffmanSet::encode(const QString& string, const QMap<QString, QBitArray>& encoder)
+{
+  QBitArray retVal;
+  QStringList c = chunks(string);
+  foreach(const QString& fragment, c) {
+    if (encoder.contains(fragment))
+      retVal = BitDecoder::unite(retVal, encoder[fragment]);
+    else
+      throw InvalidDataException();
+  }
+  return retVal;
+}
+
+uint HuffmanSet::totalElements() const
+{
+  return newStrings.size() + map.size();
+}
+
+void HuffmanSet::rebuild()
+{
+  qDebug() << __FUNCTION__ << totalElements() << numInserts << newStrings.size();
+
+  QMap<key_t, QBitArray> newVals;
+
+  QMap<QString, uint> freqTable;
+
+  foreach(key_t key, map.keys()) {
+    foreach(const QString& chunk, chunks(decode(map.value(key)))) {
+      ++freqTable[chunk];
+    }
+  }
+  foreach(key_t key, newStrings.keys()) {
+    foreach(const QString& chunk, chunks(newStrings.value(key))) {
+      ++freqTable[chunk];
+    }
+  }
+
+  BitDecoder* newLut = createLut(freqTable);
+
+  encoder = newLut->createEncoder();
+
+  foreach(key_t key, map.keys()) {
+    map.insert(key, encode(decode(map.value(key)), encoder));
+  }
+  foreach(key_t key, newStrings.keys()) {
+    map.insert(key, encode(newStrings.value(key), encoder));
+  }
+  numInserts = 0;
+  delete lut;
+  lut = newLut;
+  newStrings.clear();
+}
+
+bool HuffmanSet::contains(key_t key) const
+{
+  return newStrings.contains(key) || map.contains(key);
+}
+
+HuffmanSet::key_t HuffmanSet::hash(const QString& str)
+{
+  key_t key = qHash(str);
+  while (contains(key) && value(key) != str)
+    ++key;
+  return key;
+}
+
+HuffmanSet::key_t HuffmanSet::insert(const QString& str)
+{
+  key_t key = hash(str);
+  if (!contains(key)) {
+    try {
+      QBitArray bits = encode(str, encoder);
+      map.insert(key, bits);
+    }
+    catch (InvalidDataException& e) {
+      newStrings.insert(key, str);
+    }
+    if (++numInserts >= cutoff) {
+      rebuild();
+    }
+  }
+  return key;
+}
+
+QString HuffmanSet::value(key_t key) const
+{
+  if (map.contains(key))
+    return decode(map.value(key));
+  if (newStrings.contains(key))
+    return newStrings.value(key);
+  throw NoSuchValueException();
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/HuffmanSet.hpp	Wed Sep 05 21:54:53 2012 +0200
@@ -0,0 +1,39 @@
+#ifndef HUFFMANSET_HPP
+#define HUFFMANSET_HPP
+
+#include <QtCore/QMap>
+#include <QtCore/QString>
+#include <QtCore/QStringList>
+#include <QtCore/QBitArray>
+
+#include "BitDecoder.hpp"
+
+class HuffmanSet {
+public:
+  typedef uint key_t;
+
+private:
+  QMap<key_t, QString> newStrings;
+  QMap<key_t, QBitArray> map;
+  QMap<QString, QBitArray> encoder;
+  uint cutoff;
+  uint numInserts;
+  BitDecoder* lut;
+
+
+public:
+  HuffmanSet();
+  void setCutoff(uint cutoff);
+  static QStringList chunks(const QString& str);
+  BitDecoder* createLut(const QMap<QString, uint>& freqTable);
+  QString decode(const QBitArray& bits) const;
+  static QBitArray encode(const QString& string, const QMap<QString, QBitArray>& encoder);
+  void rebuild();
+  bool contains(key_t key) const;
+  uint totalElements() const;
+  key_t hash(const QString& str);
+  key_t insert(const QString& str);
+  QString value(key_t key) const;
+};
+
+#endif //HUFFMANSET_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/HuffmanString.cpp	Wed Sep 05 21:54:53 2012 +0200
@@ -0,0 +1,33 @@
+#include "HuffmanString.hpp"
+
+HuffmanSet* HuffmanString::set = 0;
+
+HuffmanString::HuffmanString(const QString& str, HuffmanSet* set)
+{
+  set = this->set;
+  if (!set)
+    set = new HuffmanSet();
+  this->set = set;
+  
+  key = set->insert(str);
+}
+
+QString HuffmanString::toString() const
+{
+  return set->value(key);
+}
+
+HuffmanString::operator QString() const
+{
+  return toString();
+}
+
+bool HuffmanString::operator<(const HuffmanString& rhs) const
+{
+  return this->toString() < rhs.toString();
+}
+
+bool HuffmanString::operator==(const HuffmanString& rhs) const
+{
+  return this->toString() == rhs.toString();
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/HuffmanString.hpp	Wed Sep 05 21:54:53 2012 +0200
@@ -0,0 +1,19 @@
+#ifndef HUFFMANSTRING_HPP
+#define HUFFMANSTRING_HPP
+
+#include <QtCore/QString>
+#include "HuffmanSet.hpp"
+
+class HuffmanString {
+  static HuffmanSet* set;
+  HuffmanSet::key_t key;
+public:
+
+  HuffmanString(const QString& str = QString(), HuffmanSet* set = NULL);
+  QString toString() const;
+  operator QString() const;
+  bool operator<(const HuffmanString& rhs) const;
+  bool operator==(const HuffmanString& rhs) const;
+};
+
+#endif //HUFFMANSTRING_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/TestHuffmanString.cpp	Wed Sep 05 21:54:53 2012 +0200
@@ -0,0 +1,20 @@
+#include "HuffmanString.hpp"
+#include "TestFramework.hpp"
+
+#include <QtCore/QDebug>
+
+BOOST_AUTO_TEST_CASE( TestSimple )
+{
+  HuffmanSet set;
+  set.setCutoff(1);
+
+  QList<HuffmanString> strList;
+  for (uint n = 0; n < 100; ++n) {
+    QString str = QString("test%1").arg(n);
+    strList << HuffmanString(str, &set);
+    for (uint i = 0; i <= n; ++i) {
+      QString str = QString("test%1").arg(i);
+      BOOST_REQUIRE_EQUAL(strList[i], str);
+    }
+  }
+}