changeset 46:877327e9061a

N-Tree for decoding.
author Tom Fredrik Blenning Klaussen <bfg@blenning.no>
date Mon, 10 Sep 2012 21:31:10 +0200
parents 41cc0d8ac77f
children b23f04d4a276
files CMakeLists.txt FastBitDecoder.cpp FastBitDecoder.hpp TestFastBitDecoder.cpp
diffstat 4 files changed, 204 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- a/CMakeLists.txt	Mon Sep 10 21:29:43 2012 +0200
+++ b/CMakeLists.txt	Mon Sep 10 21:31:10 2012 +0200
@@ -38,6 +38,7 @@
 	EditDistance.cpp
 	Exception/IOException.cpp
 	Exception/SQLException.cpp
+	FastBitDecoder.cpp
 	FileDBLink.cpp
 	HuffmanSet.cpp
 	HuffmanString.cpp
@@ -80,6 +81,7 @@
 
 
 SET(CMAKE_CXX_FLAGS "-O3 -Wall")
+#SET(CMAKE_CXX_FLAGS "-g2 -Wall -fno-inline")
 
 ADD_EXECUTABLE(DeDupe Apps/DeDupe.cpp ${SOURCES} ${MOC_SOURCES})
 TARGET_LINK_LIBRARIES(DeDupe ${QT_LIBRARIES} ${SQLITE3_LIBRARIES} ${Boost_LIBRARIES})
@@ -91,6 +93,7 @@
 NEW_TEST(TestBitDecoder)
 NEW_TEST(TestDBCache)
 NEW_TEST(TestEditDistance)
+NEW_TEST(TestFastBitDecoder)
 NEW_TEST(TestHuffmanString)
 NEW_TEST(TestRBTree)
 NEW_TEST(TestSqliteDBLink)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/FastBitDecoder.cpp	Mon Sep 10 21:31:10 2012 +0200
@@ -0,0 +1,103 @@
+#include "FastBitDecoder.hpp"
+
+#include "BitDecoder.hpp"
+
+#include <QtCore/QDebug>
+
+#include <cassert>
+
+unsigned char FastBitDecoder::getPaddedChar(const QBitArray& bits, uint offset)
+{
+  unsigned char retVal = 0;
+  size_t n = std::min(8u, bits.size() - offset);
+  for (size_t i = 0; i < n; ++i) {
+    retVal |= (bits.testBit(i + offset) ? 1 : 0) << (7 - i) ;
+  }
+  return retVal;
+}
+
+QBitArray FastBitDecoder::removeFirst(const QBitArray& bits)
+{
+  size_t N = bits.size();
+  assert(N - 8 > 0);
+  size_t n = N - 8;
+  QBitArray retVal(n);
+  for (size_t i = 0; i < n; ++i) {
+    retVal.setBit(i, bits.at(i + 8));
+  }
+  return retVal;
+}
+
+
+void FastBitDecoder::fill()
+{
+  for (size_t i = 0; i < N; ++i) {
+    if (decoder[i] != 0) {
+      decoder[i]->fill();
+    }
+    else if (data[i] == 0) {
+      assert(data[i - 1]);
+      data[i] = data[i - 1];
+      numBits[i] = numBits[i - 1];
+    }
+  }
+}
+
+void FastBitDecoder::blank()
+{
+  for (size_t i = 0; i < N; ++i) {
+    decoder[i] = 0;
+    data[i] = 0;
+    numBits[i] = 0;
+  }
+}
+
+FastBitDecoder::FastBitDecoder()
+{
+  blank();
+}
+
+
+FastBitDecoder::FastBitDecoder(const QMap<QString, QBitArray>& encoder)
+{
+  blank();
+  for(QMap<QString, QBitArray>::const_iterator it =  encoder.begin();
+      it != encoder.end(); ++it) {
+    insert(it.value(), it.key());
+  }
+  fill();
+}
+
+
+void FastBitDecoder::insert(const QBitArray& key, const QString& value)
+{
+  unsigned char l = getPaddedChar(key);
+  if (key.size() <= 8) {
+    data[l] = new QString(value);
+    numBits[l] = key.size();
+  }
+  else {
+    if (!decoder[l])
+      decoder[l] = new FastBitDecoder();
+    decoder[l]->insert(removeFirst(key), value);
+    numBits[l] = 8;
+  }  
+}
+
+
+uint FastBitDecoder::decode(QString& resString, const QBitArray& bits, uint offset) const
+{
+  unsigned char l = getPaddedChar(bits, offset);
+  if (data[l]) {
+    resString.append(*data[l]);
+    return numBits[l];
+  }
+  else {
+    return decoder[l]->decode(resString, bits, offset + 8) + 8;
+  }
+}
+
+QString FastBitDecoder::decode(const QString& bits) const
+{
+  return decode(BitDecoder::bitsFromString(bits));
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/FastBitDecoder.hpp	Mon Sep 10 21:31:10 2012 +0200
@@ -0,0 +1,40 @@
+#ifndef FASTBITDECODER_HPP
+#define FASTBITDECODER_HPP
+
+#include <QtCore/QString>
+#include <QtCore/QMap>
+#include <QtCore/QBitArray>
+
+class FastBitDecoder {
+  static const size_t N = 256;
+  FastBitDecoder* decoder[N];
+  unsigned char numBits[N];
+  QString* data[N];
+
+  static unsigned char getPaddedChar(const QBitArray& bitArray, uint offset = 0);
+  static QBitArray removeFirst(const QBitArray& bits);
+
+  void fill();
+  void blank();
+  FastBitDecoder();
+  void insert(const QBitArray& key, const QString& value);
+  uint decode(QString& resString, const QBitArray& bits, uint offset) const;
+
+public:
+  FastBitDecoder(const QMap<QString, QBitArray>& encoder);
+  QString decode(const QBitArray& bits) const
+  {
+    QString combined;
+    uint n = bits.size();
+    //Just a qualified overestimate guess on what we will need.
+    combined.reserve(n/4);
+    for (uint decodedBits = 0; decodedBits < n; decodedBits += decode(combined, bits, decodedBits));
+    combined.squeeze();
+    return combined;
+  }
+  QString decode(const QString& bits) const;
+
+
+};
+
+#endif //BITDECODER_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/TestFastBitDecoder.cpp	Mon Sep 10 21:31:10 2012 +0200
@@ -0,0 +1,58 @@
+#include "FastBitDecoder.hpp"
+#include "TestFramework.hpp"
+
+#include "BitDecoder.hpp"
+
+
+BOOST_AUTO_TEST_CASE( TestSimple )
+{
+  BitDecoder* up = new BitDecoder("a");
+  BitDecoder* down = new BitDecoder("b");
+
+  BitDecoder* full = BitDecoder::merge(down, up);
+
+  BOOST_REQUIRE(full->data().isNull());
+  BOOST_REQUIRE_EQUAL(up->data(), "a");
+  BOOST_REQUIRE_EQUAL(down->data(), "b");
+
+  FastBitDecoder fast(full->createEncoder());
+
+  BOOST_REQUIRE_EQUAL(fast.decode("1"), "a");
+  BOOST_REQUIRE_EQUAL(fast.decode("0"), "b");
+  BOOST_REQUIRE_EQUAL(fast.decode("1111"), "aaaa");
+  BOOST_REQUIRE_EQUAL(fast.decode("0000"), "bbbb");
+  BOOST_REQUIRE_EQUAL(fast.decode("1101"), "aaba");
+  BOOST_REQUIRE_EQUAL(fast.decode("1111"), "aaaa");
+}
+
+BOOST_AUTO_TEST_CASE( TestLong )
+{
+  BitDecoder* up = new BitDecoder("b");
+  BitDecoder* down = new BitDecoder("a");
+  for (int i = 0; i < 12; ++i) {
+    down = BitDecoder::merge(down, up);
+    up = new BitDecoder(QString('c' + i));
+  }
+
+  BitDecoder* full = BitDecoder::merge(down, up);
+
+  BOOST_REQUIRE(full->data().isNull());
+
+  FastBitDecoder fast(full->createEncoder());
+
+  BOOST_REQUIRE_EQUAL(fast.decode("1"), "n");
+  BOOST_REQUIRE_EQUAL(fast.decode("01"), "m");
+  BOOST_REQUIRE_EQUAL(fast.decode("001"), "l");
+  BOOST_REQUIRE_EQUAL(fast.decode("0001"), "k");
+  BOOST_REQUIRE_EQUAL(fast.decode("00001"), "j");
+  BOOST_REQUIRE_EQUAL(fast.decode("000001"), "i");
+  BOOST_REQUIRE_EQUAL(fast.decode("0000001"), "h");
+  BOOST_REQUIRE_EQUAL(fast.decode("00000001"), "g");
+  BOOST_REQUIRE_EQUAL(fast.decode("000000001"), "f");
+  BOOST_REQUIRE_EQUAL(fast.decode("0000000001"), "e");
+  BOOST_REQUIRE_EQUAL(fast.decode("00000000001"), "d");
+  BOOST_REQUIRE_EQUAL(fast.decode("000000000001"), "c");
+  BOOST_REQUIRE_EQUAL(fast.decode("0000000000001"), "b");
+  BOOST_REQUIRE_EQUAL(fast.decode("0000000000000"), "a");
+
+}