# HG changeset patch # User Tom Fredrik Blenning Klaussen # Date 1347305470 -7200 # Node ID 877327e9061aef61fb9094d8e30facd4631446a1 # Parent 41cc0d8ac77fe22701a0b4c373142ac3968505bc N-Tree for decoding. diff -r 41cc0d8ac77f -r 877327e9061a CMakeLists.txt --- a/CMakeLists.txt Mon Sep 10 21:29:43 2012 +0200 +++ b/CMakeLists.txt Mon Sep 10 21:31:10 2012 +0200 @@ -38,6 +38,7 @@ EditDistance.cpp Exception/IOException.cpp Exception/SQLException.cpp + FastBitDecoder.cpp FileDBLink.cpp HuffmanSet.cpp HuffmanString.cpp @@ -80,6 +81,7 @@ SET(CMAKE_CXX_FLAGS "-O3 -Wall") +#SET(CMAKE_CXX_FLAGS "-g2 -Wall -fno-inline") ADD_EXECUTABLE(DeDupe Apps/DeDupe.cpp ${SOURCES} ${MOC_SOURCES}) TARGET_LINK_LIBRARIES(DeDupe ${QT_LIBRARIES} ${SQLITE3_LIBRARIES} ${Boost_LIBRARIES}) @@ -91,6 +93,7 @@ NEW_TEST(TestBitDecoder) NEW_TEST(TestDBCache) NEW_TEST(TestEditDistance) +NEW_TEST(TestFastBitDecoder) NEW_TEST(TestHuffmanString) NEW_TEST(TestRBTree) NEW_TEST(TestSqliteDBLink) diff -r 41cc0d8ac77f -r 877327e9061a FastBitDecoder.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/FastBitDecoder.cpp Mon Sep 10 21:31:10 2012 +0200 @@ -0,0 +1,103 @@ +#include "FastBitDecoder.hpp" + +#include "BitDecoder.hpp" + +#include + +#include + +unsigned char FastBitDecoder::getPaddedChar(const QBitArray& bits, uint offset) +{ + unsigned char retVal = 0; + size_t n = std::min(8u, bits.size() - offset); + for (size_t i = 0; i < n; ++i) { + retVal |= (bits.testBit(i + offset) ? 1 : 0) << (7 - i) ; + } + return retVal; +} + +QBitArray FastBitDecoder::removeFirst(const QBitArray& bits) +{ + size_t N = bits.size(); + assert(N - 8 > 0); + size_t n = N - 8; + QBitArray retVal(n); + for (size_t i = 0; i < n; ++i) { + retVal.setBit(i, bits.at(i + 8)); + } + return retVal; +} + + +void FastBitDecoder::fill() +{ + for (size_t i = 0; i < N; ++i) { + if (decoder[i] != 0) { + decoder[i]->fill(); + } + else if (data[i] == 0) { + assert(data[i - 1]); + data[i] = data[i - 1]; + numBits[i] = numBits[i - 1]; + } + } +} + +void FastBitDecoder::blank() +{ + for (size_t i = 0; i < N; ++i) { + decoder[i] = 0; + data[i] = 0; + numBits[i] = 0; + } +} + +FastBitDecoder::FastBitDecoder() +{ + blank(); +} + + +FastBitDecoder::FastBitDecoder(const QMap& encoder) +{ + blank(); + for(QMap::const_iterator it = encoder.begin(); + it != encoder.end(); ++it) { + insert(it.value(), it.key()); + } + fill(); +} + + +void FastBitDecoder::insert(const QBitArray& key, const QString& value) +{ + unsigned char l = getPaddedChar(key); + if (key.size() <= 8) { + data[l] = new QString(value); + numBits[l] = key.size(); + } + else { + if (!decoder[l]) + decoder[l] = new FastBitDecoder(); + decoder[l]->insert(removeFirst(key), value); + numBits[l] = 8; + } +} + + +uint FastBitDecoder::decode(QString& resString, const QBitArray& bits, uint offset) const +{ + unsigned char l = getPaddedChar(bits, offset); + if (data[l]) { + resString.append(*data[l]); + return numBits[l]; + } + else { + return decoder[l]->decode(resString, bits, offset + 8) + 8; + } +} + +QString FastBitDecoder::decode(const QString& bits) const +{ + return decode(BitDecoder::bitsFromString(bits)); +} diff -r 41cc0d8ac77f -r 877327e9061a FastBitDecoder.hpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/FastBitDecoder.hpp Mon Sep 10 21:31:10 2012 +0200 @@ -0,0 +1,40 @@ +#ifndef FASTBITDECODER_HPP +#define FASTBITDECODER_HPP + +#include +#include +#include + +class FastBitDecoder { + static const size_t N = 256; + FastBitDecoder* decoder[N]; + unsigned char numBits[N]; + QString* data[N]; + + static unsigned char getPaddedChar(const QBitArray& bitArray, uint offset = 0); + static QBitArray removeFirst(const QBitArray& bits); + + void fill(); + void blank(); + FastBitDecoder(); + void insert(const QBitArray& key, const QString& value); + uint decode(QString& resString, const QBitArray& bits, uint offset) const; + +public: + FastBitDecoder(const QMap& encoder); + QString decode(const QBitArray& bits) const + { + QString combined; + uint n = bits.size(); + //Just a qualified overestimate guess on what we will need. + combined.reserve(n/4); + for (uint decodedBits = 0; decodedBits < n; decodedBits += decode(combined, bits, decodedBits)); + combined.squeeze(); + return combined; + } + QString decode(const QString& bits) const; + + +}; + +#endif //BITDECODER_HPP diff -r 41cc0d8ac77f -r 877327e9061a TestFastBitDecoder.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/TestFastBitDecoder.cpp Mon Sep 10 21:31:10 2012 +0200 @@ -0,0 +1,58 @@ +#include "FastBitDecoder.hpp" +#include "TestFramework.hpp" + +#include "BitDecoder.hpp" + + +BOOST_AUTO_TEST_CASE( TestSimple ) +{ + BitDecoder* up = new BitDecoder("a"); + BitDecoder* down = new BitDecoder("b"); + + BitDecoder* full = BitDecoder::merge(down, up); + + BOOST_REQUIRE(full->data().isNull()); + BOOST_REQUIRE_EQUAL(up->data(), "a"); + BOOST_REQUIRE_EQUAL(down->data(), "b"); + + FastBitDecoder fast(full->createEncoder()); + + BOOST_REQUIRE_EQUAL(fast.decode("1"), "a"); + BOOST_REQUIRE_EQUAL(fast.decode("0"), "b"); + BOOST_REQUIRE_EQUAL(fast.decode("1111"), "aaaa"); + BOOST_REQUIRE_EQUAL(fast.decode("0000"), "bbbb"); + BOOST_REQUIRE_EQUAL(fast.decode("1101"), "aaba"); + BOOST_REQUIRE_EQUAL(fast.decode("1111"), "aaaa"); +} + +BOOST_AUTO_TEST_CASE( TestLong ) +{ + BitDecoder* up = new BitDecoder("b"); + BitDecoder* down = new BitDecoder("a"); + for (int i = 0; i < 12; ++i) { + down = BitDecoder::merge(down, up); + up = new BitDecoder(QString('c' + i)); + } + + BitDecoder* full = BitDecoder::merge(down, up); + + BOOST_REQUIRE(full->data().isNull()); + + FastBitDecoder fast(full->createEncoder()); + + BOOST_REQUIRE_EQUAL(fast.decode("1"), "n"); + BOOST_REQUIRE_EQUAL(fast.decode("01"), "m"); + BOOST_REQUIRE_EQUAL(fast.decode("001"), "l"); + BOOST_REQUIRE_EQUAL(fast.decode("0001"), "k"); + BOOST_REQUIRE_EQUAL(fast.decode("00001"), "j"); + BOOST_REQUIRE_EQUAL(fast.decode("000001"), "i"); + BOOST_REQUIRE_EQUAL(fast.decode("0000001"), "h"); + BOOST_REQUIRE_EQUAL(fast.decode("00000001"), "g"); + BOOST_REQUIRE_EQUAL(fast.decode("000000001"), "f"); + BOOST_REQUIRE_EQUAL(fast.decode("0000000001"), "e"); + BOOST_REQUIRE_EQUAL(fast.decode("00000000001"), "d"); + BOOST_REQUIRE_EQUAL(fast.decode("000000000001"), "c"); + BOOST_REQUIRE_EQUAL(fast.decode("0000000000001"), "b"); + BOOST_REQUIRE_EQUAL(fast.decode("0000000000000"), "a"); + +}