Mercurial > dedupe
changeset 46:877327e9061a
N-Tree for decoding.
| author | Tom Fredrik Blenning Klaussen <bfg@blenning.no> |
|---|---|
| date | Mon, 10 Sep 2012 21:31:10 +0200 |
| parents | 41cc0d8ac77f |
| children | b23f04d4a276 |
| files | CMakeLists.txt FastBitDecoder.cpp FastBitDecoder.hpp TestFastBitDecoder.cpp |
| diffstat | 4 files changed, 204 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- a/CMakeLists.txt Mon Sep 10 21:29:43 2012 +0200 +++ b/CMakeLists.txt Mon Sep 10 21:31:10 2012 +0200 @@ -38,6 +38,7 @@ EditDistance.cpp Exception/IOException.cpp Exception/SQLException.cpp + FastBitDecoder.cpp FileDBLink.cpp HuffmanSet.cpp HuffmanString.cpp @@ -80,6 +81,7 @@ SET(CMAKE_CXX_FLAGS "-O3 -Wall") +#SET(CMAKE_CXX_FLAGS "-g2 -Wall -fno-inline") ADD_EXECUTABLE(DeDupe Apps/DeDupe.cpp ${SOURCES} ${MOC_SOURCES}) TARGET_LINK_LIBRARIES(DeDupe ${QT_LIBRARIES} ${SQLITE3_LIBRARIES} ${Boost_LIBRARIES}) @@ -91,6 +93,7 @@ NEW_TEST(TestBitDecoder) NEW_TEST(TestDBCache) NEW_TEST(TestEditDistance) +NEW_TEST(TestFastBitDecoder) NEW_TEST(TestHuffmanString) NEW_TEST(TestRBTree) NEW_TEST(TestSqliteDBLink)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/FastBitDecoder.cpp Mon Sep 10 21:31:10 2012 +0200 @@ -0,0 +1,103 @@ +#include "FastBitDecoder.hpp" + +#include "BitDecoder.hpp" + +#include <QtCore/QDebug> + +#include <cassert> + +unsigned char FastBitDecoder::getPaddedChar(const QBitArray& bits, uint offset) +{ + unsigned char retVal = 0; + size_t n = std::min(8u, bits.size() - offset); + for (size_t i = 0; i < n; ++i) { + retVal |= (bits.testBit(i + offset) ? 1 : 0) << (7 - i) ; + } + return retVal; +} + +QBitArray FastBitDecoder::removeFirst(const QBitArray& bits) +{ + size_t N = bits.size(); + assert(N - 8 > 0); + size_t n = N - 8; + QBitArray retVal(n); + for (size_t i = 0; i < n; ++i) { + retVal.setBit(i, bits.at(i + 8)); + } + return retVal; +} + + +void FastBitDecoder::fill() +{ + for (size_t i = 0; i < N; ++i) { + if (decoder[i] != 0) { + decoder[i]->fill(); + } + else if (data[i] == 0) { + assert(data[i - 1]); + data[i] = data[i - 1]; + numBits[i] = numBits[i - 1]; + } + } +} + +void FastBitDecoder::blank() +{ + for (size_t i = 0; i < N; ++i) { + decoder[i] = 0; + data[i] = 0; + numBits[i] = 0; + } +} + +FastBitDecoder::FastBitDecoder() +{ + blank(); +} + + +FastBitDecoder::FastBitDecoder(const QMap<QString, QBitArray>& encoder) +{ + blank(); + for(QMap<QString, QBitArray>::const_iterator it = encoder.begin(); + it != encoder.end(); ++it) { + insert(it.value(), it.key()); + } + fill(); +} + + +void FastBitDecoder::insert(const QBitArray& key, const QString& value) +{ + unsigned char l = getPaddedChar(key); + if (key.size() <= 8) { + data[l] = new QString(value); + numBits[l] = key.size(); + } + else { + if (!decoder[l]) + decoder[l] = new FastBitDecoder(); + decoder[l]->insert(removeFirst(key), value); + numBits[l] = 8; + } +} + + +uint FastBitDecoder::decode(QString& resString, const QBitArray& bits, uint offset) const +{ + unsigned char l = getPaddedChar(bits, offset); + if (data[l]) { + resString.append(*data[l]); + return numBits[l]; + } + else { + return decoder[l]->decode(resString, bits, offset + 8) + 8; + } +} + +QString FastBitDecoder::decode(const QString& bits) const +{ + return decode(BitDecoder::bitsFromString(bits)); +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/FastBitDecoder.hpp Mon Sep 10 21:31:10 2012 +0200 @@ -0,0 +1,40 @@ +#ifndef FASTBITDECODER_HPP +#define FASTBITDECODER_HPP + +#include <QtCore/QString> +#include <QtCore/QMap> +#include <QtCore/QBitArray> + +class FastBitDecoder { + static const size_t N = 256; + FastBitDecoder* decoder[N]; + unsigned char numBits[N]; + QString* data[N]; + + static unsigned char getPaddedChar(const QBitArray& bitArray, uint offset = 0); + static QBitArray removeFirst(const QBitArray& bits); + + void fill(); + void blank(); + FastBitDecoder(); + void insert(const QBitArray& key, const QString& value); + uint decode(QString& resString, const QBitArray& bits, uint offset) const; + +public: + FastBitDecoder(const QMap<QString, QBitArray>& encoder); + QString decode(const QBitArray& bits) const + { + QString combined; + uint n = bits.size(); + //Just a qualified overestimate guess on what we will need. + combined.reserve(n/4); + for (uint decodedBits = 0; decodedBits < n; decodedBits += decode(combined, bits, decodedBits)); + combined.squeeze(); + return combined; + } + QString decode(const QString& bits) const; + + +}; + +#endif //BITDECODER_HPP
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/TestFastBitDecoder.cpp Mon Sep 10 21:31:10 2012 +0200 @@ -0,0 +1,58 @@ +#include "FastBitDecoder.hpp" +#include "TestFramework.hpp" + +#include "BitDecoder.hpp" + + +BOOST_AUTO_TEST_CASE( TestSimple ) +{ + BitDecoder* up = new BitDecoder("a"); + BitDecoder* down = new BitDecoder("b"); + + BitDecoder* full = BitDecoder::merge(down, up); + + BOOST_REQUIRE(full->data().isNull()); + BOOST_REQUIRE_EQUAL(up->data(), "a"); + BOOST_REQUIRE_EQUAL(down->data(), "b"); + + FastBitDecoder fast(full->createEncoder()); + + BOOST_REQUIRE_EQUAL(fast.decode("1"), "a"); + BOOST_REQUIRE_EQUAL(fast.decode("0"), "b"); + BOOST_REQUIRE_EQUAL(fast.decode("1111"), "aaaa"); + BOOST_REQUIRE_EQUAL(fast.decode("0000"), "bbbb"); + BOOST_REQUIRE_EQUAL(fast.decode("1101"), "aaba"); + BOOST_REQUIRE_EQUAL(fast.decode("1111"), "aaaa"); +} + +BOOST_AUTO_TEST_CASE( TestLong ) +{ + BitDecoder* up = new BitDecoder("b"); + BitDecoder* down = new BitDecoder("a"); + for (int i = 0; i < 12; ++i) { + down = BitDecoder::merge(down, up); + up = new BitDecoder(QString('c' + i)); + } + + BitDecoder* full = BitDecoder::merge(down, up); + + BOOST_REQUIRE(full->data().isNull()); + + FastBitDecoder fast(full->createEncoder()); + + BOOST_REQUIRE_EQUAL(fast.decode("1"), "n"); + BOOST_REQUIRE_EQUAL(fast.decode("01"), "m"); + BOOST_REQUIRE_EQUAL(fast.decode("001"), "l"); + BOOST_REQUIRE_EQUAL(fast.decode("0001"), "k"); + BOOST_REQUIRE_EQUAL(fast.decode("00001"), "j"); + BOOST_REQUIRE_EQUAL(fast.decode("000001"), "i"); + BOOST_REQUIRE_EQUAL(fast.decode("0000001"), "h"); + BOOST_REQUIRE_EQUAL(fast.decode("00000001"), "g"); + BOOST_REQUIRE_EQUAL(fast.decode("000000001"), "f"); + BOOST_REQUIRE_EQUAL(fast.decode("0000000001"), "e"); + BOOST_REQUIRE_EQUAL(fast.decode("00000000001"), "d"); + BOOST_REQUIRE_EQUAL(fast.decode("000000000001"), "c"); + BOOST_REQUIRE_EQUAL(fast.decode("0000000000001"), "b"); + BOOST_REQUIRE_EQUAL(fast.decode("0000000000000"), "a"); + +}
