Mercurial > dedupe
diff EditDistance.cpp @ 0:a3834af36579
Working with memory backend.
| author | Tom Fredrik Blenning Klaussen <bfg@blenning.no> |
|---|---|
| date | Mon, 20 Aug 2012 15:49:48 +0200 |
| parents | |
| children | b5943e4bf676 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/EditDistance.cpp Mon Aug 20 15:49:48 2012 +0200 @@ -0,0 +1,80 @@ +#include "EditDistance.hpp" + +#include <QtCore/QList> + +#define CharComparer(A, B) (QChar(A) == QChar(B)) + +EditDistance::cacheType EditDistance::cache; + +QString EditDistance::removeDiacritics(QString in) +{ + QString out; + foreach(QChar c, in) { + if (c.decompositionTag() == QChar::NoDecomposition) { + out.append(c); + } + else { + QString tmp = c.decomposition(); + out.append(tmp[0]); + } + } + return out; +} + +int EditDistance::Compute(QString a, QString b, bool remove) { + if (remove) { + a=removeDiacritics(a); + b=removeDiacritics(b); + } + + OrderedPair<QString> lup(a, b); + + boost::optional<int> res = cache.value(lup); + if (res) + return *res; + + + // Allocate distance matrix + QList<QList<int> > d; + QList<int> temp; + for (int i=0;i<b.size()+1;i++) { + temp.append(0); + } + for (int i=0;i<a.size()+1;i++) { + d.append(temp); + } +#if 0 + // Get character comparer + CharComparer isEqual = (ignoreCase) ? + (CharComparer)CharCompareIgnoreCase : CharCompare; +#endif + // Compute distance + for (int i = 0; i <= a.size(); i++) + d[i][0] = i; + for (int j = 0; j <= b.size(); j++) + d[0][j] = j; + for (int i = 1; i <= a.size(); i++) + { + for (int j = 1; j <= b.size(); j++) + { + if (CharComparer(a.at(i-1), b.at(j - 1))) + { + // No change required + d[i][j] = d[i - 1][j - 1]; + } + else + { + d[i][ j] = + std::min(d[i - 1][ j] + 1, // Deletion + std::min(d[i][ j - 1] + 1, // Insertion + d[i - 1][ j - 1] + 1)); // Substitution + } + } + } + + // Return final value + int retVal = d[a.size()][ b.size()]; + cache.insert(lup, retVal); + + return retVal; +}
