Mercurial > dedupe
view EditDistance.cpp @ 115:404795616b1e default tip
Added a lot of common files to ignore
| author | Tom Fredrik Blenning Klaussen <bfg@bfgconsult.no> |
|---|---|
| date | Sat, 25 Mar 2017 17:43:57 +0100 |
| parents | 9744ec195be3 |
| children |
line wrap: on
line source
#include "EditDistance.hpp" #include <boost/numeric/ublas/matrix.hpp> #define CharComparer(A, B) (QChar(A) == QChar(B)) void EditDistance::removeDiacriticsNoCopy(QString& in) { for(QString::iterator c = in.begin(); c != in.end(); ++c) { if (c->decompositionTag() != QChar::NoDecomposition) { QString tmp = c->decomposition(); *c = tmp[0]; } } } QString EditDistance::removeDiacritics(const QString& in) { QString out = in; removeDiacriticsNoCopy(out); return out; } int EditDistance::Compute(QString a, QString b, bool remove) { if (remove) { removeDiacriticsNoCopy(a); removeDiacriticsNoCopy(b); } uint s1 = a.size(); uint s2 = b.size(); // Allocate distance matrix boost::numeric::ublas::matrix<int> d(s1 + 1, s2 + 1); d.clear(); // Compute distance for (uint i = 0; i <= s1; ++i) d(i, 0) = i; for (uint j = 0; j <= s2; ++j) d(0, j) = j; for (uint i = 1; i <= s1; ++i) { for (uint j = 1; j <= s2; ++j) { if (CharComparer(a.at(i - 1), b.at(j - 1))) { // No change required d(i, j) = d(i - 1, j - 1); } else { d(i, j) = std::min(d(i - 1, j) + 1, // Deletion std::min(d(i, j - 1) + 1, // Insertion d(i - 1, j - 1) + 1)); // Substitution } } } // Return final value int retVal = d(s1, s2); return retVal; }
