# HG changeset patch # User Tom Fredrik Blenning Klaussen # Date 1347196620 -7200 # Node ID e0898020af08bfe672158c669e4bd7d3ad6966af # Parent f711ddb56ae7e4de85e5dbd54f7eaa01ace9c67d Faster computation of EditDistance, by more efficient datastructure. diff -r f711ddb56ae7 -r e0898020af08 EditDistance.cpp --- a/EditDistance.cpp Fri Sep 07 13:32:33 2012 +0200 +++ b/EditDistance.cpp Sun Sep 09 15:17:00 2012 +0200 @@ -3,7 +3,7 @@ #include "CompileTimeConstants.h" #include "ConfigurationProcessing.hpp" -#include +#include #define CharComparer(A, B) (QChar(A) == QChar(B)) @@ -31,6 +31,9 @@ b = removeDiacritics(b); } + if ( a == b) + return 0; + OrderedPair lup(a, b); if (cache == 0) { @@ -41,47 +44,35 @@ if (res) return *res; + uint s1 = a.size(); + uint s2 = b.size(); // Allocate distance matrix - QList > d; - QList temp; - for (int i=0;i d(s1 + 1, s2 + 1); + d.clear(); + // Compute distance - for (int i = 0; i <= a.size(); i++) - d[i][0] = i; - for (int j = 0; j <= b.size(); j++) - d[0][j] = j; - for (int i = 1; i <= a.size(); i++) - { - for (int j = 1; j <= b.size(); j++) - { - if (CharComparer(a.at(i-1), b.at(j - 1))) - { - // No change required - d[i][j] = d[i - 1][j - 1]; - } - else - { - d[i][ j] = - std::min(d[i - 1][ j] + 1, // Deletion - std::min(d[i][ j - 1] + 1, // Insertion - d[i - 1][ j - 1] + 1)); // Substitution - } - } + for (uint i = 0; i <= s1; ++i) + d(i, 0) = i; + for (uint j = 0; j <= s2; ++j) + d(0, j) = j; + for (uint i = 1; i <= s1; ++i) { + for (uint j = 1; j <= s2; ++j) { + if (CharComparer(a.at(i - 1), b.at(j - 1))) { + // No change required + d(i, j) = d(i - 1, j - 1); + } + else { + d(i, j) = + std::min(d(i - 1, j) + 1, // Deletion + std::min(d(i, j - 1) + 1, // Insertion + d(i - 1, j - 1) + 1)); // Substitution + } } + } // Return final value - int retVal = d[a.size()][ b.size()]; + int retVal = d(s1, s2); cache->insert(lup, retVal); return retVal;