Mercurial > dedupe
comparison EditDistance.cpp @ 78:9744ec195be3
Encapsulate EditDistance with caching.
| author | Tom Fredrik Blenning Klaussen <bfg@bfgconsult.no> |
|---|---|
| date | Thu, 10 Oct 2013 01:07:52 +0200 |
| parents | 4c283daa42c7 |
| children |
comparison
equal
deleted
inserted
replaced
| 77:a827f3687c4a | 78:9744ec195be3 |
|---|---|
| 1 #include "EditDistance.hpp" | 1 #include "EditDistance.hpp" |
| 2 | |
| 3 #include "CompileTimeConstants.h" | |
| 4 #include "ConfigurationProcessing.hpp" | |
| 5 | 2 |
| 6 #include <boost/numeric/ublas/matrix.hpp> | 3 #include <boost/numeric/ublas/matrix.hpp> |
| 7 | 4 |
| 8 #define CharComparer(A, B) (QChar(A) == QChar(B)) | 5 #define CharComparer(A, B) (QChar(A) == QChar(B)) |
| 9 | |
| 10 EditDistance::cacheType* EditDistance::cache = 0; | |
| 11 //EditDistance::cacheType EditDistance::cache; | |
| 12 | 6 |
| 13 void EditDistance::removeDiacriticsNoCopy(QString& in) | 7 void EditDistance::removeDiacriticsNoCopy(QString& in) |
| 14 { | 8 { |
| 15 for(QString::iterator c = in.begin(); | 9 for(QString::iterator c = in.begin(); |
| 16 c != in.end(); ++c) { | 10 c != in.end(); ++c) { |
| 31 int EditDistance::Compute(QString a, QString b, bool remove) { | 25 int EditDistance::Compute(QString a, QString b, bool remove) { |
| 32 if (remove) { | 26 if (remove) { |
| 33 removeDiacriticsNoCopy(a); | 27 removeDiacriticsNoCopy(a); |
| 34 removeDiacriticsNoCopy(b); | 28 removeDiacriticsNoCopy(b); |
| 35 } | 29 } |
| 36 | |
| 37 if ( a == b) | |
| 38 return 0; | |
| 39 | |
| 40 OrderedPair<UniqueString> lup(a, b); | |
| 41 | |
| 42 if (cache == 0) { | |
| 43 QString cacheLocation = processSetupVariables(EDITDISTANCE_CACHE_LOCATION); | |
| 44 EditDistance::cache = new cacheType(cacheLocation, "EditLUT"); | |
| 45 } | |
| 46 boost::optional<int> res = cache->value(lup); | |
| 47 if (res) | |
| 48 return *res; | |
| 49 | 30 |
| 50 uint s1 = a.size(); | 31 uint s1 = a.size(); |
| 51 uint s2 = b.size(); | 32 uint s2 = b.size(); |
| 52 | 33 |
| 53 // Allocate distance matrix | 34 // Allocate distance matrix |
| 74 } | 55 } |
| 75 } | 56 } |
| 76 | 57 |
| 77 // Return final value | 58 // Return final value |
| 78 int retVal = d(s1, s2); | 59 int retVal = d(s1, s2); |
| 79 cache->insert(lup, retVal); | |
| 80 | 60 |
| 81 return retVal; | 61 return retVal; |
| 82 } | 62 } |
