Mercurial > dedupe
comparison EditDistance.cpp @ 42:4c283daa42c7
Optimize diacritics removal.
| author | Tom Fredrik Blenning Klaussen <bfg@blenning.no> |
|---|---|
| date | Sun, 09 Sep 2012 16:09:52 +0200 |
| parents | e0898020af08 |
| children | 9744ec195be3 |
comparison
equal
deleted
inserted
replaced
| 41:e0898020af08 | 42:4c283daa42c7 |
|---|---|
| 8 #define CharComparer(A, B) (QChar(A) == QChar(B)) | 8 #define CharComparer(A, B) (QChar(A) == QChar(B)) |
| 9 | 9 |
| 10 EditDistance::cacheType* EditDistance::cache = 0; | 10 EditDistance::cacheType* EditDistance::cache = 0; |
| 11 //EditDistance::cacheType EditDistance::cache; | 11 //EditDistance::cacheType EditDistance::cache; |
| 12 | 12 |
| 13 QString EditDistance::removeDiacritics(QString in) | 13 void EditDistance::removeDiacriticsNoCopy(QString& in) |
| 14 { | 14 { |
| 15 QString out; | 15 for(QString::iterator c = in.begin(); |
| 16 foreach(QChar c, in) { | 16 c != in.end(); ++c) { |
| 17 if (c.decompositionTag() == QChar::NoDecomposition) { | 17 if (c->decompositionTag() != QChar::NoDecomposition) { |
| 18 out.append(c); | 18 QString tmp = c->decomposition(); |
| 19 } | 19 *c = tmp[0]; |
| 20 else { | |
| 21 QString tmp = c.decomposition(); | |
| 22 out.append(tmp[0]); | |
| 23 } | 20 } |
| 24 } | 21 } |
| 22 } | |
| 23 | |
| 24 QString EditDistance::removeDiacritics(const QString& in) | |
| 25 { | |
| 26 QString out = in; | |
| 27 removeDiacriticsNoCopy(out); | |
| 25 return out; | 28 return out; |
| 26 } | 29 } |
| 27 | 30 |
| 28 int EditDistance::Compute(QString a, QString b, bool remove) { | 31 int EditDistance::Compute(QString a, QString b, bool remove) { |
| 29 if (remove) { | 32 if (remove) { |
| 30 a = removeDiacritics(a); | 33 removeDiacriticsNoCopy(a); |
| 31 b = removeDiacritics(b); | 34 removeDiacriticsNoCopy(b); |
| 32 } | 35 } |
| 33 | 36 |
| 34 if ( a == b) | 37 if ( a == b) |
| 35 return 0; | 38 return 0; |
| 36 | 39 |
