Mercurial > dedupe
annotate EditDistance.cpp @ 40:f711ddb56ae7
Sort up includes.
| author | Tom Fredrik Blenning Klaussen <bfg@blenning.no> |
|---|---|
| date | Fri, 07 Sep 2012 13:32:33 +0200 |
| parents | fda70a362ed5 |
| children | e0898020af08 |
| rev | line source |
|---|---|
|
0
a3834af36579
Working with memory backend.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff
changeset
|
1 #include "EditDistance.hpp" |
|
40
f711ddb56ae7
Sort up includes.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
34
diff
changeset
|
2 |
|
16
06166d6c083b
Add configuration processing.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
9
diff
changeset
|
3 #include "CompileTimeConstants.h" |
|
06166d6c083b
Add configuration processing.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
9
diff
changeset
|
4 #include "ConfigurationProcessing.hpp" |
|
0
a3834af36579
Working with memory backend.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff
changeset
|
5 |
|
a3834af36579
Working with memory backend.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff
changeset
|
6 #include <QtCore/QList> |
|
a3834af36579
Working with memory backend.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff
changeset
|
7 |
|
a3834af36579
Working with memory backend.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff
changeset
|
8 #define CharComparer(A, B) (QChar(A) == QChar(B)) |
|
a3834af36579
Working with memory backend.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff
changeset
|
9 |
|
16
06166d6c083b
Add configuration processing.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
9
diff
changeset
|
10 EditDistance::cacheType* EditDistance::cache = 0; |
|
06166d6c083b
Add configuration processing.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
9
diff
changeset
|
11 //EditDistance::cacheType EditDistance::cache; |
|
0
a3834af36579
Working with memory backend.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff
changeset
|
12 |
|
a3834af36579
Working with memory backend.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff
changeset
|
13 QString EditDistance::removeDiacritics(QString in) |
|
a3834af36579
Working with memory backend.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff
changeset
|
14 { |
|
a3834af36579
Working with memory backend.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff
changeset
|
15 QString out; |
|
a3834af36579
Working with memory backend.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff
changeset
|
16 foreach(QChar c, in) { |
|
a3834af36579
Working with memory backend.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff
changeset
|
17 if (c.decompositionTag() == QChar::NoDecomposition) { |
|
a3834af36579
Working with memory backend.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff
changeset
|
18 out.append(c); |
|
a3834af36579
Working with memory backend.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff
changeset
|
19 } |
|
a3834af36579
Working with memory backend.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff
changeset
|
20 else { |
|
a3834af36579
Working with memory backend.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff
changeset
|
21 QString tmp = c.decomposition(); |
|
a3834af36579
Working with memory backend.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff
changeset
|
22 out.append(tmp[0]); |
|
a3834af36579
Working with memory backend.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff
changeset
|
23 } |
|
a3834af36579
Working with memory backend.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff
changeset
|
24 } |
|
a3834af36579
Working with memory backend.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff
changeset
|
25 return out; |
|
a3834af36579
Working with memory backend.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff
changeset
|
26 } |
|
a3834af36579
Working with memory backend.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff
changeset
|
27 |
|
a3834af36579
Working with memory backend.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff
changeset
|
28 int EditDistance::Compute(QString a, QString b, bool remove) { |
|
a3834af36579
Working with memory backend.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff
changeset
|
29 if (remove) { |
|
9
b5943e4bf676
Fix up header includes.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
0
diff
changeset
|
30 a = removeDiacritics(a); |
|
b5943e4bf676
Fix up header includes.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
0
diff
changeset
|
31 b = removeDiacritics(b); |
|
0
a3834af36579
Working with memory backend.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff
changeset
|
32 } |
|
a3834af36579
Working with memory backend.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff
changeset
|
33 |
|
16
06166d6c083b
Add configuration processing.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
9
diff
changeset
|
34 OrderedPair<UniqueString> lup(a, b); |
|
0
a3834af36579
Working with memory backend.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff
changeset
|
35 |
|
16
06166d6c083b
Add configuration processing.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
9
diff
changeset
|
36 if (cache == 0) { |
|
06166d6c083b
Add configuration processing.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
9
diff
changeset
|
37 QString cacheLocation = processSetupVariables(EDITDISTANCE_CACHE_LOCATION); |
|
06166d6c083b
Add configuration processing.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
9
diff
changeset
|
38 EditDistance::cache = new cacheType(cacheLocation, "EditLUT"); |
|
06166d6c083b
Add configuration processing.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
9
diff
changeset
|
39 } |
|
06166d6c083b
Add configuration processing.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
9
diff
changeset
|
40 boost::optional<int> res = cache->value(lup); |
|
0
a3834af36579
Working with memory backend.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff
changeset
|
41 if (res) |
|
a3834af36579
Working with memory backend.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff
changeset
|
42 return *res; |
|
a3834af36579
Working with memory backend.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff
changeset
|
43 |
|
34
fda70a362ed5
Remove whitespace.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
16
diff
changeset
|
44 |
|
0
a3834af36579
Working with memory backend.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff
changeset
|
45 // Allocate distance matrix |
|
a3834af36579
Working with memory backend.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff
changeset
|
46 QList<QList<int> > d; |
|
a3834af36579
Working with memory backend.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff
changeset
|
47 QList<int> temp; |
|
a3834af36579
Working with memory backend.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff
changeset
|
48 for (int i=0;i<b.size()+1;i++) { |
|
a3834af36579
Working with memory backend.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff
changeset
|
49 temp.append(0); |
|
a3834af36579
Working with memory backend.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff
changeset
|
50 } |
|
a3834af36579
Working with memory backend.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff
changeset
|
51 for (int i=0;i<a.size()+1;i++) { |
|
a3834af36579
Working with memory backend.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff
changeset
|
52 d.append(temp); |
|
a3834af36579
Working with memory backend.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff
changeset
|
53 } |
|
a3834af36579
Working with memory backend.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff
changeset
|
54 #if 0 |
|
a3834af36579
Working with memory backend.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff
changeset
|
55 // Get character comparer |
|
a3834af36579
Working with memory backend.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff
changeset
|
56 CharComparer isEqual = (ignoreCase) ? |
|
a3834af36579
Working with memory backend.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff
changeset
|
57 (CharComparer)CharCompareIgnoreCase : CharCompare; |
|
a3834af36579
Working with memory backend.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff
changeset
|
58 #endif |
|
a3834af36579
Working with memory backend.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff
changeset
|
59 // Compute distance |
|
a3834af36579
Working with memory backend.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff
changeset
|
60 for (int i = 0; i <= a.size(); i++) |
|
a3834af36579
Working with memory backend.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff
changeset
|
61 d[i][0] = i; |
|
a3834af36579
Working with memory backend.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff
changeset
|
62 for (int j = 0; j <= b.size(); j++) |
|
a3834af36579
Working with memory backend.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff
changeset
|
63 d[0][j] = j; |
|
a3834af36579
Working with memory backend.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff
changeset
|
64 for (int i = 1; i <= a.size(); i++) |
|
a3834af36579
Working with memory backend.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff
changeset
|
65 { |
|
a3834af36579
Working with memory backend.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff
changeset
|
66 for (int j = 1; j <= b.size(); j++) |
|
a3834af36579
Working with memory backend.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff
changeset
|
67 { |
|
a3834af36579
Working with memory backend.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff
changeset
|
68 if (CharComparer(a.at(i-1), b.at(j - 1))) |
|
a3834af36579
Working with memory backend.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff
changeset
|
69 { |
|
a3834af36579
Working with memory backend.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff
changeset
|
70 // No change required |
|
a3834af36579
Working with memory backend.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff
changeset
|
71 d[i][j] = d[i - 1][j - 1]; |
|
a3834af36579
Working with memory backend.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff
changeset
|
72 } |
|
a3834af36579
Working with memory backend.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff
changeset
|
73 else |
|
a3834af36579
Working with memory backend.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff
changeset
|
74 { |
|
a3834af36579
Working with memory backend.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff
changeset
|
75 d[i][ j] = |
|
a3834af36579
Working with memory backend.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff
changeset
|
76 std::min(d[i - 1][ j] + 1, // Deletion |
|
a3834af36579
Working with memory backend.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff
changeset
|
77 std::min(d[i][ j - 1] + 1, // Insertion |
|
a3834af36579
Working with memory backend.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff
changeset
|
78 d[i - 1][ j - 1] + 1)); // Substitution |
|
a3834af36579
Working with memory backend.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff
changeset
|
79 } |
|
a3834af36579
Working with memory backend.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff
changeset
|
80 } |
|
a3834af36579
Working with memory backend.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff
changeset
|
81 } |
|
34
fda70a362ed5
Remove whitespace.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
16
diff
changeset
|
82 |
|
0
a3834af36579
Working with memory backend.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff
changeset
|
83 // Return final value |
|
a3834af36579
Working with memory backend.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff
changeset
|
84 int retVal = d[a.size()][ b.size()]; |
|
16
06166d6c083b
Add configuration processing.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
9
diff
changeset
|
85 cache->insert(lup, retVal); |
|
0
a3834af36579
Working with memory backend.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff
changeset
|
86 |
|
a3834af36579
Working with memory backend.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff
changeset
|
87 return retVal; |
|
a3834af36579
Working with memory backend.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff
changeset
|
88 } |
