annotate EditDistance.cpp @ 18:fcb7a71a22c1

Make it possible to turn off asserting for the RBTree template header.
author Tom Fredrik Blenning Klaussen <bfg@blenning.no>
date Wed, 29 Aug 2012 20:04:05 +0200
parents 06166d6c083b
children fda70a362ed5
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
a3834af36579 Working with memory backend.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
1 #include "EditDistance.hpp"
16
06166d6c083b Add configuration processing.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents: 9
diff changeset
2 #include "CompileTimeConstants.h"
06166d6c083b Add configuration processing.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents: 9
diff changeset
3 #include "ConfigurationProcessing.hpp"
0
a3834af36579 Working with memory backend.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
4
a3834af36579 Working with memory backend.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
5 #include <QtCore/QList>
a3834af36579 Working with memory backend.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
6
a3834af36579 Working with memory backend.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
7 #define CharComparer(A, B) (QChar(A) == QChar(B))
a3834af36579 Working with memory backend.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
8
16
06166d6c083b Add configuration processing.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents: 9
diff changeset
9 EditDistance::cacheType* EditDistance::cache = 0;
06166d6c083b Add configuration processing.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents: 9
diff changeset
10 //EditDistance::cacheType EditDistance::cache;
0
a3834af36579 Working with memory backend.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
11
a3834af36579 Working with memory backend.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
12 QString EditDistance::removeDiacritics(QString in)
a3834af36579 Working with memory backend.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
13 {
a3834af36579 Working with memory backend.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
14 QString out;
a3834af36579 Working with memory backend.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
15 foreach(QChar c, in) {
a3834af36579 Working with memory backend.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
16 if (c.decompositionTag() == QChar::NoDecomposition) {
a3834af36579 Working with memory backend.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
17 out.append(c);
a3834af36579 Working with memory backend.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
18 }
a3834af36579 Working with memory backend.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
19 else {
a3834af36579 Working with memory backend.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
20 QString tmp = c.decomposition();
a3834af36579 Working with memory backend.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
21 out.append(tmp[0]);
a3834af36579 Working with memory backend.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
22 }
a3834af36579 Working with memory backend.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
23 }
a3834af36579 Working with memory backend.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
24 return out;
a3834af36579 Working with memory backend.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
25 }
a3834af36579 Working with memory backend.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
26
a3834af36579 Working with memory backend.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
27 int EditDistance::Compute(QString a, QString b, bool remove) {
a3834af36579 Working with memory backend.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
28 if (remove) {
9
b5943e4bf676 Fix up header includes.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents: 0
diff changeset
29 a = removeDiacritics(a);
b5943e4bf676 Fix up header includes.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents: 0
diff changeset
30 b = removeDiacritics(b);
0
a3834af36579 Working with memory backend.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
31 }
a3834af36579 Working with memory backend.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
32
16
06166d6c083b Add configuration processing.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents: 9
diff changeset
33 OrderedPair<UniqueString> lup(a, b);
0
a3834af36579 Working with memory backend.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
34
16
06166d6c083b Add configuration processing.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents: 9
diff changeset
35 if (cache == 0) {
06166d6c083b Add configuration processing.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents: 9
diff changeset
36 QString cacheLocation = processSetupVariables(EDITDISTANCE_CACHE_LOCATION);
06166d6c083b Add configuration processing.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents: 9
diff changeset
37 EditDistance::cache = new cacheType(cacheLocation, "EditLUT");
06166d6c083b Add configuration processing.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents: 9
diff changeset
38 }
06166d6c083b Add configuration processing.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents: 9
diff changeset
39 boost::optional<int> res = cache->value(lup);
0
a3834af36579 Working with memory backend.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
40 if (res)
a3834af36579 Working with memory backend.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
41 return *res;
a3834af36579 Working with memory backend.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
42
a3834af36579 Working with memory backend.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
43
a3834af36579 Working with memory backend.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
44 // Allocate distance matrix
a3834af36579 Working with memory backend.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
45 QList<QList<int> > d;
a3834af36579 Working with memory backend.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
46 QList<int> temp;
a3834af36579 Working with memory backend.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
47 for (int i=0;i<b.size()+1;i++) {
a3834af36579 Working with memory backend.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
48 temp.append(0);
a3834af36579 Working with memory backend.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
49 }
a3834af36579 Working with memory backend.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
50 for (int i=0;i<a.size()+1;i++) {
a3834af36579 Working with memory backend.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
51 d.append(temp);
a3834af36579 Working with memory backend.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
52 }
a3834af36579 Working with memory backend.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
53 #if 0
a3834af36579 Working with memory backend.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
54 // Get character comparer
a3834af36579 Working with memory backend.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
55 CharComparer isEqual = (ignoreCase) ?
a3834af36579 Working with memory backend.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
56 (CharComparer)CharCompareIgnoreCase : CharCompare;
a3834af36579 Working with memory backend.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
57 #endif
a3834af36579 Working with memory backend.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
58 // Compute distance
a3834af36579 Working with memory backend.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
59 for (int i = 0; i <= a.size(); i++)
a3834af36579 Working with memory backend.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
60 d[i][0] = i;
a3834af36579 Working with memory backend.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
61 for (int j = 0; j <= b.size(); j++)
a3834af36579 Working with memory backend.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
62 d[0][j] = j;
a3834af36579 Working with memory backend.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
63 for (int i = 1; i <= a.size(); i++)
a3834af36579 Working with memory backend.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
64 {
a3834af36579 Working with memory backend.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
65 for (int j = 1; j <= b.size(); j++)
a3834af36579 Working with memory backend.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
66 {
a3834af36579 Working with memory backend.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
67 if (CharComparer(a.at(i-1), b.at(j - 1)))
a3834af36579 Working with memory backend.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
68 {
a3834af36579 Working with memory backend.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
69 // No change required
a3834af36579 Working with memory backend.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
70 d[i][j] = d[i - 1][j - 1];
a3834af36579 Working with memory backend.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
71 }
a3834af36579 Working with memory backend.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
72 else
a3834af36579 Working with memory backend.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
73 {
a3834af36579 Working with memory backend.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
74 d[i][ j] =
a3834af36579 Working with memory backend.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
75 std::min(d[i - 1][ j] + 1, // Deletion
a3834af36579 Working with memory backend.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
76 std::min(d[i][ j - 1] + 1, // Insertion
a3834af36579 Working with memory backend.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
77 d[i - 1][ j - 1] + 1)); // Substitution
a3834af36579 Working with memory backend.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
78 }
a3834af36579 Working with memory backend.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
79 }
a3834af36579 Working with memory backend.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
80 }
a3834af36579 Working with memory backend.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
81
a3834af36579 Working with memory backend.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
82 // Return final value
a3834af36579 Working with memory backend.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
83 int retVal = d[a.size()][ b.size()];
16
06166d6c083b Add configuration processing.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents: 9
diff changeset
84 cache->insert(lup, retVal);
0
a3834af36579 Working with memory backend.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
85
a3834af36579 Working with memory backend.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
86 return retVal;
a3834af36579 Working with memory backend.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
87 }