annotate EditDistance.cpp @ 63:dd086ec3220d

More removal of unnecessary includes.
author Tom Fredrik Blenning Klaussen <bfg@blenning.no>
date Fri, 14 Sep 2012 21:10:03 +0200
parents 4c283daa42c7
children 9744ec195be3
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
a3834af36579 Working with memory backend.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
1 #include "EditDistance.hpp"
40
f711ddb56ae7 Sort up includes.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents: 34
diff changeset
2
16
06166d6c083b Add configuration processing.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents: 9
diff changeset
3 #include "CompileTimeConstants.h"
06166d6c083b Add configuration processing.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents: 9
diff changeset
4 #include "ConfigurationProcessing.hpp"
0
a3834af36579 Working with memory backend.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
5
41
e0898020af08 Faster computation of EditDistance, by more efficient datastructure.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents: 40
diff changeset
6 #include <boost/numeric/ublas/matrix.hpp>
0
a3834af36579 Working with memory backend.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
7
a3834af36579 Working with memory backend.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
8 #define CharComparer(A, B) (QChar(A) == QChar(B))
a3834af36579 Working with memory backend.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
9
16
06166d6c083b Add configuration processing.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents: 9
diff changeset
10 EditDistance::cacheType* EditDistance::cache = 0;
06166d6c083b Add configuration processing.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents: 9
diff changeset
11 //EditDistance::cacheType EditDistance::cache;
0
a3834af36579 Working with memory backend.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
12
42
4c283daa42c7 Optimize diacritics removal.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents: 41
diff changeset
13 void EditDistance::removeDiacriticsNoCopy(QString& in)
0
a3834af36579 Working with memory backend.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
14 {
42
4c283daa42c7 Optimize diacritics removal.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents: 41
diff changeset
15 for(QString::iterator c = in.begin();
4c283daa42c7 Optimize diacritics removal.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents: 41
diff changeset
16 c != in.end(); ++c) {
4c283daa42c7 Optimize diacritics removal.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents: 41
diff changeset
17 if (c->decompositionTag() != QChar::NoDecomposition) {
4c283daa42c7 Optimize diacritics removal.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents: 41
diff changeset
18 QString tmp = c->decomposition();
4c283daa42c7 Optimize diacritics removal.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents: 41
diff changeset
19 *c = tmp[0];
0
a3834af36579 Working with memory backend.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
20 }
a3834af36579 Working with memory backend.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
21 }
42
4c283daa42c7 Optimize diacritics removal.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents: 41
diff changeset
22 }
4c283daa42c7 Optimize diacritics removal.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents: 41
diff changeset
23
4c283daa42c7 Optimize diacritics removal.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents: 41
diff changeset
24 QString EditDistance::removeDiacritics(const QString& in)
4c283daa42c7 Optimize diacritics removal.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents: 41
diff changeset
25 {
4c283daa42c7 Optimize diacritics removal.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents: 41
diff changeset
26 QString out = in;
4c283daa42c7 Optimize diacritics removal.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents: 41
diff changeset
27 removeDiacriticsNoCopy(out);
0
a3834af36579 Working with memory backend.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
28 return out;
a3834af36579 Working with memory backend.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
29 }
a3834af36579 Working with memory backend.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
30
a3834af36579 Working with memory backend.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
31 int EditDistance::Compute(QString a, QString b, bool remove) {
a3834af36579 Working with memory backend.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
32 if (remove) {
42
4c283daa42c7 Optimize diacritics removal.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents: 41
diff changeset
33 removeDiacriticsNoCopy(a);
4c283daa42c7 Optimize diacritics removal.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents: 41
diff changeset
34 removeDiacriticsNoCopy(b);
0
a3834af36579 Working with memory backend.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
35 }
a3834af36579 Working with memory backend.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
36
41
e0898020af08 Faster computation of EditDistance, by more efficient datastructure.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents: 40
diff changeset
37 if ( a == b)
e0898020af08 Faster computation of EditDistance, by more efficient datastructure.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents: 40
diff changeset
38 return 0;
e0898020af08 Faster computation of EditDistance, by more efficient datastructure.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents: 40
diff changeset
39
16
06166d6c083b Add configuration processing.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents: 9
diff changeset
40 OrderedPair<UniqueString> lup(a, b);
0
a3834af36579 Working with memory backend.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
41
16
06166d6c083b Add configuration processing.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents: 9
diff changeset
42 if (cache == 0) {
06166d6c083b Add configuration processing.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents: 9
diff changeset
43 QString cacheLocation = processSetupVariables(EDITDISTANCE_CACHE_LOCATION);
06166d6c083b Add configuration processing.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents: 9
diff changeset
44 EditDistance::cache = new cacheType(cacheLocation, "EditLUT");
06166d6c083b Add configuration processing.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents: 9
diff changeset
45 }
06166d6c083b Add configuration processing.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents: 9
diff changeset
46 boost::optional<int> res = cache->value(lup);
0
a3834af36579 Working with memory backend.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
47 if (res)
a3834af36579 Working with memory backend.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
48 return *res;
a3834af36579 Working with memory backend.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
49
41
e0898020af08 Faster computation of EditDistance, by more efficient datastructure.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents: 40
diff changeset
50 uint s1 = a.size();
e0898020af08 Faster computation of EditDistance, by more efficient datastructure.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents: 40
diff changeset
51 uint s2 = b.size();
34
fda70a362ed5 Remove whitespace.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents: 16
diff changeset
52
0
a3834af36579 Working with memory backend.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
53 // Allocate distance matrix
41
e0898020af08 Faster computation of EditDistance, by more efficient datastructure.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents: 40
diff changeset
54 boost::numeric::ublas::matrix<int> d(s1 + 1, s2 + 1);
e0898020af08 Faster computation of EditDistance, by more efficient datastructure.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents: 40
diff changeset
55 d.clear();
e0898020af08 Faster computation of EditDistance, by more efficient datastructure.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents: 40
diff changeset
56
0
a3834af36579 Working with memory backend.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
57 // Compute distance
41
e0898020af08 Faster computation of EditDistance, by more efficient datastructure.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents: 40
diff changeset
58 for (uint i = 0; i <= s1; ++i)
e0898020af08 Faster computation of EditDistance, by more efficient datastructure.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents: 40
diff changeset
59 d(i, 0) = i;
e0898020af08 Faster computation of EditDistance, by more efficient datastructure.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents: 40
diff changeset
60 for (uint j = 0; j <= s2; ++j)
e0898020af08 Faster computation of EditDistance, by more efficient datastructure.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents: 40
diff changeset
61 d(0, j) = j;
e0898020af08 Faster computation of EditDistance, by more efficient datastructure.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents: 40
diff changeset
62 for (uint i = 1; i <= s1; ++i) {
e0898020af08 Faster computation of EditDistance, by more efficient datastructure.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents: 40
diff changeset
63 for (uint j = 1; j <= s2; ++j) {
e0898020af08 Faster computation of EditDistance, by more efficient datastructure.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents: 40
diff changeset
64 if (CharComparer(a.at(i - 1), b.at(j - 1))) {
e0898020af08 Faster computation of EditDistance, by more efficient datastructure.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents: 40
diff changeset
65 // No change required
e0898020af08 Faster computation of EditDistance, by more efficient datastructure.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents: 40
diff changeset
66 d(i, j) = d(i - 1, j - 1);
e0898020af08 Faster computation of EditDistance, by more efficient datastructure.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents: 40
diff changeset
67 }
e0898020af08 Faster computation of EditDistance, by more efficient datastructure.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents: 40
diff changeset
68 else {
e0898020af08 Faster computation of EditDistance, by more efficient datastructure.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents: 40
diff changeset
69 d(i, j) =
e0898020af08 Faster computation of EditDistance, by more efficient datastructure.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents: 40
diff changeset
70 std::min(d(i - 1, j) + 1, // Deletion
e0898020af08 Faster computation of EditDistance, by more efficient datastructure.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents: 40
diff changeset
71 std::min(d(i, j - 1) + 1, // Insertion
e0898020af08 Faster computation of EditDistance, by more efficient datastructure.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents: 40
diff changeset
72 d(i - 1, j - 1) + 1)); // Substitution
e0898020af08 Faster computation of EditDistance, by more efficient datastructure.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents: 40
diff changeset
73 }
0
a3834af36579 Working with memory backend.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
74 }
41
e0898020af08 Faster computation of EditDistance, by more efficient datastructure.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents: 40
diff changeset
75 }
34
fda70a362ed5 Remove whitespace.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents: 16
diff changeset
76
0
a3834af36579 Working with memory backend.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
77 // Return final value
41
e0898020af08 Faster computation of EditDistance, by more efficient datastructure.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents: 40
diff changeset
78 int retVal = d(s1, s2);
16
06166d6c083b Add configuration processing.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents: 9
diff changeset
79 cache->insert(lup, retVal);
0
a3834af36579 Working with memory backend.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
80
a3834af36579 Working with memory backend.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
81 return retVal;
a3834af36579 Working with memory backend.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff changeset
82 }