Mercurial > dedupe
annotate CachedEditDistance.hpp @ 78:9744ec195be3
Encapsulate EditDistance with caching.
| author | Tom Fredrik Blenning Klaussen <bfg@bfgconsult.no> |
|---|---|
| date | Thu, 10 Oct 2013 01:07:52 +0200 |
| parents | |
| children |
| rev | line source |
|---|---|
|
78
9744ec195be3
Encapsulate EditDistance with caching.
Tom Fredrik Blenning Klaussen <bfg@bfgconsult.no>
parents:
diff
changeset
|
1 #ifndef CACHEDEDITDISTANCE_HPP |
|
9744ec195be3
Encapsulate EditDistance with caching.
Tom Fredrik Blenning Klaussen <bfg@bfgconsult.no>
parents:
diff
changeset
|
2 #define CACHEDEDITDISTANCE_HPP |
|
9744ec195be3
Encapsulate EditDistance with caching.
Tom Fredrik Blenning Klaussen <bfg@bfgconsult.no>
parents:
diff
changeset
|
3 |
|
9744ec195be3
Encapsulate EditDistance with caching.
Tom Fredrik Blenning Klaussen <bfg@bfgconsult.no>
parents:
diff
changeset
|
4 #include "DBCache.hpp" |
|
9744ec195be3
Encapsulate EditDistance with caching.
Tom Fredrik Blenning Klaussen <bfg@bfgconsult.no>
parents:
diff
changeset
|
5 |
|
9744ec195be3
Encapsulate EditDistance with caching.
Tom Fredrik Blenning Klaussen <bfg@bfgconsult.no>
parents:
diff
changeset
|
6 #include "EditDistance.hpp" |
|
9744ec195be3
Encapsulate EditDistance with caching.
Tom Fredrik Blenning Klaussen <bfg@bfgconsult.no>
parents:
diff
changeset
|
7 |
|
9744ec195be3
Encapsulate EditDistance with caching.
Tom Fredrik Blenning Klaussen <bfg@bfgconsult.no>
parents:
diff
changeset
|
8 template<typename Value> |
|
9744ec195be3
Encapsulate EditDistance with caching.
Tom Fredrik Blenning Klaussen <bfg@bfgconsult.no>
parents:
diff
changeset
|
9 struct InsertRegulator<OrderedPair<UniqueString>, Value > |
|
9744ec195be3
Encapsulate EditDistance with caching.
Tom Fredrik Blenning Klaussen <bfg@bfgconsult.no>
parents:
diff
changeset
|
10 { |
|
9744ec195be3
Encapsulate EditDistance with caching.
Tom Fredrik Blenning Klaussen <bfg@bfgconsult.no>
parents:
diff
changeset
|
11 uint n; |
|
9744ec195be3
Encapsulate EditDistance with caching.
Tom Fredrik Blenning Klaussen <bfg@bfgconsult.no>
parents:
diff
changeset
|
12 void start() |
|
9744ec195be3
Encapsulate EditDistance with caching.
Tom Fredrik Blenning Klaussen <bfg@bfgconsult.no>
parents:
diff
changeset
|
13 { |
|
9744ec195be3
Encapsulate EditDistance with caching.
Tom Fredrik Blenning Klaussen <bfg@bfgconsult.no>
parents:
diff
changeset
|
14 n = 0; |
|
9744ec195be3
Encapsulate EditDistance with caching.
Tom Fredrik Blenning Klaussen <bfg@bfgconsult.no>
parents:
diff
changeset
|
15 HuffmanString::getSet().setAutoRebuild(false); |
|
9744ec195be3
Encapsulate EditDistance with caching.
Tom Fredrik Blenning Klaussen <bfg@bfgconsult.no>
parents:
diff
changeset
|
16 } |
|
9744ec195be3
Encapsulate EditDistance with caching.
Tom Fredrik Blenning Klaussen <bfg@bfgconsult.no>
parents:
diff
changeset
|
17 |
|
9744ec195be3
Encapsulate EditDistance with caching.
Tom Fredrik Blenning Klaussen <bfg@bfgconsult.no>
parents:
diff
changeset
|
18 static void finish() |
|
9744ec195be3
Encapsulate EditDistance with caching.
Tom Fredrik Blenning Klaussen <bfg@bfgconsult.no>
parents:
diff
changeset
|
19 { |
|
9744ec195be3
Encapsulate EditDistance with caching.
Tom Fredrik Blenning Klaussen <bfg@bfgconsult.no>
parents:
diff
changeset
|
20 HuffmanString::getSet().rebuild(); |
|
9744ec195be3
Encapsulate EditDistance with caching.
Tom Fredrik Blenning Klaussen <bfg@bfgconsult.no>
parents:
diff
changeset
|
21 HuffmanString::getSet().setAutoRebuild(true); |
|
9744ec195be3
Encapsulate EditDistance with caching.
Tom Fredrik Blenning Klaussen <bfg@bfgconsult.no>
parents:
diff
changeset
|
22 } |
|
9744ec195be3
Encapsulate EditDistance with caching.
Tom Fredrik Blenning Klaussen <bfg@bfgconsult.no>
parents:
diff
changeset
|
23 |
|
9744ec195be3
Encapsulate EditDistance with caching.
Tom Fredrik Blenning Klaussen <bfg@bfgconsult.no>
parents:
diff
changeset
|
24 void next() |
|
9744ec195be3
Encapsulate EditDistance with caching.
Tom Fredrik Blenning Klaussen <bfg@bfgconsult.no>
parents:
diff
changeset
|
25 { |
|
9744ec195be3
Encapsulate EditDistance with caching.
Tom Fredrik Blenning Klaussen <bfg@bfgconsult.no>
parents:
diff
changeset
|
26 if (++n == 2048) |
|
9744ec195be3
Encapsulate EditDistance with caching.
Tom Fredrik Blenning Klaussen <bfg@bfgconsult.no>
parents:
diff
changeset
|
27 HuffmanString::getSet().rebuild(); |
|
9744ec195be3
Encapsulate EditDistance with caching.
Tom Fredrik Blenning Klaussen <bfg@bfgconsult.no>
parents:
diff
changeset
|
28 } |
|
9744ec195be3
Encapsulate EditDistance with caching.
Tom Fredrik Blenning Klaussen <bfg@bfgconsult.no>
parents:
diff
changeset
|
29 }; |
|
9744ec195be3
Encapsulate EditDistance with caching.
Tom Fredrik Blenning Klaussen <bfg@bfgconsult.no>
parents:
diff
changeset
|
30 |
|
9744ec195be3
Encapsulate EditDistance with caching.
Tom Fredrik Blenning Klaussen <bfg@bfgconsult.no>
parents:
diff
changeset
|
31 class CachedEditDistance { |
|
9744ec195be3
Encapsulate EditDistance with caching.
Tom Fredrik Blenning Klaussen <bfg@bfgconsult.no>
parents:
diff
changeset
|
32 protected: |
|
9744ec195be3
Encapsulate EditDistance with caching.
Tom Fredrik Blenning Klaussen <bfg@bfgconsult.no>
parents:
diff
changeset
|
33 typedef DBCache<OrderedPair<UniqueString>, int, true> cacheType; |
|
9744ec195be3
Encapsulate EditDistance with caching.
Tom Fredrik Blenning Klaussen <bfg@bfgconsult.no>
parents:
diff
changeset
|
34 public: |
|
9744ec195be3
Encapsulate EditDistance with caching.
Tom Fredrik Blenning Klaussen <bfg@bfgconsult.no>
parents:
diff
changeset
|
35 static int Compute(QString a, QString b, bool removeDiacritics = false); |
|
9744ec195be3
Encapsulate EditDistance with caching.
Tom Fredrik Blenning Klaussen <bfg@bfgconsult.no>
parents:
diff
changeset
|
36 static void removeDiacriticsNoCopy(QString& in) |
|
9744ec195be3
Encapsulate EditDistance with caching.
Tom Fredrik Blenning Klaussen <bfg@bfgconsult.no>
parents:
diff
changeset
|
37 { |
|
9744ec195be3
Encapsulate EditDistance with caching.
Tom Fredrik Blenning Klaussen <bfg@bfgconsult.no>
parents:
diff
changeset
|
38 EditDistance::removeDiacriticsNoCopy(in); |
|
9744ec195be3
Encapsulate EditDistance with caching.
Tom Fredrik Blenning Klaussen <bfg@bfgconsult.no>
parents:
diff
changeset
|
39 } |
|
9744ec195be3
Encapsulate EditDistance with caching.
Tom Fredrik Blenning Klaussen <bfg@bfgconsult.no>
parents:
diff
changeset
|
40 static QString removeDiacritics(const QString& in) |
|
9744ec195be3
Encapsulate EditDistance with caching.
Tom Fredrik Blenning Klaussen <bfg@bfgconsult.no>
parents:
diff
changeset
|
41 { |
|
9744ec195be3
Encapsulate EditDistance with caching.
Tom Fredrik Blenning Klaussen <bfg@bfgconsult.no>
parents:
diff
changeset
|
42 return EditDistance::removeDiacritics(in); |
|
9744ec195be3
Encapsulate EditDistance with caching.
Tom Fredrik Blenning Klaussen <bfg@bfgconsult.no>
parents:
diff
changeset
|
43 } |
|
9744ec195be3
Encapsulate EditDistance with caching.
Tom Fredrik Blenning Klaussen <bfg@bfgconsult.no>
parents:
diff
changeset
|
44 |
|
9744ec195be3
Encapsulate EditDistance with caching.
Tom Fredrik Blenning Klaussen <bfg@bfgconsult.no>
parents:
diff
changeset
|
45 static cacheType* cache; |
|
9744ec195be3
Encapsulate EditDistance with caching.
Tom Fredrik Blenning Klaussen <bfg@bfgconsult.no>
parents:
diff
changeset
|
46 }; |
|
9744ec195be3
Encapsulate EditDistance with caching.
Tom Fredrik Blenning Klaussen <bfg@bfgconsult.no>
parents:
diff
changeset
|
47 |
|
9744ec195be3
Encapsulate EditDistance with caching.
Tom Fredrik Blenning Klaussen <bfg@bfgconsult.no>
parents:
diff
changeset
|
48 #endif //CACHEDEDITDISTANCE_HPP |
