Mercurial > dedupe
annotate UniqueString.hpp @ 38:7905fa8a3f1b
Test for empty string.
| author | Tom Fredrik Blenning Klaussen <bfg@blenning.no> |
|---|---|
| date | Fri, 07 Sep 2012 13:07:46 +0200 |
| parents | fda70a362ed5 |
| children | b9515dc35fe4 |
| rev | line source |
|---|---|
|
16
06166d6c083b
Add configuration processing.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff
changeset
|
1 #ifndef UNIQUESTRING_HPP |
|
06166d6c083b
Add configuration processing.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff
changeset
|
2 #define UNIQUESTRING_HPP |
|
06166d6c083b
Add configuration processing.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff
changeset
|
3 |
|
06166d6c083b
Add configuration processing.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff
changeset
|
4 #if 0 |
|
06166d6c083b
Add configuration processing.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff
changeset
|
5 #include <QtCore/QMap> |
|
06166d6c083b
Add configuration processing.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff
changeset
|
6 #include <QtCore/QString> |
|
06166d6c083b
Add configuration processing.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff
changeset
|
7 |
|
06166d6c083b
Add configuration processing.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff
changeset
|
8 class UniqueString : public QString |
|
06166d6c083b
Add configuration processing.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff
changeset
|
9 { |
|
06166d6c083b
Add configuration processing.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff
changeset
|
10 private: |
|
06166d6c083b
Add configuration processing.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff
changeset
|
11 static QMap<QString, QString> map; |
|
34
fda70a362ed5
Remove whitespace.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
31
diff
changeset
|
12 |
|
16
06166d6c083b
Add configuration processing.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff
changeset
|
13 public: |
|
06166d6c083b
Add configuration processing.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff
changeset
|
14 UniqueString(const QString& str) |
|
06166d6c083b
Add configuration processing.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff
changeset
|
15 { |
|
06166d6c083b
Add configuration processing.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff
changeset
|
16 if (!map.contains(str)) { |
|
06166d6c083b
Add configuration processing.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff
changeset
|
17 map.insert(str, str); |
|
06166d6c083b
Add configuration processing.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff
changeset
|
18 } |
|
34
fda70a362ed5
Remove whitespace.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
31
diff
changeset
|
19 QString::operator=(map.value(str)); |
|
16
06166d6c083b
Add configuration processing.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff
changeset
|
20 } |
|
06166d6c083b
Add configuration processing.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff
changeset
|
21 }; |
|
24
d0502678429c
Multiplex between different stringtypes.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
16
diff
changeset
|
22 #elseif 0 |
|
16
06166d6c083b
Add configuration processing.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff
changeset
|
23 #include "RBTree.hpp" |
|
06166d6c083b
Add configuration processing.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff
changeset
|
24 #include <QtCore/QString> |
|
24
d0502678429c
Multiplex between different stringtypes.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
16
diff
changeset
|
25 #include <QtCore/QStringList> |
|
31
bf3dce7fedcb
Remove all references to QDebug
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
24
diff
changeset
|
26 |
|
16
06166d6c083b
Add configuration processing.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff
changeset
|
27 |
|
24
d0502678429c
Multiplex between different stringtypes.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
16
diff
changeset
|
28 class UniqueString |
|
16
06166d6c083b
Add configuration processing.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff
changeset
|
29 { |
|
06166d6c083b
Add configuration processing.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff
changeset
|
30 private: |
|
06166d6c083b
Add configuration processing.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff
changeset
|
31 static RBTree<QString> map; |
|
24
d0502678429c
Multiplex between different stringtypes.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
16
diff
changeset
|
32 QString myString; |
|
d0502678429c
Multiplex between different stringtypes.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
16
diff
changeset
|
33 static QMap<QString, int> lup; |
|
d0502678429c
Multiplex between different stringtypes.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
16
diff
changeset
|
34 static uint numInserts; |
|
34
fda70a362ed5
Remove whitespace.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
31
diff
changeset
|
35 |
|
16
06166d6c083b
Add configuration processing.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff
changeset
|
36 public: |
|
24
d0502678429c
Multiplex between different stringtypes.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
16
diff
changeset
|
37 QStringList chunk(const QString& str) |
|
d0502678429c
Multiplex between different stringtypes.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
16
diff
changeset
|
38 { |
|
d0502678429c
Multiplex between different stringtypes.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
16
diff
changeset
|
39 QStringList list = str.split(""); |
|
d0502678429c
Multiplex between different stringtypes.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
16
diff
changeset
|
40 list << QString(); |
|
d0502678429c
Multiplex between different stringtypes.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
16
diff
changeset
|
41 return list; |
|
d0502678429c
Multiplex between different stringtypes.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
16
diff
changeset
|
42 } |
|
d0502678429c
Multiplex between different stringtypes.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
16
diff
changeset
|
43 |
|
16
06166d6c083b
Add configuration processing.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff
changeset
|
44 UniqueString(const QString& str) |
|
06166d6c083b
Add configuration processing.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff
changeset
|
45 { |
|
24
d0502678429c
Multiplex between different stringtypes.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
16
diff
changeset
|
46 //QString str = assemble(in_str); |
|
16
06166d6c083b
Add configuration processing.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff
changeset
|
47 boost::optional<QString> present = map.find(str); |
|
06166d6c083b
Add configuration processing.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff
changeset
|
48 if (!present) { |
|
24
d0502678429c
Multiplex between different stringtypes.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
16
diff
changeset
|
49 QStringList chunks = chunk(str); |
|
d0502678429c
Multiplex between different stringtypes.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
16
diff
changeset
|
50 foreach (const QString& ch, chunks) { |
|
d0502678429c
Multiplex between different stringtypes.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
16
diff
changeset
|
51 ++lup[ch]; |
|
d0502678429c
Multiplex between different stringtypes.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
16
diff
changeset
|
52 } |
|
d0502678429c
Multiplex between different stringtypes.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
16
diff
changeset
|
53 if (++numInserts % 100 == 0) { |
|
d0502678429c
Multiplex between different stringtypes.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
16
diff
changeset
|
54 foreach(const QString& key, lup.keys()) { |
|
d0502678429c
Multiplex between different stringtypes.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
16
diff
changeset
|
55 qDebug() << key << ":" << lup[key]; |
|
d0502678429c
Multiplex between different stringtypes.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
16
diff
changeset
|
56 } |
|
d0502678429c
Multiplex between different stringtypes.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
16
diff
changeset
|
57 } |
|
d0502678429c
Multiplex between different stringtypes.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
16
diff
changeset
|
58 QString tmpStr = str; |
|
d0502678429c
Multiplex between different stringtypes.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
16
diff
changeset
|
59 tmpStr.squeeze(); |
|
d0502678429c
Multiplex between different stringtypes.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
16
diff
changeset
|
60 map.insert(tmpStr); |
|
d0502678429c
Multiplex between different stringtypes.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
16
diff
changeset
|
61 present = tmpStr; |
|
16
06166d6c083b
Add configuration processing.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff
changeset
|
62 //qDebug() << map.optimal_depth() << " : " << map.avg_depth() << " : " << map.depth(); |
|
06166d6c083b
Add configuration processing.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff
changeset
|
63 } |
|
24
d0502678429c
Multiplex between different stringtypes.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
16
diff
changeset
|
64 myString = *present; |
|
d0502678429c
Multiplex between different stringtypes.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
16
diff
changeset
|
65 } |
|
d0502678429c
Multiplex between different stringtypes.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
16
diff
changeset
|
66 |
|
d0502678429c
Multiplex between different stringtypes.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
16
diff
changeset
|
67 operator const QString() const |
|
d0502678429c
Multiplex between different stringtypes.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
16
diff
changeset
|
68 { |
|
d0502678429c
Multiplex between different stringtypes.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
16
diff
changeset
|
69 return myString; |
|
d0502678429c
Multiplex between different stringtypes.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
16
diff
changeset
|
70 } |
|
16
06166d6c083b
Add configuration processing.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff
changeset
|
71 |
|
24
d0502678429c
Multiplex between different stringtypes.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
16
diff
changeset
|
72 bool operator<(const UniqueString& rhs) const |
|
d0502678429c
Multiplex between different stringtypes.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
16
diff
changeset
|
73 { |
|
d0502678429c
Multiplex between different stringtypes.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
16
diff
changeset
|
74 return myString < rhs.myString; |
|
16
06166d6c083b
Add configuration processing.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff
changeset
|
75 } |
|
24
d0502678429c
Multiplex between different stringtypes.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
16
diff
changeset
|
76 |
|
d0502678429c
Multiplex between different stringtypes.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
16
diff
changeset
|
77 bool operator==(const UniqueString& rhs) const |
|
d0502678429c
Multiplex between different stringtypes.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
16
diff
changeset
|
78 { |
|
d0502678429c
Multiplex between different stringtypes.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
16
diff
changeset
|
79 return myString == rhs.myString; |
|
d0502678429c
Multiplex between different stringtypes.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
16
diff
changeset
|
80 } |
|
d0502678429c
Multiplex between different stringtypes.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
16
diff
changeset
|
81 }; |
|
d0502678429c
Multiplex between different stringtypes.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
16
diff
changeset
|
82 #else |
|
d0502678429c
Multiplex between different stringtypes.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
16
diff
changeset
|
83 #include "HuffmanString.hpp" |
|
d0502678429c
Multiplex between different stringtypes.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
16
diff
changeset
|
84 typedef HuffmanString UniqueString; |
|
16
06166d6c083b
Add configuration processing.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff
changeset
|
85 #endif |
|
06166d6c083b
Add configuration processing.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff
changeset
|
86 |
|
06166d6c083b
Add configuration processing.
Tom Fredrik Blenning Klaussen <bfg@blenning.no>
parents:
diff
changeset
|
87 #endif //UNIQUESTRING_HPP |
