changeset 78:9744ec195be3

Encapsulate EditDistance with caching.
author Tom Fredrik Blenning Klaussen <bfg@bfgconsult.no>
date Thu, 10 Oct 2013 01:07:52 +0200
parents a827f3687c4a
children 114be42a612c
files CMakeLists.txt CachedEditDistance.cpp CachedEditDistance.hpp DataController.cpp EditDistance.cpp EditDistance.hpp FileDBLink.cpp TestDataBase.hpp TestMemoryDBLink.cpp TestSQLGenerator.cpp
diffstat 10 files changed, 94 insertions(+), 58 deletions(-) [+]
line wrap: on
line diff
--- a/CMakeLists.txt	Sat Feb 16 19:00:54 2013 +0100
+++ b/CMakeLists.txt	Thu Oct 10 01:07:52 2013 +0200
@@ -69,6 +69,7 @@
 SET(CLASS_SOURCES
 	BitArray.cpp
 	BitDecoder.cpp
+	CachedEditDistance.cpp
 	ConfigurationProcessing.cpp
 	DataController.cpp
 	EditDistance.cpp
@@ -89,6 +90,7 @@
 SET(CLASS_HEADERS
 	BitArray.hpp
 	BitDecoder.hpp
+	CachedEditDistance.hpp
 	ConfigurationProcessing.hpp
 	DataController.hpp
 	EditDistance.hpp
@@ -220,7 +222,7 @@
 					  coverage_test.info 'moc_*'
 					  --output-file coverage.preprocessed2
 					  DEPENDS coverage_test.info
-					  COMMENT "Removing \"moc_\"-files" 
+					  COMMENT "Removing \"moc_\"-files"
 )
 
 
@@ -237,7 +239,7 @@
 ADD_CUSTOM_TARGET(coverage_presentation genhtml -q
 				    ${CMAKE_CURRENT_BINARY_DIR}/coverage.preprocessed
 				    --output-directory
-				    ${CMAKE_CURRENT_BINARY_DIR}/coverage_presentation 
+				    ${CMAKE_CURRENT_BINARY_DIR}/coverage_presentation
 				    DEPENDS
 				    ${CMAKE_CURRENT_BINARY_DIR}/coverage.preprocessed
 )
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/CachedEditDistance.cpp	Thu Oct 10 01:07:52 2013 +0200
@@ -0,0 +1,33 @@
+#include "CachedEditDistance.hpp"
+
+#include "CompileTimeConstants.h"
+#include "ConfigurationProcessing.hpp"
+
+CachedEditDistance::cacheType* CachedEditDistance::cache = 0;
+//CachedEditDistance::cacheType CachedEditDistance::cache;
+
+int CachedEditDistance::Compute(QString a, QString b, bool remove) {
+  if (remove) {
+    removeDiacriticsNoCopy(a);
+    removeDiacriticsNoCopy(b);
+  }
+
+  if ( a == b)
+    return 0;
+
+  OrderedPair<UniqueString> lup(a, b);
+
+  if (cache == 0) {
+    QString cacheLocation = processSetupVariables(EDITDISTANCE_CACHE_LOCATION);
+    CachedEditDistance::cache = new cacheType(cacheLocation, "EditLUT");
+  }
+  boost::optional<int> res = cache->value(lup);
+  if (res)
+    return *res;
+
+  int retVal = EditDistance::Compute(a, b, false);
+
+  cache->insert(lup, retVal);
+
+  return retVal;
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/CachedEditDistance.hpp	Thu Oct 10 01:07:52 2013 +0200
@@ -0,0 +1,48 @@
+#ifndef CACHEDEDITDISTANCE_HPP
+#define CACHEDEDITDISTANCE_HPP
+
+#include "DBCache.hpp"
+
+#include "EditDistance.hpp"
+
+template<typename Value>
+struct InsertRegulator<OrderedPair<UniqueString>, Value >
+{
+  uint n;
+  void start()
+  {
+    n = 0;
+    HuffmanString::getSet().setAutoRebuild(false);
+  }
+
+  static void finish()
+  {
+    HuffmanString::getSet().rebuild();
+    HuffmanString::getSet().setAutoRebuild(true);
+  }
+
+  void next()
+  {
+    if (++n == 2048)
+      HuffmanString::getSet().rebuild();
+  }
+};
+
+class CachedEditDistance {
+protected:
+  typedef DBCache<OrderedPair<UniqueString>, int, true> cacheType;
+public:
+  static int Compute(QString a, QString b, bool removeDiacritics = false);
+  static void removeDiacriticsNoCopy(QString& in)
+  {
+    EditDistance::removeDiacriticsNoCopy(in);
+  }
+  static QString removeDiacritics(const QString& in)
+  {
+    return EditDistance::removeDiacritics(in);
+  }
+
+  static cacheType* cache;
+};
+
+#endif //CACHEDEDITDISTANCE_HPP
--- a/DataController.cpp	Sat Feb 16 19:00:54 2013 +0100
+++ b/DataController.cpp	Thu Oct 10 01:07:52 2013 +0200
@@ -2,7 +2,7 @@
 
 #include "CompileTimeConstants.h"
 #include "ConfigurationProcessing.hpp"
-#include "EditDistance.hpp"
+#include "CachedEditDistance.hpp"
 #include "SqliteDBLink.hpp"
 
 #include "Exception/PermissionException.hpp"
@@ -267,7 +267,7 @@
       int absoluteCutoff = line->name().length() * editDistanceCutoff;
       foreach(QSharedPointer<FileDBLink::DBInfo> dup, elems) {
 	if(dup != line) {
-	  int distance = EditDistance::Compute(line->name(), dup->name());
+	  int distance = CachedEditDistance::Compute(line->name(), dup->name());
 
 	  if (distance <= absoluteCutoff) {
 	    oList.insert(distance, dup);
@@ -486,7 +486,7 @@
 
   if (showGUI) {
     setupGUI();
-  
+
     QTimer* populator = new QTimer(this);
     populator->setSingleShot(true);
     populator->setInterval(50);
--- a/EditDistance.cpp	Sat Feb 16 19:00:54 2013 +0100
+++ b/EditDistance.cpp	Thu Oct 10 01:07:52 2013 +0200
@@ -1,15 +1,9 @@
 #include "EditDistance.hpp"
 
-#include "CompileTimeConstants.h"
-#include "ConfigurationProcessing.hpp"
-
 #include <boost/numeric/ublas/matrix.hpp>
 
 #define CharComparer(A, B) (QChar(A) == QChar(B))
 
-EditDistance::cacheType* EditDistance::cache = 0;
-//EditDistance::cacheType EditDistance::cache;
-
 void EditDistance::removeDiacriticsNoCopy(QString& in)
 {
   for(QString::iterator c = in.begin();
@@ -34,19 +28,6 @@
     removeDiacriticsNoCopy(b);
   }
 
-  if ( a == b)
-    return 0;
-
-  OrderedPair<UniqueString> lup(a, b);
-
-  if (cache == 0) {
-    QString cacheLocation = processSetupVariables(EDITDISTANCE_CACHE_LOCATION);
-    EditDistance::cache = new cacheType(cacheLocation, "EditLUT");
-  }
-  boost::optional<int> res = cache->value(lup);
-  if (res)
-    return *res;
-
   uint s1 = a.size();
   uint s2 = b.size();
 
@@ -76,7 +57,6 @@
 
   // Return final value
   int retVal = d(s1, s2);
-  cache->insert(lup, retVal);
 
   return retVal;
 }
--- a/EditDistance.hpp	Sat Feb 16 19:00:54 2013 +0100
+++ b/EditDistance.hpp	Thu Oct 10 01:07:52 2013 +0200
@@ -1,40 +1,13 @@
 #ifndef EDITDISTANCE_HPP
 #define EDITDISTANCE_HPP
 
-#include "DBCache.hpp"
-
-template<typename Value>
-struct InsertRegulator<OrderedPair<UniqueString>, Value >
-{
-  uint n;
-  void start()
-  {
-    n = 0;
-    HuffmanString::getSet().setAutoRebuild(false);
-  }
-
-  static void finish()
-  {
-    HuffmanString::getSet().rebuild();
-    HuffmanString::getSet().setAutoRebuild(true);
-  }
-
-  void next()
-  {
-    if (++n == 2048)
-      HuffmanString::getSet().rebuild();
-  }
-};
+#include <QtCore/QString>
 
 class EditDistance {
-protected:
-  typedef DBCache<OrderedPair<UniqueString>, int, true> cacheType;
 public:
   static int Compute(QString a, QString b, bool removeDiacritics = false);
   static void removeDiacriticsNoCopy(QString& in);
   static QString removeDiacritics(const QString& in);
-
-  static cacheType* cache;
 };
 
 #endif //EDITDISTANCE_HPP
--- a/FileDBLink.cpp	Sat Feb 16 19:00:54 2013 +0100
+++ b/FileDBLink.cpp	Thu Oct 10 01:07:52 2013 +0200
@@ -1,6 +1,6 @@
 #include "FileDBLink.hpp"
 
-#include "EditDistance.hpp"
+#include "CachedEditDistance.hpp"
 
 #include "Exception/PermissionException.hpp"
 
@@ -142,7 +142,7 @@
     if (info == *it2)
       continue;
     QString p2 = (*it2)->name();
-    int dist = EditDistance::Compute(p1, p2, false);
+    int dist = CachedEditDistance::Compute(p1, p2, false);
     if (dist < minDist) {
       minDist = dist;
       other = (*it2)->path();
--- a/TestDataBase.hpp	Sat Feb 16 19:00:54 2013 +0100
+++ b/TestDataBase.hpp	Thu Oct 10 01:07:52 2013 +0200
@@ -9,7 +9,7 @@
   QSqlDatabase getDatabase();
   TestDatabase();
   ~TestDatabase();
-  
+
 private:
   const QString connectionName;
   QSqlDatabase *db;
--- a/TestMemoryDBLink.cpp	Sat Feb 16 19:00:54 2013 +0100
+++ b/TestMemoryDBLink.cpp	Thu Oct 10 01:07:52 2013 +0200
@@ -61,5 +61,5 @@
       prev = info->path();
     }
   }
-  
+
 }
--- a/TestSQLGenerator.cpp	Sat Feb 16 19:00:54 2013 +0100
+++ b/TestSQLGenerator.cpp	Thu Oct 10 01:07:52 2013 +0200
@@ -171,7 +171,7 @@
     .arg(fieldCreateString2);
 
   BOOST_REQUIRE(query.exec(createQuery));
-  
+
   QString queryString = QString("INSERT into %1 (%2, %3) VALUES(%4, %5);")
     .arg(tableName)
     .arg(SQLGenerator<int>::fieldName("value1"))