changeset 41:e0898020af08

Faster computation of EditDistance, by more efficient datastructure.
author Tom Fredrik Blenning Klaussen <bfg@blenning.no>
date Sun, 09 Sep 2012 15:17:00 +0200
parents f711ddb56ae7
children 4c283daa42c7
files EditDistance.cpp
diffstat 1 files changed, 27 insertions(+), 36 deletions(-) [+]
line wrap: on
line diff
--- a/EditDistance.cpp	Fri Sep 07 13:32:33 2012 +0200
+++ b/EditDistance.cpp	Sun Sep 09 15:17:00 2012 +0200
@@ -3,7 +3,7 @@
 #include "CompileTimeConstants.h"
 #include "ConfigurationProcessing.hpp"
 
-#include <QtCore/QList>
+#include <boost/numeric/ublas/matrix.hpp>
 
 #define CharComparer(A, B) (QChar(A) == QChar(B))
 
@@ -31,6 +31,9 @@
     b = removeDiacritics(b);
   }
 
+  if ( a == b)
+    return 0;
+
   OrderedPair<UniqueString> lup(a, b);
 
   if (cache == 0) {
@@ -41,47 +44,35 @@
   if (res)
     return *res;
 
+  uint s1 = a.size();
+  uint s2 = b.size();
 
   // Allocate distance matrix
-  QList<QList<int> > d;
-  QList<int> temp;
-  for (int i=0;i<b.size()+1;i++) {
-    temp.append(0);
-  }
-  for (int i=0;i<a.size()+1;i++) {
-    d.append(temp);
-  }
-#if 0
-  // Get character comparer
-  CharComparer isEqual = (ignoreCase) ?
-    (CharComparer)CharCompareIgnoreCase : CharCompare;
-#endif
+  boost::numeric::ublas::matrix<int> d(s1 + 1, s2 + 1);
+  d.clear();
+
   // Compute distance
-  for (int i = 0; i <= a.size(); i++)
-    d[i][0] = i;
-  for (int j = 0; j <= b.size(); j++)
-    d[0][j] = j;
-  for (int i = 1; i <= a.size(); i++)
-    {
-      for (int j = 1; j <= b.size(); j++)
-	{
-	  if (CharComparer(a.at(i-1), b.at(j - 1)))
-	    {
-	      // No change required
-	      d[i][j] = d[i - 1][j - 1];
-	    }
-	  else
-	    {
-	      d[i][ j] =
-		std::min(d[i - 1][ j] + 1,    // Deletion
-			 std::min(d[i][ j - 1] + 1,    // Insertion
-				  d[i - 1][ j - 1] + 1));       // Substitution
-	    }
-	}
+  for (uint i = 0; i <= s1; ++i)
+    d(i, 0) = i;
+  for (uint j = 0; j <= s2; ++j)
+    d(0, j) = j;
+  for (uint i = 1; i <= s1; ++i) {
+    for (uint j = 1; j <= s2; ++j) {
+      if (CharComparer(a.at(i - 1), b.at(j - 1))) {
+	// No change required
+	d(i, j) = d(i - 1, j - 1);
+      }
+      else {
+	d(i, j) =
+	  std::min(d(i - 1, j) + 1,    // Deletion
+		   std::min(d(i, j - 1) + 1,    // Insertion
+			    d(i - 1, j - 1) + 1));       // Substitution
+      }
     }
+  }
 
   // Return final value
-  int retVal = d[a.size()][ b.size()];
+  int retVal = d(s1, s2);
   cache->insert(lup, retVal);
 
   return retVal;