view EditDistance.cpp @ 16:06166d6c083b

Add configuration processing. Cache DB values Add a custom RBTree to save space. Track multiple DB connections properly. More testing. Add ValueExistsException.
author Tom Fredrik Blenning Klaussen <bfg@blenning.no>
date Tue, 28 Aug 2012 18:58:02 +0200
parents b5943e4bf676
children fda70a362ed5
line wrap: on
line source

#include "EditDistance.hpp"
#include "CompileTimeConstants.h"
#include "ConfigurationProcessing.hpp"

#include <QtCore/QList>

#define CharComparer(A, B) (QChar(A) == QChar(B))

EditDistance::cacheType* EditDistance::cache = 0;
//EditDistance::cacheType EditDistance::cache;

QString EditDistance::removeDiacritics(QString in)
{
  QString out;
  foreach(QChar c, in) {
    if (c.decompositionTag() == QChar::NoDecomposition) {
      out.append(c);
    }
    else {
      QString tmp = c.decomposition();
      out.append(tmp[0]);
    }
  }
  return out;
}

int EditDistance::Compute(QString a, QString b, bool remove) {
  if (remove) {
    a = removeDiacritics(a);
    b = removeDiacritics(b);
  }

  OrderedPair<UniqueString> lup(a, b);

  if (cache == 0) {
    QString cacheLocation = processSetupVariables(EDITDISTANCE_CACHE_LOCATION);
    EditDistance::cache = new cacheType(cacheLocation, "EditLUT");
  }
  boost::optional<int> res = cache->value(lup);
  if (res)
    return *res;

  
  // Allocate distance matrix
  QList<QList<int> > d;
  QList<int> temp;
  for (int i=0;i<b.size()+1;i++) {
    temp.append(0);
  }
  for (int i=0;i<a.size()+1;i++) {
    d.append(temp);
  }
#if 0
  // Get character comparer
  CharComparer isEqual = (ignoreCase) ?
    (CharComparer)CharCompareIgnoreCase : CharCompare;
#endif
  // Compute distance
  for (int i = 0; i <= a.size(); i++)
    d[i][0] = i;
  for (int j = 0; j <= b.size(); j++)
    d[0][j] = j;
  for (int i = 1; i <= a.size(); i++)
    {
      for (int j = 1; j <= b.size(); j++)
	{
	  if (CharComparer(a.at(i-1), b.at(j - 1)))
	    {
	      // No change required
	      d[i][j] = d[i - 1][j - 1];
	    }
	  else
	    {
	      d[i][ j] =
		std::min(d[i - 1][ j] + 1,    // Deletion
			 std::min(d[i][ j - 1] + 1,    // Insertion
				  d[i - 1][ j - 1] + 1));       // Substitution
	    }
	}
    }
  
  // Return final value
  int retVal = d[a.size()][ b.size()];
  cache->insert(lup, retVal);

  return retVal;
}