view EditDistance.cpp @ 78:9744ec195be3

Encapsulate EditDistance with caching.
author Tom Fredrik Blenning Klaussen <bfg@bfgconsult.no>
date Thu, 10 Oct 2013 01:07:52 +0200
parents 4c283daa42c7
children
line wrap: on
line source

#include "EditDistance.hpp"

#include <boost/numeric/ublas/matrix.hpp>

#define CharComparer(A, B) (QChar(A) == QChar(B))

void EditDistance::removeDiacriticsNoCopy(QString& in)
{
  for(QString::iterator c = in.begin();
      c != in.end(); ++c) {
    if (c->decompositionTag() != QChar::NoDecomposition) {
      QString tmp = c->decomposition();
      *c = tmp[0];
    }
  }
}

QString EditDistance::removeDiacritics(const QString& in)
{
  QString out = in;
  removeDiacriticsNoCopy(out);
  return out;
}

int EditDistance::Compute(QString a, QString b, bool remove) {
  if (remove) {
    removeDiacriticsNoCopy(a);
    removeDiacriticsNoCopy(b);
  }

  uint s1 = a.size();
  uint s2 = b.size();

  // Allocate distance matrix
  boost::numeric::ublas::matrix<int> d(s1 + 1, s2 + 1);
  d.clear();

  // Compute distance
  for (uint i = 0; i <= s1; ++i)
    d(i, 0) = i;
  for (uint j = 0; j <= s2; ++j)
    d(0, j) = j;
  for (uint i = 1; i <= s1; ++i) {
    for (uint j = 1; j <= s2; ++j) {
      if (CharComparer(a.at(i - 1), b.at(j - 1))) {
	// No change required
	d(i, j) = d(i - 1, j - 1);
      }
      else {
	d(i, j) =
	  std::min(d(i - 1, j) + 1,    // Deletion
		   std::min(d(i, j - 1) + 1,    // Insertion
			    d(i - 1, j - 1) + 1));       // Substitution
      }
    }
  }

  // Return final value
  int retVal = d(s1, s2);

  return retVal;
}