view FileDBLink.cpp @ 104:6bc013d5788b

Avoid unnecessary updates. Fix problems with wrong subset being selected for update with prefix. Fix some problems with to much verbosity in debug statements.
author Tom Fredrik Blenning Klaussen <bfg@bfgconsult.no>
date Sat, 15 Feb 2014 13:34:10 +0100
parents c7da835ea912
children
line wrap: on
line source

#include "FileDBLink.hpp"

#include "CachedEditDistance.hpp"

#include "Exception/PermissionException.hpp"

#include <QtCore/QtConcurrentMap>

#include <boost/bind.hpp>

FileDBLink::~FileDBLink()
{
  commit();
}

void FileDBLink::updateIfModified(const QString& path, bool lazy)
{
  QFileInfo fileinfo(path);
  FileDBLink::DBStatus status = existsWithMtime(path, fileinfo.lastModified());

  switch (status) {
  case FileDBLink::NONE: {
    addFile(fileinfo, lazy);
    break;
  }
  case FileDBLink::MTIME_DIFFERENT: {
    updateFile(fileinfo, lazy);
  }
  default: {
  }
  }
}

void FileDBLink::addFile(const QString& path, quint64 size,
			 const QDateTime& lastModified, bool lazy)
{
  QByteArray hash;
  if (!lazy)
      hash = computeHash(path);
  addFile(path, size, lastModified, hash, lazy);
}

void FileDBLink::addFile(const QFileInfo& fileinfo, bool lazy)
{
  addFile(fileinfo.absoluteFilePath(), fileinfo.size(),
	  fileinfo.lastModified(), lazy);
}

QByteArray FileDBLink::computeHash(const QString& path,
				   QCryptographicHash::Algorithm algorithm)
{
  const static uint buffersize = 32768;
  QCryptographicHash hash(algorithm);
  QFile file(path);
  if ( file.open( QIODevice::ReadOnly ) ) {
    while(!file.atEnd()){
      hash.addData(file.read(buffersize));
    }
  }
  else {
    QString errorMsg = path + ": " + file.errorString();

    switch (file.error()) {
    case QFile::PermissionsError:
      throw PermissionException(errorMsg);
    default:
      throw IOException(errorMsg);
    }
  }
  return hash.result();
}

bool FileDBLink::updateAllWithSize(quint64 size)
{
  const QList<dbinf_ptr_t> others = filesWithSize(size);
  if (!others.empty()) {
    foreach( const dbinf_ptr_t other, others) {
      if (other->checksum().isEmpty()) {
	QByteArray ohash = computeHash(other->path());
	updateFile(other->path(), other->size(), other->mtime(), ohash);
      }
    }
    return true;
  }
  return false;
}

void FileDBLink::updateFile(const QFileInfo& fileinfo, bool lazy)
{
  updateFile(fileinfo.absoluteFilePath(), fileinfo.size(),
	     fileinfo.lastModified(), lazy);
}

void FileDBLink::updateFile(const QString& path, quint64 size,
			    const QDateTime& lastModified, bool lazy)
{
  QByteArray hash;
  if (!lazy)
      hash = computeHash(path);
  updateFile(path, size, lastModified, hash, lazy);
}

const QList<FileDBLink::dbinf_ptr_t >
FileDBLink::sortOn(const QString& prefix, SORTORDER order, bool extended)
{
  QList<dbinf_ptr_t > list =
    (extended) ? computedValues(prefix) : values(prefix);

  switch (order) {
  case PATH:
    {
      QList<FileDBLink::dbinf_ptr_t > oList;
      foreach(dbinf_ptr_t info, list) {
	oList.push_back(info);
      }
      return oList;
    }
  case SIZE:
    {
      QMultiMap<quint64, dbinf_ptr_t > oList;
      foreach(dbinf_ptr_t info, list) {
	oList.insert(info->size(), info);
      }
      return oList.values();
    }
  case MTIME:
    {
      QMultiMap<QDateTime, dbinf_ptr_t > oList;
      foreach(dbinf_ptr_t info, list) {
	oList.insert(info->mtime(), info);
      }
      return oList.values();
    }
  case CHECKSUM:
    {
      QMultiMap<QByteArray, dbinf_ptr_t > oList;
      foreach(dbinf_ptr_t info, list) {
	oList.insert(info->checksum(), info);
      }
      return oList.values();
    }
  case EDIT:
    {
      assert(extended);
      QMultiMap<int, dbinf_ptr_t > oList;
      foreach(dbinf_ptr_t info, list) {
	QSharedPointer<ExtendedDBInfo> ptr;
	ptr = info.dynamicCast<ExtendedDBInfo>();
	oList.insert(ptr->editDistance(), info);
      }
      return oList.values();

    }
  }
  abort();
}

FileDBLink::dbinf_ptr_t
FileDBLink::computedValue(const dbinf_ptr_t& info,
			  const QList<dbinf_ptr_t >& entries)
{
  QString p1 = info->name();
  int minDist = 100000;
  QString other;
  for (QList<dbinf_ptr_t >::const_iterator it2 = entries.begin();
       it2 != entries.end(); ++it2) {
    if (info == *it2)
      continue;
    QString p2 = (*it2)->name();
    int dist = CachedEditDistance::Compute(p1, p2, false);
    if (dist < minDist) {
      minDist = dist;
      other = (*it2)->path();
    }
  }
  return dbinf_ptr_t(new ExtendedDBInfo(*info, other, minDist));
}

const QList<FileDBLink::dbinf_ptr_t>
FileDBLink::computedValues(const QString& prefix) const
{
  QList<dbinf_ptr_t > list;
  QList<dbinf_ptr_t > entries = values(prefix);

#if 1
  list =
    QtConcurrent::blockingMapped(entries,
				 boost::bind( &FileDBLink::computedValue,
					      _1,
					      entries));
#else
  for (QList<dbinf_ptr_t >::const_iterator it1 = entries.begin();
       it1 != entries.end(); ++it1) {
    dbinf_ptr_t ext = computedValue(*it1, entries);
    list.push_back(ext);
  }
#endif
  return list;
}


const QList<FileDBLink::dbinf_ptr_t>
FileDBLink::filesWithSize(quint64 size, const QString& prefix) const
{
  QList<dbinf_ptr_t> retVal;
  const QList<dbinf_ptr_t> vals = values(prefix);
  foreach (const dbinf_ptr_t val, vals) {
    QString path = val->path();
    if (val->size() == size) {
      retVal << val;
    }
  }
  return retVal;
}

FileDBLink::dbinf_ptr_t FileDBLink::value(const QString& path) const
{
  const QList<dbinf_ptr_t> vals = values();
  foreach (const dbinf_ptr_t val, vals) {
    QString vpath = val->path();
    if (vpath == path) {
      return val;
    }
  }
  return dbinf_ptr_t();
}

bool FileDBLink::commit(const QString&, bool)
{
  return true;
}