Mercurial > dedupe
view FileDBLink.cpp @ 91:a5788991ca9f
Refactor lazy update, with one common routine.
| author | Tom Fredrik Blenning Klaussen <bfg@bfgconsult.no> |
|---|---|
| date | Mon, 21 Oct 2013 16:21:54 +0200 |
| parents | af7962f3274b |
| children | f49023c61dac |
line wrap: on
line source
#include "FileDBLink.hpp" #include "CachedEditDistance.hpp" #include "Exception/PermissionException.hpp" #include <QtCore/QtConcurrentMap> #include <boost/bind.hpp> void FileDBLink::updateIfModified(const QString& path, bool lazy) { QFileInfo fileinfo(path); FileDBLink::DBStatus status = existsWithMtime(path, fileinfo.lastModified()); switch (status) { case FileDBLink::NONE: { addFile(fileinfo, lazy); break; } case FileDBLink::MTIME_DIFFERENT: { updateFile(fileinfo, lazy); } default: { } } } void FileDBLink::addFile(const QString& path, quint64 size, const QDateTime& lastModified, bool lazy) { QByteArray hash; //if (!lazy || updateAllWithSize(size)) if (!lazy) hash = computeHash(path); addFile(path, size, lastModified, hash); } void FileDBLink::addFile(const QFileInfo& fileinfo, bool lazy) { addFile(fileinfo.absoluteFilePath(), fileinfo.size(), fileinfo.lastModified(), lazy); } QByteArray FileDBLink::computeHash(const QString& path, QCryptographicHash::Algorithm algorithm) { const static uint buffersize = 32768; QCryptographicHash hash(algorithm); QFile file(path); if ( file.open( QIODevice::ReadOnly ) ) { while(!file.atEnd()){ hash.addData(file.read(buffersize)); } } else { QString errorMsg = path + ": " + file.errorString(); switch (file.error()) { case QFile::PermissionsError: throw PermissionException(errorMsg); default: throw IOException(errorMsg); } } return hash.result(); } bool FileDBLink::updateAllWithSize(quint64 size) { const QList<dbinf_ptr_t> others = filesWithSize(size); if (!others.empty()) { foreach( const dbinf_ptr_t other, others) { if (other->checksum().isEmpty()) { QByteArray ohash = computeHash(other->path()); updateFile(other->path(), other->size(), other->mtime(), ohash); } } return true; } return false; } void FileDBLink::updateFile(const QFileInfo& fileinfo, bool lazy) { updateFile(fileinfo.absoluteFilePath(), fileinfo.size(), fileinfo.lastModified(), lazy); } void FileDBLink::updateFile(const QString& path, quint64 size, const QDateTime& lastModified, bool lazy) { QByteArray hash; //if (!lazy || updateAllWithSize(size)) if (!lazy) hash = computeHash(path); updateFile(path, size, lastModified, hash); } const QList<FileDBLink::dbinf_ptr_t > FileDBLink::sortOn(const QString& prefix, SORTORDER order, bool extended) { QList<dbinf_ptr_t > list = (extended) ? computedValues(prefix) : values(prefix); switch (order) { case PATH: { QList<FileDBLink::dbinf_ptr_t > oList; foreach(dbinf_ptr_t info, list) { oList.push_back(info); } return oList; } case SIZE: { QMultiMap<quint64, dbinf_ptr_t > oList; foreach(dbinf_ptr_t info, list) { oList.insert(info->size(), info); } return oList.values(); } case MTIME: { QMultiMap<QDateTime, dbinf_ptr_t > oList; foreach(dbinf_ptr_t info, list) { oList.insert(info->mtime(), info); } return oList.values(); } case CHECKSUM: { QMultiMap<QByteArray, dbinf_ptr_t > oList; foreach(dbinf_ptr_t info, list) { oList.insert(info->checksum(), info); } return oList.values(); } case EDIT: { assert(extended); QMultiMap<int, dbinf_ptr_t > oList; foreach(dbinf_ptr_t info, list) { QSharedPointer<ExtendedDBInfo> ptr; ptr = info.dynamicCast<ExtendedDBInfo>(); oList.insert(ptr->editDistance(), info); } return oList.values(); } } abort(); } FileDBLink::dbinf_ptr_t FileDBLink::computedValue(const dbinf_ptr_t& info, const QList<dbinf_ptr_t >& entries) { QString p1 = info->name(); int minDist = 100000; QString other; for (QList<dbinf_ptr_t >::const_iterator it2 = entries.begin(); it2 != entries.end(); ++it2) { if (info == *it2) continue; QString p2 = (*it2)->name(); int dist = CachedEditDistance::Compute(p1, p2, false); if (dist < minDist) { minDist = dist; other = (*it2)->path(); } } return dbinf_ptr_t(new ExtendedDBInfo(*info, other, minDist)); } const QList<FileDBLink::dbinf_ptr_t> FileDBLink::computedValues(const QString& prefix) const { QList<dbinf_ptr_t > list; QList<dbinf_ptr_t > entries = values(prefix); #if 1 list = QtConcurrent::blockingMapped(entries, boost::bind( &FileDBLink::computedValue, _1, entries)); #else for (QList<dbinf_ptr_t >::const_iterator it1 = entries.begin(); it1 != entries.end(); ++it1) { dbinf_ptr_t ext = computedValue(*it1, entries); list.push_back(ext); } #endif return list; } const QList<FileDBLink::dbinf_ptr_t> FileDBLink::filesWithSize(quint64 size, const QString& prefix) const { QList<dbinf_ptr_t> retVal; const QList<dbinf_ptr_t> vals = values(prefix); foreach (const dbinf_ptr_t val, vals) { QString path = val->path(); if (val->size() == size) { retVal << val; } } return retVal; } FileDBLink::dbinf_ptr_t FileDBLink::value(const QString& path) const { const QList<dbinf_ptr_t> vals = values(); foreach (const dbinf_ptr_t val, vals) { QString vpath = val->path(); if (vpath == path) { return val; } } return dbinf_ptr_t(); }
