Mercurial > dedupe
changeset 85:1f9e27a0bd7f
Allow for lazy calculation of checksums, ignore them, if only one file of given size.
| author | Tom Fredrik Blenning Klaussen <bfg@bfgconsult.no> |
|---|---|
| date | Thu, 10 Oct 2013 15:55:30 +0200 |
| parents | 848496a57039 |
| children | af7962f3274b |
| files | FileDBLink.cpp FileDBLink.hpp SqliteDBLink.cpp SqliteDBLink.hpp |
| diffstat | 4 files changed, 81 insertions(+), 16 deletions(-) [+] |
line wrap: on
line diff
--- a/FileDBLink.cpp Thu Oct 10 14:14:20 2013 +0200 +++ b/FileDBLink.cpp Thu Oct 10 15:55:30 2013 +0200 @@ -8,28 +8,28 @@ #include <boost/bind.hpp> -void FileDBLink::updateIfModified(const QString& path) +void FileDBLink::updateIfModified(const QString& path, bool lazy) { QFileInfo fileinfo(path); FileDBLink::DBStatus status = existsWithMtime(path, fileinfo.lastModified()); switch (status) { case FileDBLink::NONE: { - addFile(fileinfo); + addFile(fileinfo, lazy); break; } case FileDBLink::MTIME_DIFFERENT: { - updateFile(fileinfo); + updateFile(fileinfo, lazy); } default: { } } } -void FileDBLink::addFile(const QFileInfo& fileinfo) +void FileDBLink::addFile(const QFileInfo& fileinfo, bool lazy) { addFile(fileinfo.absoluteFilePath(), fileinfo.size(), - fileinfo.lastModified()); + fileinfo.lastModified(), lazy); } QByteArray FileDBLink::computeHash(const QString& path, @@ -56,23 +56,44 @@ return hash.result(); } +bool FileDBLink::updateAllWithSize(qint64 size) +{ + const QList<dbinf_ptr_t> others = filesWithSize(size); + if (!others.empty()) { + foreach( const dbinf_ptr_t other, others) { + if (other->checksum().isEmpty()) { + QByteArray ohash = computeHash(other->path()); + updateFile(other->path(), other->size(), other->mtime(), ohash); + } + } + return true; + } + return false; +} void FileDBLink::addFile(const QString& path, qint64 size, - const QDateTime& lastModified) + const QDateTime& lastModified, bool lazy) { - addFile(path, size, lastModified, computeHash(path)); + QByteArray hash; + // std::cout << path.toStdString() << "::" << lazy << std::endl; + if (!lazy || updateAllWithSize(size)) + hash = computeHash(path); + addFile(path, size, lastModified, hash); } -void FileDBLink::updateFile(const QFileInfo& fileinfo) +void FileDBLink::updateFile(const QFileInfo& fileinfo, bool lazy) { updateFile(fileinfo.absoluteFilePath(), fileinfo.size(), - fileinfo.lastModified()); + fileinfo.lastModified(), lazy); } void FileDBLink::updateFile(const QString& path, qint64 size, - const QDateTime& lastModified) + const QDateTime& lastModified, bool lazy) { - updateFile(path, size, lastModified, computeHash(path)); + QByteArray hash; + if (!lazy || updateAllWithSize(size)) + hash = computeHash(path); + updateFile(path, size, lastModified, hash); } const QList<FileDBLink::dbinf_ptr_t > @@ -172,3 +193,30 @@ #endif return list; } + + +const QList<FileDBLink::dbinf_ptr_t> +FileDBLink::filesWithSize(qint64 size, const QString& prefix) const +{ + QList<dbinf_ptr_t> retVal; + const QList<dbinf_ptr_t> vals = values(prefix); + foreach (const dbinf_ptr_t val, vals) { + QString path = val->path(); + if (val->size() == size) { + retVal << val; + } + } + return retVal; +} + +FileDBLink::dbinf_ptr_t FileDBLink::value(const QString& path) const +{ + const QList<dbinf_ptr_t> vals = values(); + foreach (const dbinf_ptr_t val, vals) { + QString vpath = val->path(); + if (vpath == path) { + return val; + } + } + return dbinf_ptr_t(); +}
--- a/FileDBLink.hpp Thu Oct 10 14:14:20 2013 +0200 +++ b/FileDBLink.hpp Thu Oct 10 15:55:30 2013 +0200 @@ -113,7 +113,7 @@ virtual ~FileDBLink() {} - void updateIfModified(const QString& path); + void updateIfModified(const QString& path, bool lazy = false); virtual void addFile(const QString& path, qint64 size, const QDateTime& dtime, const QByteArray& hash) = 0; @@ -121,13 +121,19 @@ const QStringList& files) = 0; virtual void deleteFileFromDB(const QString& path) = 0; - void addFile(const QString& path, qint64 size, const QDateTime& dtime); - void addFile(const QFileInfo& fileinfo); + virtual const QList<dbinf_ptr_t> filesWithSize(qint64 size, const QString& prefix = QString()) const; + + bool updateAllWithSize(qint64 size); + + virtual dbinf_ptr_t value(const QString& path) const; + + void addFile(const QString& path, qint64 size, const QDateTime& dtime, bool lazy = false); + void addFile(const QFileInfo& fileinfo, bool lazy = false); virtual void updateFile(const QString& path, qint64 size, const QDateTime& dtime, const QByteArray& hash) = 0; - void updateFile(const QString& path, qint64 size, const QDateTime& dtime); - void updateFile(const QFileInfo& fileinfo); + void updateFile(const QString& path, qint64 size, const QDateTime& dtime, bool lazy = false); + void updateFile(const QFileInfo& fileinfo, bool lazy = false); virtual bool exists(const QString& path) = 0; virtual DBStatus existsWithMtime(const QString& path,
--- a/SqliteDBLink.cpp Thu Oct 10 14:14:20 2013 +0200 +++ b/SqliteDBLink.cpp Thu Oct 10 15:55:30 2013 +0200 @@ -197,3 +197,11 @@ deleteFileFromDB(path); } } + +const QList<FileDBLink::dbinf_ptr_t> +SqliteDBLink::filesWithSize(qint64 size, const QString& prefix) const +{ + //This is incredibly inefficient and should be reimplemented + return FileDBLink::filesWithSize(size, prefix); +} +
--- a/SqliteDBLink.hpp Thu Oct 10 14:14:20 2013 +0200 +++ b/SqliteDBLink.hpp Thu Oct 10 15:55:30 2013 +0200 @@ -16,6 +16,9 @@ bool exists(const QString& path); DBStatus existsWithMtime(const QString& path, const QDateTime& mtime); + virtual const QList<dbinf_ptr_t> filesWithSize(qint64 size, const QString& prefix = QString()) const; + + QStringList toStringList(); const QList<dbinf_ptr_t > values(const QString& prefix = QString() ) const; virtual void keepOnlyFromPrefix(const QString& prefix,
