Mercurial > dedupe
changeset 101:6c6f3a5f96ea
Better support for batch operations.
| author | Tom Fredrik Blenning Klaussen <bfg@bfgconsult.no> |
|---|---|
| date | Thu, 13 Feb 2014 15:19:37 +0100 |
| parents | f4ebbfa3ffae |
| children | 5906661c0421 |
| files | FileDBLink.hpp MemoryDBLink.cpp MemoryDBLink.hpp SqliteDBLink.cpp SqliteDBLink.hpp |
| diffstat | 5 files changed, 159 insertions(+), 38 deletions(-) [+] |
line wrap: on
line diff
--- a/FileDBLink.hpp Thu Feb 13 14:55:11 2014 +0100 +++ b/FileDBLink.hpp Thu Feb 13 15:19:37 2014 +0100 @@ -123,8 +123,9 @@ const QDateTime& dtime, const QByteArray& hash, bool lazy) = 0; virtual void keepOnlyFromPrefix(const QString& prefix, - const QStringList& files) = 0; - virtual void deleteFileFromDB(const QString& path) = 0; + const QStringList& files, + bool lazy = false) = 0; + virtual void deleteFileFromDB(const QString& path, bool lazy = false) = 0; virtual const QList<dbinf_ptr_t> filesWithSize(quint64 size, const QString& prefix = QString()) const;
--- a/MemoryDBLink.cpp Thu Feb 13 14:55:11 2014 +0100 +++ b/MemoryDBLink.cpp Thu Feb 13 15:19:37 2014 +0100 @@ -52,14 +52,15 @@ } -void MemoryDBLink::deleteFileFromDB(const QString& path) +void MemoryDBLink::deleteFileFromDB(const QString& path, bool lazy) { entries.remove(path); } void MemoryDBLink::keepOnlyFromPrefix(const QString& prefix, - const QStringList& files) + const QStringList& files, + bool lazy) { QStringList list; foreach(dbinf_ptr_t info, values(prefix)) {
--- a/MemoryDBLink.hpp Thu Feb 13 14:55:11 2014 +0100 +++ b/MemoryDBLink.hpp Thu Feb 13 15:19:37 2014 +0100 @@ -24,8 +24,9 @@ const QList<dbinf_ptr_t > values(const QString& prefix = QString() ) const; virtual void keepOnlyFromPrefix(const QString& prefix, - const QStringList& files); - virtual void deleteFileFromDB(const QString& path); + const QStringList& files, + bool lazy = false); + virtual void deleteFileFromDB(const QString& path, bool lazy = false); private:
--- a/SqliteDBLink.cpp Thu Feb 13 14:55:11 2014 +0100 +++ b/SqliteDBLink.cpp Thu Feb 13 15:19:37 2014 +0100 @@ -1,3 +1,5 @@ + + #include "SqliteDBLink.hpp" #include <QtCore/QStringList> @@ -14,6 +16,8 @@ const QString SqliteDBLink::connectionName("SqliteDBLink"); +#include <QtCore/QDebug> + SqliteDBLink::SqliteDBLink(const QString& dbPath) { @@ -36,6 +40,13 @@ throw SQLException("No database"); } + if (!query.exec(QString("SELECT * FROM human_files;"))) { + query.exec("CREATE VIEW human_files AS SELECT path, size, date(mtime) as mdate, time(mtime) as mtime, hex(checksum) as checksum FROM files;"); + } + if (!query.exec(QString("SELECT * FROM human_files;"))) { + throw SQLException("No view"); + } + preparedSizePrefixQuery = new QSqlQuery(db); preparedSizePrefixQuery->prepare("SELECT * FROM files WHERE path LIKE :prefix AND size = :size"); @@ -52,6 +63,9 @@ "UPDATE files " "SET size=:size, mtime=:mtime, checksum=:checksum " "WHERE path=:path"); + + preparedDeleteQuery = new QSqlQuery(db); + preparedDeleteQuery->prepare("DELETE FROM files WHERE path = :path"); } SqliteDBLink::~SqliteDBLink() @@ -124,7 +138,7 @@ void SqliteDBLink::updateFile(const DBInfo& dbinfo, bool lazy) { if (lazy) { - operations.push_back(Operation(dbinfo, Update)); + operations.push_back(new UpdateOperation(dbinfo)); } else { preparedUpdateQuery->bindValue(":path", dbinfo.path()); @@ -140,7 +154,7 @@ void SqliteDBLink::addFile(const DBInfo& dbinfo, bool lazy) { if (lazy) { - operations.push_back(Operation(dbinfo, Add)); + operations.push_back(new AddOperation(dbinfo)); } else { if (!tryAddFile(dbinfo)) { @@ -199,19 +213,25 @@ return values; } -void SqliteDBLink::deleteFileFromDB(const QString& path) +void SqliteDBLink::deleteFileFromDB(const QString& path, bool lazy) { - QSqlQuery query(db); - query.prepare("DELETE FROM files WHERE path = :path"); - query.bindValue(":path", path); - if (!query.exec()) { - throw SQLException(query); + if (lazy) { + operations.push_back(new DeleteOperation(path)); + } + else { + QSqlQuery query(db); + query.prepare("DELETE FROM files WHERE path = :path"); + query.bindValue(":path", path); + if (!query.exec()) { + throw SQLException(query); + } } } void SqliteDBLink::keepOnlyFromPrefix(const QString& prefix, - const QStringList& files) + const QStringList& files, + bool lazy) { QStringList list; foreach(dbinf_ptr_t info, values(prefix)) { @@ -220,7 +240,7 @@ } } foreach(QString path, list) { - deleteFileFromDB(path); + deleteFileFromDB(path, lazy); } } @@ -265,25 +285,34 @@ QVariantList& checksums, OperationType operation) { - assert(paths.size() == sizes.size()); - assert(paths.size() == mtimes.size()); - assert(paths.size() == checksums.size()); + assert(sizes.size() == 0 || paths.size() == sizes.size()); + assert(mtimes.size() == 0 || paths.size() == mtimes.size()); + assert(checksums.size() == 0 || paths.size() == checksums.size()); QSqlQuery* query; + bool infoQuery; switch (operation) { case Add: query = preparedTryAddQuery; + infoQuery = true; break; case Update: query = preparedUpdateQuery; + infoQuery = true; + break; + case Delete: + query = preparedDeleteQuery; + infoQuery = false; break; case None: assert(paths.size() == 0); return; } query->bindValue("path", paths); - query->bindValue("size", sizes); - query->bindValue("mtime", mtimes); - query->bindValue("checksum", checksums); + if (infoQuery) { + query->bindValue("size", sizes); + query->bindValue("mtime", mtimes); + query->bindValue("checksum", checksums); + } if (!query->execBatch()) throw SQLException(*query); @@ -294,34 +323,64 @@ checksums.clear(); } +const char* SqliteDBLink::typeString(OperationType type) +{ + switch(type) { + case Add: + return "Add"; + case Update: + return "Update"; + case Delete: + return "Delete"; + case None: + return "None"; + } + + return ""; +} + + bool SqliteDBLink::commit(const QString& prefix) { OperationType last = None; QVariantList paths, sizes, mtimes, hashes; - foreach(const Operation& operation, operations) { - if (operation.second != last) { + foreach(const Operation* operation, operations) { + if (operation->type() != last) { executeOperation(paths, sizes, mtimes, hashes, last); } - switch (operation.second) { + switch (operation->type()) { case Add: - case Update: - paths.push_back(operation.first.path()); - sizes.push_back(operation.first.size()); - mtimes.push_back(operation.first.mtime()); - hashes.push_back(operation.first.checksum()); + case Update: { + const InfoOperation* iOperation = dynamic_cast<const InfoOperation*>(operation); + paths.push_back(iOperation->info().path()); + sizes.push_back(iOperation->info().size()); + mtimes.push_back(iOperation->info().mtime()); + hashes.push_back(iOperation->info().checksum()); + break; + } + case Delete: { + const DeleteOperation* dOperation = dynamic_cast<const DeleteOperation*>(operation); + paths.push_back(dOperation->path()); + } case None: break; } - last = operation.second; + last = operation->type(); } if (last != None) { + qDebug() << "Execute Operation" << typeString(last); + qDebug() << paths; executeOperation(paths, sizes, mtimes, hashes, last); + qDebug() << "Execute Operation Done"; } QSqlQuery whatToUpdate(db); - QString whatToUpdateQuery = "SELECT path FROM files WHERE checksum is NULL AND size in (SELECT size FROM files WHERE size <> 0 GROUP BY size HAVING count(*) > 1 ORDER BY SIZE) %1 ORDER BY size"; + QString whatToUpdateQuery = + "SELECT path FROM files WHERE checksum is NULL AND path in " + "(SELECT path FROM files WHERE size <> 0 %1 " + "GROUP BY size HAVING count(*) > 1) ORDER BY size"; if (prefix.isEmpty()) { whatToUpdateQuery = whatToUpdateQuery.arg(""); } @@ -332,10 +391,12 @@ if (!prefix.isEmpty()) { whatToUpdate.bindValue("prefix", QString("%1%").arg(prefix)); } - + + qDebug() << "Before whatToUpdate"; if (!whatToUpdate.exec()) { throw SQLException(whatToUpdate); } + qDebug() << "After whatToUpdate"; int pathIndex = whatToUpdate.record().indexOf("path"); QStringList updatePaths; @@ -351,6 +412,7 @@ "WHERE path=:path"); foreach (const QString& path, updatePaths) { + qDebug() << path; QByteArray ohash = computeHash(path); emit progressUpdate(++n, max); updateChecksum.bindValue("checksum", ohash);
--- a/SqliteDBLink.hpp Thu Feb 13 14:55:11 2014 +0100 +++ b/SqliteDBLink.hpp Thu Feb 13 15:19:37 2014 +0100 @@ -26,13 +26,68 @@ QStringList toStringList(); const QList<dbinf_ptr_t > values(const QString& prefix = QString() ) const; virtual void keepOnlyFromPrefix(const QString& prefix, - const QStringList& files); - virtual void deleteFileFromDB(const QString& path); + const QStringList& files, + bool lazy = false); + virtual void deleteFileFromDB(const QString& path, bool lazy = false); bool commit(const QString& prefix = QString() ); private: - typedef enum {None = 0, Add, Update } OperationType; - typedef QPair<DBInfo, OperationType> Operation; + typedef enum {None = 0, Add, Update, Delete } OperationType; + static const char* typeString(OperationType); + + //typedef QPair<DBInfo, OperationType> Operation; + class Operation { + public: + virtual OperationType type() const = 0; + }; + + class InfoOperation: public Operation { + private: + DBInfo _info; + public: + InfoOperation(const DBInfo&info) { + this->_info=info; + } + const DBInfo& info() const { + return _info; + } + }; + + class UpdateOperation: public InfoOperation { + public: + UpdateOperation(const DBInfo&info) : InfoOperation(info) {} + + OperationType type() const + { + return Update; + } + }; + + class AddOperation: public InfoOperation { + public: + AddOperation(const DBInfo&info) : InfoOperation(info) {} + + OperationType type() const + { + return Add; + } + }; + + class DeleteOperation: public Operation { + private: + QString _path; + public: + DeleteOperation(const QString& path) { + _path=path; + } + OperationType type() const + { + return Delete; + } + const QString& path() const { + return _path; + } + }; void addFile(const DBInfo& info, bool lazy = false); bool tryAddFile(const DBInfo& info); @@ -45,7 +100,8 @@ QSqlQuery* preparedSizeQuery; QSqlQuery* preparedTryAddQuery; QSqlQuery* preparedUpdateQuery; - QList<Operation> operations; + QSqlQuery* preparedDeleteQuery; + QList<Operation*> operations; void executeOperation(QVariantList& paths, QVariantList& sizes, QVariantList& mtimes, QVariantList& checksums, OperationType operation);
