Mercurial > dedupe
changeset 2:2833b7f8884a
Sql backend is working.
Need to get more speed on comparisson.
| author | Tom Fredrik Blenning Klaussen <bfg@blenning.no> |
|---|---|
| date | Tue, 21 Aug 2012 14:25:33 +0200 |
| parents | aae83c0a771d |
| children | 7a44ba08673d |
| files | CMakeLists.txt CompileTimeConstants.h DataController.cpp DataController.hpp FileDBLink.cpp FileDBLink.hpp SqliteDBLink.cpp SqliteDBLink.hpp TestFramework.cpp TestFramework.hpp TestSqliteDBLink.cpp |
| diffstat | 11 files changed, 415 insertions(+), 37 deletions(-) [+] |
line wrap: on
line diff
--- a/CMakeLists.txt Mon Aug 20 17:32:58 2012 +0200 +++ b/CMakeLists.txt Tue Aug 21 14:25:33 2012 +0200 @@ -12,7 +12,7 @@ ENDIF() -FIND_PACKAGE(Qt4 COMPONENTS QtOpenGL QtXml REQUIRED) +FIND_PACKAGE(Qt4 COMPONENTS QtOpenGL QtXml QtSql REQUIRED) FIND_PACKAGE(Sqlite3 REQUIRED) @@ -20,13 +20,13 @@ ADD_DEFINITIONS(${QT_DEFINITIONS}) -SET(SOURCES +SET(CLASS_SOURCES DataController.cpp EditDistance.cpp IOException.cpp FileDbLink.cpp + SqliteDbLink.cpp MemoryDbLink.cpp - main.cpp ) SET(MOC_HEADERS @@ -36,14 +36,32 @@ # Returns the moc_xxx.cpp files in the foo_MOC_SRCS variable QT4_WRAP_CPP(MOC_SOURCES ${MOC_HEADERS}) +SET(SOURCES + ${CLASS_SOURCES} + ${MOC_SOURCES} + main.cpp +) + +SET(TEST_SOURCES + ${CLASS_SOURCES} + ${MOC_SOURCES} + TestFramework.cpp +) + + + SET(CMAKE_CXX_FLAGS "-g2 -Wall -Werror -fno-inline") ADD_EXECUTABLE(DeDupe ${SOURCES} ${MOC_SOURCES}) TARGET_LINK_LIBRARIES(DeDupe ${QT_LIBRARIES} ${SQLITE3_LIBRARIES}) ENABLE_TESTING() -ADD_EXECUTABLE(TestEditDistance TestEditDistance.cpp EditDistance.cpp) +ADD_EXECUTABLE(TestEditDistance TestEditDistance.cpp ${TEST_SOURCES}) ADD_TEST(TestEditDistance TestEditDistance) +TARGET_LINK_LIBRARIES(TestEditDistance ${QT_LIBRARIES}) -TARGET_LINK_LIBRARIES(TestEditDistance ${QT_LIBRARIES}) +ADD_EXECUTABLE(TestSqliteDBLink TestSqliteDBLink.cpp ${TEST_SOURCES}) +ADD_TEST(TestSqliteDBLink TestSqliteDBLink) +TARGET_LINK_LIBRARIES(TestSqliteDBLink ${QT_LIBRARIES}) + #ADD_PRECOMPILED_HEADER(TestEditDistance TestFramework.hpp)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/CompileTimeConstants.h Tue Aug 21 14:25:33 2012 +0200 @@ -0,0 +1,8 @@ +#ifndef COMPILETIMECONSTANTS_H +#define COMPILETIMECONSTANTS_H + +#ifndef DB_DEFAULT_LOCATION +#define DB_DEFAULT_LOCATION "~/.DeDupe.sqlite" +#endif + +#endif //COMPILETIMECONSTANTS_H
--- a/DataController.cpp Mon Aug 20 17:32:58 2012 +0200 +++ b/DataController.cpp Tue Aug 21 14:25:33 2012 +0200 @@ -1,6 +1,9 @@ +#include "CompileTimeConstants.h" + #include "DataController.hpp" #include "MemoryDBLink.hpp" +#include "SqliteDBLink.hpp" #include "PermissionException.hpp" #include "DataController.hpp" @@ -14,6 +17,7 @@ #include <QtCore/QDateTime> #include <QtGui/QMainWindow> +#include <QtGui/QDesktopServices> #include <QtGui/QTreeWidget> #include <QtGui/QHeaderView> #include <QtGui/QMenuBar> @@ -25,36 +29,66 @@ #include "EditDistance.hpp" -#include <cassert> -void findFiles(const QDir& dir, FileDBLink& dblink) +void DataController::findFiles(const QDir& dir, QStringList& list) { - /* - QProgressDialog progressDialog(this); - progressDialog.setCancelButtonText(tr("&Cancel")); - progressDialog.setRange(0, files.size()); - progressDialog.setWindowTitle(tr("Find Files")); - */ - foreach(QString filename, dir.entryList(QDir::NoDotAndDotDot | QDir::Dirs)) { filename = dir.absoluteFilePath(filename); - findFiles(QDir(filename), dblink); + findFiles(QDir(filename), list); } foreach(QString filename, dir.entryList(QDir::Files)) { - filename = dir.absoluteFilePath(filename); + list << dir.absoluteFilePath(filename); + } +} + +QStringList DataController::findFiles(const QDir& dir) +{ + QStringList list; + findFiles(dir, list); + return list; +} + +void DataController::findFiles(const QDir& dir, FileDBLink& dblink) +{ + QStringList list = findFiles(dir); + + QProgressBar bar; + + QDateTime last = QDateTime::currentDateTime(); + + bar.resize(200,25); + bar.setValue(0); + bar.setMinimum(0); + bar.setMaximum(list.size()); + bar.show(); + + connect(this, SIGNAL(populateProgress(int)), &bar, SLOT(setValue(int))); + + int n = 0; + foreach(QString filename, findFiles(dir)) { try { dblink.updateIfModified(filename); } catch (const PermissionException& e) { - qDebug()<<e.toString(); + qDebug() << e.toString(); } catch (Exception& e) { - qDebug()<<e.toString(); + qDebug() << e.toString(); exit(1); } + + emit populateProgress(++n); + + QDateTime now = QDateTime::currentDateTime(); + if (last.msecsTo(now) > 500) { + QCoreApplication::processEvents(); + last = now; + } } + } + QTreeWidgetItem* DataController::createItem(const FileDBLink::DBInfo& info) { QTreeWidgetItem* item = new QTreeWidgetItem(); @@ -86,7 +120,7 @@ { tw->clear(); - const QList<QSharedPointer<FileDBLink::DBInfo> > elems = dblink->sortOn(FileDBLink::EDIT, true); + const QList<QSharedPointer<FileDBLink::DBInfo> > elems = dblink->values(dir.path()); QProgressBar bar; @@ -216,6 +250,10 @@ } +void DataController::setDir(const QDir& dir) +{ + this->dir = dir.absolutePath(); +} DataController::DataController() : showFullPath(false) { @@ -224,9 +262,16 @@ populateDelay->setInterval(500); connect(populateDelay, SIGNAL(timeout()), this, SLOT(populate())); - dblink = new MemoryDBLink(); + QString dbpath = DB_DEFAULT_LOCATION; + + dbpath.replace(QRegExp("^~/"), + QString("%1%2").arg(QDesktopServices::storageLocation(QDesktopServices::HomeLocation)).arg(QDir::separator())); - findFiles(QDir("."), *dblink); + dblink = new SqliteDBLink(dbpath); + + setDir(QDir(".")); + + findFiles(dir, *dblink); mw = new QMainWindow(); QMenuBar* mb = new QMenuBar(); @@ -254,6 +299,7 @@ checksumFilter = filterBar->addAction("Checksum"); checksumFilter->setCheckable(true); + checksumFilter->setChecked(true); connect(checksumFilter, SIGNAL(toggled(bool)), this, SLOT(delayPopulate())); QWidget* widget = new QWidget(); @@ -265,7 +311,7 @@ layout->addWidget(editCutoffSpin); editCutoffSpin->setMinimum(0); editCutoffSpin->setMaximum(100); - editCutoffSpin->setValue(70); + editCutoffSpin->setValue(0); editCutoffSpin->setSingleStep(10); editCutoffSpin->setSuffix("%"); connect(editCutoffSpin, SIGNAL(valueChanged(int)), this, SLOT(delayPopulate()));
--- a/DataController.hpp Mon Aug 20 17:32:58 2012 +0200 +++ b/DataController.hpp Tue Aug 21 14:25:33 2012 +0200 @@ -2,6 +2,7 @@ #define DATACONTROLLER_HPP #include <QtCore/QObject> +#include <QtCore/QDir> #include "FileDBLink.hpp" @@ -20,6 +21,12 @@ DataController(); ~DataController(); + void findFiles(const QDir& dir, FileDBLink& dblink); + static void findFiles(const QDir& dir, QStringList& list); + static QStringList findFiles(const QDir& dir); + + void setDir(const QDir& dir); + public slots: //void cellClicked(int row, int column); void cellDoubleClicked(int row, int column); @@ -52,6 +59,8 @@ QSpinBox* editCutoffSpin; QTimer* populateDelay; + + QDir dir; }; #endif //DATACONTROLLER_HPP
--- a/FileDBLink.cpp Mon Aug 20 17:32:58 2012 +0200 +++ b/FileDBLink.cpp Tue Aug 21 14:25:33 2012 +0200 @@ -17,8 +17,18 @@ void FileDBLink::updateIfModified(const QString& path) { QFileInfo fileinfo(path); - if (!exists(path)) { + FileDBLink::DBStatus status = existsWithMtime(path, fileinfo.lastModified()); + + switch (status) { + case FileDBLink::NONE: { addFile(fileinfo); + break; + } + case FileDBLink::MTIME_DIFFERENT: { + updateFile(fileinfo); + } + default: { + } } } @@ -45,13 +55,39 @@ } } - addFile(path, size, lastModified, hash); + addFile(path, size, lastModified, hash.result()); } -const QList<QSharedPointer<FileDBLink::DBInfo> > FileDBLink::sortOn(SORTORDER order, bool extended) +void FileDBLink::updateFile(const QFileInfo& fileinfo) +{ + updateFile(fileinfo.absoluteFilePath(), fileinfo.size(), fileinfo.lastModified()); +} + +void FileDBLink::updateFile(const QString& path, qint64 size, const QDateTime& lastModified) { - QList<QSharedPointer<DBInfo> > list = (extended) ? computedValues() : computedValues(); + QCryptographicHash hash( QCryptographicHash::Sha1 ); + QFile file(path); + if ( file.open( QIODevice::ReadOnly ) ) { + hash.addData( file.readAll() ); + } + else { + QString errorMsg = path + ": " + file.errorString(); + qDebug()<<file.error(); + switch (file.error()) { + case QFile::PermissionsError: + throw PermissionException(errorMsg); + default: + throw IOException(errorMsg); + } + } + + updateFile(path, size, lastModified, hash.result()); +} + +const QList<QSharedPointer<FileDBLink::DBInfo> > FileDBLink::sortOn(const QString& prefix, SORTORDER order, bool extended) +{ + QList<QSharedPointer<DBInfo> > list = (extended) ? computedValues(prefix) : values(prefix); switch (order) { case PATH: @@ -122,10 +158,10 @@ return QSharedPointer<DBInfo>(new ExtendedDBInfo(*info, other, minDist)); } -const QList<QSharedPointer<FileDBLink::DBInfo> > FileDBLink::computedValues() const +const QList<QSharedPointer<FileDBLink::DBInfo> > FileDBLink::computedValues(const QString& prefix) const { QList<QSharedPointer<DBInfo> > list; - QList<QSharedPointer<DBInfo> > entries = values(); + QList<QSharedPointer<DBInfo> > entries = values(prefix); #if 1 list = QtConcurrent::blockingMapped(entries, boost::bind( &FileDBLink::computedValue, _1, entries));
--- a/FileDBLink.hpp Mon Aug 20 17:32:58 2012 +0200 +++ b/FileDBLink.hpp Tue Aug 21 14:25:33 2012 +0200 @@ -91,19 +91,29 @@ public: + enum DBStatus { NONE = 0, MTIME_DIFFERENT, SAME}; + virtual ~FileDBLink() {} void updateIfModified(const QString& path); - virtual void addFile(const QString& path, qint64 size, const QDateTime& dtime, const QCryptographicHash& hash) = 0; + + virtual void addFile(const QString& path, qint64 size, const QDateTime& dtime, const QByteArray& hash) = 0; void addFile(const QString& path, qint64 size, const QDateTime& dtime); void addFile(const QFileInfo& fileinfo); + + virtual void updateFile(const QString& path, qint64 size, const QDateTime& dtime, const QByteArray& hash) = 0; + void updateFile(const QString& path, qint64 size, const QDateTime& dtime); + void updateFile(const QFileInfo& fileinfo); + virtual bool exists(const QString& path) = 0; - virtual const QList<QSharedPointer<DBInfo> > values() const = 0; - virtual const QList<QSharedPointer<DBInfo> > computedValues() const; + virtual DBStatus existsWithMtime(const QString& path, const QDateTime& mtime) = 0; + + virtual const QList<QSharedPointer<DBInfo> > values(const QString& prefix = QString() ) const = 0; + virtual const QList<QSharedPointer<DBInfo> > computedValues(const QString& prefix = QString() ) const; enum SORTORDER { PATH, SIZE, MTIME, CHECKSUM, EDIT }; - virtual const QList<QSharedPointer<DBInfo> > sortOn(SORTORDER order, bool extended = false); + virtual const QList<QSharedPointer<DBInfo> > sortOn(const QString& prefix, SORTORDER order, bool extended = false); }; #endif //FILEDBLINK
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/SqliteDBLink.cpp Tue Aug 21 14:25:33 2012 +0200 @@ -0,0 +1,160 @@ +#include "SqliteDBLink.hpp" + + +#include <sqlite3.h> + +#include <cassert> + +#include <QtCore/QStringList> +#include <QtCore/QDebug> + +#include <QtSql/QSqlQuery> +#include <QtSql/QSqlError> +#include <QtSql/QSqlRecord> + +SqliteDBLink::SqliteDBLink(const QString& dbPath) +{ + db = QSqlDatabase::addDatabase("QSQLITE"); + db.setDatabaseName(dbPath); + bool ok = db.open(); + assert(ok); + QSqlQuery query; + if (!query.exec(QString("SELECT * FROM files;"))) { + query.exec("CREATE TABLE files(path VARCHAR PRIMARY KEY ASC, size INTEGER, mtime TEXT, checksum TEXT);"); + } + if (!query.exec(QString("SELECT * FROM files;"))) { + qDebug()<<"No database"; + exit(1); + } + +} + +SqliteDBLink::~SqliteDBLink() +{ + db.close(); +} + +bool SqliteDBLink::exists(const QString& path) +{ + QSqlQuery query; + query.prepare("SELECT path FROM files WHERE path = :path;"); + query.bindValue(":path", path); + if (!query.exec()) { + qDebug() << path << "::" << query.lastQuery() << "::" << query.lastError().text(); + } + return query.last(); +} + +FileDBLink::DBStatus SqliteDBLink::existsWithMtime(const QString& path, const QDateTime& mtime) +{ + QSqlQuery query; + query.prepare("SELECT mtime FROM files WHERE path = :path;"); + query.bindValue(":path", path); + if (!query.exec()) { + qDebug() << path << "::" << query.lastQuery() << "::" << query.lastError().text(); + } + if (query.next()) { + int dateIndex = query.record().indexOf("mtime"); + QDateTime mtimeEntry = query.value(dateIndex).toDateTime(); + if (mtimeEntry == mtime) + return SAME; + return MTIME_DIFFERENT; + } + return NONE; +} + +void SqliteDBLink::addFile(const QString& path, qint64 size, const QDateTime& dtime, const QByteArray& hash) +{ + addFile(DBInfo(path, size, dtime, hash)); +} + +void SqliteDBLink::updateFile(const QString& path, qint64 size, const QDateTime& dtime, const QByteArray& hash) +{ + updateFile(DBInfo(path, size, dtime, hash)); +} + +bool SqliteDBLink::tryAddFile(const DBInfo& dbinfo) +{ + if (exists(dbinfo.path())) + return false; + QSqlQuery query; + query.prepare("INSERT INTO files (path, size, mtime, checksum) " + "VALUES (:path, :size, :mtime, :checksum)"); + query.bindValue(":path", dbinfo.path()); + query.bindValue(":size", dbinfo.size()); + query.bindValue(":mtime", dbinfo.mtime()); + query.bindValue(":checksum", dbinfo.checksum()); + if (!query.exec()) { + qDebug() << dbinfo.path() << "::" << query.lastQuery() << "::" << query.lastError().text(); + } + return true; +} + +void SqliteDBLink::updateFile(const DBInfo& dbinfo) +{ + QSqlQuery query; + query.prepare("UPDATE files SET size=:size, mtime=:mtime, checksum=:checksum WHERE path=:path"); + query.bindValue(":path", dbinfo.path()); + query.bindValue(":size", dbinfo.size()); + query.bindValue(":mtime", dbinfo.mtime()); + query.bindValue(":checksum", dbinfo.checksum()); + if (!query.exec()) { + qDebug() << query.lastError().text(); + } +} + +void SqliteDBLink::addFile(const DBInfo& dbinfo) +{ + if (!tryAddFile(dbinfo)) { + abort(); //Should throw exception + } +} + + +QStringList SqliteDBLink::toStringList() +{ + abort(); + QStringList list; + /* + foreach(QSharedPointer<DBInfo> info, entries) { + list << info->serialize(); + } + */ + return list; +} + +const QList<QSharedPointer<FileDBLink::DBInfo> > SqliteDBLink::values(const QString& prefix) const +{ + QList<QSharedPointer<FileDBLink::DBInfo> > values; + + QSqlQuery query; + + if (prefix.size() > 0) { + query.prepare("SELECT * FROM files WHERE path LIKE :prefix"); + query.bindValue(":prefix", QString("%1%").arg(prefix)); + } + else { + query.prepare("SELECT * FROM files"); + } + + if (!query.exec()) { + qDebug() << prefix << "::" << query.lastQuery() << "::" << query.lastError().text(); + abort(); + } + + int pathIndex = query.record().indexOf("path"); + int sizeIndex = query.record().indexOf("size"); + int dateIndex = query.record().indexOf("mtime"); + int checksumIndex = query.record().indexOf("checksum"); + while (query.next()) { + QString path = query.value(pathIndex).toString(); + qint64 size = query.value(sizeIndex).toInt(); + QDateTime mtime = query.value(dateIndex).toDateTime(); + QByteArray checksum = query.value(checksumIndex).toByteArray(); + + values << QSharedPointer<FileDBLink::DBInfo>(new FileDBLink::DBInfo(path, size, mtime, checksum)); + //qDebug() << path << size << mtime << checksum.toHex(); + } + + return values; +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/SqliteDBLink.hpp Tue Aug 21 14:25:33 2012 +0200 @@ -0,0 +1,31 @@ +#ifndef SQLITEDBLINK_HPP +#define SQLITEDBLINK_HPP +#include "FileDBLink.hpp" + +#include <QtCore/QMap> +#include <QtCore/QSharedPointer> + +#include <QtSql/QSqlDatabase> + +class SqliteDBLink : public FileDBLink { +public: + SqliteDBLink(const QString& dbpath); + ~SqliteDBLink(); + + virtual void addFile(const QString& path, qint64 size, const QDateTime& dtime, const QByteArray& hash); + virtual void updateFile(const QString& path, qint64 size, const QDateTime& dtime, const QByteArray& hash); + bool exists(const QString& path); + DBStatus existsWithMtime(const QString& path, const QDateTime& mtime); + + QStringList toStringList(); + const QList<QSharedPointer<DBInfo> > values(const QString& prefix = QString() ) const; + +private: + void addFile(const DBInfo& info); + bool tryAddFile(const DBInfo& info); + void updateFile(const DBInfo& dbinfo); + + QSqlDatabase db; +}; + +#endif //MEMORYDBLINK_HPP
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/TestFramework.cpp Tue Aug 21 14:25:33 2012 +0200 @@ -0,0 +1,12 @@ +#include <QtCore/QString> +#include <QtCore/QDateTime> + +std::ostream& operator<<(std::ostream& out, const QString& rhs) +{ + return out << rhs.toStdString(); +} + +std::ostream& operator<<(std::ostream& out, const QDateTime& rhs) +{ + return out << rhs.toString(); +}
--- a/TestFramework.hpp Mon Aug 20 17:32:58 2012 +0200 +++ b/TestFramework.hpp Tue Aug 21 14:25:33 2012 +0200 @@ -18,10 +18,9 @@ #endif //Here comes our helperfunctions -#include <QtCore/QString> -inline std::ostream& operator<<(std::ostream& out, const QString& rhs) -{ - return out << rhs.toStdString(); -} +class QString; +std::ostream& operator<<(std::ostream& out, const QString& rhs); +class QDateTime; +std::ostream& operator<<(std::ostream& out, const QDateTime& rhs); #endif //TESTFRAMEWORK_HPP
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/TestSqliteDBLink.cpp Tue Aug 21 14:25:33 2012 +0200 @@ -0,0 +1,49 @@ +#include "SqliteDBLink.hpp" +#include "TestFramework.hpp" + +#include <QtCore/QDebug> +#include <QtCore/QTemporaryFile> + +BOOST_AUTO_TEST_CASE( TestSave ) +{ + QDateTime now = QDateTime::currentDateTime(); + now = now.addSecs(-1000); + + QTemporaryFile sqlfile("XXXXXX.sqlite"); + sqlfile.open(); + + SqliteDBLink db(sqlfile.fileName()); + + QTemporaryFile testfile("XXXXXX.test"); + testfile.open(); + + db.addFile(testfile.fileName(), 4, now, "Sau"); + BOOST_REQUIRE(db.existsWithMtime(testfile.fileName(), now)); + { + const QList<QSharedPointer<FileDBLink::DBInfo> > values = db.values(); + BOOST_REQUIRE_EQUAL(values.size(), 1); + } + + now = now.addSecs(1); + + db.updateFile(testfile.fileName(), 4, now, "Jau"); + BOOST_REQUIRE(db.existsWithMtime(testfile.fileName(), now)); + { + const QList<QSharedPointer<FileDBLink::DBInfo> > values = db.values(); + BOOST_REQUIRE_EQUAL(values.size(), 1); + } + + testfile.write(QByteArray("test")); + testfile.close(); + + QFileInfo finf(testfile.fileName()); + + db.updateIfModified(testfile.fileName()); + { + const QList<QSharedPointer<FileDBLink::DBInfo> > values = db.values(); + BOOST_REQUIRE_EQUAL(values.size(), 1); + + QSharedPointer<FileDBLink::DBInfo> file = values[0]; + BOOST_REQUIRE_EQUAL(file->mtime(), finf.lastModified()); + } +}
