Mercurial > dedupe
comparison SqliteDBLink.cpp @ 101:6c6f3a5f96ea
Better support for batch operations.
| author | Tom Fredrik Blenning Klaussen <bfg@bfgconsult.no> |
|---|---|
| date | Thu, 13 Feb 2014 15:19:37 +0100 |
| parents | c7da835ea912 |
| children | 6bc013d5788b |
comparison
equal
deleted
inserted
replaced
| 100:f4ebbfa3ffae | 101:6c6f3a5f96ea |
|---|---|
| 1 | |
| 2 | |
| 1 #include "SqliteDBLink.hpp" | 3 #include "SqliteDBLink.hpp" |
| 2 | 4 |
| 3 #include <QtCore/QStringList> | 5 #include <QtCore/QStringList> |
| 4 #include <QtCore/QVariant> | 6 #include <QtCore/QVariant> |
| 5 | 7 |
| 11 #include "Exception/IOException.hpp" | 13 #include "Exception/IOException.hpp" |
| 12 | 14 |
| 13 #include <cassert> | 15 #include <cassert> |
| 14 | 16 |
| 15 const QString SqliteDBLink::connectionName("SqliteDBLink"); | 17 const QString SqliteDBLink::connectionName("SqliteDBLink"); |
| 18 | |
| 19 #include <QtCore/QDebug> | |
| 16 | 20 |
| 17 | 21 |
| 18 SqliteDBLink::SqliteDBLink(const QString& dbPath) | 22 SqliteDBLink::SqliteDBLink(const QString& dbPath) |
| 19 { | 23 { |
| 20 if (!QSqlDatabase::contains(connectionName)) | 24 if (!QSqlDatabase::contains(connectionName)) |
| 34 } | 38 } |
| 35 if (!query.exec(QString("SELECT * FROM files;"))) { | 39 if (!query.exec(QString("SELECT * FROM files;"))) { |
| 36 throw SQLException("No database"); | 40 throw SQLException("No database"); |
| 37 } | 41 } |
| 38 | 42 |
| 43 if (!query.exec(QString("SELECT * FROM human_files;"))) { | |
| 44 query.exec("CREATE VIEW human_files AS SELECT path, size, date(mtime) as mdate, time(mtime) as mtime, hex(checksum) as checksum FROM files;"); | |
| 45 } | |
| 46 if (!query.exec(QString("SELECT * FROM human_files;"))) { | |
| 47 throw SQLException("No view"); | |
| 48 } | |
| 49 | |
| 39 | 50 |
| 40 preparedSizePrefixQuery = new QSqlQuery(db); | 51 preparedSizePrefixQuery = new QSqlQuery(db); |
| 41 preparedSizePrefixQuery->prepare("SELECT * FROM files WHERE path LIKE :prefix AND size = :size"); | 52 preparedSizePrefixQuery->prepare("SELECT * FROM files WHERE path LIKE :prefix AND size = :size"); |
| 42 | 53 |
| 43 preparedSizeQuery = new QSqlQuery(db); | 54 preparedSizeQuery = new QSqlQuery(db); |
| 50 preparedUpdateQuery = new QSqlQuery(db); | 61 preparedUpdateQuery = new QSqlQuery(db); |
| 51 preparedUpdateQuery->prepare( | 62 preparedUpdateQuery->prepare( |
| 52 "UPDATE files " | 63 "UPDATE files " |
| 53 "SET size=:size, mtime=:mtime, checksum=:checksum " | 64 "SET size=:size, mtime=:mtime, checksum=:checksum " |
| 54 "WHERE path=:path"); | 65 "WHERE path=:path"); |
| 66 | |
| 67 preparedDeleteQuery = new QSqlQuery(db); | |
| 68 preparedDeleteQuery->prepare("DELETE FROM files WHERE path = :path"); | |
| 55 } | 69 } |
| 56 | 70 |
| 57 SqliteDBLink::~SqliteDBLink() | 71 SqliteDBLink::~SqliteDBLink() |
| 58 { | 72 { |
| 59 delete preparedSizePrefixQuery; | 73 delete preparedSizePrefixQuery; |
| 122 } | 136 } |
| 123 | 137 |
| 124 void SqliteDBLink::updateFile(const DBInfo& dbinfo, bool lazy) | 138 void SqliteDBLink::updateFile(const DBInfo& dbinfo, bool lazy) |
| 125 { | 139 { |
| 126 if (lazy) { | 140 if (lazy) { |
| 127 operations.push_back(Operation(dbinfo, Update)); | 141 operations.push_back(new UpdateOperation(dbinfo)); |
| 128 } | 142 } |
| 129 else { | 143 else { |
| 130 preparedUpdateQuery->bindValue(":path", dbinfo.path()); | 144 preparedUpdateQuery->bindValue(":path", dbinfo.path()); |
| 131 preparedUpdateQuery->bindValue(":size", dbinfo.size()); | 145 preparedUpdateQuery->bindValue(":size", dbinfo.size()); |
| 132 preparedUpdateQuery->bindValue(":mtime", dbinfo.mtime()); | 146 preparedUpdateQuery->bindValue(":mtime", dbinfo.mtime()); |
| 138 } | 152 } |
| 139 | 153 |
| 140 void SqliteDBLink::addFile(const DBInfo& dbinfo, bool lazy) | 154 void SqliteDBLink::addFile(const DBInfo& dbinfo, bool lazy) |
| 141 { | 155 { |
| 142 if (lazy) { | 156 if (lazy) { |
| 143 operations.push_back(Operation(dbinfo, Add)); | 157 operations.push_back(new AddOperation(dbinfo)); |
| 144 } | 158 } |
| 145 else { | 159 else { |
| 146 if (!tryAddFile(dbinfo)) { | 160 if (!tryAddFile(dbinfo)) { |
| 147 abort(); //Should throw exception | 161 abort(); //Should throw exception |
| 148 } | 162 } |
| 197 } | 211 } |
| 198 | 212 |
| 199 return values; | 213 return values; |
| 200 } | 214 } |
| 201 | 215 |
| 202 void SqliteDBLink::deleteFileFromDB(const QString& path) | 216 void SqliteDBLink::deleteFileFromDB(const QString& path, bool lazy) |
| 203 { | 217 { |
| 204 QSqlQuery query(db); | 218 if (lazy) { |
| 205 query.prepare("DELETE FROM files WHERE path = :path"); | 219 operations.push_back(new DeleteOperation(path)); |
| 206 query.bindValue(":path", path); | 220 } |
| 207 if (!query.exec()) { | 221 else { |
| 208 throw SQLException(query); | 222 QSqlQuery query(db); |
| 223 query.prepare("DELETE FROM files WHERE path = :path"); | |
| 224 query.bindValue(":path", path); | |
| 225 if (!query.exec()) { | |
| 226 throw SQLException(query); | |
| 227 } | |
| 209 } | 228 } |
| 210 } | 229 } |
| 211 | 230 |
| 212 | 231 |
| 213 void SqliteDBLink::keepOnlyFromPrefix(const QString& prefix, | 232 void SqliteDBLink::keepOnlyFromPrefix(const QString& prefix, |
| 214 const QStringList& files) | 233 const QStringList& files, |
| 234 bool lazy) | |
| 215 { | 235 { |
| 216 QStringList list; | 236 QStringList list; |
| 217 foreach(dbinf_ptr_t info, values(prefix)) { | 237 foreach(dbinf_ptr_t info, values(prefix)) { |
| 218 if (!files.contains(info->path())) { | 238 if (!files.contains(info->path())) { |
| 219 list << info->path(); | 239 list << info->path(); |
| 220 } | 240 } |
| 221 } | 241 } |
| 222 foreach(QString path, list) { | 242 foreach(QString path, list) { |
| 223 deleteFileFromDB(path); | 243 deleteFileFromDB(path, lazy); |
| 224 } | 244 } |
| 225 } | 245 } |
| 226 | 246 |
| 227 const QList<FileDBLink::dbinf_ptr_t> | 247 const QList<FileDBLink::dbinf_ptr_t> |
| 228 SqliteDBLink::filesWithSize(quint64 size, const QString& prefix) const | 248 SqliteDBLink::filesWithSize(quint64 size, const QString& prefix) const |
| 263 QVariantList& sizes, | 283 QVariantList& sizes, |
| 264 QVariantList& mtimes, | 284 QVariantList& mtimes, |
| 265 QVariantList& checksums, | 285 QVariantList& checksums, |
| 266 OperationType operation) | 286 OperationType operation) |
| 267 { | 287 { |
| 268 assert(paths.size() == sizes.size()); | 288 assert(sizes.size() == 0 || paths.size() == sizes.size()); |
| 269 assert(paths.size() == mtimes.size()); | 289 assert(mtimes.size() == 0 || paths.size() == mtimes.size()); |
| 270 assert(paths.size() == checksums.size()); | 290 assert(checksums.size() == 0 || paths.size() == checksums.size()); |
| 271 QSqlQuery* query; | 291 QSqlQuery* query; |
| 292 bool infoQuery; | |
| 272 switch (operation) { | 293 switch (operation) { |
| 273 case Add: | 294 case Add: |
| 274 query = preparedTryAddQuery; | 295 query = preparedTryAddQuery; |
| 296 infoQuery = true; | |
| 275 break; | 297 break; |
| 276 case Update: | 298 case Update: |
| 277 query = preparedUpdateQuery; | 299 query = preparedUpdateQuery; |
| 300 infoQuery = true; | |
| 301 break; | |
| 302 case Delete: | |
| 303 query = preparedDeleteQuery; | |
| 304 infoQuery = false; | |
| 278 break; | 305 break; |
| 279 case None: | 306 case None: |
| 280 assert(paths.size() == 0); | 307 assert(paths.size() == 0); |
| 281 return; | 308 return; |
| 282 } | 309 } |
| 283 query->bindValue("path", paths); | 310 query->bindValue("path", paths); |
| 284 query->bindValue("size", sizes); | 311 if (infoQuery) { |
| 285 query->bindValue("mtime", mtimes); | 312 query->bindValue("size", sizes); |
| 286 query->bindValue("checksum", checksums); | 313 query->bindValue("mtime", mtimes); |
| 314 query->bindValue("checksum", checksums); | |
| 315 } | |
| 287 | 316 |
| 288 if (!query->execBatch()) | 317 if (!query->execBatch()) |
| 289 throw SQLException(*query); | 318 throw SQLException(*query); |
| 290 | 319 |
| 291 paths.clear(); | 320 paths.clear(); |
| 292 sizes.clear(); | 321 sizes.clear(); |
| 293 mtimes.clear(); | 322 mtimes.clear(); |
| 294 checksums.clear(); | 323 checksums.clear(); |
| 295 } | 324 } |
| 296 | 325 |
| 326 const char* SqliteDBLink::typeString(OperationType type) | |
| 327 { | |
| 328 switch(type) { | |
| 329 case Add: | |
| 330 return "Add"; | |
| 331 case Update: | |
| 332 return "Update"; | |
| 333 case Delete: | |
| 334 return "Delete"; | |
| 335 case None: | |
| 336 return "None"; | |
| 337 } | |
| 338 | |
| 339 return ""; | |
| 340 } | |
| 341 | |
| 342 | |
| 297 bool SqliteDBLink::commit(const QString& prefix) | 343 bool SqliteDBLink::commit(const QString& prefix) |
| 298 { | 344 { |
| 299 OperationType last = None; | 345 OperationType last = None; |
| 300 QVariantList paths, sizes, mtimes, hashes; | 346 QVariantList paths, sizes, mtimes, hashes; |
| 301 | 347 |
| 302 foreach(const Operation& operation, operations) { | 348 foreach(const Operation* operation, operations) { |
| 303 if (operation.second != last) { | 349 if (operation->type() != last) { |
| 304 executeOperation(paths, sizes, mtimes, hashes, last); | 350 executeOperation(paths, sizes, mtimes, hashes, last); |
| 305 } | 351 } |
| 306 | 352 |
| 307 switch (operation.second) { | 353 switch (operation->type()) { |
| 308 case Add: | 354 case Add: |
| 309 case Update: | 355 case Update: { |
| 310 paths.push_back(operation.first.path()); | 356 const InfoOperation* iOperation = dynamic_cast<const InfoOperation*>(operation); |
| 311 sizes.push_back(operation.first.size()); | 357 paths.push_back(iOperation->info().path()); |
| 312 mtimes.push_back(operation.first.mtime()); | 358 sizes.push_back(iOperation->info().size()); |
| 313 hashes.push_back(operation.first.checksum()); | 359 mtimes.push_back(iOperation->info().mtime()); |
| 360 hashes.push_back(iOperation->info().checksum()); | |
| 361 break; | |
| 362 } | |
| 363 case Delete: { | |
| 364 const DeleteOperation* dOperation = dynamic_cast<const DeleteOperation*>(operation); | |
| 365 paths.push_back(dOperation->path()); | |
| 366 } | |
| 314 case None: | 367 case None: |
| 315 break; | 368 break; |
| 316 } | 369 } |
| 317 last = operation.second; | 370 last = operation->type(); |
| 318 } | 371 } |
| 319 if (last != None) { | 372 if (last != None) { |
| 373 qDebug() << "Execute Operation" << typeString(last); | |
| 374 qDebug() << paths; | |
| 320 executeOperation(paths, sizes, mtimes, hashes, last); | 375 executeOperation(paths, sizes, mtimes, hashes, last); |
| 376 qDebug() << "Execute Operation Done"; | |
| 321 } | 377 } |
| 322 | 378 |
| 323 QSqlQuery whatToUpdate(db); | 379 QSqlQuery whatToUpdate(db); |
| 324 QString whatToUpdateQuery = "SELECT path FROM files WHERE checksum is NULL AND size in (SELECT size FROM files WHERE size <> 0 GROUP BY size HAVING count(*) > 1 ORDER BY SIZE) %1 ORDER BY size"; | 380 QString whatToUpdateQuery = |
| 381 "SELECT path FROM files WHERE checksum is NULL AND path in " | |
| 382 "(SELECT path FROM files WHERE size <> 0 %1 " | |
| 383 "GROUP BY size HAVING count(*) > 1) ORDER BY size"; | |
| 325 if (prefix.isEmpty()) { | 384 if (prefix.isEmpty()) { |
| 326 whatToUpdateQuery = whatToUpdateQuery.arg(""); | 385 whatToUpdateQuery = whatToUpdateQuery.arg(""); |
| 327 } | 386 } |
| 328 else { | 387 else { |
| 329 whatToUpdateQuery = whatToUpdateQuery.arg("AND path LIKE :prefix"); | 388 whatToUpdateQuery = whatToUpdateQuery.arg("AND path LIKE :prefix"); |
| 330 } | 389 } |
| 331 whatToUpdate.prepare(whatToUpdateQuery); | 390 whatToUpdate.prepare(whatToUpdateQuery); |
| 332 if (!prefix.isEmpty()) { | 391 if (!prefix.isEmpty()) { |
| 333 whatToUpdate.bindValue("prefix", QString("%1%").arg(prefix)); | 392 whatToUpdate.bindValue("prefix", QString("%1%").arg(prefix)); |
| 334 } | 393 } |
| 335 | 394 |
| 395 qDebug() << "Before whatToUpdate"; | |
| 336 if (!whatToUpdate.exec()) { | 396 if (!whatToUpdate.exec()) { |
| 337 throw SQLException(whatToUpdate); | 397 throw SQLException(whatToUpdate); |
| 338 } | 398 } |
| 399 qDebug() << "After whatToUpdate"; | |
| 339 | 400 |
| 340 int pathIndex = whatToUpdate.record().indexOf("path"); | 401 int pathIndex = whatToUpdate.record().indexOf("path"); |
| 341 QStringList updatePaths; | 402 QStringList updatePaths; |
| 342 while (whatToUpdate.next()) { | 403 while (whatToUpdate.next()) { |
| 343 updatePaths << whatToUpdate.value(pathIndex).toString(); | 404 updatePaths << whatToUpdate.value(pathIndex).toString(); |
| 349 updateChecksum.prepare("UPDATE files " | 410 updateChecksum.prepare("UPDATE files " |
| 350 "SET checksum=:checksum " | 411 "SET checksum=:checksum " |
| 351 "WHERE path=:path"); | 412 "WHERE path=:path"); |
| 352 | 413 |
| 353 foreach (const QString& path, updatePaths) { | 414 foreach (const QString& path, updatePaths) { |
| 415 qDebug() << path; | |
| 354 QByteArray ohash = computeHash(path); | 416 QByteArray ohash = computeHash(path); |
| 355 emit progressUpdate(++n, max); | 417 emit progressUpdate(++n, max); |
| 356 updateChecksum.bindValue("checksum", ohash); | 418 updateChecksum.bindValue("checksum", ohash); |
| 357 updateChecksum.bindValue("path", path); | 419 updateChecksum.bindValue("path", path); |
| 358 if (!updateChecksum.exec()) | 420 if (!updateChecksum.exec()) |
