changeset 112:3951f6d27219

Add various useful scripts. Refactor out SQL-statements.
author Tom Fredrik Blenning Klaussen <bfg@bfgconsult.no>
date Tue, 03 May 2016 02:20:07 +0200
parents 9d9926a6011f
children 27e628852401
files scripts/STATEMENTS scripts/dircompare.sh scripts/duplicates.sh scripts/duplicatesRemoveCommon.sh scripts/removeWithSignature scripts/statistics.sh
diffstat 6 files changed, 122 insertions(+), 3 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/scripts/STATEMENTS	Tue May 03 02:20:07 2016 +0200
@@ -0,0 +1,13 @@
+#!/bin/bash
+
+DUPLICATES_STATEMENT="SELECT path FROM (SELECT * FROM files WHERE checksum IN (SELECT checksum FROM (SELECT checksum, COUNT(*) as num FROM files ${PREFIX} GROUP BY checksum) WHERE num >= 2)) ${PREFIX}"
+
+#SELECT_STATEMENT="SELECT hex(checksum),size,path FROM (SELECT * FROM files WHERE checksum IN (SELECT checksum FROM (SELECT checksum, COUNT(*) as num FROM files ${PREFIX} GROUP BY checksum) WHERE num >= 2)) ${PREFIX} ORDER BY size;"
+
+SELECT_STATEMENT="SELECT hex(checksum),size,path FROM files WHERE path IN (${DUPLICATES_STATEMENT}) ORDER BY size;"
+
+DUPLICATES_STATISTICS_STATEMENT="SELECT count(*) as TotalDuplicates,sum(size) as TotalDuplicatesSize FROM files WHERE path IN (${DUPLICATES_STATEMENT})"
+
+TOTAL_STATISTICS_STATEMENT="SELECT count(*) as TotalFiles,sum(size) as TotalSize FROM files"
+
+STATISTICS_STATEMENT="SELECT * FROM (${DUPLICATES_STATISTICS_STATEMENT}),(${TOTAL_STATISTICS_STATEMENT});"
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/scripts/dircompare.sh	Tue May 03 02:20:07 2016 +0200
@@ -0,0 +1,34 @@
+#!/bin/bash
+
+PREFIX=$1
+
+
+if [ -n "$PREFIX" ]
+then
+  SEDPREFIX=$(echo $PREFIX | sed -e 's/\//\\\//g')
+  SQLPREFIX=$(echo $PREFIX | sed -e "s/'/''/g")
+
+  PREFIX="WHERE path LIKE '${SQLPREFIX}%'"
+fi
+
+. $(dirname $0)/STATEMENTS
+
+error=0
+
+RES=$(sqlite3 ~/.DeDupe.sqlite "SELECT path FROM files WHERE path LIKE '$2%' AND ( checksum NOT IN (SELECT checksum FROM files WHERE path LIKE '$1%' AND checksum IS NOT NULL) OR checksum IS NULL) ;")
+if [ -n "${RES}" ]
+then
+    echo "Not in $1" 
+    error=1
+    echo ${RES}
+fi
+RES=$(sqlite3 ~/.DeDupe.sqlite "SELECT path FROM files WHERE path LIKE '$1%' AND ( checksum NOT IN (SELECT checksum FROM files WHERE path LIKE '$2%' AND checksum IS NOT NULL) OR checksum IS NULL) ;")
+if [ -n "${RES}" ]
+then
+    echo "Not in $2" 
+    error=1
+    echo "${RES}"
+fi
+
+
+exit $error
--- a/scripts/duplicates.sh	Tue May 03 02:17:38 2016 +0200
+++ b/scripts/duplicates.sh	Tue May 03 02:20:07 2016 +0200
@@ -1,8 +1,31 @@
 #!/bin/bash
 
+STRIP=false
+
+BASEDIR=$(dirname $0)
+
+while getopts 's' val
+do
+  case $val in
+    s) STRIP=true ;;
+    ?) exit 1;
+  esac
+done
+let nopts=OPTIND-1
+shift ${nopts}
+
+
 PREFIX=$1
 
-STRIP=true
+if [[ ${PREFIX} != /* ]]
+then
+    PREFIX=$(canonicalize ${PREFIX})
+fi
+if [[ ${PREFIX} == *\. ]]
+then
+    PREFIX=${PREFIX::-1}
+fi
+
 if [ -n "$PREFIX" ]
 then
   SEDPREFIX=$(echo $PREFIX | sed -e 's/\//\\\//g')
@@ -11,9 +34,11 @@
   PREFIX="WHERE path LIKE '${SQLPREFIX}%'"
 fi
 
+. ${BASEDIR}/STATEMENTS
+
 if ${STRIP}
 then
-  sqlite3 ~/.DeDupe.sqlite "SELECT hex(checksum),size,path FROM (SELECT * FROM files WHERE checksum IN (SELECT checksum FROM (SELECT checksum, COUNT(*) as num FROM files ${PREFIX} GROUP BY checksum) WHERE num >= 2)) ${PREFIX} ORDER BY size;" | sed "s/|${SEDPREFIX}/|/"
+  sqlite3 ~/.DeDupe.sqlite "${SELECT_STATEMENT}" | sed "s/|${SEDPREFIX}/|/"
 else
-  sqlite3 ~/.DeDupe.sqlite "SELECT hex(checksum),size,path FROM (SELECT * FROM files WHERE checksum IN (SELECT checksum FROM (SELECT checksum, COUNT(*) as num FROM files ${PREFIX} GROUP BY checksum) WHERE num >= 2)) ${PREFIX} ORDER BY size;"
+  sqlite3 ~/.DeDupe.sqlite "${SELECT_STATEMENT}"
 fi
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/scripts/duplicatesRemoveCommon.sh	Tue May 03 02:20:07 2016 +0200
@@ -0,0 +1,3 @@
+#!/bin/bash
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/scripts/removeWithSignature	Tue May 03 02:20:07 2016 +0200
@@ -0,0 +1,28 @@
+#!/bin/bash
+
+FORCE=false
+while getopts 'f' val
+do
+  case $val in
+    f) FORCE=true ;;
+    ?) exit 1;
+  esac
+done
+let nopts=OPTIND-1
+shift ${nopts}
+
+if $FORCE
+then
+ command=rm
+else
+  command=echo
+fi
+
+TARGET=$1
+shift
+
+while [ -n "$1" ]
+do
+	~/projects/dedupe/scripts/duplicates.sh -s $TARGET | grep $1 | cut -d\| -f3- | xargs -d '\n' -n 1 $command
+	shift
+done
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/scripts/statistics.sh	Tue May 03 02:20:07 2016 +0200
@@ -0,0 +1,16 @@
+#!/bin/bash
+
+PREFIX=$1
+
+
+if [ -n "$PREFIX" ]
+then
+  SEDPREFIX=$(echo $PREFIX | sed -e 's/\//\\\//g')
+  SQLPREFIX=$(echo $PREFIX | sed -e "s/'/''/g")
+
+  PREFIX="WHERE path LIKE '${SQLPREFIX}%'"
+fi
+
+. $(dirname $0)/STATEMENTS
+
+sqlite3 ~/.DeDupe.sqlite "${STATISTICS_STATEMENT}"