# HG changeset patch # User Tom Fredrik Blenning Klaussen # Date 1462234807 -7200 # Node ID 3951f6d27219a2d1ebe0ecddf813950ec24a1cfb # Parent 9d9926a6011ff7207949fa38df344668eb32fbf6 Add various useful scripts. Refactor out SQL-statements. diff -r 9d9926a6011f -r 3951f6d27219 scripts/STATEMENTS --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scripts/STATEMENTS Tue May 03 02:20:07 2016 +0200 @@ -0,0 +1,13 @@ +#!/bin/bash + +DUPLICATES_STATEMENT="SELECT path FROM (SELECT * FROM files WHERE checksum IN (SELECT checksum FROM (SELECT checksum, COUNT(*) as num FROM files ${PREFIX} GROUP BY checksum) WHERE num >= 2)) ${PREFIX}" + +#SELECT_STATEMENT="SELECT hex(checksum),size,path FROM (SELECT * FROM files WHERE checksum IN (SELECT checksum FROM (SELECT checksum, COUNT(*) as num FROM files ${PREFIX} GROUP BY checksum) WHERE num >= 2)) ${PREFIX} ORDER BY size;" + +SELECT_STATEMENT="SELECT hex(checksum),size,path FROM files WHERE path IN (${DUPLICATES_STATEMENT}) ORDER BY size;" + +DUPLICATES_STATISTICS_STATEMENT="SELECT count(*) as TotalDuplicates,sum(size) as TotalDuplicatesSize FROM files WHERE path IN (${DUPLICATES_STATEMENT})" + +TOTAL_STATISTICS_STATEMENT="SELECT count(*) as TotalFiles,sum(size) as TotalSize FROM files" + +STATISTICS_STATEMENT="SELECT * FROM (${DUPLICATES_STATISTICS_STATEMENT}),(${TOTAL_STATISTICS_STATEMENT});" \ No newline at end of file diff -r 9d9926a6011f -r 3951f6d27219 scripts/dircompare.sh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scripts/dircompare.sh Tue May 03 02:20:07 2016 +0200 @@ -0,0 +1,34 @@ +#!/bin/bash + +PREFIX=$1 + + +if [ -n "$PREFIX" ] +then + SEDPREFIX=$(echo $PREFIX | sed -e 's/\//\\\//g') + SQLPREFIX=$(echo $PREFIX | sed -e "s/'/''/g") + + PREFIX="WHERE path LIKE '${SQLPREFIX}%'" +fi + +. $(dirname $0)/STATEMENTS + +error=0 + +RES=$(sqlite3 ~/.DeDupe.sqlite "SELECT path FROM files WHERE path LIKE '$2%' AND ( checksum NOT IN (SELECT checksum FROM files WHERE path LIKE '$1%' AND checksum IS NOT NULL) OR checksum IS NULL) ;") +if [ -n "${RES}" ] +then + echo "Not in $1" + error=1 + echo ${RES} +fi +RES=$(sqlite3 ~/.DeDupe.sqlite "SELECT path FROM files WHERE path LIKE '$1%' AND ( checksum NOT IN (SELECT checksum FROM files WHERE path LIKE '$2%' AND checksum IS NOT NULL) OR checksum IS NULL) ;") +if [ -n "${RES}" ] +then + echo "Not in $2" + error=1 + echo "${RES}" +fi + + +exit $error diff -r 9d9926a6011f -r 3951f6d27219 scripts/duplicates.sh --- a/scripts/duplicates.sh Tue May 03 02:17:38 2016 +0200 +++ b/scripts/duplicates.sh Tue May 03 02:20:07 2016 +0200 @@ -1,8 +1,31 @@ #!/bin/bash +STRIP=false + +BASEDIR=$(dirname $0) + +while getopts 's' val +do + case $val in + s) STRIP=true ;; + ?) exit 1; + esac +done +let nopts=OPTIND-1 +shift ${nopts} + + PREFIX=$1 -STRIP=true +if [[ ${PREFIX} != /* ]] +then + PREFIX=$(canonicalize ${PREFIX}) +fi +if [[ ${PREFIX} == *\. ]] +then + PREFIX=${PREFIX::-1} +fi + if [ -n "$PREFIX" ] then SEDPREFIX=$(echo $PREFIX | sed -e 's/\//\\\//g') @@ -11,9 +34,11 @@ PREFIX="WHERE path LIKE '${SQLPREFIX}%'" fi +. ${BASEDIR}/STATEMENTS + if ${STRIP} then - sqlite3 ~/.DeDupe.sqlite "SELECT hex(checksum),size,path FROM (SELECT * FROM files WHERE checksum IN (SELECT checksum FROM (SELECT checksum, COUNT(*) as num FROM files ${PREFIX} GROUP BY checksum) WHERE num >= 2)) ${PREFIX} ORDER BY size;" | sed "s/|${SEDPREFIX}/|/" + sqlite3 ~/.DeDupe.sqlite "${SELECT_STATEMENT}" | sed "s/|${SEDPREFIX}/|/" else - sqlite3 ~/.DeDupe.sqlite "SELECT hex(checksum),size,path FROM (SELECT * FROM files WHERE checksum IN (SELECT checksum FROM (SELECT checksum, COUNT(*) as num FROM files ${PREFIX} GROUP BY checksum) WHERE num >= 2)) ${PREFIX} ORDER BY size;" + sqlite3 ~/.DeDupe.sqlite "${SELECT_STATEMENT}" fi diff -r 9d9926a6011f -r 3951f6d27219 scripts/duplicatesRemoveCommon.sh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scripts/duplicatesRemoveCommon.sh Tue May 03 02:20:07 2016 +0200 @@ -0,0 +1,3 @@ +#!/bin/bash + + diff -r 9d9926a6011f -r 3951f6d27219 scripts/removeWithSignature --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scripts/removeWithSignature Tue May 03 02:20:07 2016 +0200 @@ -0,0 +1,28 @@ +#!/bin/bash + +FORCE=false +while getopts 'f' val +do + case $val in + f) FORCE=true ;; + ?) exit 1; + esac +done +let nopts=OPTIND-1 +shift ${nopts} + +if $FORCE +then + command=rm +else + command=echo +fi + +TARGET=$1 +shift + +while [ -n "$1" ] +do + ~/projects/dedupe/scripts/duplicates.sh -s $TARGET | grep $1 | cut -d\| -f3- | xargs -d '\n' -n 1 $command + shift +done diff -r 9d9926a6011f -r 3951f6d27219 scripts/statistics.sh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scripts/statistics.sh Tue May 03 02:20:07 2016 +0200 @@ -0,0 +1,16 @@ +#!/bin/bash + +PREFIX=$1 + + +if [ -n "$PREFIX" ] +then + SEDPREFIX=$(echo $PREFIX | sed -e 's/\//\\\//g') + SQLPREFIX=$(echo $PREFIX | sed -e "s/'/''/g") + + PREFIX="WHERE path LIKE '${SQLPREFIX}%'" +fi + +. $(dirname $0)/STATEMENTS + +sqlite3 ~/.DeDupe.sqlite "${STATISTICS_STATEMENT}"