Mercurial > dedupe
changeset 112:3951f6d27219
Add various useful scripts. Refactor out SQL-statements.
| author | Tom Fredrik Blenning Klaussen <bfg@bfgconsult.no> |
|---|---|
| date | Tue, 03 May 2016 02:20:07 +0200 |
| parents | 9d9926a6011f |
| children | 27e628852401 |
| files | scripts/STATEMENTS scripts/dircompare.sh scripts/duplicates.sh scripts/duplicatesRemoveCommon.sh scripts/removeWithSignature scripts/statistics.sh |
| diffstat | 6 files changed, 122 insertions(+), 3 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scripts/STATEMENTS Tue May 03 02:20:07 2016 +0200 @@ -0,0 +1,13 @@ +#!/bin/bash + +DUPLICATES_STATEMENT="SELECT path FROM (SELECT * FROM files WHERE checksum IN (SELECT checksum FROM (SELECT checksum, COUNT(*) as num FROM files ${PREFIX} GROUP BY checksum) WHERE num >= 2)) ${PREFIX}" + +#SELECT_STATEMENT="SELECT hex(checksum),size,path FROM (SELECT * FROM files WHERE checksum IN (SELECT checksum FROM (SELECT checksum, COUNT(*) as num FROM files ${PREFIX} GROUP BY checksum) WHERE num >= 2)) ${PREFIX} ORDER BY size;" + +SELECT_STATEMENT="SELECT hex(checksum),size,path FROM files WHERE path IN (${DUPLICATES_STATEMENT}) ORDER BY size;" + +DUPLICATES_STATISTICS_STATEMENT="SELECT count(*) as TotalDuplicates,sum(size) as TotalDuplicatesSize FROM files WHERE path IN (${DUPLICATES_STATEMENT})" + +TOTAL_STATISTICS_STATEMENT="SELECT count(*) as TotalFiles,sum(size) as TotalSize FROM files" + +STATISTICS_STATEMENT="SELECT * FROM (${DUPLICATES_STATISTICS_STATEMENT}),(${TOTAL_STATISTICS_STATEMENT});" \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scripts/dircompare.sh Tue May 03 02:20:07 2016 +0200 @@ -0,0 +1,34 @@ +#!/bin/bash + +PREFIX=$1 + + +if [ -n "$PREFIX" ] +then + SEDPREFIX=$(echo $PREFIX | sed -e 's/\//\\\//g') + SQLPREFIX=$(echo $PREFIX | sed -e "s/'/''/g") + + PREFIX="WHERE path LIKE '${SQLPREFIX}%'" +fi + +. $(dirname $0)/STATEMENTS + +error=0 + +RES=$(sqlite3 ~/.DeDupe.sqlite "SELECT path FROM files WHERE path LIKE '$2%' AND ( checksum NOT IN (SELECT checksum FROM files WHERE path LIKE '$1%' AND checksum IS NOT NULL) OR checksum IS NULL) ;") +if [ -n "${RES}" ] +then + echo "Not in $1" + error=1 + echo ${RES} +fi +RES=$(sqlite3 ~/.DeDupe.sqlite "SELECT path FROM files WHERE path LIKE '$1%' AND ( checksum NOT IN (SELECT checksum FROM files WHERE path LIKE '$2%' AND checksum IS NOT NULL) OR checksum IS NULL) ;") +if [ -n "${RES}" ] +then + echo "Not in $2" + error=1 + echo "${RES}" +fi + + +exit $error
--- a/scripts/duplicates.sh Tue May 03 02:17:38 2016 +0200 +++ b/scripts/duplicates.sh Tue May 03 02:20:07 2016 +0200 @@ -1,8 +1,31 @@ #!/bin/bash +STRIP=false + +BASEDIR=$(dirname $0) + +while getopts 's' val +do + case $val in + s) STRIP=true ;; + ?) exit 1; + esac +done +let nopts=OPTIND-1 +shift ${nopts} + + PREFIX=$1 -STRIP=true +if [[ ${PREFIX} != /* ]] +then + PREFIX=$(canonicalize ${PREFIX}) +fi +if [[ ${PREFIX} == *\. ]] +then + PREFIX=${PREFIX::-1} +fi + if [ -n "$PREFIX" ] then SEDPREFIX=$(echo $PREFIX | sed -e 's/\//\\\//g') @@ -11,9 +34,11 @@ PREFIX="WHERE path LIKE '${SQLPREFIX}%'" fi +. ${BASEDIR}/STATEMENTS + if ${STRIP} then - sqlite3 ~/.DeDupe.sqlite "SELECT hex(checksum),size,path FROM (SELECT * FROM files WHERE checksum IN (SELECT checksum FROM (SELECT checksum, COUNT(*) as num FROM files ${PREFIX} GROUP BY checksum) WHERE num >= 2)) ${PREFIX} ORDER BY size;" | sed "s/|${SEDPREFIX}/|/" + sqlite3 ~/.DeDupe.sqlite "${SELECT_STATEMENT}" | sed "s/|${SEDPREFIX}/|/" else - sqlite3 ~/.DeDupe.sqlite "SELECT hex(checksum),size,path FROM (SELECT * FROM files WHERE checksum IN (SELECT checksum FROM (SELECT checksum, COUNT(*) as num FROM files ${PREFIX} GROUP BY checksum) WHERE num >= 2)) ${PREFIX} ORDER BY size;" + sqlite3 ~/.DeDupe.sqlite "${SELECT_STATEMENT}" fi
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scripts/duplicatesRemoveCommon.sh Tue May 03 02:20:07 2016 +0200 @@ -0,0 +1,3 @@ +#!/bin/bash + +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scripts/removeWithSignature Tue May 03 02:20:07 2016 +0200 @@ -0,0 +1,28 @@ +#!/bin/bash + +FORCE=false +while getopts 'f' val +do + case $val in + f) FORCE=true ;; + ?) exit 1; + esac +done +let nopts=OPTIND-1 +shift ${nopts} + +if $FORCE +then + command=rm +else + command=echo +fi + +TARGET=$1 +shift + +while [ -n "$1" ] +do + ~/projects/dedupe/scripts/duplicates.sh -s $TARGET | grep $1 | cut -d\| -f3- | xargs -d '\n' -n 1 $command + shift +done
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scripts/statistics.sh Tue May 03 02:20:07 2016 +0200 @@ -0,0 +1,16 @@ +#!/bin/bash + +PREFIX=$1 + + +if [ -n "$PREFIX" ] +then + SEDPREFIX=$(echo $PREFIX | sed -e 's/\//\\\//g') + SQLPREFIX=$(echo $PREFIX | sed -e "s/'/''/g") + + PREFIX="WHERE path LIKE '${SQLPREFIX}%'" +fi + +. $(dirname $0)/STATEMENTS + +sqlite3 ~/.DeDupe.sqlite "${STATISTICS_STATEMENT}"
