#!/bin/sh # # Copyright (c) 2008 Peter Kuyarov, All rights reserved [some of it] # Tue Apr 28 08:30:40 MDT 2009 - 0.9 # cleaned up code, do checks if output files already exist # no more need to manually disalbe parts of this script # output files are left intact for manual removal/reuse # added in 'maxdepth' to $findcmd to prevent directory # traversal if not required - by default goes 999 dirs deep # Thu Feb 21 08:20:55 MST 2008 - 0.5 # basics, more "options" coming soon # # script to recursively check for duplicate files in current directory # # scripts@pknet.net # http://peterk.org/scripts/ # PATH=/sbin:/bin:/usr/sbin:/usr/bin:/usr/games:/usr/local/sbin:/usr/local/bin:/usr/X11R6/bin:/usr/local/vpopmail/bin:/root/bin export PATH # user configurable variables # files cksumraw=~/.tmp.cksumraw cksumsorted=~/.tmp.cksumsorted cksumresults=~/.tmp.cksumresults findcmd="/usr/bin/find ./ -type f -maxdepth 999 -print0" datecmd='/bin/date +"%Y.%m.%d.%H%M.%S"' # end user configurable variables # check if output file exists, if it does, go on to next step # using the existing files echo # 'cksum' each file in directory if [ ! -f $cksumraw ] then echo "populating $cksumraw - `$datecmd`" $findcmd | /usr/bin/xargs -0 cksum >> $cksumraw else echo "$cksumraw ALREADY exists, NOT repopulating" echo " continuing with sorting it" echo fi if [ ! -f $cksumsorted ] then echo "populating $cksumsorted - `$datecmd`" sort $cksumraw > $cksumsorted else echo "$cksumsorted ALREADY exists, NOT repopulating" echo " continuing with analyzing it" echo fi # now go through sorted list to check for dups # cksum write: checksum CRC, total number of octets, the filename if [ ! -f $cksumresults ] then echo "populating $cksumresults - `$datecmd`" #init start of list startlist=0 echo > $cksumresults cat $cksumsorted | \ while read crc octets filename do # if first in list, make it same as previous if [ $startlist -eq 0 ] then startlist=1 prevchksum=$crc prevfile=$filename continue fi # if current checksum equals saved checksum, # the file is a duplicate if [ $prevchksum -eq $crc ] then printf "file $prevfile \n has duplicate $filename \n" >> $cksumresults printf "file $prevfile \n has duplicate $filename \n" else prevchksum=$crc prevfile=$filename fi done echo "done populating $cksumresults - `$datecmd`" else echo "$cksumresults ALREADY exists, NOT repopulating" echo fi echo 'temp files NOT deleted:' echo " $cksumraw" echo " $cksumsorted" echo Output list of duplicates saved in $cksumresults