#!/bin/bash ## Unix-diff with enhanced options; by Eugene Reimer 1998-02; AT="$@" ##remember cmdline args for msgs SAME="-s" ##default for report-identical option WHITE="-b -B" ##default for whitespace-matters option ##=====new improved arg-parsing: USAGE="USAGE: dif [OPTION]... OLDFILENAME NEWFILENAME where OPTION can be one of: --sed=SEDCMD (-S=SEDCMD) --special-case of --cmd=FILTER; --cmd=FILTER (-c=FILTER) --filter applied to each file prior to comparison; --ignore-case (-i) --GNU-diff option to be case-blind; --whitespace-matters (-m) --by default dif uses -b and -B GNU-diff options; this option suppresses those; --ignore-all-space (-w) --GNU-diff option to ignore all whitespace --terse (-T) --for -U0 output without the "@@" lines; two of these also suppresses the heading; ++report-identical (+s) --inverse of the GNU-diff option; default is to report when the 2 files are identical any option recognized by GNU-diff, except -c -T;" ##NOTE: ending quote, ends the USAGE=... statement getopnd () { if [[ $ARG == *=* ]];then OPX="${ARG#*=}";else ((++J)); OPX="$NXT"; fi; } ##Note: implicit param J is updated in next-word case; result $OPX N=$# ##set N = nbr-args; could use $# in for-expr but need to evaluate just once?? for((J=1;J<=N;++J));do ARG=${!J}; ((X=J+1));NXT=${!X} ##for J from 1..N do, setting ARG to the Jth arg... if [[ $ARG != [-+]* ]];then break; fi ##leave on encountering non-option if [[ $ARG == -- ]];then shift; break; fi ##leave on encountering "--" allowing subsequent non-option to start with dash|plus case "$ARG" in -h*|--help) echo "$USAGE"; exit 0 ;; -S*|--sed*) getopnd;SEDCMD=$OPX ;; ##sed-filter; getopnd procedure-call to handle same- or next-word opnd -c*|--cmd*) getopnd;FILTER=$OPX ;; ##other filter -m*|--white*) WHITE= ;; ##suppress the use of -b and -B as GNU-diff options +s*|++report-i*) SAME= ;; ##turn off the report-identical option -T*|--terse) ((++TERSE)) ;; ##increment terseness-level TERSE -I*|--ignore-m*) getopnd; OPTIONS="$OPTIONS-I'$OPX' " ;; ##handle GNU-diff -I|--ignore-matching-lines=regexp option (broken, needs eval on diff-cmdline??) *) OPTIONS="$OPTIONS$1 " ;; ##assume anything else is a GNU-diff option (may add complete parsing?) *) echo "unrecognized option: $ARG"; echo "$USAGE"; exit 1 ;; ##errmsg for unknown option (presently useless) esac done for((J--;J--;));do shift; done ##remove args 1..J-1 (an extra shift has been in the "--" case) if [ $# -ne 2 ];then echo -e "dif $AT - 2 non-options required;\n$USAGE";exit 1;fi ##errmsg if OLDFILENAME NEWFILENAME missing, or too many non-options ##=====endof arg-parsing FOLD=$1; FNEW=$2 if [[ $SEDCMD && $FILTER ]];then ##apply sed-filter plus FILTER to each file before comparison... cat $FOLD |sed "$SEDCMD" |$FILTER >tmp-Old$$; FOLD=tmp-Old$$ cat $FNEW |sed "$SEDCMD" |$FILTER >tmp-New$$; FNEW=tmp-New$$ elif [[ $SEDCMD ]];then ##apply sed-filter to each file before comparison... cat $FOLD |sed "$SEDCMD" >tmp-Old$$; FOLD=tmp-Old$$ cat $FNEW |sed "$SEDCMD" >tmp-New$$; FNEW=tmp-New$$ elif [[ $FILTER ]];then ##apply FILTER to each file before comparison... cat $FOLD |$FILTER >tmp-Old$$; FOLD=tmp-Old$$ cat $FNEW |$FILTER >tmp-New$$; FNEW=tmp-New$$ fi cmp -s $FOLD $FNEW; same=$? ##set same; same==zero iff files identical OUTFIL="cat"; if((TERSE>0));then OUTFIL='egrep -v ^(---|\+\+\+|@@)';fi ##output-filter uses egrep for TERSE>=1 if((TERSE<2)) && [[ $SAME == "-s" || $same -ne 0 ]];then ##supply one-line heading, if TERSE<2 and (SAME or not same) echo "==dif $AT" ##(had -e and leading "\n") fi diff -U0 $SAME $WHITE $OPTIONS $FOLD $FNEW |$OUTFIL ##run GNU-diff with options and output-filtering rm -f tmp-{Old,New}$$ ##cleanup any tmpfiles that were created exit 2010-05-06: my diff-variants are getting out of hand, especially since many more combinations are needed; ==NEEDED: make be a cmd with options; name it since need name to be distinct from Unix-diff; 2010-05-07: renamed diff-->dif: ==looked at output from: egrep $'(^|[^-\\./a-zA-Z])diff[ \t]' * cd /etc/sbin mv diff dif egrep $'(^|[^-\\./a-zA-Z])diff[ \t]' * &>/tmp/dif-renaming-01 egrep -l $'(^|[^-\\./a-zA-Z])diff[ \t]' * &>/tmp/dif-renaming-02 chgsed --tmp --verbose 's!\(^\|[^-\./a-zA-Z]\)diff\([ \t]\)!\1dif\2!g' $(cat /tmp/dif-renaming-02) &>/tmp/dif-renaming-03 chgsed --tmp --verbose 's!/usr/bin/diff!diff!g' $(grep -l '/usr/bin/diff' *) &>/tmp/dif-renaming-04 --er programs affected: tesseract-training-from-source diff-by-charset chg weblinkcheck.cron webcommentcheck chg needed manual revision, went with dif-->difterse for PROMPT-output (difterse is inline-function); diff-by-charset <==NEEDS RENAME==??== ==renaming the other diff-XX scripts (to dif-XX) will be much easier, but isn't essential; 2011-01: have also renamed & published difsed; 2011-01-11: NEW OPTIONS: --sed=SED-CMDLINE <--special-case of --cmd=FILTER --cmd=FILTER <--filter applied to each file prior to comparison --ignore-case (-i) <--NOT NEEDED; GNU-diff has exact same option --ignore-whitespace <--2/3 levels of ignore-whitespace; GNU-diff: --ignore-space-change (-b) --ignore-all-space (-w) --ignore-blank-lines (-B); using -b -B by default, need whitespace-matters option to avoid those; ignore-whitespace == GNU ignore-all-space --terse <--2 levels of terseness: with or without a one-line heading; terse-output is -U0 without its 2-line heading & without the "@@" lines; one-line heading is an echoing of cmdline options + filenames --sort <--NOT NEEDED; --cmd=sort does that also support any option of GNU-diff; ergo --ignore-case and --ignore-whitespace not needed since diff has such options; mine ignores space-changes by default; OBSOLETE CMDLINE: diff -bBs -U0 "$@" ##Unix-diff with my usual options (SEE ALSO: NOTES-dif-REWRITE-201011) OBSOLETE CMDS: diffsed -- rename to difsed==!!== becomes: dif --sed diffterse -- becomes: dif -t diff-strict -- becomes: dif -m diff-ignoring-whitespace -- becomes: dif -w diffsed-ignoring-case -- becomes: difsed -i diffsed-ignoring-whitespace -- becomes: difsed -w diffsedsort -- becomes: difsed --cmd=sort diffsort -- becomes: dif --cmd=sort finishing: (1) rename existing dif-variant cmds to XX-OLD; (2) replace each with one-liner shown above; (3) provide both diffsed & difsed; ==DONE; NOTE: the above are not "obsolete" but rather have become trivial one-liners; any published ones will be given a "dif" (one eff) name; DIF-VARIANTS THAT REMAIN: diff-by-charset -- this one is fundamentally different... (the two-eff name seems appropriate) diff-redline -- may add --redline option?? diff-ls -- may add --ls option?? may also offer a SIZE-ONLY variant?? diff+ diff- -- variants of terse that may be needed again?? ALTERNATE CODE FOR FOLD FNEW FILTERING (simpler, yet more chance of quoting grief?): if [[ $SEDCMD && $FILTER ]];then FIL="sed '$SEDCMD' |$FILTER" ##sed-filter plus FILTER elif [[ $SEDCMD ]];then FIL="sed '$SEDCMD'" ##sed-filter elif [[ $FILTER ]];then FIL="$FILTER" ##FILTER fi if [[ $FIL ]];then ##apply filter FIL to each file before comparison... cat $FOLD |$FILTER >tmp-Old$$; FOLD=tmp-Old$$ cat $FNEW |$FILTER >tmp-New$$; FNEW=tmp-New$$ fi OUTFIL is troublesome WRT quoting?? a space in pattern, after right-paren, leads to: egrep: ': No such file or directory==??== the OUTFIL filtering NOT working as intended due to bash doing incomplete parsing after such substitution; removing the nested quotes solves the problem!! 2011-01-13: webput: several uses of dif are getting the USAGE msg: in genByCaption-ER: dif -I 'This page was generated mechanically on .*' /tmp/$OUT $OUT <--note the quoted and separate-word operand; in genOrchidsBySpecies-ER: dif -I 'This page was generated mechanically on .*' /tmp/$OUT $OUT <--note the quoted and separate-word operand; ==added parsing-code for -I*|--ignore-m* -- to handle separate-word operand and quoting on -I operand; ==NOT WORKING; get: diff: extra operand 'generated' ==genByCaption-ER, genOrchidsBySpecies-ER: dif -I 'This page was.*' --> difsed 's|This page was.*||' <--so webput works, tho -I still broken... ==consider: use eval on the diff-cmdline in order to get complete bash-parsing?? 20110-01-19: for the dif-cmds in the genXX scripts (from webput), want my dif to say nothing for identical files in those cases; also want default to be that it reports when 2 files are the same; ergo need the inverse of -s option; using: +s as the inverse of -s; not possible with getopt... genByCaption-ER, genOrchidsBySpecies-ER: add +s option on dif-cmdlines to suppress output if files identical; revise difsed +s-->dif +s --sed so +s works; revised arg-parsing: -s --> -S; -t --> -T; added to help-output: any GNU-option except ...