#!/bin/bash ## uniquify a list of email-addresses -- by Eugene Reimer 2007-Oct; ## combining items such as: ## Gord Thomson ## Marge Thomson <--combine to: Gord Thomson or Marge Thomson ## ## Lynwood Cundall ## Lynwood Cundall <--combine to: Lynwood Cundall ## ## Note that it uniquifies by lowercased email-address, and leaves the list in that order; to aid in the elimination of duplicates that have arisen due to ## someone having changed email-addresses (where the problem is that you may end up having both the old and the new in your list), you'll probably want ## to do a case-blind sort by Name (by the entire line) as an aid in spotting such. I've decided to leave that sort of uniquifying as a semi-manual ## process because for one thing a mechanical method would be so easily foiled by minor variations in the spelling of "Name", and also because there ## may well be cases where one really wants the same person to receive two copies, at say both home and work email-addresses. ## ## testcase: /books/ELIST-Regina-PCAP-PCESC-MailingList cat $1 |sed ' /|; ##convert just email-address to NM format (stdization needed for sort) /^|\1 <\1>|; ##convert just email-address to NM format (stdization needed for sort) /<.*]*>\)|\L\1|; ##convert pointy-bracketed part to lowercase s|mb.sympatico.ca|mts.net|g; ##KLUDGE for items from exported-addressbook... s|^ *||; s|[ ,]*$||; ##remove leading spaces and trailing space|comma s| |~|g;s|<| <|; ##space->tilde; lt->TAB+lt (kludges for field-oriented sort) ' |sort -k2,2 |sed ' ##SORT on email-address only s|~| |g; s| <|<|; ##undo the sort-kludges ' >tmpuniqify$$A ##to tmpfile - due to subshell rules... while read;do ##read line-by-line to combine (partial) duplicates NM="${REPLY%%<*}"; ID="<${REPLY#*<}" ##split into 2 parts NM=${NM% } ##revise NM removing trailing space if [[ $NM == *@* ]];then NM=${NM%%@*}; fi ##revise NM containing at-sign (decided not to undo first stdization) NM=${NM//[_.]/ }; NM=${NM// / } ##revise NM revising some troublesome characters to spaces NM=${NM//[,+\'()]/} ##revise NM removing other troublesome characters if [[ $ID == $PRVID ]];then if [[ $NM != $PRVNM ]];then NM="$PRVNM or $NM"; fi echo "combining 2 lines for $ID to get:$NM" >&2 ##msg about combining 2 lines elif [[ $PRVID != "" ]]; then echo "$PRVNM $PRVID" ##emit one line fi PRVNM=$NM; PRVID=$ID done tmpuniqify$$B ##from tmpuniqify$$A, to tmpuniqify$$B [[ $PRVID != "" ]] && echo "$PRVNM $PRVID" >>tmpuniqify$$B ##emit last line if cmp -s $1 tmpuniqify$$B;then echo "$1 unchanged" ##unchanged msg else mv -f $1 $1~; mv tmpuniqify$$B $1 ##rename with tilde-backup echo "previous $1 saved as $1~" ##msg wc $1~ $1 ##show nbr lines before & after ##diffsort $1~ $1|m ##DEBUG -- view diffs in sorted order by entire line (by NM) fi rm -f tmpuniqify$$* ##cleanup