#!/bin/bash
## generateElistFromHISTORY+MBRS (was pruneElist) -- generates an up-to-date Elist to be used to sendto -- by Eugene Reimer 2005-March
## PREREQ: fixPeggyMBRS subsetMBRStoSMALL arr tolower dif -- from http://ereimer.net/programs/ertools.zip
##
## creates the file Elist-$DATE,
## adding email-ids from most recent MBRS list,
## adding email-ids from ADDED|JOIN|NEWyy lines of Elist-HISTORY (where yy is current year),
## removing those from BOUNCED|REMOVED|DROP lines of Elist-HISTORY;
## also displays differences from the previously generated Elist, and keeps a copy thereof as Elist-Changes-$DATE.
##
## USAGE:
## generateElistFromHISTORY+MBRS -- file Elist-$DATE is created; messages to stdout
##
## NOTE: is invoked by sendtoElist, if needed.
##set -v
DATE=$(date +%Y%m%d) ##get DATE in YYYYMMDD style, for output filename Elist-$DATE
YY=$(date +%y);yy=10#$YY; MM=$(date +%m);mm=10#$MM; ((P=yy-1));PP=$(printf "%02d" $P);pp=10#$PP ##get year YY, month MM, and PP=(YY-1)
[ -e Elist-$DATE ] && /bin/rm -fv Elist-$DATE ##rerunning same day, so compare-to-previous works (2008-08:-i-->-fv)
##-- Extract email-ids from current MBRS-db;
##== depending on what Peggy sends, the possibilities are ugly! (may even need to construct a BIG by merging SMALLs, then extract what's needed??)
##== USUAL-case: receive a BIG early in the year (Feb); then periodic SMALLs later in the year;
##== HANDLE the FOLLOWING CASES: (nothing but BIGs early; either BIGs or SMALLs later)
## (1) EARLY in year AND newest MBRS-db is a SMALL from year YY-1: handled
## (2) EARLY in year AND newest MBRS-db is a SMALL from year YY: SHOULD NEVER HAPPEN -- would be forced to re-use an old MBRS-db??
## (3) EARLY in year AND newest MBRS-db is a BIG from year YY: handled via subsetMBRStoSMALL selecting BOTH BadStanding + GoodStanding mbrs
## (4) EARLY in year AND newest MBRS-db is a BIG from year YY-1: handled via subsetMBRStoSMALL selecting GoodStanding mbrs (provided BIG is from late in YY-1)
## (5) LATE in year AND newest MBRS-db is a SMALL from year YY: handled
## (6) LATE in year AND newest MBRS-db is a BIG from year YY: handled via subsetMBRStoSMALL selecting GoodStanding mbrs (or Both GS+BS if BIG from early in YY)
## (7) LATE in year AND newest MBRS-db is from year YY-1: SHOULD NEVER HAPPEN
## (8) newest MBRS-db is from year YY-2: SHOULD NEVER HAPPEN
for f in MBRS*xls; do CURXLS=$f; done ##find newest XLS MBRS-file
CURHTM=${CURXLS/xls/htm}; CURSMA=${CURHTM/-BIG/};
MDATE=${CURSMA%%.*}; MDATE=${MDATE/MBRS/}; MYY=${MDATE:2:2};Myy=10#$MYY; MMM=${MDATE:4:2};Mmm=10#$MMM; ##get MDATE, from the MBRS-db being used
[ -e $CURHTM ] || { echo "No HTML version of $CURXLS; running fixPeggyMBRS:"; fixPeggyMBRS $CURXLS; } ##convert XLS-to-HTML if needed
[ -e $CURHTM ] || { echo "fixPeggyMBRS failed!"; exit; } ##ensure the HTML version exists
[ -e $CURSMA ] || { echo "Running subsetMBRStoSMALL"; subsetMBRStoSMALL $CURHTM $CURSMA; } ##07mar: Subset-to-SMALL if needed
[ -e $CURSMA ] || { echo "subsetMBRStoSMALL failed!"; exit; } ##ensure the HTML version exists
echo "YY=$YY,$yy MM=$MM,$mm MYY=$MYY,$Myy MMM=$MMM,$Mmm" ##==DEBUG==
((mm<=5 && Myy==yy)) && [[ $CURXLS != *BIG* ]] && { echo "Early in year MBRS-db MUST BE BIG"; exit; } ##detect SHOULD-NEVER-HAPPEN case-2
((mm>=6 && Myy!=yy)) && { echo "Late in year MBRS-db MUST BE from current year"; exit; } ##detect SHOULD-NEVER-HAPPEN case-7
((Myy!=yy && Myy!=pp)) && { echo "MBRS-db from year $MYY is TOO OLD"; exit; } ##detect SHOULD-NEVER-HAPPEN case-8
((mm<=5 && Myy==pp && Mmm<=5)) && [[ $CURXLS == *BIG* ]] && { echo "MBRS-db from year $MYY month $MMM is TOO OLD"; exit; } ##detect PROVIDED-THAT for case-4 (07oct)
if [ -e Elist-extractionsMBRS$MDATE ]; then
cp -fbv Elist-extractionsMBRS$MDATE tmpElist-1 ##only Elist-HISTORY has changed... (Beware when rerunning after fixing)
echo "re-using previously extracted MBR-email-ids" ##DEBUG
else
cat $CURSMA |
sed 's| |~|g; s|
| | |g' | ##field kludges
arr 11 | ##col-11 is Email-id
grep '@' | ##exclude junk (non-email-id)
sed 's|||' | ##remove |
tolower | ##lowercase
sed 's|,|~|g; s|~or~|~|g; s|~~~*|~|g' | ##some members have 2 email-ids, separated by space, comma, or the word "or"
tr '~' '\n' | ##tilde to newline
grep '@' | ##redo junk-removal, after line-splitting
sort -u >tmpElist-1 ##remove duplicates, write to tmpElist-1
cp -fbv tmpElist-1 Elist-extractionsMBRS$MDATE ##may scrap since keeping Elist-Changes suffices; Only Needed to prevent redoing; see above??
fi
##-- Extract email-ids from ADDED|JOIN|NEWyy lines of Elist-HISTORY
PAT="ADDED|JOIN|"; if [ $MBRSONLY ];then PAT=; fi ##2009-04-21: new MBRSONLY variant; global var set by sendto...
((mm<=5)) && PAT="${PAT}NEW$YY|NEW$PP" ##early in year (Jan..May), use both NEW$YY and NEW(YY-1) ==UNTESTED==
((mm>=6)) && PAT="${PAT}NEW$YY" ##later in year, only NEW$YY
egrep "^[^-].*($PAT)" Elist-HISTORY |sed $'s|[ \t].*||' |tolower >tmpElist-2 ##ids from ADDED|JOIN|NEWyy lines, to tmpElist-2
##-- Merge the two tmpElist files, into tmpElist-3:
cat tmpElist-1 tmpElist-2 |sort -u >tmpElist-3 ##produce tmpElist-3, the merged new Elist in one-per-line form
##-- Prune using (BOUNCED|REMOVED|DROP) lines from Elist-HISTORY, producing tmpElist-new
>tmpElist-new
>Elist-extractionsMBRS$MDATE-$DATE-PrunedMsgs
egrep "^[^-].*(BOUNCED|REMOVED|DROP)" Elist-HISTORY >tmpREMOVALS ##tmpREMOVALS has just the BOUNCED|REMOVED|DROP lines
cat tmpElist-3 | while read; do ##read tmpElist-3, produce tmpElist-new in one-per-line form
if grep -i "^$REPLY" tmpREMOVALS >tmpLINE; then ##id to be pruned
if grep -q "^$REPLY" tmpElist-1; then FROM=MBRS; elif grep -q "^$REPLY" tmpElist-2; then FROM=HISTORY; else FROM="?"; fi
CAUSE=$(cat tmpLINE |sed $'s|[^ \t]*[ \t]*||; s|<-- *||') ##note that the "<-- " is Optional - expect I'll soon stop using it...
printf "pruned:%-32s from:%-7s cause:%s\n" $REPLY $FROM "$CAUSE" ##DEBUG msg for pruned id, show whether from MBRS or HISTORY...
[[ $FROM == MBRS ]] && printf "pruned: %-32s %s\n" $REPLY "$CAUSE" >>Elist-extractionsMBRS$MDATE-$DATE-PrunedMsgs ##==for sending to Peggy==
else echo "$REPLY" >>tmpElist-new ##output one non-pruned id
fi
done
echo "-----COUNTS before & after Pruning-----"; wc tmpElist{-3,-new} ##DEBUG counting
echo "-----file for Peggy: Elist-extractionsMBRS$MDATE-$DATE-PrunedMsgs-----" ##msg
##-- Find previously most-recent Elist, and convert to One-Per-Line format, without pointy-brackets or commas (for Comparison)
##-- (on first use after the ToBeRemoved emailing, would get neater compare-output if first removed those TBR ids from prv--??--)
for f in Elist-2*; do PrvElist=$f; done ##get name of most-recent Elist (was Elist.bak)
cat $PrvElist |tr '\n' '~' |sed 's|[<>]||g; s|, *|~|g; s|~~~*|~|g' |tr '~' '\n' >tmpElist-prv ##convert to one-per-line unbracketed format
##-- Compare previous- to new-Elist, with both in One-Per-Line format!!
dif tmpElist-prv tmpElist-new |grep -v '^@@' >Elist-Changes-$DATE ##keep a dated record of Elist-Changes
echo "-----Changes WRT previous Elist-----"; cat Elist-Changes-$DATE ##msg + DEBUG show Elist-Changes
##-- Produce "ToBeRemoved" list, if needed: ##new 2007-Oct
if ((mm>=6)) && ((Myy=====-----";fi
if ((mm>=6)) && ((Myy=6 && Myy=====-----";fi
## ((mm>=6 && Myy=6)) && ! [ -e Elist-AboutToBeRemoved-20$MYY ]; then ##time to create a ToBeRemoved list
>Elist-AboutToBeRemoved-20$MYY ##2009-10: so it really gets created even if empty
grep '^-[^-]' Elist-Changes-$DATE |sed 's|^-||' |while read; do
if grep -iq "^$REPLY" tmpREMOVALS; then ##id to be ignored (due to BOUNCED|REMOVED|DROP)
echo "removed id $REPLY not added to AboutToBeRemoved list due to BOUNCED|REMOVED|DROP" ##msg (beware of yanking, bash has no empty block)
else echo "$REPLY" >>Elist-AboutToBeRemoved-20$MYY ##add one id onto the ToBeRemoved list
fi
done
echo "-----==========-----" ##msg
fi
cp -f tmpElist-new Elist-$DATE ##One-Per-Line format result, if skipping the CONVERT-TO-FIVE-per-line step
echo "-----have written One-Per-Line to Elist-$DATE" ##message, if skipping the CONVERT-TO-FIVE-per-line step
exit ##============SKIP the CONVERT-TO-FIVE-per-line step============
##-- Convert Elist to FIVE-per-line format -- this is based on last part of <>
##==NOW OBSOLETE; was nice for old emailing method;
##==NOW OBSOLETE; new emailing-script prefers one-per-line, no need for pointy-brackets (may enhance to use PersonName ...)
TEXTEDIT=jed ##==CUSTOMIZE AS NEEDED==
cat tmpElist-new | sed 's|^.*$|<&>|' | { ##read tmpElist-new, adding pointy-brackets
line=; prv=","; ((k=0)); ((K=0)); ((N=0))
while read; do
if ((k==5)) || [ ${prv:1:1} != ${REPLY:1:1} ]; then echo "$line"; line=; ((k=0)); fi ##produce line of output
if ((K>95)) || [[ ${prv:1:1} < k && ${REPLY:1:1} > j ]]; then echo ""; ((K=0)); fi ##break after 'J's to get groups<=100, will outgrow someday...
line="$line$REPLY, " ##add comma+space (pointy brackets added by sed-cmd above)
((++k)); ((++K)); ((++N))
prv="$REPLY"
done
if ((k>0)); then echo "$line"; fi ##produce the last partial line of output
} >Elist-$DATE ##output goes to Elist-$DATE
echo -n "-----have written FivePerLine to Elist-$DATE; enter $TEXTEDIT?"; read; $TEXTEDIT Elist-$DATE ##leave user in $TEXTEDIT - can use cut-and-paste...
===========
CHANGE-LOG:
===========
2006-June+Nov: rewrote this script yet again; it was originally called pruneElist, when it only did pruning based on BOUNCE etc, then it was briefly
called updateElistFromHISTORY, but things were getting overly complex!! Found simplicity by going to an Elist-generated-when-needed approach,
that allows this simple script to replace BOTH of the much more complex scripts: updateElistFromHISTORY AND updateElistFromMBRS.
NOTE: when generating Elist in Jan..May, do MBRS-Additions for GS-mbrs from preceding AND current year;
but in Jun..Dec (after sending out "To Be Removed" notices), only use GS-mbrs from current year => Peggy's SMALL flavour suffices.
2007-Mar: ==see notes in <> -- nicer to do Subset-to-SMALL in fixPeggyMBRS ==> would then remove code to call it from here==
2007-Oct: produce the AboutToBeRemoved list, each year in June or thereabouts;
use the Elist-comparison /removals/ PRUNED by removing all DROP|BOUNCED|REMOVED email-ids (Elist-HISTORY)
2008-Feb: to get around YY and MYY being "08" -- the OCTAL problem; (why have MM and MMM never been a problem?? comparisons vs subtraction??)
introduced yy,mm,Myy,Mmm vars having a leading "10#" ==MAY NEED TESTING / DEBUGGING==??==
==similar fixup in subsetMBRStoSMALL
==got spurious "tis time to create a ToBeRemoved list" msg; unyanked some DEBUG stmts...for next time
|