#!/bin/bash ## count number of Memberships in a Peggy db; by Eugene Reimer 2006-Sep; ## her "Members-db" is really a mailing-list where one entry describes 2, 3, or more members living at one mailing-address; ## to get number of members, need to count as 2 any row having an "&" or the word "and" in FirstName column; ## ## checking for threesomes, foursomes, etc -- here are old examples: ## eg: FirstName:Lori & Ron/Helen LastName:Drebnisky/ Vogt <-- couple + mother-in-law (weird use of slash??) ## eg: FirstName:Don, Barret, Lorressa, Sheena LastName:Batenchuk ## eg: FirstName:Terry, Jennifer & Vicki LastName:Ewacha ## eg: FirstName:Terry, Richard, Michael LastName:Zdan & Debra Lane <-- NEED TO COUNT ampersands in LastName also--!!-- ## eg: FirstName:Patty, Ed & 4 kids LastName:Lotecki <-- do not expect this style after Junior-mbrships... ## algorithm: ## count the number of AMPERSAND|COMMA|SPACEandSPACE occurences, in FirstName+LastName, and add that to NBRLINES; ## -- AVOID COUNTING SLASH because of: Froehlich/!Prairie!Originals Orchid!Society!c/o!Robert!Kato ## == COULD COUNT SLASH in FirstName ONLY--??-- ## ## Not essential, but to also identify Group memberships: look for "c/o" in LastName column?? ## (06nov: fails for Friends of Assiniboine Park Conservatory) ## (BIG-db uses "Group" somewhere, in Hobbies?) ## USAGE: ## countMBRS MBRS.htm I=$1 cat $I |egrep -iv 'HTML>|>ID|' | sed 's| |!|g; s| ||g' | arr 2 3 | ##just FirstName+LastName sed 's|!*&!*|!\&!|g; s|&[a-z#][a-z0-9]*;|?|g; s|!and!|!\&!|g' | ##stdize & and "and" to ampersand sed 's|[&,]\+!*[&,]\+|\&|g' | ##stdize sequence of [&,] to just & (2011-01-10) sed 's|&!*Family||g' | ##remove "& Family" cat >tmpXXB ##into tmpXXB quirks () { if eval $2 2>&1 >/dev/null;then echo "$1"; eval $2; echo ""; fi } ##function to suppress heading when zero examples ##irks "Lines with 2 or more member names:" "grep '[&,]' tmpXXB" ##--review the 2-or-more rows (normally yanked) quirks "Lines with 3 or more member names:" "grep '[&,].*[&,]' tmpXXB" ##--review the 3-or-more rows quirks "Lines with digit or slash or hi-char in name:" "grep '[0-9/?]' tmpXXB" ##--review DIGIT(s) OR SLASH OR HI-CHAR in name quirks "Lines with hi-char anywhere:" "cat $I |sed 's|&|\&|g; s| | |g' |grep '&[a-z#][a-z0-9]*;'" ##--review HI-CHAR anywhere NBRLINES=$( cat tmpXXB |wc -l) NBRTOTAL=$( cat tmpXXB |sed 's|[&,]|\n|g' |wc -l) ##rm tmpXX? ##yank if tmpfiles wanted for DEBUG echo "Nbr-of-Mailing-labels=NBRLINES=$NBRLINES" echo "Nbr-of-Members(estimated)=NBR-LINES+AMPERSANDS+COMMAS+ANDs=$NBRTOTAL" exit 2010-01-23: added the quirks function for neater output; obsolete lines: echo "Lines with 3 or more member names:"; grep '[&,].*[&,]' tmpXXB echo "Lines with digit or slash or hi-char in name:"; grep '[0-9/?]' tmpXXB echo "Lines with hi-char anywhere:"; cat $I |sed 's|&|\&|g; s| | |g' |grep '&[a-z#][a-z0-9]*;' 2011-01-10: compared this Membership-count with Peggy's, hers being 161 at year-end, this one being 164; one family was being miscounted and the correct total is 163; the troublesome one: Will,!Jose,!Aidan,!&!Emmett! Milne! <--leads to 5; ought to be 4; fixed by revising sequence of [&,] to be just one; now get 163 as wanted;