#!/bin/bash
## made this to identify possible submissions for book-project,  then decided to put up as "Orchid Photos" on my website;  Eugene Reimer  2004-feb;
##
## relies on  genByCaption-ER (Part1)  to construct /tmp/er-caption2.htm  -- which consists of lines like:
##	02jun15  Williams  <a href=AG01f400.jpg>Moccasin</a><br>
##	02jun15  Williams  <a href=AG08f400.jpg>Moccasin</a><br>
## extracts lines from that file which refer to orchids, by either common-name or latin-name,
##	producing an orchids-only subset page.
##
## PREREQ:  dif -- from http://ereimer.net/programs/general-purpose-scripts.htm

cd /er/website || exit 9
INP=/tmp/er-caption2.htm
TMP=/tmp/tmp-caption2.htm
OUT=pixOrchids.htm
cat $INP  |egrep -v '\?\?|_(jn|da|aa|zz|bk)[BC0-9]'  |sort -fs -k 1.1,1.2 -k 1.3Mf,1.5 -k 1.6,1.7  >$TMP
echo "<html><title>ER Orchid Photos</title><body bgcolor=#f5f0e6 link=#007fbf vlink=#005d8c><big><b>Orchid photos by Species</b></big><br><br>"  >$OUT

echo "   <b><i>Amerorchis rotundifolia</i>&nbsp; Small round-leaved orchid</b><br>"	>>$OUT;egrep -i 'rotundifolia|Small[- ]*round'		$TMP >>$OUT
echo "<p><b><i>Arethusa bulbosa</i>&nbsp; Dragon's-mouth</b><br>"			>>$OUT;egrep -i 'Arethusa|Dragons'			$TMP >>$OUT
echo "<p><b><i>Calopogon tuberosus</i>&nbsp; Grass-pink</b><br>"			>>$OUT;egrep -i 'Calopogon|Grass[- ]*pink'		$TMP >>$OUT
echo "<p><b><i>Calypso bulbosa</i>&nbsp; Fairy-slipper</b><br>"				>>$OUT;egrep -i 'Calypso'				$TMP >>$OUT  ##07may: Fairy not needed...FairyBells
echo "<p><b><i>Corallorhiza maculata</i>&nbsp; Spotted coralroot</b><br>"		>>$OUT;egrep -i 'maculata|Spotted *coralroot'		$TMP >>$OUT
echo "<p><b><i>Corallorhiza striata</i>&nbsp; Striped coralroot</b><br>"		>>$OUT;egrep -i 'striata|Striped *coralroot'		$TMP >>$OUT
echo "<p><b><i>Corallorhiza trifida</i>&nbsp; Early coralroot</b><br>"			>>$OUT;egrep -i 'trifida|Early *coralroot'		$TMP >>$OUT
echo "<p><b><i>Cypripedium acaule</i>&nbsp; Moccasin-flower lady's-slipper</b><br>"	>>$OUT;egrep -i 'acaule|Moccasin|Pink *lady'		$TMP >>$OUT
echo "<p><b><i>Cypripedium arietinum</i>&nbsp; Ram's-head lady's-slipper</b><br>"	>>$OUT;egrep -i 'arietinum|Ramshead'			$TMP >>$OUT
echo "<p><b><i>Cypripedium candidum</i>&nbsp; Small white lady's-slipper</b><br>"	>>$OUT;egrep -i 'candidum|Small[- ]*white'		$TMP >>$OUT
echo "<p><b><i>Cypripedium montanum</i>&nbsp; Mountain lady's-slipper</b><br>"		>>$OUT;egrep -i 'montanum'				$TMP >>$OUT  ##07nov: added, for an 07may photo
echo "<p><b><i>Cypripedium parviflorum</i>&nbsp; Yellow lady's-slipper</b><br>"		>>$OUT;egrep -i 'Cypripedium.parviflorum|Yellow *lady'	$TMP >>$OUT  ##08sep: SCRAP calceolus
echo "<p><b><i>Cypripedium passerinum</i>&nbsp; Sparrow-egg lady's-slipper</b><br>"	>>$OUT;egrep -i 'passerinum|Sparrow-egg'		$TMP >>$OUT
echo "<p><b><i>Cypripedium reginae</i>&nbsp; Showy lady's-slipper</b><br>"		>>$OUT;egrep -i 'reginae|Showy *lady'			$TMP >>$OUT
echo "<p><b><i>Cypripedium x andrewsii</i>&nbsp; hybrid: Smallwhite x Yellow</b><br>"	>>$OUT;egrep -i 'Ladyslipper.hybrid'			$TMP >>$OUT
echo "<p><b><i>Dactylorhiza viridis</i>&nbsp; Long-bracted frog-orchid</b><br>"		>>$OUT;egrep -i 'Dactylorhiza|virid(e|is)|Long-bracted'	$TMP >>$OUT  ##08sep: was Coeloglossum viride
echo "<p><b><i>Epipactis helleborine</i>&nbsp; Broad-leaved Helleborine</b><br>"	>>$OUT;egrep -i 'helleborine'				$TMP >>$OUT  ##08sep: new
echo "<p><b><i>Goodyera repens</i>&nbsp; Lesser rattlesnake-orchid</b><br>"		>>$OUT;egrep -i 'Goodyera.repens|(Lesser|Dwarf) rattle' $TMP >>$OUT
echo "<p><b><i>Goodyera tesselata</i>&nbsp; Tessellated rattlesnake-orchid</b><br>"	>>$OUT;egrep -i 'tesselata|Tessel+ated'			$TMP >>$OUT
echo "<p><b><i>Liparis loeselii</i>&nbsp; Loesel's fen-twayblade</b><br>"		>>$OUT;egrep -i 'loeselii|Loesel'			$TMP >>$OUT
echo "<p><b><i>Listera borealis</i>&nbsp; Northern twayblade</b><br>"			>>$OUT;egrep -i 'Listera.borealis|Northern *tway'	$TMP >>$OUT
##echo "<p><b><i>Listera auriculata</i>&nbsp; Auricled twayblade</b><br>"		>>$OUT;egrep -i 'auriculata|Auricled'			$TMP >>$OUT
echo "<p><b><i>Listera cordata</i>&nbsp; Heart-leaved twayblade</b><br>"		>>$OUT;egrep -i 'cordata|Heart-leaved *tway'		$TMP >>$OUT
echo "<p><b><i>Malaxis brachypoda</i>&nbsp; White adder's-mouth</b><br>"		>>$OUT;egrep -i 'brachypoda|monophyllos|White *adders'	$TMP >>$OUT  ##08sep: was M.monophyllos
echo "<p><b><i>Malaxis paludosa</i>&nbsp; Bog adder's-mouth</b><br>"			>>$OUT;egrep -i 'paludosa|Bog *adders'			$TMP >>$OUT
echo "<p><b><i>Malaxis unifolia</i>&nbsp; Green adder's-mouth</b><br>"			>>$OUT;egrep -i 'unifolia|Green *adders'		$TMP >>$OUT
echo "<p><b><i>Platanthera aquilonis</i>&nbsp; Northern green bog-orchid</b><br>"	>>$OUT;egrep -i 'aquilonis|Northern[- ]*green'		$TMP >>$OUT
echo "<p><b><i>Platanthera dilatata</i>&nbsp; White bog-orchid</b><br>"			>>$OUT;egrep -i 'dilatata|White *bog'			$TMP >>$OUT  ##08sep: SCRAP LeafyWhite
echo "<p><b><i>Platanthera hookeri</i>&nbsp; Hooker's rein-orchid</b><br>"		>>$OUT;egrep -i 'hookeri|Hookers'			$TMP >>$OUT
echo "<p><b><i>Platanthera huronensis</i>&nbsp; Tall green bog-orchid</b><br>"		>>$OUT;egrep -i 'huronensis|Tall[- ]*green'		$TMP >>$OUT
echo "<p><b><i>Platanthera lacera</i>&nbsp; Ragged fringed-orchid</b><br>"		>>$OUT;egrep -i 'Platanthera.lacera|Ragged *fringed'	$TMP >>$OUT
echo "<p><b><i>Platanthera obtusata</i>&nbsp; Blunt-leaf rein-orchid</b><br>"		>>$OUT;egrep -i 'obtusata|Blunt.*orch'			$TMP >>$OUT  ##2010-01: .*orch
echo "<p><b><i>Platanthera orbiculata</i>&nbsp; Round-leaved rein-orchid</b><br>"	>>$OUT;egrep -i 'orbiculata|Round-leaved *rein'		$TMP >>$OUT  ##08sep: SCRAP LargeRound
echo "<p><b><i>Platanthera praeclara</i>&nbsp; Western prairie fringed-orchid</b><br>"	>>$OUT;egrep -i 'praeclara|Western[- ]*prairie'		$TMP >>$OUT
echo "<p><b><i>Platanthera psycodes</i>&nbsp; Small purple fringed-orchid</b><br>"	>>$OUT;egrep -i 'psycodes|Purple *fringed'		$TMP >>$OUT
echo "<p><b><i>Pogonia ophioglossoides</i>&nbsp; Rose pogonia</b><br>"			>>$OUT;egrep -i 'ophioglossoides|pogonia'		$TMP >>$OUT
##echo "<p><b><i>Spiranthes cernua</i>&nbsp; Nodding ladies'-tresses</b><br>"		>>$OUT;egrep -i 'cernua|Nodding *ladies'		$TMP >>$OUT
echo "<p><b><i>Spiranthes lacera</i>&nbsp; Slender ladies'-tresses</b><br>"		>>$OUT;egrep -i 'Spiranthes.lacera|Slender *ladies'	$TMP >>$OUT
echo "<p><b><i>Spiranthes magnicamporum</i>&nbsp; Great-plains ladies'-tresses</b><br>"	>>$OUT;egrep -i 'magnicamporum|Great[- ]*plains *ladie'	$TMP >>$OUT
echo "<p><b><i>Spiranthes romanzoffiana</i>&nbsp; Hooded ladies'-tresses</b><br>"	>>$OUT;egrep -i 'romanzoffiana|Hooded *ladies'		$TMP >>$OUT
echo "<p>" >>$OUT

##gsed -n -q  's|</b><br>|</b><br><table cellpadding=0>|; s|<p>|</table><p>|; s|^0|<tr><td>0|; s| &nbsp;|<td> \&nbsp;|g' $OUT
chgsed -n -q  's|</b><br>|</b><br><table>|; s|<p>|</table><p>|; s|^[0-9]|<tr><td>&|; s| &nbsp; |\&nbsp; <td>|g' $OUT		##2008-09-22:spacing;  2010-07: 0-->[0-9]
echo "<br><small>This page was generated mechanically on $(date +%Y%b%d)</small></body></html>"  >>$OUT

dif +s --sed='s|This page was generated mechanically.*||'  /tmp/$OUT  $OUT		##SHOW changes, and test whether significantly changed;  +s for no output if same
if [ $? -eq 0 ]; then									##identical, except for generated-DATE => keep the previous version
  cp -pf /tmp/$OUT $OUT
else											##different
  wc /tmp/pixOrchids-20080922 /tmp/$OUT  $OUT						##counts are nice when a great many lines changed  (the -20080922 is TEMP)
  mv -f /tmp/$OUT /tmp/$OUT~
  cp -pf $OUT /tmp/$OUT
fi

##{ difsed -i 's|[-:~_ ]||g; s|&nbsp;||g; s|<td>||g' /tmp/pixOrchids-20080922  $OUT;  wc /tmp/pixOrchids-20080922 $OUT; }|m	##DEBUG against 2008-09-22 output


exit
CONSIDER:  having one page per species, each with inline size400 photos
CONSIDER:  also excluding photos labelled as "BAD" or "bad";  presently excluding ones with "??"

2008-July:  fixed false match on  Dracocephalum~parviflorum (AmericanDragonhead)
2008-09-22:  fixed TABLE-columns looking awful wherever an entry needs two lines  (see "revised spacing" chgsed near end)
2008-09-22:  revised  Coeloglossum viride-->Dactylorhiza viridis;  Malaxis monophyllos-->Malaxis brachypoda;
2008-09-22:  revised to handle Old/New captions  (new ones will use space between words;  old use [_~} within Latin-name, nothing within Common-name),
	use -i on all egreps;  spelling Cypripedium, Platanthera, etc in full, then .*-->. between words in Latin-name;  added SPACE* or HYPHEN* in Common;
	above left pixOrchids page unchanged;  but adding Goodyera.repens discovered a bunch of photos not previously included in pixOrchids page!!
2008-09-24:  after running cvt-ER to convert captions to new-style (spaces within majcap and colon after),
	removed space-to-hyphen transformation;  revised SPACE* or HYPHEN* or HYPHEN/SPACE* cases;
	==no longer supporting Old-style ==> could scrap the star==??==  would miss a few references in Subcaption tho...
	found one more photo after cvt-ER conversion:  photographer: Ed Zalusky zooms in on Hooded ladies-tresses  (not sure why it was missed before)
2008-09-25:  fixed false-match on Castilleja hyperborea - this removed 2 spurious lines, one from each of P-aquilonis, P-huronensis;
	fixed false-match on Aster-Goldenrod-hybrid - removed one spurious line;
	several matches on SubCap are dubious--??--
2010-07-06:  fixed photos from 2010 lacking "<tr>";  was of the y2k sort:-)
	fixed Goodyera tesselata false-match on "Hyposizyus tesselatus";  Tessel-->Tessel+ated;
	fixed Listera borealis false matches:  Northern starflower=Trientalis borealis, Blue-bead lily=Clintonia borealis, Twinflower=Linnaea borealis;

CONSIDER:  replacing pixOrchids with 37 canned searches??  constructing it at genByCaption-time but in the pixsearch thumbnail-style??
	==Doris finds pixOrchids useful;  however feel confident she wouldn't object to its having thumbnails though...
