#!/bin/bash ## a photo-uploader relying on Apache-protection/authentication -- Eugene Reimer 2010-12; ## consists of 2 parts: an HTML page with photo-upload-FORM, and a CGI program to receive data + file from that form, and store the file in its rightful place; ## but implemented with only this bash CGI-program; it produces an HTML-page with the uploader-form; ## the HTML-page also contains Javascript for field-level validation of form data; ## ## PREREQ: convert identify -- from Imagemagick ## ## Copyright © 2010 Eugene Reimer; can be modified and/or distributed under the terms of the GPL; see http://www.gnu.org/licenses/gpl.html. fix () { echo "$@" |sed "s|['\"]||g; s|[:=]\+|--|g; s|[()_:/ \t]\+|~|g"; } ##function to eliminate chars that are troublesome in filenames nbr () { echo "$@" |sed -n 's|[^0-9]*\([0-9]\{2,8\}\).*|\1-|p'; } ##function returns the first (2-to-8-digit) numeric-substring, and a hyphen if nonempty MSGS=$(csplit -k --prefix=tmpPart$$. - /^--/ "{100}" 2>&1) ##split stdin at "--" lines into tempfiles named tmpPart$$.NN if [[ -e tmpPart$$.05 ]];then ##==tmpPart$$.05 exists, means we're doing an upload ONM=$(cat tmpPart$$.05 |sed -n 's|.*filename="\([^"]*\).*|\1|p') ##remember user-filename from IMGFILE section for F in tmpPart$$.??;do sed -i '/^--/d; /^Content-/d; /^\r$/d' $F; done ##fixup tmpPart$$.NN tmpfiles, discarding --DLM Content-.* and empty lines F=tmpPart$$.05; head --bytes=-2 $F >$F.rev; mv -f $F.rev $F ##fixup tmpPart$$.05 tmpfile, removing last 2 bytes LOC=$( cat tmpPart$$.01 |tr '\r\n' ' ' |sed 's| \+$||' ) ##remember the text in LOC DATE=$( cat tmpPart$$.02 |tr '\r\n' ' ' |sed 's| \+$||' ) ##remember the text in DATE PHOTOG=$( cat tmpPart$$.03 |tr '\r\n' ' ' |sed 's| \+$||' ) ##remember the text in PHOTOG CAPTION=$(cat tmpPart$$.04 |tr '\r\n' ' ' |sed 's| \+$||' ) ##remember the text in CAPTION OTY=$(file -b -i $F|sed 's|;.*||'); OFS=$(stat -c%s $F) ##get mime-type (eg: image/jpeg) and filesize of image-file OSZ=$(identify -format "%wx%h" $F); W=${OSZ%x*}; H=${OSZ#*x} ##get image-size in pixels as W, H if ((W*H>3030000));then convert $F -resize "3000000@" -quality 85 $F.rev ##more than 3.03-megapixels: scale down to 3-megapixel qual:85 JPEG; "@"=>area-units elif [[ $OTY != *jpeg* ]];then convert $F -quality 85 $F.rev ##non-JPEG: convert to same-size qual:85 JPEG elif ((OFS>900000));then convert $F -quality 85 $F.rev ##JPEG at excessively high qual: convert to same-size qual:85 JPEG fi [[ -e $F.rev ]] && [[ $(stat -c%s $F.rev) -gt $OFS ]] && rm -f $F.rev ##discard converted file if bigger than original; note: can mean keeping a non-JPEG [[ -e $F.rev ]] && CVTED=1 ##remember that conversion done, for msg to be shown later [[ -e $F.rev ]] && mv -f $F.rev $F ##converted file replaces $F NTY=$(file -b -i $F|sed 's|;.*||'); NFS=$(stat -c%s $F) ##get post-conversion mime-type and filesize NSZ=$(identify -format "%wx%h" $F) ##get post-conversion image-size [[ $DATE == "" ]] && DATE=$(identify -format "%[EXIF:DateTimeOriginal]]" $F |sed 's| .*||; s|:||g') ##omitted DATE indicates use-EXIF-date; beware: "]" for omitted [[ $DATE != [0-9]* ]] && DATE=$(date -r$F +%Y%m%d) ##with fallback to file-timestamp-date NNM=$(nbr $ONM)$(fix $CAPTION)_$(fix $LOC)_$(fix $DATE)_$(fix $PHOTOG) ##form new-filename from numeric-part-of-user-filename, CAPTION, LOC, DATE, PHOTOG NNM=$NNM.$(echo "${NTY#image/}" |sed 's|jpeg|jpg|; s|tiff|tif|') ##add suffix for image-type derived from post-conversion mime-type else ##==tmpPart$$.05 does not exist: this is initial invocation just to supply the form PHOTOG=$REMOTE_USER ##default PHOTOG from $REMOTE_USER fi UPLOADER="$REMOTE_USER"; [[ $UPLOADER == "" ]] && UPLOADER=$(cat tmpPart$$.06 |tr '\r\n' ' ' |sed 's| \+$||') ##workaround for unreliable $REMOTE_USER var [[ $UPLOADER != "" ]] && SIGNEDIN="

You are signed in as:$UPLOADER" ##signed-in-as msg when $UPLOADER available echo "Content-type: text/html" echo "" cat ../nav1-head.txt echo " Photo Uploader " cat ../nav2-body.txt echo "

Photo Uploader

$SIGNEDIN

Location:
Date (YYYYMMDD):
Photographer:
Caption identifying species and persons if possible:

Image-file:


Instructions:

Enter Date as YYYYMMDD, that is as 8-digits consisting of 4-digit Year, 2-digit Month, 2-digit Day-of-month;  note: if your camera's clock is set correctly and the image contains the camera's EXIF-data then omit the Date here for the best result;  omitting Date may work even if the image lacks EXIF-data as in that case the image-file's timestamp will be used. 

Enter Photographer as zz if name of photographer is unknown;  please do not upload annotated photos, however if you must then so indicate by supplying zz or zzNAME as Photographer where NAME is the name of photographer. 

Please supply a Caption that identifies the species;  when species is identified by more than one name use an equal-sign to separate names;  use a colon to separate species-name(s) from rest-of-caption;  for a people-picture, please identify persons by name if possible (in which case there's no need to identify the species);  for a plant-picture, please indicate whether a Habitat, Clump, Plant, Flower-spike, Flower, Foliage, Fruit/Seed-capsule view. 

Use the Browse... button to select the image-file to be uploaded (some browsers show it as a Choose... button);  uploading the full-sized image produced by your camera is fine, although we'll save it as at most a 3-megapixel JPEG;  almost any image type is supported, JPEG, PNG, TIFF, etc, however if your camera uses a RAW format then you use its software to convert to JPEG before uploading;  if converting to JPEG yourself we recommend a quality-setting that will reduce a typical 3-megapixel photo to roughly 500KB. 

Use the Submit button once you're ready to do the upload. " if F=tmpPart$$.05; [[ -e $F ]];then ##tell user about the actions -- will do them after producing DEBUG-INFO echo "


ACTIONS:" [[ $CVTED ]] && echo "
$OTY size:$OSZ filesize:$OFS converted due to size or type" echo "
$ONM saved as $NNM;  size:$NSZ filesize:$NFS
" fi echo "" echo "" if [[ -e tmpPart$$.05 ]];then mv -f tmpPart$$.05 $NNM ##==rename uploaded image-file OAT=; [[ $CVTED ]] && OAT=" (sz:$OSZ fsz:$OFS)" ##old attributes to be logged if conversion was done echo "$UPLOADER uploaded $ONM$OAT saved as $NNM (sz:$NSZ fsz:$NFS)" >>upload-logfile ##==record in log the upload by $UPLOADER of $NNM fi rm -f tmpPart$$.?? ##==cleanup tmpfiles (YANK for debugging) exit ================================================== NOTES from the entire-file-inhaling debug version: ================================================== POST-INPUT ought to look like: Content-type: multipart/form-data, boundary=DLM --DLM Content-Disposition: form-data; name="LOC" SunsetBlvd --DLM Content-Disposition: form-data; name="DATE" 20060602 --DLM Content-Disposition: form-data; name="PHOTOG" zzANON --DLM Content-Disposition: form-data; name="CAPTION" Hawk~moth --DLM Content-Disposition: form-data; name="IMGFILE"; filename="09719_erCBS160.jpg" Content-Type: image/jpeg ... --DLM-- (where I'm not showing the empty-lines) (I chose file: /pix/20080228-erdotnet-root/ereimer.net/09719_erCBS160.jpg; it is the same as /er/website/20060602/09719_erCBS160.jpg) (SEE: photo-upload/photo-upload-INHALING-DEBUG-version.cgi -- with inhaling method as used n coordinatecvt, before trying csplit) PROBLEM: the 1st line of Post-input is missing (with Local-Apache OR HostExcellence-Apache), and that leaves me not knowing what boundary-string (ie: DLM) to scan for?? failed to find a reason for that line being missing; many sources provide examples WITH that line present -- though most just say "use Perl..." klugey solution: get DLM from 1st-line which can be either: "Content-type...boundary=DLM" or "--DLM"; am actually using an even bigger kluge that doesn't even use $DLM, is based on valid input-line never beginning with "--" also am discarding ALL empty-lines; possibly BETTER: expect one after each 2|3-line set of: --DLM Content-Disposition Content-Type PROBLEM: info about checkboxes etc is in $QUERY_STRING for GET; for POST with default enctype that info is first part of stdin-file; however with enctype=multipart/form-data (as required for FILE upload) then each var becomes a message-PART, and in coordinatecvt those come AFTER the file itself==!!== ==> easiest way to work with such is to inhale the entire file (which makes a joke of being forced to use method=POST:) processing such data in one pass would be possible if form were laid out with buttons+checkboxes up front, however I prefer coordinatecvt the way it is; CONSIDER: use csplit to split stdin into separate tempfiles; doesn't need file-chooser being last in the form; 2-pass method, or can we avoid reprocessing image-file?? for photo-uploader want a non-inhaling method; one-pass requires file-chooser last in the form; easiest is to use csplit tempfiles; to avoid reprocessing image-file, need to start after the empty-line that immediately follows "Content-Type: image/...", and stop before "--DLM--" line; doing that with csplit: skipping the empty-line may be tricky?? may also need to discard newlines?? beware: file-uploader uses MS-DOS-style newlines (CR+LF)!! ==NOTE: file /er/website/20060602/09719_erCBS160.jpg -- SIZE: 3622; ==NOTE: tmpPart$$.05 SIZE:3624 after removing Content-Disposition|Content-Type lines; ==NOTE: the 2 extra chars are the ^M^J that were added at EOF; using head to remove last 2 bytes of tmpPart$$.05; ======================== NOTES on photo-uploader: ======================== ((PLAN-A)) photo-uploader WITH REGISTRATION and login components, that can be used for other purposes; usr-registration: usr-signin: photo-upload: they use forms, with text-fields, checkboxes, and a file-chooser for user-input; not all browsers offer a file-chooser, modern ones do; they support method:POST since it's required with a file-chooser; some may also support method:GET; form-info arrives as stdin for POST, as $QUERY_STRING var for GET; the $REQUEST_METHOD var shows which method is being used; they rely on a file-tree where: a file under cgi-bin cannot be read by anyone not having either Control-Panel or FTP access to our webhosting-account; note that the usr-db must be world-writable but not world-readable; unsafe in shared-server environment?? safer to use mysql-db, see cgi101.com link below; for a robot-defeating quiz: see "captchas"; http://captchas.net/ offers example in PHP ASP Perl Python JSP Ruby, but NOT bash!! haven't found one as easy to use as I was hoping for, although a PHP-interface is fine; but why must the captcha-generating code be on another site?? they all do things that way, probably to keep the code secret; according to Wikipedia the major ones have all been broken -- yet presumably Google's will be quickly replaced whenever cracked; reCaptcha from http://www.google.com/recaptcha; KEYS including PRIVATE-KEY are on my system in ~/PRIVATE-KEY-for-recaptcha; HOWTO: http://code.google.com/apis/recaptcha/docs/php.html a signin is remembered via browser-cookie, currently having "session" as its lifetime though longer lifetime is easily achieved if wanted; the security is on the weak side, not good enough for financial transactions etc, with (1) its willingness to send passwords by email, and (2) storing passwords as plain-text so that a hacker getting read-access to the usr-db has gained the ability to impersonate any user; however in my experience many websites requiring user-registration use methods of this sort; Note: a hacker gaining read-access to the PHP-scripts obtains the Private-Key which may enable breaking the captcha?? (enables multiple guesses on same image) overall user-interface: a single webpage contains a form for registration, one for signin, and one for photo-uploading; the 3 forms invoke 3 different CGI-routines, but each of them produces an HTML-page with one form, the one for photo-uploading; note: the 3-way webpage must show whether user is currently signed-in; alternative: only provide uploader-form in the signed-in case? ((PLAN-B)) A DIFFERENT APPROACH: using Apache's HTTP-Authentication: where asking for userid & password will be done by Apache upon any access to a protected file/directory; ideal for by-invitation-only authorized users where webmaster does user-creation; although online-registration is also possible; SEE: http://www.cgi101.com/class/password/ -- for the basics, and for online user-registration; using in photo-uploader: environment-var REMOTE_USER; the CGI-script needs to be protected for this var to be set; http://httpd.apache.org/docs/1.3/misc/FAQ-F.html#remote-user-var this is bare-bones demo; for real-world use would also provide forms for: viewing photos one has uploaded with provisions for: revising location, date, photographer, caption, and for deleting a photo one has has uploaded; making changes to ones account (if there is online registration); RESIZING & RENAMING imagefile: tools available at Host-Excellence include Imagemagick (convert,identify) but not NetPbm; can however install additional Unix tools if unavoidable... arithmetic with bc, without my library-functions, is mildly painful but entirely possible; wrote but didn't even need: SZ=$(echo "scale=20; int(sqrt($W*$H*4/3))+.5)" |bc) ##get size-in-ER-units rounded to integer -- NOT NEEDED can obtain image-size & type using identify, can rescale & convert-to-JPEG-with-specified-Quality using convert; discovered its Area-units, nice!! Added bonus: convert preserves the EXIF-info so don't need to do that separately with jhead etc; convert's -resize uses better algorithm vs -scale?? see also -filter; TESTING: the basics work with local-Apache with Opera, Firefox-2, and ie6; including the UPLOADER workaround for $REMOTE_USER; testcase with large jpeg: /pix/er030216/01293_er.jpg -- sz:1600 qual:85 to scalepix results in filesize:844,998 about triple the usual!! jpeg-compression-ratio so erratic there's actually a chance, albeit slight, that my filesize-triggered conversion to qual:85 could make file bigger:-) convert -resize "3000000@" -quality 85: time:5.653s filesize:1,355,293; convert -scale "3000000@" -quality 85: time:4.211s filesize:1,272,896; CAN'T SEE THE DIFFERENCE, but then neither time nor size costs much... LOC, DATE, PHOTOG, CAPTION included in the saved-image-filename: space->tilde, other unsafe-chars either discarded or replaced with hyphen|tilde; CAPN=${CAPN%:}; CAPN=${CAPN//:/-}; CAPN=${CAPN//=/--}; CAPN=${CAPN//\'/} ##--from get-ERphoto-DATE+CAPN+LOCN X=${X//[\'\"]/}; X=${X//[:=]/--}; X=${X//[()_:\/]/-}; ##--using bash can't do unstutter suffix; derive from file-command with -b --mime-t options hopefully user-filename is or contains the sort produced by camera; keep 4|5|6-digit numeric part but nothing else?? also abbreviated uploader-name?? DATE-EXIF: jhead /pix/er030216/01293_er.jpg indicates 2003:02:16 16:02:18 is the desired-timestamp; DATE-EXIF: identify -format "%[EXIF:DateTimeOriginal]]" /pix/er030216/01293_er.jpg gives:2003:02:16 16:02:18.] -- correct but for some spurious junk; LOGFILE: one line per upload with uploader-name & image-filename; workaround for $REMOTE_USER: assume it's reliable on initial-call; NOTES: BODY-tag: my combined navbar+focus onLoad (onLoad='menubarPad();document.F1.LOC.focus()' within BODY-tag) -- gets javascript errmsg; SEE set-focus note in nav1... .cgi is defined as a CGI-ending within top-level .htaccess file (or control-panel); could define .bash as CGI-ending within photo-uploader/.htaccess?? consider exclusion of photo-upload directory, in robots.txt, in sitemap.xml?? ==webput: for photo-upload directory, files on hoster-system override local ones; ie: mirror-upload W/O deletion, plus mirror-download excluding *cgi .ht*; consider: rename this script to .photo-upload.cgi so dotted-names distinguish local-overrides files from hoster-overrides ones?? ==see notes in .htaccess .htpasswd on how they must be revised upon upload, due to the fully-qualified filename requirement!! .htpasswd may need to be recreated on the webhoster system; if so then see my /etc/sbin/mk-.htpasswd-for-Apache-Photo-Uploader mods needed before running on hoster-system: tmpPart-->tmpPart$$; for local-testing could omit $$ and yank tmpPart-cleanup; DONE; Javascript-DATE-VALIDATION: see ~/TIPS-DEBUGGING-Tale-of-Woe HURRAH, IT IS FINISHED==!!== ...but for the following: navbar: I prefer the big "Photo Uploader" Heading LEFT-JUSTIFIED!! otherwise with-navbar is fine in all 3 browsers; LEFT-JUSTIFICATION DONE; robots.txt, sitemap.xml: the Authorization will be a problem for Googlebot, so will exclude the photo-upload dir; DONE; webput revision: .htaccess: see notes in it on how it must be revised upon upload, due to the fully-qualified filename requirement (for location of .htpasswd) .htpasswd may need to be recreated on the webhoster system; if so then see my /etc/sbin/mk-.htpasswd-for-Apache-Photo-Uploader photo-upload dir: most files are HOSTER-overrides; some are LOCAL-overrides; ie: need mirror-UPLOAD W/O deletion, plus mirror-DOWNLOAD excluding *cgi .ht*; ==DONE; or rather UPLOAD-WITHOUT-deletion including PRE/POST-fixups on .htaccess are done; ==the DOWNLOAD is not done; MENTION TO WILL: Google Webmaster-Tools including sitemap.xml; Note: observed he has not been updating sitemap.xml on NOCI site; uploaded to ereimer.net; tried a testcase; signing-in works; date-validation works; chose /pix/er060602/09719_erCBS.jpg; everything fine except: (1) spurious conversion occured: ACTIONS: size:856x856 filesize:372202 converted due to size or type 09719_erCBS.jpg saved as 09719-Hawk~moth_Sunset~Blvd_20060602_Eugene.Reimer.; size:856x856 filesize:194434 why would that image be converted?? that image works as expected locally; ==HostExcellence file-command?? YUP, theirs says: file: unrecognized option '--mime-type'; fixed OTY by revising file -b --mime-t --> file -b -i and adding |sed 's|;.*||' (2) after using uploader to save the photo (shown above), the HE-control-panel-FileManager shows no such file under photo-upload dir; OK, still had the rename + logging cmdlines yanked; NOW creates W/O-conversion a file named 09719_erCBS.jpg saved as 09719-Hawk~moth_Sunset~Blvd_20060602_Eugene.Reimer. <--fixed suffix; fix NTY same as OTY; NOW WORKS; 09719_erCBS.jpg saved as 09719-Hawk~moth_Sunset~Blvd_20060602_Eugene.Reimer.jpg; size:856x856 filesize:372202 minor flaw: photog-name containing a dot, ought to revise dot-->tilde?? webput: did the upload WITH DELETION?? don't know why; had done webput from correct dir; the revised msg in WEBPUT-LL may shed light on it?? ==http://ereimer.net/programs/WEBPUT-LL gets 403 (Forbidden)!! HE changed rules OR using Apache-authorization causes?? make world-readable; CHECK FOR OTHERS==!!== ==Consider: if $NNM exists add a number for uniqueness?? Consider: preserve AlterationLetters if present in user-filename?? (string of [-A-Z0-9] before last dot) Another testcase: ACTIONS: image/jpeg size:2560x1920 filesize:2299639 converted due to size or type 09720_er.jpg saved as 09720-Hawk~moth_Sunset~Blvd_20060602_Eugene.Reimer.jpg; size:2560x1920 filesize:912670 --made a same-size jpeg when it should've been a 3-megapixel one!! my logic was broken, needed elif... After Fixing: ACTIONS: image/jpeg size:2560x1920 filesize:1985074 converted due to size or type 09721_er.jpg saved as 09721-Hawk~moth_Sunset~Blvd_20060602_Eugene.Reimer.jpg; size:1999x1499 filesize:515433 WEBPUT-LL: still broken; conclusion: lftp-mirror is Broken wrt -e (Deletion) option; I knew it was dangerous but not that it was broken; added Download step (via lftp-mirror) before the Upload; added -n option to Download; make backup under /tmp; =========== CHANGE-LOG: =========== 2010-12-17: made a Public-Demo, by creating directory photo: it does NOT use Apache-Authentication; its copy of of photo-upload.cgi modified as follows: yanked the rename; yanked the write-to-log; in FORM.action revised pathname-to-script (so it invokes itself); 2010-12-18: to-be-readable copy of photo-upload.cgi that's under programs-dir: name it with .txt ending, WITHOUT any .cgi in its name; because unable to get rid of .cgi as a script-alias, due to bug in Host-Excellence-Control-Panel:-( Update: several hours after repeated attempts to discard .cgi as a script-alias saw it coming back, it finally went away!! =>no longer need .txt rename??