#!/bin/bash ## receives tif+box pair (from tesseract-training procedure) and crops the region described by the first box; Eugene Reimer 2009; ## wanted this to determine whether the 12-pixel-too-low bug was in tesseract-makebox or in the viewer (tesseractTrainer.py); ## may go on to make this more general, making one crop for each row, or perhaps making a drawn-upon image instead; better just to use tesseractTrainer:-) ## ## TURNS OUT I don't even need to fix tesseractTrainer -- 'tis tesseract-makebox that is broken--!!-- PAGEHT=$(identify -format "%h" $1) ##get height of the tif-image read -r BCHR XLO YLO XHI YHI PAGENBR <${1/tif/box} ##read line of boxfile; vars get empty-string if omitted (all if empty-line; PAGENBR if simple tiff) ((H=YHI-YLO, W=XHI-XLO, T=PAGEHT-YHI)) ##convert to coordinate-system used by the netpbm routines echo "PAGEHT:$PAGEHT; pnmcut $XLO $T $W $H >tmpCharBox-${1%.*}-$XLO-$YLO-$XHI-$YHI.pbm" ##DEBUG convert $1 tmp$$.pbm ##convert to PBM pnmcut $XLO $T $W $H tmp$$.pbm >tmpCharBox-${1%.*}-$XLO-$YLO-$XHI-$YHI.pbm ##crop using pnmcut rm tmp$$.pbm