#!/bin/bash ## use less to show unprintable-chars - Eugene Reimer 2001-Oct ## i was happy with: LESSCHARSET=iso8859 (aka latin1) ## until i encountered a file with blanks that werent spaces!! ## also: files containing CR's can be confusing!! ## So I made this script to provide an unambiguous view for use on such troublesome files. ## ## The following (from ) is a start for constructing a custom charset: ## ## LESSCHARSET equivalentLESSCHARDEF (where .==printable b==binary c==control) ## ----------- --------------------- ## ascii 8bcccbcc18b95.b ## dos 8bcccbcc12bc5b95.b. ## ebcdic 5bc6bcc7bcc41b.9b7.9b5.b..8b6.10b6.b9.7b9.8b8.17b3.3b9.7b9.8b8.6b10.b.b.b. ## iso8859 8bcccbcc18b95.33b. ## koi8-r 8bcccbcc18b95.b128. ## latin1 8bcccbcc18b95.33b. ## next 8bcccbcc18b95.bb125.bb unset LESSCHARSET ##export LESSCHARDEF="8bcccbcc18b95.b32bb94.." ##ER: very slightly modified ISO-8859-1, treating NBSP as binary ##export LESSCHARDEF="32b95.b" ##ER: my simpler pattern for ASCII - used until 2007jan export LESSCHARDEF="32b95.b68b.17b..4b.2b.4b.17b.5b.3b" ##ER: 2007jan: ASCII or Plautdietsch-chars or Times-symbol are printable export LESSBINFMT="*s<%x>" ##echo "list: LESSCHARDEF=$LESSCHARDEF" ##debug less -dfQU "$@" ## use -c for less to redraw from the top. ## use -d to prevent msgs about "dumb terminal" ## use -f to prevent the "may be binary" prompt. ## use -Q to prevent bell-ringing ## use -U to treat TAB and CR as binary rather than control chars exit 2007jan: want highlighting of chars other than "ASCII plus the following" Auml c4 = 196 Ouml d6 = 214 times d7 = 215 Uuml dc = 220 szlig df = 223 auml e4 = 228 ouml f6 = 246 uuml fc = 252 2009-06: while working on OCRing Eldo's Plautdiesch Verbs, had 2nd thoughts about using latin1 for general less usage (eg l,m cmds); try switching to the Microsoft superset of latin1; ==neither cp1252 nor windows-1252 nor windows for LESSCHARSET is legal with my version of less <--appear to be for later versions==??== ==SEE /etc/profile-er; tried using custom charset via LESSCHARDEF -- but hex-80..9f chars are still disappearing:-( ==NOTE: Konsole in Encoding:cp1252 may be the answer--??-- ==google for LC_CTYPE OR locale 1252 ==for JED: google for site:jedsoft.org charset OR site:jedsoft.org encoding ==SEE my solution described in http://ereimer.net/programs/charsets-cp1252-utf8.htm