#!/bin/bash ## diff-by-charset -- compares a file to itself, using two specified charset/encodings -- Eugene Reimer 2009-08; ## receives 2 named charsets and one file, compares the file converted from charset1 to utf8, and same file converted from charset2. ## ## USAGE EXAMPLE: ## diff-by-charset windows-1252 windows-1251 /pkg/geneweb/gw/lang/lexicon.txt ## ## Copyright © 2009 Eugene Reimer; can be used, modified, copied, and distributed or sold under the terms of either the LGPL or the GPL (your choice); ## see http://www.gnu.org/licenses for the details of these terms. [ $# -lt 3 ] && { echo "usage: diff-by-charset CHARSET1 CHARSET2 FILE..."; exit 2; } FROM1=$1; FROM2=$2; shift; shift for F in "$@";do iconv -f $FROM1 -t utf8 $F >/tmp/tmpdiff-from-$FROM1 iconv -f $FROM2 -t utf8 $F >/tmp/tmpdiff-from-$FROM2 diff -bBs -U0 /tmp/tmpdiff-from-$FROM1 /tmp/tmpdiff-from-$FROM2 done