#!/bin/bash ## 2009-09-23: filter to url-decode using sed; by Eugene Reimer; ## written when needed by my weblog-parse-search-strings script, for Apache-log-records; ## (url-decode-OLD is my 1st attempt, using handcrafted sed-s-cmds that didn't handle hi-bit chars) ## note: hex-80 thru hex-ff are needed to handle latin1 as well as byte-by-byte-encoded utf8 chars; ## note: hex-25 (percent-symbol) must be done last; ## ## Copyright © 2009 Eugene Reimer; can be used, modified, copied, and distributed or sold under the terms of either the LGPL or the GPL (your choice); ## see http://www.gnu.org/licenses for the details of these terms. sed \ -e $'s|+| |g; ##expr-01; plus-to-space first ' \ -e $'s|%[0-9a-zA-Z][0-9a-zA-Z]|\U&|g; ##expr-02; convert to uppercase ' \ -e $' s/%01/\x01/g;s/%02/\x02/g;s/%03/\x03/g;s/%04/\x04/g;s/%05/\x05/g;s/%06/\x06/g;s/%07/\x07/g; ##expr-03 ' \ -e $'s/%08/\x08/g;s/%09/\x09/g; s/%0B/\x0B/g;s/%0C/\x0C/g;s/%0D/\x0D/g;s/%0E/\x0E/g;s/%0F/\x0F/g; ##expr-04; x0A is newline ' \ -e $'s/%10/\x10/g;s/%11/\x11/g;s/%12/\x12/g;s/%13/\x13/g;s/%14/\x14/g;s/%15/\x15/g;s/%16/\x16/g;s/%17/\x17/g; ##expr-05 ' \ -e $'s/%18/\x18/g;s/%19/\x19/g;s/%1A/\x1A/g;s/%1B/\x1B/g;s/%1C/\x1C/g;s/%1D/\x1D/g;s/%1E/\x1E/g;s/%1F/\x1F/g; ##expr-06 ' \ -e $'s/%20/\x20/g;s/%21/\x21/g;s/%22/\x22/g;s/%23/\x23/g;s/%24/\x24/g; s/%26/\\\x26/g;s/%27/\x27/g; ##expr-07; x25:percent, x26:amp ' \ -e $'s/%28/\x28/g;s/%29/\x29/g;s/%2A/\x2A/g;s/%2B/\x2B/g;s/%2C/\x2C/g;s/%2D/\x2D/g;s/%2E/\x2E/g;s/%2F/\\\x2F/g; ##expr-08; x2F is slash ' \ -e $'s/%30/\x30/g;s/%31/\x31/g;s/%32/\x32/g;s/%33/\x33/g;s/%34/\x34/g;s/%35/\x35/g;s/%36/\x36/g;s/%37/\x37/g; ##expr-09 ' \ -e $'s/%38/\x38/g;s/%39/\x39/g;s/%3A/\x3A/g;s/%3B/\x3B/g;s/%3C/\x3C/g;s/%3D/\x3D/g;s/%3E/\x3E/g;s/%3F/\x3F/g; ##expr-10 ' \ -e $'s/%40/\x40/g;s/%41/\x41/g;s/%42/\x42/g;s/%43/\x43/g;s/%44/\x44/g;s/%45/\x45/g;s/%46/\x46/g;s/%47/\x47/g; ##expr-11 ' \ -e $'s/%48/\x48/g;s/%49/\x49/g;s/%4A/\x4A/g;s/%4B/\x4B/g;s/%4C/\x4C/g;s/%4D/\x4D/g;s/%4E/\x4E/g;s/%4F/\x4F/g; ##expr-12 ' \ -e $'s/%50/\x50/g;s/%51/\x51/g;s/%52/\x52/g;s/%53/\x53/g;s/%54/\x54/g;s/%55/\x55/g;s/%56/\x56/g;s/%57/\x57/g; ##expr-13 ' \ -e $'s/%58/\x58/g;s/%59/\x59/g;s/%5A/\x5A/g;s/%5B/\x5B/g;s/%5C/\\\\/g;s/%5D/\x5D/g;s/%5E/\x5E/g;s/%5F/\x5F/g; ##expr-14; x5C is backslash ' \ -e $'s/%60/\x60/g;s/%61/\x61/g;s/%62/\x62/g;s/%63/\x63/g;s/%64/\x64/g;s/%65/\x65/g;s/%66/\x66/g;s/%67/\x67/g; ##expr-15 ' \ -e $'s/%68/\x68/g;s/%69/\x69/g;s/%6A/\x6A/g;s/%6B/\x6B/g;s/%6C/\x6C/g;s/%6D/\x6D/g;s/%6E/\x6E/g;s/%6F/\x6F/g; ##expr-16 ' \ -e $'s/%70/\x70/g;s/%71/\x71/g;s/%72/\x72/g;s/%73/\x73/g;s/%74/\x74/g;s/%75/\x75/g;s/%76/\x76/g;s/%77/\x77/g; ##expr-17 ' \ -e $'s/%78/\x78/g;s/%79/\x79/g;s/%7A/\x7A/g;s/%7B/\x7B/g;s/%7C/\x7C/g;s/%7D/\x7D/g;s/%7E/\x7E/g;s/%7F/\x7F/g; ##expr-18 ' \ -e $'s/%80/\x80/g;s/%81/\x81/g;s/%82/\x82/g;s/%83/\x83/g;s/%84/\x84/g;s/%85/\x85/g;s/%86/\x86/g;s/%87/\x87/g; ##expr-19 ' \ -e $'s/%88/\x88/g;s/%89/\x89/g;s/%8A/\x8A/g;s/%8B/\x8B/g;s/%8C/\x8C/g;s/%8D/\x8D/g;s/%8E/\x8E/g;s/%8F/\x8F/g; ##expr-20 ' \ -e $'s/%90/\x90/g;s/%91/\x91/g;s/%92/\x92/g;s/%93/\x93/g;s/%94/\x94/g;s/%95/\x95/g;s/%96/\x96/g;s/%97/\x97/g; ##expr-21 ' \ -e $'s/%98/\x98/g;s/%99/\x99/g;s/%9A/\x9A/g;s/%9B/\x9B/g;s/%9C/\x9C/g;s/%9D/\x9D/g;s/%9E/\x9E/g;s/%9F/\x9F/g; ##expr-22 ' \ -e $'s/%A0/\xA0/g;s/%A1/\xA1/g;s/%A2/\xA2/g;s/%A3/\xA3/g;s/%A4/\xA4/g;s/%A5/\xA5/g;s/%A6/\xA6/g;s/%A7/\xA7/g; ##expr-23 ' \ -e $'s/%A8/\xA8/g;s/%A9/\xA9/g;s/%AA/\xAA/g;s/%AB/\xAB/g;s/%AC/\xAC/g;s/%AD/\xAD/g;s/%AE/\xAE/g;s/%AF/\xAF/g; ##expr-24 ' \ -e $'s/%B0/\xB0/g;s/%B1/\xB1/g;s/%B2/\xB2/g;s/%B3/\xB3/g;s/%B4/\xB4/g;s/%B5/\xB5/g;s/%B6/\xB6/g;s/%B7/\xB7/g; ##expr-25 ' \ -e $'s/%B8/\xB8/g;s/%B9/\xB9/g;s/%BA/\xBA/g;s/%BB/\xBB/g;s/%BC/\xBC/g;s/%BD/\xBD/g;s/%BE/\xBE/g;s/%BF/\xBF/g; ##expr-26 ' \ -e $'s/%C0/\xC0/g;s/%C1/\xC1/g;s/%C2/\xC2/g;s/%C3/\xC3/g;s/%C4/\xC4/g;s/%C5/\xC5/g;s/%C6/\xC6/g;s/%C7/\xC7/g; ##expr-27 ' \ -e $'s/%C8/\xC8/g;s/%C9/\xC9/g;s/%CA/\xCA/g;s/%CB/\xCB/g;s/%CC/\xCC/g;s/%CD/\xCD/g;s/%CE/\xCE/g;s/%CF/\xCF/g; ##expr-28 ' \ -e $'s/%D0/\xD0/g;s/%D1/\xD1/g;s/%D2/\xD2/g;s/%D3/\xD3/g;s/%D4/\xD4/g;s/%D5/\xD5/g;s/%D6/\xD6/g;s/%D7/\xD7/g; ##expr-29 ' \ -e $'s/%D8/\xD8/g;s/%D9/\xD9/g;s/%DA/\xDA/g;s/%DB/\xDB/g;s/%DC/\xDC/g;s/%DD/\xDD/g;s/%DE/\xDE/g;s/%DF/\xDF/g; ##expr-30 ' \ -e $'s/%E0/\xE0/g;s/%E1/\xE1/g;s/%E2/\xE2/g;s/%E3/\xE3/g;s/%E4/\xE4/g;s/%E5/\xE5/g;s/%E6/\xE6/g;s/%E7/\xE7/g; ##expr-31 ' \ -e $'s/%E8/\xE8/g;s/%E9/\xE9/g;s/%EA/\xEA/g;s/%EB/\xEB/g;s/%EC/\xEC/g;s/%ED/\xED/g;s/%EE/\xEE/g;s/%EF/\xEF/g; ##expr-32 ' \ -e $'s/%F0/\xF0/g;s/%F1/\xF1/g;s/%F2/\xF2/g;s/%F3/\xF3/g;s/%F4/\xF4/g;s/%F5/\xF5/g;s/%F6/\xF6/g;s/%F7/\xF7/g; ##expr-33 ' \ -e $'s/%F8/\xF8/g;s/%F9/\xF9/g;s/%FA/\xFA/g;s/%FB/\xFB/g;s/%FC/\xFC/g;s/%FD/\xFD/g;s/%FE/\xFE/g;s/%FF/\xFF/g; ##expr-34 ' \ -e $'s/%25/\x25/g; ##expr-35; PERCENT done last ' exit cmd to write this script: for ((j=16#00; j<=16#ff; ++j));do J=$(printf "%02X" $j); if ((j%8==0));then echo "";fi; echo -n "s/%$J/\x$J/g;"; done >tmp wanted to handle any hex-digits (except hex-00) in a consistent style, but there are several troublesome cases, as noted in the comments above; for sed-errormsgs that are meaningful (as to location within string), went to multiple -e operands, and numbered the -e lines; now do plus-to-space here (was in weblog-parse-search-strings); now also handle lowercase hex-digit-characters, by uppercasing any %XX up front; because some url-encoders use lowercase hex-digits;