#!/bin/bash ## jsminify -- remove comments and some other whitespace from a javascript file -- (c) Eugene Reimer 2007-August. ## ## I was hoping to use Douglas Crockford's jsmin. However it only promises to work for files that pass his jslint. ## And since his jslint outlaws several programming practices I'm unwilling to give up (for example, writing a ## trivial for-loop on one line without braces), I wrote this bash script as a simpler alternative to his jsmin. ## Mine can be safely used on any javascript. No jslint needed!! ## The space-saving is much the same as with his: my 110KB testcase shrinks to 36.6KB with his, or with mine. ## ## USAGE: jsminify [string]... --reads from stdin, writes minified javascript to stdout with the cmdline args as the only comments. ## ## LIMITATIONS: ## Only handles //... comments, not the /*...*/ kind. ## Requires that the input be in ASCII or UTF-8 or ISO-8859-1 or any charset that has ASCII as a subset. ## Uses Unix sed, and needs a bit of trickery to handle things not easily parsed with regular-expressions; this will ## be safe provided your javascript is free from hex-01, hex-02...hex-05 characters (not to worry, it will be). ## ## My first attempt was foiled by sed's behaviour on a command such as: s|\(...\)*...|\1| ## when that didn't work, the sed solution got so ugly that any reasonable person would've switched to something else. ## ## Approach WRT string-literals: ## break line at first quote|stroph, and break again at next occurrence of what it starts with (quote|stroph); ## repeat the preceding N times to handle up to N string-literals per line; ## (do comment-removal at the same time: at the first quote|stroph|SLASHSLASH...) ## can now do space-removal, leaving string-literals unchanged; ## then remove the introduced linebreaks (possible due to having added something to mark those introduced NLs). ## ## Please send bug-reports, comments, or suggestions to ereimer@shaw.ca. while [ $# -ne 0 ]; do echo "//$1"; shift; done ##emit leading comments for the cmdline-args sed $' s|\x0d$||; ##remove trailing CR, in case input is MS-DOS style s|^[ \t][ \t]*||; ##remove leading whitespace s|[ \t][ \t]*$||; ##remove trailing whitespace /^\\/\\//d; ##remove line containing nothing but comment /^$/d; ##remove empty line s|\\\\"|\x02|g; ##replace backslash+quote with hex02 (the bash dollar-stroph undoubles backslashes, sed undoubles again) s|\\\\\x27|\x03|g; ##replace backslash+stroph with hex03 s|//|\x04|g; ##replace slash+slash with hex04 /[^"\x27\x04]*\x04/{ ##on a line where the first quote|stroph|SLASHSLASH is a SLASHSLASH: s|^\\([^"\x27\x04]*\\)\x04.*$|\\1|; s|[ \t][ \t]*$||};## remove comment, everything after that SLASHSLASH, and redo trailing-whitespace removal s|^\\([^"\x27\x04][^"\x27\x04]*\\)"|\\1\x05\x01"|; ##put hex05+hex01 before opening-quote thats nonfirst char; hex05s needed to undo... s|^\\([^"\x27\x04][^"\x27\x04]*\\)\x27|\\1\x05\x01\x27|;##put hex05+hex01 before opening-stroph thats nonfirst ' |tr $'\x01' '\n' |sed $' ##hex01->NL, introducing linebreaks... s|^\\("[^"]*"\\)\\([^\x05]\\)|\\1\x05\x01\\2|; ##put hex05+hex01 after closing-quote thats nonlast s|^\\(\x27[^\x27]*\x27\\)\\([^\x05]\\)|\\1\x05\x01\\2|g;##put hex05+hex01 after closing-stroph thats nonlast ' |tr $'\x01' '\n' |sed $' ##hex01->NL, introducing linebreaks... ##do those same steps about another 10 or so times: /[^"\x27\x04]*\x04/{s|^\\([^"\x27\x04]*\\)\x04.*$|\\1|;s|[ \t][ \t]*$||}; s|^\\([^"\x27\x04][^"\x27\x04]*\\)"|\\1\x05\x01"|; s|^\\([^"\x27\x04][^"\x27\x04]*\\)\x27|\\1\x05\x01\x27|'|tr $'\x01' '\n'|sed $' s|^\\("[^"]*"\\)\\([^\x05]\\)|\\1\x05\x01\\2|; s|^\\(\x27[^\x27]*\x27\\)\\([^\x05]\\)|\\1\x05\x01\\2|g'|tr $'\x01' '\n'|sed $' /[^"\x27\x04]*\x04/{s|^\\([^"\x27\x04]*\\)\x04.*$|\\1|;s|[ \t][ \t]*$||}; s|^\\([^"\x27\x04][^"\x27\x04]*\\)"|\\1\x05\x01"|; s|^\\([^"\x27\x04][^"\x27\x04]*\\)\x27|\\1\x05\x01\x27|'|tr $'\x01' '\n'|sed $' s|^\\("[^"]*"\\)\\([^\x05]\\)|\\1\x05\x01\\2|; s|^\\(\x27[^\x27]*\x27\\)\\([^\x05]\\)|\\1\x05\x01\\2|g'|tr $'\x01' '\n'|sed $' /[^"\x27\x04]*\x04/{s|^\\([^"\x27\x04]*\\)\x04.*$|\\1|;s|[ \t][ \t]*$||}; s|^\\([^"\x27\x04][^"\x27\x04]*\\)"|\\1\x05\x01"|; s|^\\([^"\x27\x04][^"\x27\x04]*\\)\x27|\\1\x05\x01\x27|'|tr $'\x01' '\n'|sed $' s|^\\("[^"]*"\\)\\([^\x05]\\)|\\1\x05\x01\\2|; s|^\\(\x27[^\x27]*\x27\\)\\([^\x05]\\)|\\1\x05\x01\\2|g'|tr $'\x01' '\n'|sed $' /[^"\x27\x04]*\x04/{s|^\\([^"\x27\x04]*\\)\x04.*$|\\1|;s|[ \t][ \t]*$||}; s|^\\([^"\x27\x04][^"\x27\x04]*\\)"|\\1\x05\x01"|; s|^\\([^"\x27\x04][^"\x27\x04]*\\)\x27|\\1\x05\x01\x27|'|tr $'\x01' '\n'|sed $' s|^\\("[^"]*"\\)\\([^\x05]\\)|\\1\x05\x01\\2|; s|^\\(\x27[^\x27]*\x27\\)\\([^\x05]\\)|\\1\x05\x01\\2|g'|tr $'\x01' '\n'|sed $' /[^"\x27\x04]*\x04/{s|^\\([^"\x27\x04]*\\)\x04.*$|\\1|;s|[ \t][ \t]*$||}; s|^\\([^"\x27\x04][^"\x27\x04]*\\)"|\\1\x05\x01"|; s|^\\([^"\x27\x04][^"\x27\x04]*\\)\x27|\\1\x05\x01\x27|'|tr $'\x01' '\n'|sed $' s|^\\("[^"]*"\\)\\([^\x05]\\)|\\1\x05\x01\\2|; s|^\\(\x27[^\x27]*\x27\\)\\([^\x05]\\)|\\1\x05\x01\\2|g'|tr $'\x01' '\n'|sed $' /[^"\x27\x04]*\x04/{s|^\\([^"\x27\x04]*\\)\x04.*$|\\1|;s|[ \t][ \t]*$||}; s|^\\([^"\x27\x04][^"\x27\x04]*\\)"|\\1\x05\x01"|; s|^\\([^"\x27\x04][^"\x27\x04]*\\)\x27|\\1\x05\x01\x27|'|tr $'\x01' '\n'|sed $' s|^\\("[^"]*"\\)\\([^\x05]\\)|\\1\x05\x01\\2|; s|^\\(\x27[^\x27]*\x27\\)\\([^\x05]\\)|\\1\x05\x01\\2|g'|tr $'\x01' '\n'|sed $' /[^"\x27\x04]*\x04/{s|^\\([^"\x27\x04]*\\)\x04.*$|\\1|;s|[ \t][ \t]*$||}; s|^\\([^"\x27\x04][^"\x27\x04]*\\)"|\\1\x05\x01"|; s|^\\([^"\x27\x04][^"\x27\x04]*\\)\x27|\\1\x05\x01\x27|'|tr $'\x01' '\n'|sed $' s|^\\("[^"]*"\\)\\([^\x05]\\)|\\1\x05\x01\\2|; s|^\\(\x27[^\x27]*\x27\\)\\([^\x05]\\)|\\1\x05\x01\\2|g'|tr $'\x01' '\n'|sed $' /[^"\x27\x04]*\x04/{s|^\\([^"\x27\x04]*\\)\x04.*$|\\1|;s|[ \t][ \t]*$||}; s|^\\([^"\x27\x04][^"\x27\x04]*\\)"|\\1\x05\x01"|; s|^\\([^"\x27\x04][^"\x27\x04]*\\)\x27|\\1\x05\x01\x27|'|tr $'\x01' '\n'|sed $' s|^\\("[^"]*"\\)\\([^\x05]\\)|\\1\x05\x01\\2|; s|^\\(\x27[^\x27]*\x27\\)\\([^\x05]\\)|\\1\x05\x01\\2|g'|tr $'\x01' '\n'|sed $' /[^"\x27\x04]*\x04/{s|^\\([^"\x27\x04]*\\)\x04.*$|\\1|;s|[ \t][ \t]*$||}; s|^\\([^"\x27\x04][^"\x27\x04]*\\)"|\\1\x05\x01"|; s|^\\([^"\x27\x04][^"\x27\x04]*\\)\x27|\\1\x05\x01\x27|'|tr $'\x01' '\n'|sed $' s|^\\("[^"]*"\\)\\([^\x05]\\)|\\1\x05\x01\\2|; s|^\\(\x27[^\x27]*\x27\\)\\([^\x05]\\)|\\1\x05\x01\\2|g'|tr $'\x01' '\n'|sed $' /[^"\x27\x04]*\x04/{s|^\\([^"\x27\x04]*\\)\x04.*$|\\1|;s|[ \t][ \t]*$||}; s|^\\([^"\x27\x04][^"\x27\x04]*\\)"|\\1\x05\x01"|; s|^\\([^"\x27\x04][^"\x27\x04]*\\)\x27|\\1\x05\x01\x27|'|tr $'\x01' '\n'|sed $' s|^\\("[^"]*"\\)\\([^\x05]\\)|\\1\x05\x01\\2|; s|^\\(\x27[^\x27]*\x27\\)\\([^\x05]\\)|\\1\x05\x01\\2|g'|tr $'\x01' '\n'|sed $' /["\x27]/!{ ##on lines other than string-literals do: (each string-literal is now on a line by itself) s|\t| |g; ##TAB->SPACE s| *| |g; ##destutter spaces s|\\([][(){},;<=>&|!*/%?:]\\) |\\1|g; ##remove space after bracket|paren|brace|comma|semicolon|operator(most) s| \\([][(){},;<=>&|!*/%?:]\\)|\\1|g; ##remove space before bracket|paren|brace|comma|semicolon|operator(most) s|\\([+-]\\) \\([^+-]\\)|\\1\\2|g; ##remove space after plus|minus only if not followed by plus|minus s|\\([^+-]\\) \\([+-]\\)|\\1\\2|g; ##remove space before plus|minus only if not preceded by plus|minus s|^ +|+|; ##remove leading space (can only happen after a string-literal) s|+ $|+|; ##remove ending space (can only happen before a string-literal) } ' |tr '\n' $'\x01' |sed $' ##NL->hex01 (so can remove selected NLs, and undo the added linebreaks) s|\x05\x01||g; ##remove the hex05+hex01 that were added earlier, before and after string-literals s|{\x01|{|g; ##remove NL after opening-brace (can yank this to avoid long lines) s|;\x01|;|g; ##remove NL after semicolon (can yank this to avoid long lines) s|\\([[(,<=>&|!*/%?:]\\)\x01|\\1|g; ##remove NL after opening-bracket|opening-paren|comma|operator(most) s|\x01\\([]),<=>&|!*/%?:]\\)|\\1|g; ##remove NL before closing-bracket|closing-paren|comma|operator(most) s|\\([^+]+\\)\x01\\([^+]\\)|\\1\\2|g; ##remove NL after plus, but not after plusplus, nor before plus s|\\([^-]-\\)\x01\\([^-]\\)|\\1\\2|g; ##remove NL after minus, but not after minusminus, nor before minus ' |tr $'\x01' '\n' |sed $' ##restore remaining hex01->NL s|\x02|\\\\"|g; ##restore hex02->backslash+quote s|\x03|\\\\\x27|g; ##restore hex03->backslash+stroph s|\x04|//|g; ##restore hex04->slash+slash '