#!/bin/bash # # DANGER ! this thing does not treat
at all, it will destroy the formatting ! # 1) use 0xA0 (or unicode characters) for the spaces # 2) use
(or unicode) for the newlines MINIPATH=$(echo $0 | sed 's/\/[^\/]*$//') echo "" # steps of removal : # comments : $MINIPATH/strip_html_comments.sed $* |\ # newlines (except inside?) tr '\n\r' ' ' |\ # consecutive spaces (except inside?) sed 's/ \+/ /g' |\ #spaces at the start of lines sed 's/^ *//g' |\ #spaces before/after closing tags sed 's/ \?> \?/>/g'|\ #spaces before opening tags sed 's/\([^:]\) <\(ul\|li\|h\|title\|td\|tr\|th\|div\|span\|img\)/\1<\2/g'