Mercurial > repos > bgruening > text_processing
comparison ansi2html.sh @ 0:5314e5d6f040 draft
Imported from capsule None
author | bgruening |
---|---|
date | Thu, 29 Jan 2015 07:53:17 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:5314e5d6f040 |
---|---|
1 #!/bin/sh | |
2 | |
3 # Convert ANSI (terminal) colours and attributes to HTML | |
4 | |
5 # Author: | |
6 # http://www.pixelbeat.org/docs/terminal_colours/ | |
7 # Examples: | |
8 # ls -l --color=always | ansi2html.sh > ls.html | |
9 # git show --color | ansi2html.sh > last_change.html | |
10 # Generally one can use the `script` util to capture full terminal output. | |
11 # Changes: | |
12 # V0.1, 24 Apr 2008, Initial release | |
13 # V0.2, 01 Jan 2009, Phil Harnish <philharnish@gmail.com> | |
14 # Support `git diff --color` output by | |
15 # matching ANSI codes that specify only | |
16 # bold or background colour. | |
17 # P@draigBrady.com | |
18 # Support `ls --color` output by stripping | |
19 # redundant leading 0s from ANSI codes. | |
20 # Support `grep --color=always` by stripping | |
21 # unhandled ANSI codes (specifically ^[[K). | |
22 # V0.3, 20 Mar 2009, http://eexpress.blog.ubuntu.org.cn/ | |
23 # Remove cat -v usage which mangled non ascii input. | |
24 # Cleanup regular expressions used. | |
25 # Support other attributes like reverse, ... | |
26 # P@draigBrady.com | |
27 # Correctly nest <span> tags (even across lines). | |
28 # Add a command line option to use a dark background. | |
29 # Strip more terminal control codes. | |
30 # V0.4, 17 Sep 2009, P@draigBrady.com | |
31 # Handle codes with combined attributes and color. | |
32 # Handle isolated <bold> attributes with css. | |
33 # Strip more terminal control codes. | |
34 # V0.12, 12 Jul 2011 | |
35 # http://github.com/pixelb/scripts/commits/master/scripts/ansi2html.sh | |
36 | |
37 if [ "$1" = "--version" ]; then | |
38 echo "0.12" && exit | |
39 fi | |
40 | |
41 if [ "$1" = "--help" ]; then | |
42 echo "This utility converts ANSI codes in data passed to stdin" >&2 | |
43 echo "It has 2 optional parameters:" >&2 | |
44 echo " --bg=dark --palette=linux|solarized|tango|xterm" >&2 | |
45 echo "E.g.: ls -l --color=always | ansi2html.sh --bg=dark > ls.html" >&2 | |
46 exit | |
47 fi | |
48 | |
49 [ "$1" = "--bg=dark" ] && { dark_bg=yes; shift; } | |
50 | |
51 if [ "$1" = "--palette=solarized" ]; then | |
52 # See http://ethanschoonover.com/solarized | |
53 P0=073642; P1=D30102; P2=859900; P3=B58900; | |
54 P4=268BD2; P5=D33682; P6=2AA198; P7=EEE8D5; | |
55 P8=002B36; P9=CB4B16; P10=586E75; P11=657B83; | |
56 P12=839496; P13=6C71C4; P14=93A1A1; P15=FDF6E3; | |
57 shift; | |
58 elif [ "$1" = "--palette=solarized-xterm" ]; then | |
59 # Above mapped onto the xterm 256 color palette | |
60 P0=262626; P1=AF0000; P2=5F8700; P3=AF8700; | |
61 P4=0087FF; P5=AF005F; P6=00AFAF; P7=E4E4E4; | |
62 P8=1C1C1C; P9=D75F00; P10=585858; P11=626262; | |
63 P12=808080; P13=5F5FAF; P14=8A8A8A; P15=FFFFD7; | |
64 shift; | |
65 elif [ "$1" = "--palette=tango" ]; then | |
66 # Gnome default | |
67 P0=000000; P1=CC0000; P2=4E9A06; P3=C4A000; | |
68 P4=3465A4; P5=75507B; P6=06989A; P7=D3D7CF; | |
69 P8=555753; P9=EF2929; P10=8AE234; P11=FCE94F; | |
70 P12=729FCF; P13=AD7FA8; P14=34E2E2; P15=EEEEEC; | |
71 shift; | |
72 elif [ "$1" = "--palette=xterm" ]; then | |
73 P0=000000; P1=CD0000; P2=00CD00; P3=CDCD00; | |
74 P4=0000EE; P5=CD00CD; P6=00CDCD; P7=E5E5E5; | |
75 P8=7F7F7F; P9=FF0000; P10=00FF00; P11=FFFF00; | |
76 P12=5C5CFF; P13=FF00FF; P14=00FFFF; P15=FFFFFF; | |
77 shift; | |
78 else # linux console | |
79 P0=000000; P1=AA0000; P2=00AA00; P3=AA5500; | |
80 P4=0000AA; P5=AA00AA; P6=00AAAA; P7=AAAAAA; | |
81 P8=555555; P9=FF5555; P10=55FF55; P11=FFFF55; | |
82 P12=5555FF; P13=FF55FF; P14=55FFFF; P15=FFFFFF; | |
83 [ "$1" = "--palette=linux" ] && shift | |
84 fi | |
85 | |
86 [ "$1" = "--bg=dark" ] && { dark_bg=yes; shift; } | |
87 | |
88 echo -n "<html> | |
89 <head> | |
90 <meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\"/> | |
91 <style type=\"text/css\"> | |
92 .ef0,.f0 { color: #$P0; } .eb0,.b0 { background-color: #$P0; } | |
93 .ef1,.f1 { color: #$P1; } .eb1,.b1 { background-color: #$P1; } | |
94 .ef2,.f2 { color: #$P2; } .eb2,.b2 { background-color: #$P2; } | |
95 .ef3,.f3 { color: #$P3; } .eb3,.b3 { background-color: #$P3; } | |
96 .ef4,.f4 { color: #$P4; } .eb4,.b4 { background-color: #$P4; } | |
97 .ef5,.f5 { color: #$P5; } .eb5,.b5 { background-color: #$P5; } | |
98 .ef6,.f6 { color: #$P6; } .eb6,.b6 { background-color: #$P6; } | |
99 .ef7,.f7 { color: #$P7; } .eb7,.b7 { background-color: #$P7; } | |
100 .ef8, .f0 > .bold,.bold > .f0 { color: #$P8; font-weight: normal; } | |
101 .ef9, .f1 > .bold,.bold > .f1 { color: #$P9; font-weight: normal; } | |
102 .ef10,.f2 > .bold,.bold > .f2 { color: #$P10; font-weight: normal; } | |
103 .ef11,.f3 > .bold,.bold > .f3 { color: #$P11; font-weight: normal; } | |
104 .ef12,.f4 > .bold,.bold > .f4 { color: #$P12; font-weight: normal; } | |
105 .ef13,.f5 > .bold,.bold > .f5 { color: #$P13; font-weight: normal; } | |
106 .ef14,.f6 > .bold,.bold > .f6 { color: #$P14; font-weight: normal; } | |
107 .ef15,.f7 > .bold,.bold > .f7 { color: #$P15; font-weight: normal; } | |
108 .eb8 { background-color: #$P8; } | |
109 .eb9 { background-color: #$P9; } | |
110 .eb10 { background-color: #$P10; } | |
111 .eb11 { background-color: #$P11; } | |
112 .eb12 { background-color: #$P12; } | |
113 .eb13 { background-color: #$P13; } | |
114 .eb14 { background-color: #$P14; } | |
115 .eb15 { background-color: #$P15; } | |
116 " | |
117 | |
118 # The default xterm 256 colour palette | |
119 for red in $(seq 0 5); do | |
120 for green in $(seq 0 5); do | |
121 for blue in $(seq 0 5); do | |
122 c=$((16 + ($red * 36) + ($green * 6) + $blue)) | |
123 r=$((($red * 40 + 55) * ($red > 0))) | |
124 g=$((($green * 40 + 55) * ($green > 0))) | |
125 b=$((($blue * 40 + 55) * ($blue > 0))) | |
126 printf ".ef%d { color: #%2.2x%2.2x%2.2x; } " $c $r $g $b | |
127 printf ".eb%d { background-color: #%2.2x%2.2x%2.2x; }\n" $c $r $g $b | |
128 done | |
129 done | |
130 done | |
131 for gray in $(seq 0 23); do | |
132 c=$(($gray+232)) | |
133 l=$(($gray*10 + 8)) | |
134 printf ".ef%d { color: #%2.2x%2.2x%2.2x; } " $c $l $l $l | |
135 printf ".eb%d { background-color: #%2.2x%2.2x%2.2x; }\n" $c $l $l $l | |
136 done | |
137 | |
138 echo -n ' | |
139 .f9 { color: '`[ "$dark_bg" ] && echo "#$P7;" || echo "#$P0;"`' } | |
140 .b9 { background-color: #'`[ "$dark_bg" ] && echo $P0 || echo $P15`'; } | |
141 .f9 > .bold,.bold > .f9, body.f9 > pre > .bold { | |
142 /* Bold is heavy black on white, or bright white | |
143 depending on the default background */ | |
144 color: '`[ "$dark_bg" ] && echo "#$P15;" || echo "#$P0;"`' | |
145 font-weight: '`[ "$dark_bg" ] && echo 'normal;' || echo 'bold;'`' | |
146 } | |
147 .reverse { | |
148 /* CSS doesnt support swapping fg and bg colours unfortunately, | |
149 so just hardcode something that will look OK on all backgrounds. */ | |
150 '"color: #$P0; background-color: #$P7;"' | |
151 } | |
152 .underline { text-decoration: underline; } | |
153 .line-through { text-decoration: line-through; } | |
154 .blink { text-decoration: blink; } | |
155 | |
156 </style> | |
157 </head> | |
158 | |
159 <body class="f9 b9"> | |
160 <pre> | |
161 ' | |
162 | |
163 p='\x1b\[' #shortcut to match escape codes | |
164 P="\(^[^°]*\)¡$p" #expression to match prepended codes below | |
165 | |
166 # Handle various xterm control sequences. | |
167 # See /usr/share/doc/xterm-*/ctlseqs.txt | |
168 sed " | |
169 s#\x1b[^\x1b]*\x1b\\\##g # strip anything between \e and ST | |
170 s#\x1b][0-9]*;[^\a]*\a##g # strip any OSC (xterm title etc.) | |
171 | |
172 #handle carriage returns | |
173 s#^.*\r\{1,\}\([^$]\)#\1# | |
174 s#\r\$## # strip trailing \r | |
175 | |
176 # strip other non SGR escape sequences | |
177 s#[\x07]##g | |
178 s#\x1b[]>=\][0-9;]*##g | |
179 s#\x1bP+.\{5\}##g | |
180 s#${p}[0-9;?]*[^0-9;?m]##g | |
181 | |
182 #remove backspace chars and what they're backspacing over | |
183 :rm_bs | |
184 s#[^\x08]\x08##g; t rm_bs | |
185 " | | |
186 | |
187 # Normalize the input before transformation | |
188 sed " | |
189 # escape HTML | |
190 s#\&#\&#g; s#>#\>#g; s#<#\<#g; s#\"#\"#g | |
191 | |
192 # normalize SGR codes a little | |
193 | |
194 # split 256 colors out and mark so that they're not | |
195 # recognised by the following 'split combined' line | |
196 :e | |
197 s#${p}\([0-9;]\{1,\}\);\([34]8;5;[0-9]\{1,3\}\)m#${p}\1m${p}¬\2m#g; t e | |
198 s#${p}\([34]8;5;[0-9]\{1,3\}\)m#${p}¬\1m#g; | |
199 | |
200 :c | |
201 s#${p}\([0-9]\{1,\}\);\([0-9;]\{1,\}\)m#${p}\1m${p}\2m#g; t c # split combined | |
202 s#${p}0\([0-7]\)#${p}\1#g #strip leading 0 | |
203 s#${p}1m\(\(${p}[4579]m\)*\)#\1${p}1m#g #bold last (with clr) | |
204 s#${p}m#${p}0m#g #add leading 0 to norm | |
205 | |
206 # undo any 256 color marking | |
207 s#${p}¬\([34]8;5;[0-9]\{1,3\}\)m#${p}\1m#g; | |
208 | |
209 # map 16 color codes to color + bold | |
210 s#${p}9\([0-7]\)m#${p}3\1m${p}1m#g; | |
211 s#${p}10\([0-7]\)m#${p}4\1m${p}1m#g; | |
212 | |
213 # change 'reset' code to a single char, and prepend a single char to | |
214 # other codes so that we can easily do negative matching, as sed | |
215 # does not support look behind expressions etc. | |
216 s#°#\°#g; s#${p}0m#°#g | |
217 s#¡#\¡#g; s#${p}[0-9;]*m#¡&#g | |
218 " | | |
219 | |
220 # Convert SGR sequences to HTML | |
221 sed " | |
222 :ansi_to_span # replace ANSI codes with CSS classes | |
223 t ansi_to_span # hack so t commands below only apply to preceeding s cmd | |
224 | |
225 /^[^¡]*°/ { b span_end } # replace 'reset code' if no preceeding code | |
226 | |
227 # common combinations to minimise html (optional) | |
228 s#${P}3\([0-7]\)m¡${p}4\([0-7]\)m#\1<span class=\"f\2 b\3\">#;t span_count | |
229 s#${P}4\([0-7]\)m¡${p}3\([0-7]\)m#\1<span class=\"f\3 b\2\">#;t span_count | |
230 | |
231 s#${P}1m#\1<span class=\"bold\">#; t span_count | |
232 s#${P}4m#\1<span class=\"underline\">#; t span_count | |
233 s#${P}5m#\1<span class=\"blink\">#; t span_count | |
234 s#${P}7m#\1<span class=\"reverse\">#; t span_count | |
235 s#${P}9m#\1<span class=\"line-through\">#; t span_count | |
236 s#${P}3\([0-9]\)m#\1<span class=\"f\2\">#; t span_count | |
237 s#${P}4\([0-9]\)m#\1<span class=\"b\2\">#; t span_count | |
238 | |
239 s#${P}38;5;\([0-9]\{1,3\}\)m#\1<span class=\"ef\2\">#; t span_count | |
240 s#${P}48;5;\([0-9]\{1,3\}\)m#\1<span class=\"eb\2\">#; t span_count | |
241 | |
242 s#${P}[0-9;]*m#\1#g; t ansi_to_span # strip unhandled codes | |
243 | |
244 b # next line of input | |
245 | |
246 # add a corresponding span end flag | |
247 :span_count | |
248 x; s/^/s/; x | |
249 b ansi_to_span | |
250 | |
251 # replace 'reset code' with correct number of </span> tags | |
252 :span_end | |
253 x | |
254 /^s/ { | |
255 s/^.// | |
256 x | |
257 s#°#</span>°# | |
258 b span_end | |
259 } | |
260 x | |
261 s#°## | |
262 b ansi_to_span | |
263 " | | |
264 | |
265 # Convert alternative character set | |
266 # Note we convert here, as if we do at start we have to worry about avoiding | |
267 # conversion of SGR codes etc., whereas doing here we only have to | |
268 # avoid conversions of stuff between &...; or <...> | |
269 # | |
270 # Note we could use sed to do this based around: | |
271 # sed 'y/abcdefghijklmnopqrstuvwxyz{}`~/▒␉␌␍␊°±␋┘┐┌└┼⎺⎻─⎼⎽├┤┴┬│≤≥π£◆·/' | |
272 # However that would be very awkward as we need to only conv some input. | |
273 # The basic scheme that we do in the python script below is: | |
274 # 1. enable transliterate once ¡ char seen | |
275 # 2. disable once µ char seen (may be on diff line to ¡) | |
276 # 3. never transliterate between &; or <> chars | |
277 sed " | |
278 # change 'smacs' and 'rmacs' to a single char so that we can easily do | |
279 # negative matching, as sed does not support look behind expressions etc. | |
280 # Note we don't use ° like above as that's part of the alternate charset. | |
281 s#\x1b(0#¡#g; | |
282 s#µ#\µ#g; s#\x1b(B#µ#g | |
283 " | | |
284 ( | |
285 python -c " | |
286 # vim:fileencoding=utf8 | |
287 | |
288 import sys | |
289 import locale | |
290 encoding=locale.getpreferredencoding() | |
291 | |
292 old='abcdefghijklmnopqrstuvwxyz{}\`~' | |
293 new='▒␉␌␍␊°±␋┘┐┌└┼⎺⎻─⎼⎽├┤┴┬│≤≥π£◆·' | |
294 new=unicode(new, 'utf-8') | |
295 table=range(128) | |
296 for o,n in zip(old, new): table[ord(o)]=n | |
297 | |
298 (STANDARD, ALTERNATIVE, HTML_TAG, HTML_ENTITY) = (0, 1, 2, 3) | |
299 | |
300 state = STANDARD | |
301 last_mode = STANDARD | |
302 for c in unicode(sys.stdin.read(), encoding): | |
303 if state == HTML_TAG: | |
304 if c == '>': | |
305 state = last_mode | |
306 elif state == HTML_ENTITY: | |
307 if c == ';': | |
308 state = last_mode | |
309 else: | |
310 if c == '<': | |
311 state = HTML_TAG | |
312 elif c == '&': | |
313 state = HTML_ENTITY | |
314 elif c == u'¡' and state == STANDARD: | |
315 state = ALTERNATIVE | |
316 last_mode = ALTERNATIVE | |
317 continue | |
318 elif c == u'µ' and state == ALTERNATIVE: | |
319 state = STANDARD | |
320 last_mode = STANDARD | |
321 continue | |
322 elif state == ALTERNATIVE: | |
323 c = c.translate(table) | |
324 sys.stdout.write(c.encode(encoding)) | |
325 " 2>/dev/null || | |
326 sed 's/[¡µ]//g' # just strip aternative flag chars | |
327 ) | |
328 | |
329 echo "</pre> | |
330 </body> | |
331 </html>" |