| yiyu.jgl@gmail.com 2007-08-21, 9:57 pm |
| This is an awk script to filter plain text to an html file. It uses
the Markdown syntax ( http://daringfireball.net/projects/markdown/syntax
), though it is not 100% compatible. I would be very happy if you
could test it (specially with awk versions different from gawk, mawk
and plan9 awk).
I think it can be done in a better way (look at the links section),
any suggestion?
Thanks for your time and greetings,
- yiyu || JGL
txt2html.awk :
#txt2html.awk
#=A9 Jesus Galan (yiyus) 2006
#<yiyuDOTjglATgmailDOTcom>
#Usage: awk -f txt2html.awk file.txt > file.html
BEGIN {
env =3D "none";
text =3D "";
}
# images
/^!\[.+\] *\(.+\)/ {
split($0, a, /\] *\(/);
split(a[1], b, /\[/);
imgtext =3D b[2];
split(a[2], b, /\)/);
imgaddr =3D b[1];
print "<p><img src=3D\"" imgaddr "\" alt=3D\"" imgtext "\" title=3D
\"\" /></p>\n";
text =3D "";
next;
}
# links
/\] *\(/ {
do {
na =3D split($0, a, /\] *\(/);
split(a[1], b, "[");
linktext =3D b[2];
nc =3D split(a[2], c, ")");
linkaddr =3D c[1];
text =3D text b[1] "<a href=3D\"" linkaddr "\">" linktext
"</a>" c[2];
for(i =3D 3; i <=3D nc; i++)
text =3D text ")" c[i];
for(i =3D 3; i <=3D na; i++)
text =3D text "](" a[i];
$0 =3D text;;
text =3D "";
}
while (na > 2);
}
# code
/`/ {
while (match($0, /`/) !=3D 0) {
if (env =3D=3D "code") {
sub(/`/, "</code>");
env =3D pcenv;
}
else {
sub(/`/, "<code>");
pcenv =3D env;
env =3D "code";
}
}
}
# emph
/\*\*/ {
while (match($0, /\*\*/) !=3D 0) {
if (env =3D=3D "emph") {
sub(//, "</emph>");
env =3D peenv;
}
else {
sub(/\*\*/, "<emph>");
peenv =3D env;
env =3D "emph";
}
}
}
# setex-style headers (plus h3 with underscores)
/^=3D+$/ {
print "<h1>" text "</h1>\n";
text =3D "";
next;
}
/^-+$/ {
print "<h2>" text "</h2>\n";
text =3D "";
next;
}
/^_+$/ {
print "<h3>" text "</h3>\n";
text =3D "";
next;
}
# atx-style headers
/^#/ {
match($0, /#+/);
n =3D RLENGTH;
if(n > 6)
n =3D 6;
print "<h" n ">" substr($0, RLENGTH + 1) "</h" n ">\n";
next;
}
# unordered lists
/^[*-+]/ {
if (env =3D=3D "none") {
env =3D "ul";
print "<ul>";
}
print "<li>" substr($0, 3) "</li>";
text =3D "";
next;
}
/^[0-9]./ {
if (env =3D=3D "none") {
env =3D "ol";
print "<ol>";
}
print "<li>" substr($0, 3) "</li>";
next;
}
# paragraph
/^[ t]*$/ {
if (env !=3D "none") {
if (text)
print text;
text =3D "";
print "</" env ">\n";
env =3D "none";
}
if (text)
print "<p>" text "</p>\n";
text =3D "";
next;
}
# default
// {
text =3D text $0;
}
END {
if (env !=3D "none") {
if (text)
print text;
text =3D "";
print "</" env ">\n";
env =3D "none";
}
if (text)
print "<p>" text "</p>\n";
text =3D "";
}
|