diff options
author | Erik Schnetter <schnetter@cct.lsu.edu> | 2009-09-03 16:19:15 -0500 |
---|---|---|
committer | Barry Wardell <barry.wardell@gmail.com> | 2011-12-14 16:42:31 +0000 |
commit | 11c4d98017cbb86d08e15fd1b549180184b58a26 (patch) | |
tree | 2546a154c6f7bc0bec87de7316125ae7d1453569 /Carpet/CarpetWeb/publications/bib2xhtml | |
parent | f520477b1c14e02f1495cfa8d3e09f4e21ab34d0 (diff) |
Import Carpet
Ignore-this: 309b4dd613f4af2b84aa5d6743fdb6b3
Diffstat (limited to 'Carpet/CarpetWeb/publications/bib2xhtml')
-rw-r--r-- | Carpet/CarpetWeb/publications/bib2xhtml | 1254 |
1 files changed, 1254 insertions, 0 deletions
diff --git a/Carpet/CarpetWeb/publications/bib2xhtml b/Carpet/CarpetWeb/publications/bib2xhtml new file mode 100644 index 000000000..1b63bc196 --- /dev/null +++ b/Carpet/CarpetWeb/publications/bib2xhtml @@ -0,0 +1,1254 @@ +#!/opt/local/bin/perl -w +eval 'exec /opt/local/bin/perl -w -S $0 ${1+"$@"}' + if 0; + +$version = '$Id: bib2xhtml 2.26 2007/06/14 14:59:44 dds Exp $'; + +# +# Convert from bibtex to XHTML. +# +# (C) Copyright 1995, 1996 David Hull. +# (David Hull / hull@cs.uiuc.edu / http://www.uiuc.edu/ph/www/dlhull) +# +# (C) Copyright 2002-2007 Diomidis Spinellis +# http://www.spinellis.gr +# +# This program is free software. You can redistribute it and/or modify +# it under the terms of the GNU General Public License. See the +# files README and COPYING for details. +# +# This source code contains UTF-8 characters. You might want to use +# an appropriate editor, if you want to view/modify the LaTeX to Unicode +# substitution commands. +# + +use Getopt::Std; + +require 'ctime.pl'; + +eval {use PDF::API2}; +$have_pdf_api = 1 unless (defined $@ && $@ ne ''); + +# Label styles. +$label_styles{'plain'} = $LABEL_PLAIN = 1; +$label_styles{'ordered'} = $LABEL_ORDERED = 2; +$label_styles{'numbered'} = $LABEL_NUMBERED = 3; +$label_styles{'default'} = $LABEL_DEFAULT = 4; + +$list_start[$LABEL_PLAIN] = 'ul class="bib2xhtml"'; +$list_end[$LABEL_PLAIN] = "/ul"; +$list_start[$LABEL_ORDERED] = 'ol class="bib2xhtml"'; +$list_end[$LABEL_ORDERED] = "/ol"; +$list_start[$LABEL_NUMBERED] = 'dl compact="1" class="bib2xhtml"'; +$list_end[$LABEL_NUMBERED] = "/dl"; +$list_start[$LABEL_DEFAULT] = 'dl class="bib2xhtml"'; +$list_end[$LABEL_DEFAULT] = "/dl"; + +@tmpfiles = (); + +sub usage { + $program = $0; + $program =~ s+^.*/++; + print STDERR <<_EOF_; +usage: $program [-a] [-b bibtex-options] [-c] [-d delim] [-D mappings] + [-e extended-information] [-h heading] [-i] [-k] + [-m macro file] [-r] [-s style] [-t] [-u] [-v] + sourcefile [htmlfile] + + -a Write abstract to htmlfile. + -b bibtex-options + Options to pass to bibtex. + -c Sort chronologically, by year and then by author. + -d delim + Specify bibliography delimiter. + -D mappings + Specify file path to URL mappings. + -e extended-information + Specify the extended metadata information (page count, size, PDF icon) + that will be included in each citation. + -h heading + String to use instead of default title when creating a new htmlfile. + If updating an existing htmlfile, this option is ignored. + -i Use included citations + -k In labeled styles append to the label of each entry its bibtex key. + -m macro file + Specify an additional macro file. + -r Sort in reverse chronological order. + -s style + Control style of bibliography: + (empty, plain, alpha, named, unsort, or unsortlist). + -t Write timestamp to htmlfile. + -u Output a Unicode-coded document. + -v Report the version number. +_EOF_ + exit(1); +} + +# Return the command needed to open a (perhaps compressed) file, +# as well as the type of compression. +sub openCommand { + local($path) = @_; + local($cmd); + local($cmp); + +command: { + ($path =~ m/\.Z$/ && + ($cmd = "uncompress -c $path |", $cmp = "Compressed", last command)); + ($path =~ m/\.g?z$/ && + ($cmd = "gzip -d -c $path |", $cmp = "Gzipped", last command)); + ($cmd = "<$path", $cmp = "", last command); + } + + ($cmd, $cmp); +} + +@paperTypes = ("PostScript", "PDF", "DVI", "DOI"); + +sub PostScriptPageCount { + local($cmd) = @_; + local($pageCount); + + #print "in PostScriptPageCount $cmd\n"; + + open(FILE, $cmd) || (warn "error opening $cmd: $!\n", return undef); + + local($_); + local($/) = "\n"; + +line: + while (<FILE>) { + last line if m/^%%EndComments/; + if (m/^%%Pages:\s*(\d+)/) { + $pageCount = $1 if ($1 > 0); + last line; + } + } + close(FILE); + + $pageCount; +} + +sub PDFPageCount { + return undef unless ($have_pdf_api); + my($file) = @_; + $file =~ s/^\<//; + # print "in PDFPageCount $file\n"; + my($pdf); + eval {$pdf = PDF::API2->open($file)}; + return undef if (defined $@ && $@ ne ''); + return $pdf->pages; +} + +sub DVIPageCount { + local($cmd) = @_; + local($pageCount); + + #print "in DVIPageCount $cmd\n"; + + if ($cmd =~ m/^</) { + # Simple file. + $cmd = "dviselect : $cmd >/dev/null"; + } else { + # Compressed file. + $cmd .= "dviselect : >/dev/null"; + } + + # Look at dviselect's stderr. + open(DVISELECT, "-|") || (open(STDERR, ">&STDOUT"), exec $cmd); + + local($_); + local($/) = "\n"; +line: + while (<DVISELECT>) { + if (m/[Ww]rote (\d+) pages/) { + $pageCount = $1; + last line; + } + } + close(DVISELECT); + + $pageCount; +} + +# Make an intelligent link to a paper file. +sub doPaperLinks { + local($file); + local($url); + local($paper, $ppaper); + local($cstr, $pstr, $sstr); + +papertype: + foreach $paper (@paperTypes) { + + $ppaper = $paper unless defined($notype); # Paper type + $sstr = ""; # size string + $pstr = ""; # pages string + $cstr = ""; # compression type string + + if (($url) = m/\<\!\-\- $paper:[\s\n]+(\S+)[\s\n]+\-\-\>/) { + + # If $url looks like a file (doesn't begin with http://, ftp://, + # etc.), get more info. + if ($paper ne 'DOI' && $url !~ m/^[^\:\/]+\:\//) { + local($file) = $url; + local($path); + local($dir); + foreach $dir (@filedir) { + $path = join('/', $dir, $file); + if ( -f $path) { + if (defined $dirmap{$dir}) { + $url = join('/', $dirmap{$dir}, $file); + } else { + $url = $path; + } + last; + } + } + + if (! -f $path) { + print STDERR "couldn't find $file\n"; + next papertype; + } + + local($opencmd); + local($size); + local($pageCountRoutine); + local($pageCount) = 0; + + ($opencmd, $cstr) = &openCommand($path); + + # Get size. + $size = -s _; + if (! defined $nosize) { + if ($size < 1000) { + $sstr = ", $size bytes"; + } elsif ($size < 1000000) { + $sstr = sprintf ", %.1f kbyte", $size / 1000; + } elsif ($size < 1000000000) { + $sstr = sprintf ", %.1f Mbyte", $size / 1000000; + } else { + $sstr = sprintf ", %.1f Gbyte", $size / 1000000000; + } + } + + # Get page count. + $pageCountRoutine = $paper . "PageCount"; + $pageCount = &$pageCountRoutine($opencmd); + $pstr = ", $pageCount pages" if (defined $pageCount && !defined $nopages); + + # Get compression type. + $cstr = "$cstr " if ($cstr ne ""); + undef $cstr if (defined $nocompression); + } elsif ($paper eq 'DOI' && + (($url =~ m/^doi:(.*)/i) || + ($url =~ m/^http:\/\/[\w.]+\/(.*)/i) || + ($url =~ m/^(.*)$/))) { + # Convert the DOI URL into an HTTP link + $url = "http://dx.doi.org/$1"; + $ppaper = "doi:$1" unless (defined($nodoi)); + } + + $ppaper = $typeicon{$paper} if (defined $typeicon{$paper}); + + #print STDERR "found $paper $file$pstr$sstr\n"; + + if ($nobrackets) { + s/\<\!\-\- $paper:[\s\n]+\S+[\s\n]+\-\-\>/<a href=\"$url\">${cstr}$ppaper<\/a>$pstr$sstr/; + } else { + s/\<\!\-\- $paper:[\s\n]+\S+[\s\n]+\-\-\>/(<a href=\"$url\">${cstr}$ppaper<\/a>$pstr$sstr)/; + } + } + } +} + +# html_encode(string) +# Protect character entities in string. +sub html_encode { + local($_) = @_; + + s/&/&/g; # Must be first. + s/</</g; + s/>/>/g; + s/"/"/g; + + $_; +} + +# Convert $_ into an HTML entity representation +sub html_ent { + # Accents. + s/\\i\b/i/g; # dotless i. + s/\\\'(\001\d+)\{([AEIOUaeiou])\1\}/&$2acute;/g; # acute accent \'{x} + s/\\\'([AEIOUaeiou])/&$1acute;/g; # acute accent \'x + s/\\\`(\001\d+)\{([AEIOUaeiou])\1\}/&$2grave;/g; # grave accent \`{x} + s/\\\`([AEIOUaeiou])/&$1grave;/g; # grave accent \`x + s/\\\"(\001\d+)\{([AEIOUaeiouy])\1\}/&$2uml;/g; # umlaut \"{x} + s/\\\"([AEIOUaeiouy])/&$1uml;/g; # umlaut \"x + s/\\\~(\001\d+)\{([ANOano])\1\}/&$2tilde;/g; # tilde \~{x} + s/\\\~([ANOano])/&$1tilde;/g; # tilde \~x + s/\\\^(\001\d+)\{([AEIOUaeiou])\1\}/&$2circ;/g; # circumflex \^{x} + s/\\\^([AEIOUaeiou])/&$1circ;/g; # circumflex \^x + s/\\c(\001\d+)\{([Cc])\1\}/&$2cedil;/g; # cedilla \c{x} + # The following accents have no HTML equivalent. + # (This list is still not complete.) + s/\\u(\001\d+)\{(.)\1\}/$2/g; # breve accent \u{x} + s/\\v(\001\d+)\{(.)\1\}/$2/g; # hacek accent \v{x} + s/\\([lL])\b/$1/g; # slashed l + s/\\\=(\001\d+)\{(.)\1\}/$2/g; # macron \={x} + s/\\\=(.)/$1/g; # macron accent \=x + s/\\\.(\001\d+)\{(.)\1\}/$2/g; # dot \.{x} + s/\\\.(.)/$1/g; # dot accent \.x + + # Other special characters. + s/\\([Oo])\b\s*/&$1slash;/g; # \[Oo] -> &[Oo]slash; + s/\\AA\b\s*/Å/g; # \AA -> Å + s/\\aa\b\s*/å/g; # \aa -> å + s/\\AE\b\s*/Æ/g; # \AE -> Æ + s/\\ae\b\s*/æ/g; # \ae -> æ + s/\\ss\b\s*/ß/g; # \ss -> ß + s/\\S\b\s*/§/g; # \S -> § + s/\\P\b\s*/¶/g; # \P -> ¶ + s/\\pounds\b\s*/£/g; # \pounds -> £ + s/\?\`/¿/g; # ?` -> ¿ + s/\!\`/¡/g; # !` -> ¡ + + # Other special characters. + # Try to be careful to not change the dashes in HTML comments + # (<!-- comment -->) to –s. + s/\-\-\-/—/g; # --- -> — + s/([^\!])\-\-([^\>])/$1–$2/g; # -- -> – + #s/\-\-\-/\227/g; # --- -> — + #s/([^\!])\-\-([^\>])/$1\226$2/g; # -- -> – + + # Upper and lower case greek + s/\\([aA]lpha)\b/&$1;/g; + s/\\([bB]eta)\b/&$1;/g; + s/\\([gG]amma)\b/&$1;/g; + s/\\([dD]elta)\b/&$1;/g; + s/\\varepsilon\b/ε/g; + s/\\([eE]psilon)\b/&$1;/g; + s/\\([zZ]eta)\b/&$1;/g; + s/\\([eE]ta)\b/&$1;/g; + s/\\([tT]heta)\b/&$1;/g; + s/\\vartheta\b/θ/g; + s/\\([iI]ota)\b/&$1;/g; + s/\\([kK]appa)\b/&$1;/g; + s/\\([lL]ambda)\b/&$1;/g; + s/\\([mM]u)\b/&$1;/g; + s/\\([nN]u)\b/&$1;/g; + s/\\([xX]i)\b/&$1;/g; + s/\\([oO]micron)\b/&$1;/g; + s/\\([pP]i)\b/&$1;/g; + s/\\varpi\b/π/g; + s/\\([rR]ho)\b/&$1;/g; + s/\\varrho\b/ρ/g; + s/\\([sS]igma)\b/&$1;/g; + s/\\varsigma\b/ς/g; + s/\\([tT]au)\b/&$1;/g; + s/\\([uU]psilon)\b/&$1;/g; + s/\\([pP]hi)\b/&$1;/g; + s/\\varphi\b/φ/g; + s/\\([cC]hi)\b/&$1;/g; + s/\\([pP]si)\b/&$1;/g; + s/\\([oO]mega)\b/&$1;/g; +} + +# Convert $_ into a UTF-8 character +sub utf_ent { + # Accents. + s/\\i\b/ı/g; # dotless i. + + # acute accent \'{x} + s/\\\'(\001\d+)\{A\1\}/Á/g; + s/\\\'(\001\d+)\{C\1\}/Ć/g; + s/\\\'(\001\d+)\{E\1\}/É/g; + s/\\\'(\001\d+)\{I\1\}/Í/g; + s/\\\'(\001\d+)\{L\1\}/Ĺ/g; + s/\\\'(\001\d+)\{N\1\}/Ń/g; + s/\\\'(\001\d+)\{O\1\}/Ó/g; + s/\\\'(\001\d+)\{R\1\}/Ŕ/g; + s/\\\'(\001\d+)\{S\1\}/Ś/g; + s/\\\'(\001\d+)\{U\1\}/Ú/g; + s/\\\'(\001\d+)\{Y\1\}/Ý/g; + s/\\\'(\001\d+)\{Z\1\}/Ź/g; + s/\\\'(\001\d+)\{a\1\}/á/g; + s/\\\'(\001\d+)\{c\1\}/ć/g; + s/\\\'(\001\d+)\{e\1\}/é/g; + s/\\\'(\001\d+)\{ı\1\}/í/g; + s/\\\'(\001\d+)\{i\1\}/í/g; + s/\\\'(\001\d+)\{l\1\}/ĺ/g; + s/\\\'(\001\d+)\{n\1\}/ń/g; + s/\\\'(\001\d+)\{o\1\}/ó/g; + s/\\\'(\001\d+)\{r\1\}/ŕ/g; + s/\\\'(\001\d+)\{s\1\}/ś/g; + s/\\\'(\001\d+)\{u\1\}/ú/g; + s/\\\'(\001\d+)\{y\1\}/ý/g; + s/\\\'(\001\d+)\{z\1\}/ź/g; + + # acute accent \'x + s/\\\'A/Á/g; + s/\\\'C/Ć/g; + s/\\\'E/É/g; + s/\\\'I/Í/g; + s/\\\'L/Ĺ/g; + s/\\\'N/Ń/g; + s/\\\'O/Ó/g; + s/\\\'R/Ŕ/g; + s/\\\'S/Ś/g; + s/\\\'U/Ù/g; + s/\\\'Y/Ý/g; + s/\\\'Z/Ź/g; + s/\\\'a/á/g; + s/\\\'c/ć/g; + s/\\\'e/é/g; + s/\\\'i/í/g; + s/\\\'ı/í/g; + s/\\\'l/ĺ/g; + s/\\\'n/ń/g; + s/\\\'o/ó/g; + s/\\\'r/ŕ/g; + s/\\\'s/ś/g; + s/\\\'u/ú/g; + s/\\\'y/ý/g; + s/\\\'z/ź/g; + + # grave accent \`{x} + s/\\\`(\001\d+)\{A\1\}/À/g; + s/\\\`(\001\d+)\{E\1\}/È/g; + s/\\\`(\001\d+)\{I\1\}/Ì/g; + s/\\\`(\001\d+)\{O\1\}/Ò/g; + s/\\\`(\001\d+)\{U\1\}/Ù/g; + s/\\\`(\001\d+)\{a\1\}/à/g; + s/\\\`(\001\d+)\{e\1\}/è/g; + s/\\\`(\001\d+)\{i\1\}/ì/g; + s/\\\`(\001\d+)\{o\1\}/ò/g; + s/\\\`(\001\d+)\{u\1\}/ù/g; + + # grave accent \`x + s/\\\`A/À/g; + s/\\\`E/È/g; + s/\\\`I/Ì/g; + s/\\\`O/Ò/g; + s/\\\`U/Ù/g; + s/\\\`a/à/g; + s/\\\`e/è/g; + s/\\\`i/ì/g; + s/\\\`o/ò/g; + s/\\\`u/ù/g; + + # umlaut \"{x} + s/\\\"(\001\d+)\{A\1\}/Ä/g; + s/\\\"(\001\d+)\{E\1\}/Ë/g; + s/\\\"(\001\d+)\{I\1\}/Ï/g; + s/\\\"(\001\d+)\{O\1\}/Ö/g; + s/\\\"(\001\d+)\{U\1\}/Ü/g; + s/\\\"(\001\d+)\{Y\1\}/Ÿ/g; + s/\\\"(\001\d+)\{a\1\}/ä/g; + s/\\\"(\001\d+)\{e\1\}/ë/g; + s/\\\"(\001\d+)\{i\1\}/ï/g; + s/\\\"(\001\d+)\{o\1\}/ö/g; + s/\\\"(\001\d+)\{u\1\}/ü/g; + s/\\\"(\001\d+)\{y\1\}/ÿ/g; + + # umlaut \"x + s/\\\"A/Ä/g; + s/\\\"E/Ë/g; + s/\\\"I/Ï/g; + s/\\\"O/Ö/g; + s/\\\"U/Ü/g; + s/\\\"Y/Ÿ/g; + s/\\\"a/ä/g; + s/\\\"e/ë/g; + s/\\\"i/ï/g; + s/\\\"o/ö/g; + s/\\\"u/ü/g; + s/\\\"y/ÿ/g; + + # tilde \~{x} + s/\\\~(\001\d+)\{A\1\}/Ã/g; + s/\\\~(\001\d+)\{N\1\}/Ñ/g; + s/\\\~(\001\d+)\{O\1\}/Õ/g; + s/\\\~(\001\d+)\{a\1\}/ã/g; + s/\\\~(\001\d+)\{n\1\}/ñ/g; + s/\\\~(\001\d+)\{o\1\}/õ/g; + + # tilde \~x + s/\\\~A/Ã/g; + s/\\\~N/Ñ/g; + s/\\\~O/Õ/g; + s/\\\~a/ã/g; + s/\\\~n/ñ/g; + s/\\\~O/õ/g; + + # circumflex \^{x} + s/\\\^(\001\d+)\{A\1\}/Â/g; + s/\\\^(\001\d+)\{E\1\}/Ê/g; + s/\\\^(\001\d+)\{G\1\}/Ĝ/g; + s/\\\^(\001\d+)\{H\1\}/Ĥ/g; + s/\\\^(\001\d+)\{I\1\}/Î/g; + s/\\\^(\001\d+)\{J\1\}/Ĵ/g; + s/\\\^(\001\d+)\{O\1\}/Ô/g; + s/\\\^(\001\d+)\{U\1\}/Û/g; + s/\\\^(\001\d+)\{W\1\}/Ŵ/g; + s/\\\^(\001\d+)\{Y\1\}/Ŷ/g; + s/\\\^(\001\d+)\{a\1\}/â/g; + s/\\\^(\001\d+)\{e\1\}/ê/g; + s/\\\^(\001\d+)\{g\1\}/ĝ/g; + s/\\\^(\001\d+)\{h\1\}/ĥ/g; + s/\\\^(\001\d+)\{i\1\}/î/g; + s/\\\^(\001\d+)\{j\1\}/ĵ/g; + s/\\\^(\001\d+)\{o\1\}/ô/g; + s/\\\^(\001\d+)\{u\1\}/û/g; + s/\\\^(\001\d+)\{w\1\}/ŵ/g; + s/\\\^(\001\d+)\{y\1\}/ŷ/g; + + # circumflex \^x + s/\\\^A/Â/g; + s/\\\^E/Ê/g; + s/\\\^G/Ĝ/g; + s/\\\^H/Ĥ/g; + s/\\\^I/Î/g; + s/\\\^J/Ĵ/g; + s/\\\^O/Ô/g; + s/\\\^U/Û/g; + s/\\\^W/Ŵ/g; + s/\\\^Y/Ŷ/g; + s/\\\^a/â/g; + s/\\\^e/ê/g; + s/\\\^g/ĝ/g; + s/\\\^h/ĥ/g; + s/\\\^i/î/g; + s/\\\^J/ĵ/g; + s/\\\^o/ô/g; + s/\\\^u/û/g; + s/\\\^w/ŵ/g; + s/\\\^y/ŷ/g; + + # cedilla \c{x} + s/\\c(\001\d+)\{C\1\}/Ç/g; + s/\\c(\001\d+)\{c\1\}/ç/g; + s/\\c(\001\d+)\{K\1\}/Ķ/g; + s/\\c(\001\d+)\{k\1\}/ķ/g; + s/\\c(\001\d+)\{L\1\}/Ļ/g; + s/\\c(\001\d+)\{l\1\}/ļ/g; + s/\\c(\001\d+)\{N\1\}/Ņ/g; + s/\\c(\001\d+)\{n\1\}/ņ/g; + s/\\c(\001\d+)\{N\1\}/Ŗ/g; + s/\\c(\001\d+)\{n\1\}/ŗ/g; + + # double acute accent \H{x} + s/\\H(\001\d+)\{O\1\}/Ő/g; + s/\\H(\001\d+)\{U\1\}/Ű/g; + s/\\H(\001\d+)\{o\1\}/ő/g; + s/\\H(\001\d+)\{u\1\}/ű/g; + + # breve accent \u{x} + s/\\u(\001\d+)\{A\1\}/Ă/g; + s/\\u(\001\d+)\{E\1\}/Ĕ/g; + s/\\u(\001\d+)\{G\1\}/Ğ/g; + s/\\u(\001\d+)\{I\1\}/Ĭ/g; + s/\\u(\001\d+)\{O\1\}/Ŏ/g; + s/\\u(\001\d+)\{U\1\}/Ŭ/g; + s/\\u(\001\d+)\{a\1\}/ă/g; + s/\\u(\001\d+)\{e\1\}/ĕ/g; + s/\\u(\001\d+)\{g\1\}/ğ/g; + s/\\u(\001\d+)\{i\1\}/ĭ/g; + s/\\u(\001\d+)\{o\1\}/ŏ/g; + s/\\u(\001\d+)\{u\1\}/ŭ/g; + + # hacek/caron? accent \v{x} + s/\\v(\001\d+)\{C\1\}/Č/g; + s/\\v(\001\d+)\{D\1\}/Ď/g; + s/\\v(\001\d+)\{E\1\}/Ě/g; + s/\\v(\001\d+)\{L\1\}/Ľ/g; + s/\\v(\001\d+)\{N\1\}/Ň/g; + s/\\v(\001\d+)\{R\1\}/Ř/g; + s/\\v(\001\d+)\{S\1\}/Š/g; + s/\\v(\001\d+)\{T\1\}/Ť/g; + s/\\v(\001\d+)\{Z\1\}/Ž/g; + s/\\v(\001\d+)\{c\1\}/č/g; + s/\\v(\001\d+)\{d\1\}/ď/g; + s/\\v(\001\d+)\{e\1\}/ě/g; + s/\\v(\001\d+)\{l\1\}/ľ/g; + s/\\v(\001\d+)\{n\1\}/ň/g; + s/\\v(\001\d+)\{r\1\}/ř/g; + s/\\v(\001\d+)\{s\1\}/š/g; + s/\\v(\001\d+)\{t\1\}/ť/g; + s/\\v(\001\d+)\{z\1\}/ž/g; + + # macron \={x} + s/\\\=(\001\d+)\{A\1\}/Ā/g; + s/\\\=(\001\d+)\{E\1\}/Ē/g; + s/\\\=(\001\d+)\{O\1\}/Ō/g; + s/\\\=(\001\d+)\{U\1\}/Ū/g; + s/\\\=(\001\d+)\{a\1\}/ā/g; + s/\\\=(\001\d+)\{e\1\}/ē/g; + s/\\\=(\001\d+)\{o\1\}/ō/g; + s/\\\=(\001\d+)\{u\1\}/ū/g; + + # macron \=x + s/\\\=A/Ā/g; + s/\\\=E/Ē/g; + s/\\\=O/Ō/g; + s/\\\=U/Ū/g; + s/\\\=a/ā/g; + s/\\\=e/ē/g; + s/\\\=o/ō/g; + s/\\\=u/ū/g; + + # dot \.{x} + s/\\\.(\001\d+)\{G\1\}/Ġ/g; + s/\\\.(\001\d+)\{L\1\}/Ŀ/g; + s/\\\.(\001\d+)\{Z\1\}/Ż/g; + s/\\\.(\001\d+)\{g\1\}/ġ/g; + s/\\\.(\001\d+)\{l\1\}/ŀ/g; + s/\\\.(\001\d+)\{z\1\}/ż/g; + + # dot \.x + s/\\\.G/Ġ/g; + s/\\\.L/Ŀ/g; + s/\\\.Z/Ż/g; + s/\\\.g/ġ/g; + s/\\\.l/ŀ/g; + s/\\\.z/ż/g; + + + # slashed l + s/\\l\b/ł/g; + s/\\L\b/Ł/g; + + # krouzek \accent23x or \accent'27 + s/\{\\accent2[37]\s*u\}/ů/g; + s/\\accent2[37]\s*u/ů/g; + + # Other special characters. + s/\\O\b\s*/Ø/g; + s/\\o\b\s*/ø/g; + s/\\AA\b\s*/Å/g; + s/\\aa\b\s*/å/g; + s/\\AE\b\s*/Æ/g; + s/\\ae\b\s*/æ/g; + s/\\OE\b\s*/Œ/g; + s/\\oe\b\s*/œ/g; + s/\\ss\b\s*/ß/g; + s/\\S\b\s*/§/g; + s/\\P\b\s*/¶/g; + s/\\pounds\b\s*/£/g; + s/\?\`/¿/g; + s/\!\`/¡/g; + + # en and em dashes + # Try to be careful to not change the dashes in HTML comments + # (<!-- comment -->) to –s. + s/\-\-\-/—/g; # --- -> — + s/([^\!])\-\-([^\>])/$1–$2/g; # -- -> – + + # Upper case Greek + s/\\Alpha\b/Α/g; + s/\\Beta\b/Β/g; + s/\\Gamma\b/Γ/g; + s/\\Delta\b/Δ/g; + s/\\Epsilon\b/Ε/g; + s/\\Zeta\b/Ζ/g; + s/\\Eta\b/Η/g; + s/\\Theta\b/Θ/g; + s/\\Iota\b/Ι/g; + s/\\Kappa\b/Κ/g; + s/\\Lambda\b/Λ/g; + s/\\Mu\b/Μ/g; + s/\\Nu\b/Ν/g; + s/\\Xi\b/Ξ/g; + s/\\Omicron\b/Ο/g; + s/\\Pi\b/Π/g; + s/\\Rho\b/Ρ/g; + s/\\Sigma\b/Σ/g; + s/\\Tau\b/Τ/g; + s/\\Upsilon\b/Υ/g; + s/\\Phi\b/Φ/g; + s/\\Chi\b/Χ/g; + s/\\Psi\b/Ψ/g; + s/\\Omega\b/Ω/g; + + # Lower case Greek + s/\\alpha\b/α/g; + s/\\beta\b/β/g; + s/\\gamma\b/γ/g; + s/\\delta\b/δ/g; + s/\\varepsilon\b/ε/g; + s/\\epsilon\b/ε/g; + s/\\zeta\b/ζ/g; + s/\\eta\b/η/g; + s/\\theta\b/θ/g; + s/\\vartheta\b/θ/g; + s/\\iota\b/ι/g; + s/\\kappa\b/κ/g; + s/\\lambda\b/λ/g; + s/\\mu\b/μ/g; + s/\\nu\b/ν/g; + s/\\xi\b/ξ/g; + s/\\omicron\b/ο/g; + s/\\pi\b/π/g; + s/\\varpi\b/π/g; + s/\\rho\b/ρ/g; + s/\\varrho\b/ρ/g; + s/\\sigma\b/σ/g; + s/\\varsigma\b/ς/g; + s/\\tau\b/τ/g; + s/\\upsilon\b/υ/g; + s/\\phi\b/φ/g; + s/\\varphi\b/φ/g; + s/\\chi\b/χ/g; + s/\\psi\b/ψ/g; + s/\\omega\b/ω/g; +} + + +# Prevent "identifier used only once" warnings. +$opt_a = $opt_b = $opt_c = $opt_D = $opt_d = $opt_e = $opt_h = $opt_m = $opt_r = undef; +$opt_i = $opt_k = $opt_s = $opt_t = $opt_v = $opt_u = undef; + +$macrofile = ''; + +$command_line = &html_encode(join(' ', $0, @ARGV)); + +getopts("ab:cd:D:e:h:ikm:rs:tuv") || &usage; + +if (defined($opt_v)) { + print "$version\n"; + exit 0; +} + +&usage if (($#ARGV < 0) || ($#ARGV > 1)); + +if ($ARGV[0] =~ m/\.bib$/) { + $bibfile = $ARGV[0]; + $bibfile =~ s/\.bib$//; + $delimiter = $bibfile; +} elsif ($ARGV[0] =~ m/\.aux$/) { + if ($opt_i) { + print STDERR "source file must be a bibliography (.bib) file when run with the -i switch \n"; + &usage; + } + $citefile = $ARGV[0]; + $citefile =~ s/\.aux$//; + $delimiter = $citefile; +} else { + print STDERR "Unknown file extension on $ARGV[0]\n"; + &usage; +} + +$htmlfile = $ARGV[1] if ($#ARGV == 1); + +$delimiter = $opt_d if (defined($opt_d)); +$title = (defined($opt_h) ? $opt_h : "Bibliography generated from $ARGV[0]"); +$macrofile = "$opt_m," if (defined($opt_m)); + +$opt_s = 'empty' if (! defined $opt_s); +style: { + ($opt_s eq 'empty') && + ($bstfile = "html-n", + $label_style = $LABEL_PLAIN, + last style); + ($opt_s eq 'plain') && + ($bstfile = "html-n", + $label_style = $LABEL_NUMBERED, + last style); + ($opt_s eq 'alpha') && + ($bstfile = "html-a", + $label_style = $LABEL_DEFAULT, + last style); + ($opt_s eq 'named') && + ($bstfile = "html-n", + $label_style = $LABEL_DEFAULT, + last style); + ($opt_s eq 'unsort') && + ($bstfile = "html-u", + $label_style = $LABEL_NUMBERED, + last style); + ($opt_s eq 'unsortlist') && + ($bstfile = "html-u", + #$label_style = $LABEL_PLAIN, + $label_style = $LABEL_ORDERED, + last style); + ($opt_s =~ s/\.bst$//) && + ($bstfile = $opt_s, + # label-style will be defined in .bst file. + last style); + print STDERR "Unknown style: $_\n"; + &usage; +} + +if ($bstfile eq "html-u" && ($opt_r || $opt_c)) { + print STDERR "Unsorted styles do not support a sort specification\n"; + exit(1); +} + +$bstfile .= "c" if (defined ($opt_c)); +$bstfile .= "r" if (defined ($opt_r)); +$bstfile .= "a" if (defined ($opt_a)); + +# Extended information is specified as a sequence of +# (PostScript|PDF|DVI|DOI):icon or +# (notype|nosize|nopages|nocompression|nodoi|nobrackets) +# Set correspondingly the associative array %typeicon and the no* variables +# Example usage: +# perl bib2xhtml -e 'nosize,nopages,PDF:<img src="pdficon_small.gif" alt="PDF" border="0" />' example.bib >example.html +undef $nopages; +undef $nosize; +undef $nodoi; +undef $notype; +undef $nocompression; +undef $nobrackets; + +if (defined($opt_e)) { + my(@opts) = split(/,/, $opt_e); + for $opt (@opts) { + if ($opt =~ m/(PostScript|PDF|DVI|DOI):(.*)/) { + $typeicon{$1} = $2; + } elsif ($opt =~ m/(notype|nosize|nopages|nocompression|nodoi|nobrackets)/) { + eval "\$$1 = 1"; + } else { + print STDERR qq{Invalid extended information specification: $opt +This can be a comma-separated list of the following specifications: +PostScript|PDF|DVI|DOI:new-text (e.g. PDF file icon) +notype|nosize|nopages|nocompression|nodoi|nobrackets +}; + exit 1; + } + } +} + +# PostScript and PDF files are assumed to be in same directory +# as the target HTML file. +if (defined($htmlfile) && ($htmlfile =~ m+(^.*)/+)) { + push @filedir, $1; +} else { + push @filedir, "." +} +if (defined $opt_D) { + local($dir, $url); + foreach $dir (split(/\,/, $opt_D)) { + $url = $dir; + if ($dir =~ s/\@(.*)$//) { $url = $1; } + push @filedir, $dir; + $dirmap{$dir} = $url; + } +} + +umask(077); + +open(HTMLFILE, (defined($htmlfile) ? ">$htmlfile$$" : ">&STDOUT")); +if (defined($htmlfile) && open(OHTMLFILE, "$htmlfile")) { + $mode = (stat OHTMLFILE)[2] & 0xfff; + $updating = 1; +} else { + $mode = 0644; + $updating = 0; + + # An existing HTML file does not exist, so output some boilerplate. + if ($opt_u) { + $enc = 'UTF-8'; + } else { + $enc = 'US-ASCII'; + } + print HTMLFILE +qq{<?xml version="1.0" encoding="$enc"?> +<!DOCTYPE html + PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" + "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> +<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> +<head> +<title>$title</title> +<meta http-equiv="Content-type" content="text/html; charset=$enc" />} . q{ +<meta name="Generator" content="$Id: bib2xhtml 2.26 2007/06/14 14:59:44 dds Exp $" /> +</head> +<body> +} +} + +$beginstring = "<!-- BEGIN CITATIONS $delimiter -->"; +$endstring = "<!-- END CITATIONS $delimiter -->"; + +@citations = (); + +if ($opt_i && $updating) { + loop: + while (<OHTMLFILE>) { + print HTMLFILE; + last loop if m/^$beginstring$/; + } + loop: + while (<OHTMLFILE>) { + print HTMLFILE; + last loop if m/^$endstring$/; + push(@citations, $2) if m/^([^\\]*)?(.+\})(.*)?$/; + } + push(@citations, "\\bibdata{$bibfile}"); +} + +# Create an .aux file for bibtex to read. + +$auxfile = "bib$$"; +push(@tmpfiles, "$auxfile.aux"); + +open(AUXFILE, ">$auxfile" . ".aux"); + +print AUXFILE "\\relax\n\\bibstyle{$bstfile}\n"; + +if (defined($citefile)) { + $citefile .= ".aux"; + open(CITEFILE, "<$citefile") || die "error opening $citefile: $!\n"; + while (<CITEFILE>) { + print AUXFILE $_ if (m/^\\(citation|bibdata){/); + } + close(CITEFILE); +} elsif (@citations) { + foreach $citation (@citations) { + print AUXFILE "$citation\n"; + } +} else { + print AUXFILE "\\citation{*}\n\\bibdata{$macrofile$bibfile}\n"; +} + +close(AUXFILE); + + +# run bibtex, redirecting bibtex's output from STDOUT to STDERR. + +push(@tmpfiles, "$auxfile.blg"); +push(@tmpfiles, "$auxfile.bbl"); + +#Flush HTMLFILE to avoid duplicate buffer writes after the fork +select(HTMLFILE); +$| = 1; $| = 0; +select(STDOUT); + +# We attempt to fork in order to redirect bibtex's stdout to stderr. +# This is needed when bib2xhtml is generating its output on the +# standard output. +# The shell redirection syntax used in the system() alternative +# is by no means portable. +eval { fork || (open(STDOUT, ">&STDERR"), + # Handle leakage in Win32 prevents the final rename() + close(HTMLFILE), + close(OHTMLFILE), + exec('bibtex', (split(/\s+/, ($opt_b ? $opt_b : "")), $auxfile))); + wait; }; +# fork is not implemented on some non-Unix platforms. +if ($@) { + # The fork failed (perhaps not implemented on this system). + system("bibtex $opt_b $auxfile 1>&2"); +} + +$beginstring = "<!-- BEGIN BIBLIOGRAPHY $delimiter -->"; +$endstring = "<!-- END BIBLIOGRAPHY $delimiter -->"; + +if ($updating) { +loop: + while (<OHTMLFILE>) { + last loop if m/^$beginstring$/; + print HTMLFILE; + } +loop: + while (<OHTMLFILE>) { + last loop if m/^$endstring$/; + } +} + +print HTMLFILE "$beginstring\n"; +print HTMLFILE <<EOF; +<!-- + DO NOT MODIFY THIS BIBLIOGRAPHY BY HAND! IT IS MAINTAINED AUTOMATICALLY! + YOUR CHANGES WILL BE LOST THE NEXT TIME IT IS UPDATED! +--> +<!-- Generated by: $command_line --> +EOF +# Now we make two passes over the .bbl file. In the first pass, we +# just collect the {cite, label} pairs, which we will use later for +# crossrefs. + +$t = $auxfile . ".bbl"; + +$/ = ""; + +# Make a first pass through the .bbl file, collecting citation/label pairs. +open(BBLFILE, "<$t") || die "error opening $t: $!\n"; +$nentry = 0; +loop: +while (<BBLFILE>) { + # Check for definitions at start of .bbl file. + if (($nentry == 0) && (m/^#/)) { + if ((m/#\s*label-style:\s*(\S+)/) && (! defined $label_style)) { + $label_style = $label_styles{$1}; + if (! defined $label_style) { + print STDERR "label style unknown: \n"; + next loop; + } + } + next loop; + } + $nentry++; + ($bcite, $blabel) = m+<dt><a name=\"([^\"]*)\">\[([^\]]*)\]</a></dt><dd>+; + $blabel = "$nentry" if ($label_style == $LABEL_NUMBERED); + $bibcite{$bcite} = $blabel; +} +close(BBLFILE); + +$label_style = $LABEL_DEFAULT if (! defined $label_style); +$list_start = $list_start[$label_style]; +$list_end = $list_end[$label_style]; + +if (defined($opt_t)) { + print HTMLFILE "$nentry references, last updated " . &ctime(time) . "<p />\n"; +} + +print HTMLFILE "<$list_start>\n\n"; + +#foreach $key (sort (keys(%bibcite))) { +# print "$key : $bibcite{$key}\n"; +#} + +open(BBLFILE, "<$t") || die "error opening $t: $!\n"; +$nentry = 0; +loop: +while (<BBLFILE>) { + # Skip definitions at start of .bbl file. + next loop if (($nentry == 0) && (m/^#/)); + + $nentry++; + + # Protect \{, \}, and \$, and then assign matching {} pairs a unique ID. + s/\\\{/\002/g; + s/\\\}/\003/g; + s/\\\$/\004/g; + { + local ($c, $l, $z) = (0, 0, ()); + s/([\{\}])/join("","\001",($1 eq "\{" ? $z[$l++]=$c++ : $z[--$l]),$1)/ge; + } + + # bibtex sometimes breaks long lines by inserting "%\n". We remove + # that because it might accidently break the line in the middle + # of a URL. We don't need to deal with TeX comments in general + # because bibtex seems to munge them up anyway, so there shouldn't + # be any in the bibliography file. + s/\%\n//g; + + # bibtex's add.period$ knows how to avoid adding extra periods + # when a block already ends in a period. bib2xhtml's modifications + # of bibtex's style files break that. We fix it here. + s/(\.(<\/cite>|<\/a>|\')+)\./$1/g; + + # Adjust beginning of entry based on bibliography style. + if ($label_style == $LABEL_PLAIN || $label_style == $LABEL_ORDERED) { + s:<dt>(<a name=\"[^\"]*\">)\[[^\]]*\](</a>)</dt><dd>:<li>$1$2:; + s:</dd>:</li>:; + + # Attempt to fix up empty <a name=...></a> tag, which some browsers + # don't handle properly (even though it *is* legal HTML). + # First try to combine a <a name=...></a> with a following <A ". + s:(<a name=\"[^\"]*\")></a><a\b:$1: + # If that doesn't work, try to swallow following word. + or s:(<a name=\"[^\"]*\">)</a>([\w]+):$1$2<\/a>:; + } elsif ($label_style == $LABEL_NUMBERED) { + s:(<dt><a name=\"[^\"]*\">\[)[^\]]*(\]</a></dt><dd>):$1$nentry$2:; + } + + # Append the key name, if asked so + if ($opt_k && ($label_style == $LABEL_NUMBERED || $label_style == $LABEL_DEFAULT)) { + # $1 $2 $3 $4 $5 + s:(<dt><a name=\")([^\"]*)(\">\[)([^\]]*)(\]</a></dt><dd>):$1$2$3$4 --- $2$5:; + } + + # Attempt to fix up crossrefs. + while (m/(\\(cite(label)?)(\001\d+)\{([^\001]+)\4\})/) { + $old = $1; + $cmd = $2; + $doxref = defined($3); + $bcite = $5; + if (! defined $bibcite{$bcite}) { + $blabel = " [" . $bcite . "]"; + } elsif ($doxref) { + $blabel = " <a href=\"#$bcite\">[" . $bibcite{$bcite} . "]<\/a>"; + } else { + $blabel = " [" . $bibcite{$bcite} . "]"; + } + $old =~ s/(\W)/\\$1/g; + s/\s*$old/$blabel/g; + } + # In some styles crossrefs become something like + # "In Doe and Roe [Doe and Roe, 1995]." Change this to + # "In [Doe and Roe, 1995]." to remove the redundancy. + s/In (<a href=\"[^\"]*\">)([^\[]+) \[(\2)/In $1\[$2/; + + # Handle the latex2html commands \htmladdnormallink{text}{url} + # and \htmladdnormallinkfoot{text}{url}. + s/\\htmladdnormallink(foot)?(\001\d+)\{([^\001]+)\2\}(\001\d+)\{([^\001]+)\4\}/<a href="$5">$3<\/a>/g; + + s/\&/\005/g; # Protect original & sequences + s/\\?&/&/g; # \& -> & and & -> & + s/\005/&/g; # Restore original & sequences + + if ($opt_u) { + utf_ent(); + } else { + html_ent(); + } + + # Handle \char123 -> &123;. + while (m/\\char([\'\"]?[0-9a-fA-F]+)/) { + $o = $r = $1; + if ($r =~ s/^\'//) { + $r = oct($r); + } elsif ($r =~ s/^\"//) { + $r = hex($r); + } + s/\\char$o\s*/&#$r;/g; + } + + s/{\\etalchar\001(\d+)\{(.)}\001\1\}/$2/g; # {\etalchar{x}} -> x + + s/\\par\b/<p \/>/g; + + # \url{text} -> <a href"text">text</a> + s/\\url(\001\d+)\{([^\001]*)\1\}/<a href="$2">$2<\/a>/g; + + # remove spaces from urls + while (m+<a href="(.*?)">(.*?)</a>+s) { + my $before = $`; + my $url = $1; + my $text = $2; + my $after = $'; #' + $url =~ s/ //g; + $_ = "$before\<a_ href=\"$url\">$text</a>$after"; + } + s/<a_ /<a /g; + + # There's no way to easily handle \rm and \textrm because + # HTML has no tag to convert back to plain text. Since it's very + # difficult to do the right thing, we do the wrong thing, and just + # remove them. + s/(\001\d+)\{\\rm\s+([^\001]*)\1\}/$2/g; # {\rm text} -> text + s/\\textrm(\001\d+)\{([^\001]*)\1\}/$2/g; # \textrm{text} -> text + + # This doesn't create correct HTML, because HTML doesn't allow nested + # character style tags. Oh well. + s/(\001\d+)\{\\em\s+([^\001]*)\1\}/<em>$2<\/em>/g; # {\em text} -> <EM>text</EM> + s/(\001\d+)\{\\it\s+([^\001]*)\1\}/<i>$2<\/i>/g; # {\it text} -> <I>text</I> + s/(\001\d+)\{\\bf\s+([^\001]*)\1\}/<b>$2<\/b>/g; # {\bf text} -> <B>text</B> + s/(\001\d+)\{\\tt\s+([^\001]*)\1\}/<tt>$2<\/tt>/g; # {\tt text} -> <TT>text</TT> + + s/\\emph(\001\d+)\{([^\001]*)\1\}/<em>$2<\/em>/g; # \emph{text} -> <EM>text</EM> + s/\\textit(\001\d+)\{([^\001]*)\1\}/<i>$2<\/i>/g; # \textit{text} -> <I>text</I> + s/\\textbf(\001\d+)\{([^\001]*)\1\}/<b>$2<\/b>/g; # \textbf{text} -> <B>text</B> + s/\\texttt(\001\d+)\{([^\001]*)\1\}/<tt>$2<\/tt>/g;# \textit{text} -> <TT>text</TT> + + s/\\mathrm(\001\d+)\{([^\001]*)\1\}/$2/g; # \mathrm{text} -> text + s/\\mathnormal(\001\d+)\{([^\001]*)\1\}/$2/g; # \mathnormal{text} -> text + s/\\mathsf(\001\d+)\{([^\001]*)\1\}/$2/g; # \mathsf{text} -> text + s/\\mathbf(\001\d+)\{([^\001]*)\1\}/<b>$2<\/b>/g; # \mathbf{text} -> <B>text</B> + s/\\mathcal(\001\d+)\{([^\001]*)\1\}/<i>$2<\/i>/g;# \mathcal{text} -> <I>text</I> + s/\\mathit(\001\d+)\{([^\001]*)\1\}/<i>$2<\/i>/g; # \mathit{text} -> <I>text</I> + s/\\mathtt(\001\d+)\{([^\001]*)\1\}/<tt>$2<\/tt>/g;# \mathtt{text} -> <TT>text</TT> + + # {\boldmath $mathstuff$} -> <B>mathstuff</B> +# s/(\001\d+)\{\s*\\boldmath ?([^A-Za-z\{\}][^\{\}]*)\}/<b>$1<\/b>/g; + + +sub domath { + local($t) = @_; + $t =~ s/\^(\001\d+)\{\\circ\1\}/\&\#176;/g; # ^{\circ}->degree + $t =~ s/\^\\circ/\&\#176;/g; # ^\circ->degree +# $t =~ s/\^(\001\d+)\{([^\001]*)\1\}/<sup>$2<\/sup>/g; # ^{x} + $t =~ s/\^(\001\d+)\{(.*)\1\}/<sup>$2<\/sup>/g; # ^{x} + $t =~ s/\^(\w)/<sup>$1<\/sup>/g; # ^x +# $t =~ s/\_(\001\d+)\{([^\001]*)\1\}/<sub>$2<\/sub>/g; # _{x} + $t =~ s/\_(\001\d+)\{(.*)\1\}/<sub>$2<\/sub>/g; # _{x} + $t =~ s/\_(\w)/<sub>$1<\/sub>/g; # _x + $t; +} + + # Handle superscripts and subscripts in inline math mode. + s/(\$([^\$]+)\$)/&domath($2)/ge; # $ ... $ + s/(\\\((([^\\]|\\[^\(\)])+)\\\))/&domath($2)/ge; # \( ... \) + + # Remove \mbox. + s/\\mbox(\001\d+)\{([^\001]*)\1\}/$2/g; # \mbox{x} + + # Escape tildes protecting URLs + # For some reason /g doesn't work + while (s/(\<a href\=\"[^<]*?)\~/$1\005/g) { ; } + if ($opt_u) { + s/([^\\])~/$1 /g; # ~ non-breaking space -   + s/\\\,/ /g; # \, thin space -   + s/\\ldots\b\s*/…/g; # Horizontal ellipsis + s/\\dots\b\s*/…/g; # Horizontal ellipsis + } else { + s/([^\\])~/$1 /g; # ~ non-breaking space + s/\\\,/ /g; # \, thin space + s/\\ldots\b/…/g; # Horizontal ellipsis + s/\\dots\b/…/g; # Horizontal ellipsis + + } + s/\005/\~/g; # Unescape tildes + s/\\ / /g; # \ (normal space) + s/\\textasciitilde\b\s*/~/g; # \textasciitilde -> ~ + + # Non-alphabetic macros that we keep. + s/\\([\#\&\%\~\_\^\|])/$1/g; + + # Non-alphabetic macros that we remove. + # (discretionary hyphen) + # (italic correction) + s/\\\W//g; + + # Clean up things we don't handle. +# s/\\//g; + + # The format {\Xyz{Abc}} is interpreted by BibTeX as a single letter + # whose text is given by "Abc". If we see this pattern, it is + # likely that discarding the \Xyz will do the right thing. + s/\001(\d+)\{\\[A-Za-z]+\001(\d+)\{([^\001]*)\001\2\}\001\1\}/$3/g; + + # Macro names may be meaningful, so keep them and don't run them together. + s/\\([A-Za-z]+)/ $1 /g; + + # Remove an empty <a href=...></a> tag that bad cross-referencing + # in the BibTeX file may have left us with. + s+In <a href=\"[^\"]*\"></a>++; + + &doPaperLinks; + + # Get rid of { } ids, and put protected { } back. + s/\001\d+[\{\}]//g; + tr/\002\003\004/{}$/; + + print HTMLFILE $_; +} + +close(BBLFILE); + +print HTMLFILE "<$list_end>\n\n$endstring\n"; + +if ($updating) { + while (<OHTMLFILE>) { + print HTMLFILE; + } + close (OHTMLFILE); +} else { + print HTMLFILE "</body></html>\n"; +} + +close(HTMLFILE); + +if (defined ($htmlfile)) { + #$mode &= 0777; + #print "setting $htmlfile$$ to $mode\n"; + #printf("mode = %lo\n", $mode); + + chmod($mode, "$htmlfile$$"); + rename("$htmlfile$$", $htmlfile); +} + +unlink(@tmpfiles); + +exit(0); |