#Blah: Bibtex to LaTeX and HTML
#by Emanuele Viola building on bibtex parser by Gerhard Gossen
#v1.0 Friday, December 23, 2011 09:57
#v1.0 documentation
#Reads on (augmented) .bib file, outputs
#blah.html and blah.tex
#
#blah.tex is a sequence of \item, and within each \\
#blah.html is HTML separated using
#
#Features:
#Handling authors:
#1) Does not write authors if only author is YOU
#2) Writes list of co-authors starting with "With" if
#YOU are among the author
#3) Writes full list of authors if YOU are not among them
#
#Handling preliminary versions:
#@journal entries supports extra entries to indicate
#preliminary version (e.g. conferences)
#
#Handling of special notes for html and/or LaTeX
#(e.g. best paper award)
#
#Handling of various HTML links associated to work
#bladdoc etc.
#
#Supports bibtex entries:
#@ARTICLE
#blahprelimbooktitle
#blahprelimyear
#blahprelimpages (currently not printed)
#blahnotel (note for LaTex)
#blahnoteh (note for HTML)
#blahdoc (paper/write-up/etc file)
#blahslides blahvideo blahcode
#journal, year, number, volume, pages
#Note on journal: if year not present, writes "to appear")
#@INPROCEEDINGS
#booktitle
#pages (currently not printed)
#year
#blahdoc blahslides blahvideo blahcode
#@UNPUBLISHED
#@BLAHCLOSEH{}
#@BLAHEND{}
#@BLAHPRINT{
#blahprintl={LaTeX code e.g. \begin{enumerate} \end{enumerate}},
#blahprinth={HTML code e.g.
},
#}
#
#Blah.pl is written in Strawberry Perl on WinXP
#Blah.pl uses Perl bibtex parser by Gerhard Gossen:
#https://metacpan.org/module/BibTeX::Parser
#(to install you type cpan BibTeX::Parser)
#
#to run: perl parseBib.pl 2>blaherr.txt
#
#the option "2>blaherr.txt" redirects the error output
#to err.txt.
#There are many warnings from Perl parser I do not understand.
#
#Note: Strings such as @string{jacm={Journal of the ACM}}
#are expanded by the parser.
#This happens even for fields in .bib entry I create.
#OUTL is LaTeX output file handle
#OUTH is HTML output file handle
open(OUTL,">blah.tex") || die "Can't open OUTL";
open(OUTH,">blah.html") || die "Can't open OUTH";
use BibTeX::Parser;
use IO::File;
my $fh = IO::File->new("C:\\home\\krv\\math\\OmniBib.bib");
# Create parser object ...
my $parser = BibTeX::Parser->new($fh);
#Expects an author field.
#Returns the string untouched if I am not present
# (useful for other works by research group)
#Returns empty if I am the only author.
#Otherwise returns string of co-authors,
# started with "With",
# not terminated
sub author_to_with{
if (!($_[0] =~ "Emanuele Viola")) {
return $_[0];
}
if ($_[0] eq "Emanuele Viola") {
return "";
}
#Removes my name. Note I could be first or last
my $with = $_[0];
$with =~ s/ and Emanuele Viola//;
$with =~ s/Emanuele Viola and //;
return "With ".$with;
}
#Next two functions are useful in adding new lines
#in LaTeX and HTML. It's done in two different ways.
#In LaTeX at beginning, in HTML at the end.
#Newline Latex
#"" -> ""
#Otherwise add "\\" at beginning
sub nll{
if ($_[0] ne "") {
$_[0] = "\\\\".$_[0];
}
return $_[0];
}
#Newline Html
#"" -> ""
#Otherwise add "
" at end
sub nlh{
if ($_[0] ne "") {
$_[0] = $_[0]."
\n";
}
return $_[0];
}
#Latex to Html.
#Performs various substitutions
sub l2h{
my $h = $_[0];
#Kills {,},$, except if they are preceded by \
#For example \{0,1\} should stay as is
$h =~ s/([^\\]){/\1/g;
$h =~ s/([^\\])}/\1/g;
$h =~ s/([^\\])\$/\1/g;
#Various accents
$h =~ s/\\'//g;
$h =~ s/\\"//g;
$h =~ s/\\a//g;
$h =~ s/\\v //g;
$h =~ s/\\c //g;
$h =~ s/~/ /g;
#Finally kills all \. This way \$ -> $, \{ -> { etc.
$h =~ s/\\//g;
return $h;
}
#Processes and prints title to OUTL and OUTH
sub print_title {
#Note no nll for latex
print OUTL "\\item ".$entry->field("title");
print OUTH "".nlh(l2h($entry->field("title")))."";
}
#Processes and prints author to OUTL and OUTH
sub print_author {
$author = $entry->field("author");
print OUTL nll(author_to_with($author));
print OUTH nlh(l2h(author_to_with($author)));
}
sub print_journal {
#If year is missing then to appear, otherwise full entry
if ($entry->field("year") eq "") {
print OUTL nll("To appear in ".$entry->field("journal"));
print OUTH nlh("To appear in ".l2h($entry->field("journal")));
} else {
#Constructs journal string
#Some journals may miss volume, number, etc.
#Name of journal and year is always needed here.
my $journal = l2h($entry->field("journal")).", ";
if ($entry->field("volume") ne "") {
$journal = $journal."vol. ".$entry->field("volume").", ";
}
if ($entry->field("number") ne "") {
$journal = $journal."num. ".$entry->field("number").", ";
}
if ($entry->field("pages") ne "") {
$journal = $journal."pp. ".$entry->field("pages").", ";
}
print OUTL nll($journal.$entry->field("year"));
print OUTH nlh($journal.$entry->field("year"));
}
}
sub print_blahprelim {
#Preliminary venue
if ($entry->field("blahprelimbooktitle") ne "") {
print OUTL nll("Preliminary version in ".$entry->field("blahprelimbooktitle").", ".$entry->field("blahprelimyear"));
print OUTH nlh("Preliminary version in ".l2h($entry->field("blahprelimbooktitle")).", ".$entry->field("blahprelimyear"));
}
}
sub print_proceedings {
print OUTL nll("In ".$entry->field("booktitle").", ".$entry->field("year"));
print OUTH nlh("In ".l2h($entry->field("booktitle")).", ".$entry->field("year"));
}
sub print_unpublished {
print OUTL nll("Manuscript, ".$entry->field("year"));
print OUTH nlh("Manuscript, ".$entry->field("year"));
}
sub print_blahnote {
#Latex
if (!($entry->field("blahnotel") eq "")) {
print OUTL nll($entry->field("blahnotel"));
}
#html
if (!($entry->field("blahnoteh") eq "")) {
print OUTH nlh($entry->field("blahnoteh"));
}
}
#Outputs "" if no link
#Outputs links terminated by
if some links
sub print_h_links {
#Warning! Multiple occurrences of each field
$blahdoc = $entry->field("blahdoc");
$blahslides = $entry->field("blahslides");
$blahvideo = $entry->field("blahvideo");
$blahcode = $entry->field("blahcode");
if (($blahdoc.$blahslides.$blahvideo.$blahcode) eq "") {
return;
}
if ($blahdoc ne "") {
print OUTH 'Document ';
}
if ($blahslides ne "") {
print OUTH 'Slides ';
}
if ($blahvideo ne "") {
print OUTH 'Video ';
}
if ($blahcode ne "") {
print OUTH 'Code ';
}
print OUTH "
\n";
}
# Main iteration over entries
while ($entry = $parser->next ) {
if (! $entry->parse_ok){
warn "Error parsing file: ".$entry->error;
next;
}
if ($entry->type eq "BLAHEND") {
last;
}
if ($entry->type eq "BLAHCLOSEH") {
close(OUTH) || die "Can't close OUTH";
next;
}
if ($entry->type eq "BLAHPRINT") {
print OUTL $entry->field("blahprintl");
print OUTH $entry->field("blahprinth");
next;
}
if ($entry->type eq "ARTICLE") {
print_title;
print_author;
print_journal;
print_blahprelim;
print_blahnote;
print_h_links;
}
if ($entry->type eq "INPROCEEDINGS") {
print_title;
print_author;
print_proceedings;
print_blahnote;
print_h_links;
}
if ($entry->type eq "UNPUBLISHED") {
print_title;
print_author;
print_unpublished;
print_blahnote;
print_h_links;
}
#Adds newline to OUTL for readability
print OUTL "\n";
#Newline in OUTH
print OUTH "
\n";
}
print "Finished";
close(OUTL) || die "Can't close OUTL";
close(OUTH) || die "Can't close OUTH";