#!perl # Enhance an XML index file conforming to imperatives.xsd by # iterating over each item, and inserting the actual text into the # 'text' and 'edited_text' attributes, doing retrieval with the ESV # web service. See usage() for more details. # NOTE the variable $esvkey needs to be changed to a valid ESV key for this to work use strict; use vars qw($opt_usage $opt_index $opt_debug $queries $lastref $lasttext $esvkey); use XML::Twig; use LWP::Simple; $| = 1; # don't buffer output #MAIN { # modify this for your own ESV key: this program won't work without it $esvkey = "TEST"; process_options(); $queries = 0; $lastref = ""; $lasttext = ""; my $indextwig= XML::Twig->new( twig_roots => { 'item' => \&do_item } ); $indextwig->parsefile($opt_index); $indextwig->print; print STDERR "\nQueries performed: $queries\n"; } # this handler gets called for each item in the XML index file sub do_item { my ($twig, $elt)= @_; # set the verse reference my $ref = $elt->{'att'}->{'ref'}; my $fullref = $ref; # strip out any trailing alphabetic index: don't need it for retrieval $fullref =~ s/[a-z]$//; # simple trick to only look up repeats once (since only a limited # of queries are allowed) unless ($fullref eq $lastref) { # set a URL that references this verse via the ESV web service my $esvref = $fullref; $esvref =~ s/\./ /; $esvref =~ s/\./:/; $esvref =~ s/ /\+/; # retrieve and parse out the content from the web service # use the reference to retrieve the verse content my $text = query_passage($esvref); if ($opt_debug) { print STDERR ">> for $ref, text is $text\n"; } my $esv=XML::Twig->new(twig_handlers => { p => sub { $_->set_gi('span'); }, div => sub { $_->set_gi('span'); }, br => sub { $_->delete; }}, keep_encoding => 1); $esv->parse($text) or print STDERR "Can't parse this:$text\n"; my $esvroot = $esv->root; $lasttext = $esvroot->text; $esv->purge; # flush the temporary text } # add the verse text as an attribute: if the verse is a repeat, # this is from the previous query if ($lasttext) { $elt->{'att'}->{'versetext'} = $lasttext; } else { print STDERR "Error: lasttext not set for $ref\n"; } $lastref = $fullref; #$twig->purge; # frees the memory print STDERR '.'; } sub query_passage { my ($passage) = @_; my $options = "action=doPassageQuery&include-verse-numbers=false&include-passage-references=false&include-footnotes=false&include-headings=false&include-subheadings=false&include-short-copyright=false"; $queries++; return get("http://www.gnpcb.org/esv/share/get/?key=$esvkey&passage=$passage&$options"); } sub process_options { $opt_usage = 0; $opt_debug = 0; $opt_index = ""; # Parse the command line options use Getopt::Long; &GetOptions("usage|help|?", "index=s", # unadvertised "debug"); if ($opt_usage) {print usage(); exit 0;} unless (-e $opt_index) { die "Can't open $opt_index\n".usage(); } } sub usage { return "usage: textualize.pl -index file.xml Read an input file in 'index' format, connect to the ESV web service to download the verse content for each item, merge in additional index file content and write to stdout. "; }