DigitalNZ Image Distribution | ||
Source code: image_distribution.pl#! /usr/bin/perl -W# DigitalNZ Image Distribution Grapher (image-distribution.pl) # Version 1.1 by Gordon Paynter, 11 February 2009 # Copyright 2009 National Library of New Zealand # This CGI script outputs a graphical representation of the images # currently stored in DigitalNZ. It requires: Perl (including # LWP::Simple and List::Util modules), the acompanying 1x1.gif file, # and a writable "cache" subdirectory. The coding is pretty simple: # the HTML is formatted with tables (so retro!) and we parse the # DigialNZ search API output with regular expressions (i.e. no XML # parser). # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # See http://www.gnu.org/licenses/ for a copy of the GNU General # Public License. ### Script parameters # The main facet whose distribution will be graphed: my $show_source = 0; my $distribution_field = 'century'; my $query_string = $ENV{'QUERY_STRING'} || ''; if ($query_string =~ /field=(\w+)/) { $distribution_field = $1; } elsif ($query_string eq 'show_source=true') { $show_source = 1; } ### DigitalNZ API setup: my $dnz_api_url = 'http://api.digitalnz.org/records/v1.xml/'; my $dnz_api_key = 'ADD_YOUR_API_KEY_HERE'; ### Output the page: &write_page_start(); if ($show_source) { &write_source_code(); } else { &write_image_distribution($distribution_field); } &write_page_end(); ### Output the beginning of the page: sub write_page_start { print "Content-type: text/html\r\n\r\n"; print "<html><head><title>DigitalNZ Image Distribution</title></head>\n"; print "<body>\n"; print '<table border="0">'; print "<tr><td colspan=\"3\" align=\"center\">\n"; print "<h1>DigitalNZ Image Distribution</h1>\n"; print "</td></tr>\n"; } ## Output the page body for image distribution data: sub write_image_distribution { my ($distribution_field) = @_; # Query 1: Get the distribution data my $intiial_url = "$dnz_api_url?api_key=$dnz_api_key"; $intiial_url .= "&search_text=category:Images"; $intiial_url .= "&num_results=1"; $intiial_url .= "&start=0"; $intiial_url .= "&facet_num_results=1000"; $intiial_url .= "&facets=$distribution_field"; #print "<!-- Initial URL: '$intiial_url' -->\n"; use LWP::Simple; my $content = get $intiial_url; if (!defined($content)) { print "<p>Couldn't get $intiial_url</p>\n"; return; } # Extract the facet value data (none of yer fancy XML parsers here): my $values = $content; $values =~ s/^.*\<values//s; $values =~ s/\<\/values.*//s; my @value_list = split('value', $values); my @names; my %name_to_frequency; my $total_frequency = 0; my $max_frequency = 0; foreach my $v (@value_list) { if ($v =~ /num-results.(\d+)..num-results.*name.(.+)..name/s) { my $num_frequency = $1; my $name = $2; #print "<!-- Value '$name' => $num_frequency -->\n"; push(@names, $name); $name_to_frequency{$name} = $num_frequency; $total_frequency += $num_frequency; if ($num_frequency > $max_frequency) { $max_frequency = $num_frequency; } } } print "<tr><th>$distribution_field</th><th> </th><th align=\"right\">number</th></tr>\n"; foreach my $name (@names) { $freq = $name_to_frequency{$name}; $blocks = int($freq / $max_frequency * 10); print '<tr height="50"><td>' . $name . '</td><td>'; &output_blocks($distribution_field, $name, $freq, $blocks); print '</td><td align="right">' . $freq . "</td></tr>\n"; } } ## Dump the source code to the table: sub write_source_code { open(CODE, 'image-distribution.pl') or die("Unable to open file"); @lines = <CODE>; close(CODE); print "<tr><td colspan=\"3\" align=\"left\">\n"; print "<h2>Source code: image_distribution.pl</h2>\n"; foreach my $line (@lines) { $line =~ s/&/&/g; $line =~ s/</</g; $line =~ s/>/>/g; $line =~ s/my \$dnz_api_key = '.+'/my \$dnz_api_key = 'ADD_YOUR_API_KEY_HERE'/; $line =~ s/ / /g; $line =~ s/$/<br \/>/; print $line; } print "</td></tr>\n"; } ### Output the end of the page: sub write_page_end { print "<tr height=\"50\"><td colspan=\"3\" align=\"center\">\n"; print '<p align="centre">View by '; print '<a href="image-distribution.pl">century</a>, '; print '<a href="image-distribution.pl?field=decade">decade</a>, '; print '<a href="image-distribution.pl?field=content_partner">content_partner</a> or '; print '<a href="image-distribution.pl?field=language">language</a>, or '; print '<a href="http://www.digitalnz.org/">visit DigitalNZ</a>.'; print '</p>'; print '<p align="centre">Copyright 2009 The National Library of New Zealand.<br />'; print '<a href="image-distribution.pl?show_source=true">Licensed under the GNU GPL</a>. '; print '<a href="image-distribution.pl?show_source=true">Get the source</a>.'; print '</p>'; print "</td></tr></table>\n"; print "</body></html>\n"; } ### Output a set of image blocks to illustrate the distribution: sub output_blocks() { my ($field, $name, $freq, $blocks) = @_; if ($blocks > 0) { print "\n"; my @ids = &get_image_urls($field, $name, $blocks); foreach my $id (@ids) { my ($source_url, $thumbnail_url ) = $id =~ /^(.+)\t(.+)$/; printf('<a href="%s"><img src="%s" width="50" height="50" border="0" /></a>', $source_url, $thumbnail_url); } } else { print '<img src="1x1.gif" width="1" height="50" />'; } print "\n"; } ### Grab the image URLs, and cache them for later use if needed: sub get_image_urls() { my ($field, $name, $blocks) = @_; # Cache the image files: my $filename = &get_cache_file_name($field, $name); if (!-e $filename) { &add_image_urls_to_cache($field, $name, $filename); } # Read the image file names from the cache: open(CACHE, $filename) or die("Unable to open file"); my @lines = <CACHE>; close(CACHE); use List::Util 'shuffle'; @lines = shuffle(@lines); splice(@lines, $blocks); return @lines; } ### Calculate name of cache file: sub get_cache_file_name() { my ($field, $name) = @_; my $filename = 'cache/' . $field . '-' . $name . '.txt'; $filename =~ s/[ &]/-/g; return $filename; } ### Populate the cache file: sub add_image_urls_to_cache() { my ($field, $name, $filename) = @_; # Create subsidiary URL: my $url = "$dnz_api_url?api_key=$dnz_api_key"; $url .= "&search_text=category:Images AND $field:\"$name\"&num_results=50"; #print "<!-- Caching: URL for '$filename': $url -->\n"; # Read the content out of the URL and into the file my $content = get $url; die "Couldn't get $url" unless defined $content; open CACHE, '>', "$filename"; # Read the record data out of the search result: $content =~ s/^.*\<results//s; $content =~ s/\<\/results.*//s; my @result_list = split('<result>', $content); foreach my $v (@result_list) { if ($v =~ /.source\-url.(.+).\/source\-url..+.thumbnail\-url.(.+).\/thumbnail\-url./s) { my $source_url = $1; my $thumbnail_url = $2; print CACHE $source_url . "\t" . $thumbnail_url . "\n"; } } } | ||
|
View by century, decade, content_partner or language, or visit DigitalNZ. Copyright 2009 The National Library of New Zealand. |