#!/usr/local/bin/ruby -w # isbn2marc - find MARC records for one or more ISBNs # William Denton # April 2007 - October 2008 # Released under the MIT License. # Copyright (c) 2008 William Denton # # Permission is hereby granted, free of charge, to any person # obtaining a copy of this software and associated documentation # files (the "Software"), to deal in the Software without # restriction, including without limitation the rights to use, # copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the # Software is furnished to do so, subject to the following # conditions: # # The above copyright notice and this permission notice shall be # included in all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES # OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT # HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, # WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR # OTHER DEALINGS IN THE SOFTWARE. # INSTALLATION NOTES # # Requires the ruby-zoom package (which requires YAZ) and the marc gem. # # ruby-zoom: http://ruby-zoom.rubyforge.org/ # See the installation instructions there # # marc gem: http://rubyforge.org/projects/marc/ # $ sudo gem install marc # # NOTE ruby-zoom installs its own marc.rb file that will conflict with # marc! You will need to delete ruby-zoom's marc.rb or rename it # for the marc gem to work. # # On my system it was installed in # /usr/local/lib/ruby/site_ruby/1.8/marc.rb # but you'll have to look for it wherever your system put it. # USAGE # # isbn2marc [-q] [-d marcfile] [-x xmlfile] ISBN # # -q Run quietly, don't list servers queried and don't # --quiet dump MARC to STDOUT # By default isbn2marc tells STDERR what it's doing # # -d marcfile Dump MARC record to file # --dump # # -x xmlfile Dump MARCXML record to file # --dumpxml # # -w apikey Query WorldCat using API key # --worldcat # # -l level WorldCat service level (default or full) # --worldcatlevel Default is default # EXAMPLES # # Find a MARC record for the first volume of Casanova's HISTORY OF # MY LIFE and pretty print it: # # $ isbn2marc 0801856620 # # Find a MARC record for Terry Pratchett's THIEF OF TIME and write # a binary MARC file to disk as well as pretty printing it: # # $ isbn2marc -d thief.marc 0552148407 # # Find a MARC record for the first omnibus volume of the University of # Chicago Press's edition of Anthony Powell's A DANCE TO THE MUSIC OF TIME, # but don't show the record on the screen: # # $ isbn2marc -q -d stringham.marc -x templer.xml 0226677141 # # Find MARC records for all ISBNs in a file and save them on disk: # # $ cat isbns.txt # 0439064864 # 0439064872 # 0439136350 # 0439136369 # 0439139600 # 059035342X # 0736646736 # $ isbn2marc -f isbns.txt -d harry-potter.marc # # Try WorldCat first, which will speed things up, and get a full record: # # $isbn2marc --worldcat yourAPIkeyhere --worldcatlevel full 1551922460 # # Find a MARC record but don't display it or save it: # # $ isbn2marc -q 0195024028 # # You'd be a bit silly to run that often. # TO DO # # Proper ISBN validity check. require 'net/http' require 'optparse' require "rexml/document" include REXML require 'rubygems' require 'marc' require 'zoom' # I don't know if it's cool to use global variables for these # options, or for the MARC writer and xmlwriter thingies, but # I am because of the write_record function. Comments welcome. $options = {} $options[:quiet] = false $options[:file] = nil $options[:dump] = nil $options[:dumpxml] = nil # Enter your WorldCat API key here. # It can also be specified on the command line but that's tiresome. # To get a key, see # http://worldcat.org/devnet/wiki/SearchAPIWhoCanUse # For WorldCat API details see # http://worldcat.org/devnet/wiki/SearchAPIDetails $options[:worldcat] = '' $options[:worldcatlevel]= 'default' # Or full OptionParser.new do |opts| opts.banner = "Usage: isbn2marc [-q] [-d marcfile] [-x xmlfile] isbn" opts.on("-q", "--quiet", "Run silently") { $options[:quiet] = true } opts.on("-d", "--dump d", "Dump MARC to file d") do |d| $options[:dump] = d end opts.on("-x", "--dumpxml x", "Dump MARCXML to file x") do |x| $options[:dumpxml] = x end opts.on("-f", "--file f", String, "Read ISBNs from file f (one per line)") do |f| $options[:file] = f end opts.on("-w", "--worldcat apikey", String, "WorldCat API key") do |w| $options[:worldcat] = w end opts.on("-l", "--worldcatlevel level", String, "WorldCat service level (default or full)") do |wl| $options[:worldcatlevel] = wl end end.parse! # Probably won't change, but one never knows. worldcatAPIURL = "http://www.worldcat.org/webservices/catalog/content/isbn/::ISBN::?servicelevel=::LEVEL::&wskey=::KEY::" isbns = [] if $options[:file].nil? if ARGV.empty? STDERR.puts "No ISBN specified" exit 0 end else begin file = File.open($options[:file], "r") while (line = file.gets) line.chomp! # TODO Proper ISBN validity check next unless /(978)*\d{9}[0-9X]/.match(line) # puts "'#{line}'" isbns << line.gsub(/[^0-9X]/, '') end file.close rescue Exception => e STDERR.puts "ERROR '#{e}'" end end if ! ARGV.empty? isbns << ARGV[0].to_s.gsub(/[^0-9X]/, '') end if $options[:dump] begin $writer = MARC::Writer.new($options[:dump]) rescue Exception => e STDERR.puts "ERROR Cannot write to #{$options[:dump]}: '#{e}'" exit 0 end end if $options[:dumpxml] begin $xmlwriter = MARC::XMLWriter.new($options[:dumpxml]) rescue Exception => e STDERR.puts "ERROR Cannot write to #{$options[:dumpxml]}: '#{e}'" exit 0 end end # Two lists of open Z39.50 servers: # http://targettest.indexdata.com/ # http://staff.library.mun.ca/staff/toolbox/z3950hosts.htm # Could also look at the list in Terry Reese's MarcEdit: # http://oregonstate.edu/~reeset/marcedit/html/downloads.html servers = [ # Reorder these so that your preferred servers are first # North America ['theta.library.yorku.ca', 2200, 'unicorn' ], # York ['sirsi.library.utoronto.ca', 2200, 'unicorn' ], # U Toronto ['amicus.collectionscanada.gc.ca', 210, 'NL' ], # Lib & Archives Canada ['z3950.loc.gov', 7090, 'Voyager' ], # Library of Congress ['aleph.mcgill.ca', 210, 'MUSE' ], # McGill # ['ualapp.library.ualberta.ca', 2200, 'unicorn', ], # U Alberta ['portage.library.ubc.ca', 7090, 'voyager' ], # UBC ['catnyp.nypl.org', 210, 'INNOPAC' ], # New York Pub Lib ['library.mit.edu', 9909, 'mit01pub' ], # MIT ['prodorbis.library.yale.edu', 7090, 'voyager' ], # Yale ['catalog.princeton.edu', 7090, 'voyager' ], # Princeton ['ipac.lib.uchicago.edu', 210, 'usmarc' ], # Chicago ['www.saclibrarycatalog.org', 210, 'INNOPAC' ], # Sacramento Pub Lib ['library.bu.edu', 210, 'INNOPAC' ], # Boston U ['voyager.wrlc.org', 7090, 'voyager' ], # Wash Res Lib Consor ['catalog.lib.jhu.edu', 210, 'horizon' ], # Johns Hopkins ['z3950.lib.umich.edu', 210, 'miu01_pub' ], # U Michigan ['catalog.library.cornell.edu',7090, 'voyager' ], # Cornell # UK and Ireland ['library.ucc.ie', 210, 'INNOPAC' ], # U College Cork ['library.ox.ac.uk', 210, 'MAIN*BIBMAST'], # Oxford ['z3950.nls.uk', 7290, 'voyager' ], # Scottish Nat Lib ['lib-15.lse.ac.uk', 7090, 'voyager' ], # LSE ['libsys.lib.hull.ac.uk', 210, 'INNOPAC' ], # Hull # Europe (non-English) ['sigma.nkp.cz', 9909, 'NKC' ], # Nat Lib Czech R ['lib.mpib-berlin.mpg.de', 2020, 'opac' ], # Max Planck Inst ['ubsun02.biblio.etc.tu-bs.de',2020, 'bac' ], # Bibliotheken Berlins ['z3950.kb.dk', 2100, 'KGL01' ], # Kongelige Bibliothek ['roble.unizar.es', 210, 'INNOPAC' ], # U Zaragoza ['www.helmet.fi', 210, 'INNOPAC' ], # Helsinki Lib ['carmin.sudoc.abes.fr', 210, 'ABES-Z39-PUBLIC' ], # France ['gofor.bibli.ens-cachan.fr', 21210, 'ADVANCE' ], # French school ['gofor.bibli.ens-cachan.fr', 21210, 'MAIN*BIBMAST'], # French school ['isis.cilea.it', 2100, 'usmarc' ], # U Brescia ['z3950.bibsys.no', 2100, 'BIBSYS' ], # Nat Lib Norway ['z3950.nb.no', 2100, 'norbok' ], # Nat Lib Norway ['alpha.bn.org.pl', 210, 'INNOPAC' ], # Nat Lib Poland ['z3950.btj.se', 210, 'BURK' ], # Sweden # Australia and New Zealand ['catalogue.nla.gov.au', 7090, 'voyager' ], # Nat Lib Australia ['nlnzcat.natlib.govt.nz', 7190, 'voyager' ], # Nat Lib New Zealand # Asia ['library.cuhk.edu.hk', 210, 'INNOPAC' ], # Chinesse U HK ['linc.nus.edu.sg', 210, 'INNOPAC' ], # Nat U Singapore ['nbinet.ncl.edu.tw', 210, 'INNOPAC' ], # Nat Cent Lib Taiwan # Africa ['explore.up.ac.za', 210, 'INNOPAC' ], # U Pretoria ] # Given an ISBN and some Z39.50 server information, return MARCXML. # Why MARCXML? Because (now) the ruby-zoom module can't return a # marc MARC object. It can, however, return MARCXML, which marc can # grok. def z3950query (isbn, host, port, db) begin ZOOM::Connection.open(host, port) do |conn| conn.database_name = db conn.preferred_record_syntax = 'MARC21' rset = conn.search("@attr 1=7 #{isbn}") return rset[0].xml end rescue Exception => e STDERR.puts "\nERROR Z39.50 query: '#{e}'" if /failed/.match(e) return nil end end # Given a MARC reader object, write out the records therein, to # STDOUT or disk, in the desired format. # TODO Write out only the *first* one. (Why?) def write_record (marcxml) reader = MARC::XMLReader.new(StringIO.new(marcxml)) begin reader.each do |record| puts; puts record unless $options[:quiet] == true $writer.write(record) if $options[:dump] $xmlwriter.write(record) if $options[:dumpxml] end rescue Exception => e STDERR.puts "\nERROR Record not read: '#{e}'" end end # Now the real business. If we can check WorldCat, do so, because # it's the best source. If we can't, or if it fails, loop through all # the servers listed above and ask about the ISBN(s). isbns.each do |isbn| if $options[:worldcat].length > 0 STDERR.print "\r#{isbn} WorldCat" + " "*25 unless $options[:quiet] == true worldcatURL = worldcatAPIURL.gsub(/::ISBN::/, isbn) worldcatURL = worldcatURL.gsub(/::KEY::/, $options[:worldcat]) worldcatURL = worldcatURL.gsub(/::LEVEL::/, $options[:worldcatlevel]) # STDERR.print "\nGetting #{worldcatURL}" worldcat_xml = Net::HTTP.get_response(URI.parse(worldcatURL)).body doc = REXML::Document.new worldcat_xml error = doc.elements['//diagnostics/diagnostic/message'].nil? ? nil : doc.elements['//diagnostics/diagnostic/message'].text if error.nil? write_record(worldcat_xml) next else STDERR.puts "\nERROR from WorldCat: #{error}" end end servers.each do |server| STDERR.print "\r#{isbn} #{server[0]}" + " "*25 unless $options[:quiet] == true marcxml = z3950query(isbn, server[0], server[1], server[2]) next if marcxml.nil? write_record(marcxml) break end end $writer.close if $options[:dump] $xmlwriter.close if $options[:dumpxml]