xquery version "1.0";
(:
: Module Name: xqOAI
: Module Version: 1.0
: Date: September, 2007
: Copyright: Michael J. Giarlo and Winona Salesky
: Proprietary XQuery Extensions Used: X-Hive/DB
: XQuery Specification: November 2005
: Updated: 4/27/2010, Winona Salesky
: Module Overview: OAI-PMH data provider for MODS and DC and EAD records
:)
(:~
: OAI-PMH data provider for MODS records within an eXist
:
: @author Michael J. Giarlo
: @author Winona Salesky
: @since April, 2010
: @version 1.2
:)
(: declare namespaces for each metadata schema we care about :)
declare namespace rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#";
declare namespace dc="http://purl.org/dc/elements/1.1/";
declare namespace mets = "http://www.loc.gov/METS/";
declare namespace mods = "http://www.loc.gov/mods/v3";
declare namespace xlink = "http://www.w3.org/1999/xlink";
declare namespace dcterms = "http://purl.org/dc/terms/";
declare namespace xslt="http://exist-db.org/xquery/transform";
declare namespace tei = "http://www.tei-c.org/ns/1.0";
declare namespace ead = "urn:isbn:1-931666-22-9";
declare namespace oai_dc="http://www.openarchives.org/OAI/2.0/oai_dc/";
declare namespace request="http://exist-db.org/xquery/request";
declare option exist:serialize "method=xml media-type=text/xml omit-xml-declaration=no indent=yes";
(: configurable variables :)
declare variable $base-url := 'http://cdi.uvm.edu/collections/oai.xql';
declare variable $repository-name := 'Center for Digital Initiatives, The University of Vermont ';
declare variable $admin-email := 'cdi@uvm.edu';
declare variable $hits-per-page := 500;
declare variable $earliest-datestamp := '2006-01-01';
declare variable $_docs := collection('/db/mets')/mets:mets;
declare variable $oai-domain := 'cdi.uvm.edu';
declare variable $id-scheme := 'oai';
(: params from OAI-PMH spec :)
declare variable $verb {request:request-parameter('verb', '')};
declare variable $identifier {request:request-parameter('identifier', '')};
declare variable $from {request:request-parameter('from', '')};
declare variable $until {request:request-parameter('until', '')};
declare variable $set {request:request-parameter('set', '')};
declare variable $start {request:request-parameter('resumptionToken', 1) cast as xs:integer};
declare variable $metadataPrefix {request:request-parameter('metadataPrefix', 'oai_dc')};
declare variable $resumptionToken {request:request-parameter('resumptionToken', '')};
(: set to true in argstring for extra debugging information :)
declare variable $verbose {request:request-parameter('verbose', '')};
(:~
: Print datetime of OAI response.
: - Uses substring and concat to get the date in the format OAI wants
:
: @return XML
:)
declare function local:oai-response-date() {
{
concat(substring(current-dateTime() cast as xs:string, 1, 19), 'Z')
}
};
(:~
: Build the OAI request element
:
: @return XML
:)
declare function local:oai-request() {
element request {
if ($verb != '') then attribute verb {$verb} else '',
if ($identifier != '') then attribute identifier {$identifier} else '',
if ($metadataPrefix != '') then attribute metadataPrefix {$metadataPrefix} else '',
if ($from != '') then attribute from {$from} else '',
if ($until != '') then attribute until {$until} else '',
if ($set != '') then attribute set {$set} else '',
if ($resumptionToken != '') then attribute resumptionToken {$resumptionToken} else '',
$base-url
}
};
(:~
: Get resumptionToken
: - this is a stub
: TO-DO: real resumptionTokens, using xquery update to store result sets in the db
:
: @return valid resumptionToken in appropriate format
:)
declare function local:get-cursor-token() {
if ($resumptionToken = '') then
1
else
$resumptionToken cast as xs:integer
};
declare function local:validateParams(){
let $parameters := request:get-parameter-names()
for $param in $parameters
return
if($param = 'verb' or $param = 'identifier' or $param = 'from' or $param = 'until' or $param = 'set' or $param = 'metadataPrefix' or $param = 'resumptionToken' or $param = 'start')
then ''
else Invalid OAI-PMH parameter : {$param}
};
declare function local:errorCheck(){
if (exists($verb) and $verb = 'GetRecord') then
(if(not(exists($identifier))) then identifier is a required argument else '',
if (exists($identifier) and $identifier = '') then identifier is a required argument else '',
if (exists($metadataPrefix) and $metadataPrefix != 'oai_dc') then only oai_dc is supported else '',
if (exists($metadataPrefix) and count($metadataPrefix) gt 1) then Only one metadataPrefix argument acceptable else '')
else if (exists($verb) and $verb = 'ListIdentifiers' or $verb = 'ListRecords') then
(if(exists($resumptionToken) and $resumptionToken != '' and not(matches($resumptionToken, '^\d+$'))) then bad resumptionToken else '',
if(exists($metadataPrefix) and $metadataPrefix != 'oai_dc') then only oai_dc is supported else '',
if (exists($metadataPrefix) and count($metadataPrefix) gt 1) then Only one metadataPrefix argument acceptable else '',
if(exists($from) and $from !='' or exists($until) and $until !='') then
if(local:validate-dates() = 'true') then
if(exists($from) and $from lt $earliest-datestamp) then Earliest date available is {$earliest-datestamp} else ''
else From/until arguments are not valid
else ''
)
else ''
};
(:Begin Error checking, accept only valid paremters:)
declare function local:testParameters(){
let $error :=
if(local:validateParams() != '') then local:validateParams()
else local:errorCheck()
return $error
};
(:~
: Validate from and until params
: - dates are valid only if they match date-pattern and are in same format
: - note that date-pattern also matches an empty string
:
: @return boolean
:)
declare function local:validate-dates() {
let $date-pattern := '^(\d{4}-\d{2}-\d{2}){0,1}$'
let $from-len := string-length($from)
let $until-len := string-length($until)
return
if ($from-len > 0 and $until-len > 0 and $from-len != $until-len) then
'false'
else
if(matches($from, $date-pattern) and matches($until, $date-pattern)) then 'true' else 'false'
};
(:
: Modifies dates extracted from METS records to be OAI compliant
:)
declare function local:modDate(){
let $date := string(mets:metsHdr/@LASTMODDATE)
let $shortDate := substring-before($date,'T')
return if(exists($shortDate) and $shortDate != '') then $shortDate else '2006-01-01'
};
(:
: Build xpath for selecting records based on date range or sets
:)
declare function local:buildPath(){
if(exists($from) and $from != '' and exists($until) and $until != '') then
if(exists($set) and $set !='') then
$_docs[local:modDate() gt $from and local:modDate() lt $until and .//mods:titleInfo[@ID = $set]] |
$_docs[local:modDate() gt $from and local:modDate() lt $until and .//dcterms:isPartOf[@id = $set]]
else
$_docs[local:modDate() gt $from and local:modDate() lt $until]
else if(exists($from) and $from != '' and exists($until) and $until = '') then
if(exists($set) and $set !='') then
$_docs[local:modDate() gt $from and .//mods:titleInfo[@ID = $set]] |
$_docs[local:modDate() gt $from and .//dcterms:isPartOf[@id = $set]]
else
$_docs[local:modDate() gt $from]
else if(exists($from) and $from != '' and not(exists($until))) then
if(exists($set) and $set !='') then
$_docs[local:modDate() gt $from and .//mods:titleInfo[@ID = $set]] |
$_docs[local:modDate() gt $from and .//dcterms:isPartOf[@id = $set]]
else
$_docs[local:modDate() gt $from]
else if(exists($until) and $until != '') then
if(exists($set) and $set !='') then
$_docs[local:modDate() lt $until and .//mods:titleInfo[@ID = $set]] |
$_docs[local:modDate() lt $until and .//dcterms:isPartOf[@id = $set]]
else
$_docs[local:modDate() lt $until]
else
if(exists($set) and $set !='') then
$_docs[.//mods:titleInfo[@ID = $set]] |
$_docs[.//dcterms:isPartOf[@id = $set]]
else
$_docs
};
(:~
: Branch processing based on client-supplied "verb" param
:
: @param $_hits a sequence of XML docs
: @param $_end an integer reflecting the last item in the current page of results
: @param $_count an integer reflecting total hits in the result set
: @return XML if errors, nothing if not
:)
declare function local:oai-response() {
if (exists(local:testParameters()) and local:testParameters() !='') then local:testParameters()
else
if ($verb = 'ListSets') then local:oai-list-sets()
else if ($verb = 'ListRecords') then local:oai-list-records()
else if ($verb = 'ListIdentifiers') then local:oai-list-identifiers()
else if ($verb = 'GetRecord') then local:oai-get-record()
else if ($verb = 'ListMetadataFormats') then local:oai-list-metadata-formats()
else if ($verb = 'Identify') then local:oai-identify()
else Invalid OAI-PMH verb : { $verb }
};
(:~
: Print a metadata record
: - the mods/ead brancher is inelegant -- more abstraction may be helpful here
: TO-DO: find a way to make this easier to extend, e.g., for new metadata formats
:
: @param $_record an XML record
: @return XML
:)
declare function local:oai-metadata($record) {
{local:buildDC($record)}
};
(:~
: Extract OAI identifier from MODS or EAD
: - currently assumes only mods and ead are relevant
: TO-DO: get rid of hard-coding
:
: @param $_record an XML record
: @return a string representing an OAI identifier
:)
declare function local:get-identifier($record) {
let $id := string($record/@OBJID)
let $oaiID := concat('oai:cdi.uvm.edu:',$id)
return $oaiID
};
(:~
: Print the resumptionToken
: TO-DO: fix this up when resumptionToken support is built-in
:
: @param $_end integer, index of last item in current page of results
: @param $_count integer, total number of hits in result set
: @return XML or nothing
:)
declare function local:print-token($_end, $_count) {
if ($_end + 1 < $_count) then
let $token := $_end + 1
return
{ $token }
else ''
};
(:~
: OAI GetRecord verb
:
: @param $_hits a sequence of XML docs
: @return XML corresponding to a single OAI record
:)
declare function local:oai-get-record() {
let $docID := substring-after($identifier,'oai:cdi.uvm.edu:')
let $_hits := $_docs[@OBJID = $docID]
let $record := $_hits
let $date := substring-before($record/mets:metsHdr/@LASTMODDATE,'T')
let $oaiDate := concat(string($date),'Z')
return
if($_hits !='') then
{
(
{$identifier}
{$oaiDate}
{
let $set := $record//dcterms:isPartOf | $record//mods:relatedItem[@type='host']
for $oaiSet in $set
let $setID := $oaiSet/mods:titleInfo/@ID | $oaiSet/@id
let $idString := string($setID)
return
{$idString}
}
,
local:oai-metadata($record) )
}
{
if ($verbose = 'true') then
{ $record }
else ''
}
else
No Records matched your criteria.
};
(:~
: OAI Identify verb
:
: @return XML describing the OAI provider
:)
declare function local:oai-identify() {
{ $repository-name }
{ $base-url }
2.0
{ $admin-email }
{ $earliest-datestamp }
transient
YYYY-MM-DD
deflate
};
(:~
: OAI ListIdentifiers verb
:
: @param $_hits a sequence of XML docs
: @param $_end integer, index of last item in page of results
: @param $_count integer, total number of hits in result set
: @return XML corresponding to a list of OAI identifier records
:)
declare function local:oai-list-identifiers() {
let $_hits := local:buildPath()
let $_count := count($_hits)
let $max := $hits-per-page
let $_end := if ($start + $max - 1 < $_count) then
$start + $max - 1
else
$_count
return
if($_count eq 0) then No Records matched your criteria.
else
{
for $i in $start to $_end
let $record := $_hits[$i]
let $date := substring-before($record/mets:metsHdr/@LASTMODDATE,'T')
let $oaiDate := concat(string($date),'Z')
let $status := $record/mets:metsHdr/@RECORDSTATUS
return
(
{local:get-identifier($record)}
{$oaiDate}
{
let $set := $record//dcterms:isPartOf | $record//mods:relatedItem[@type='host']
for $oaiSet in $set
let $setID := $oaiSet/mods:titleInfo/@ID | $oaiSet/@id
let $idString := string($setID)
return
{$idString}
}
)
}
{ local:print-token($_end, $_count)}
};
(:~
: OAI ListMetadataFormats verb
:
: @return XML corresponding to a list of supported metadata formats
:)
declare function local:oai-list-metadata-formats() {
oai_dc
http://www.openarchives.org/OAI/2.0/oai_dc.xsd
http://www.openarchives.org/OAI/2.0/oai_dc/
};
(:~
: OAI ListRecords verb
:
: @param $_hits a sequence of XML docs
: @param $_end integer, index of last item in page of results
: @param $_count integer, total number of hits in result set
: @return XML corresponding to a list of full OAI records
:)
declare function local:oai-list-records() {
let $_hits := local:buildPath()
let $_count := count($_hits)
let $max := $hits-per-page
let $_end := if ($start + $max - 1 < $_count) then
$start + $max - 1
else
$_count
return
if($_count eq 0) then No Records matched your criteria.
else
{
for $i in $start to $_end
let $record := $_hits[$i]
let $date := substring-before($record/mets:metsHdr/@LASTMODDATE,'T')
let $oaiDate := concat(string($date),'Z')
let $status := $record/mets:metsHdr/@RECORDSTATUS
return
({
{local:get-identifier($record)}
{$oaiDate}
{
let $set := $record//dcterms:isPartOf | $record//mods:relatedItem[@type='host']
for $oaiSet in $set
let $setID := $oaiSet/mods:titleInfo/@ID | $oaiSet/@id
let $idString := string($setID)
return
{$idString}
}
,
local:oai-metadata($record)
}
)
}
{local:print-token($_end, $_count) }
};
(:~
: OAI ListSets verb
:
: @param $_hits a sequence of XML docs
: @return XML corresponding to a list of OAI set records
:)
declare function local:oai-list-sets() {
{
for $record in $_docs[@TYPE = 'collection']
let $collectionID := $record/@OBJID
let $title := $record/@LABEL
return
{string($collectionID)}
{string($title)}
}
};
(:Creates DC title tags:)
declare function local:dcTitle($record) {
let $title := $record//dc:title | $record//archdesc/did/unittitle | $record//mods:titleInfo/descendant::*
for $dcTitle in $title
return
{string($dcTitle)}
};
(:Creates the DC Creator/Contributor tags:)
declare function local:dcCreator($record) {
let $creator := $record//dc:creator | $record//dc:contributor | $record//archdesc/did/origination[@label = 'Creator']/descendant::* | $record//mods:name/mods:namePart
for $dcCreator in $creator
return
{string($dcCreator)}
};
(:Creates DC subject tags:)
declare function local:dcSubject($record) {
let $subject := $record//dc:subject | $record//archdesc/controlaccess/subject/descendant::* | $record//mods:subject/descendant::*
for $dcSubject in $subject
return
{string($dcSubject)}
};
(:Creates DC description tags:)
declare function local:dcDescription($record) {
let $description := $record//dc:description | $record//archdesc//abstract/descendant-or-self::* | $record//mods:abstract/descendant-or-self::* | $record//mods:tableOfContents
for $dcDescription in $description
return
{string($dcDescription)}
};
(:Creates DC date tags -- i did not include the dc:date, becuase we use that for creation of digital object:)
declare function local:dcDate($record) {
let $date := $record//dcterms:temporal | $record//publicationstmt//date | $record//archdesc/unitdate | $record//mods:date | $record//mods:dateCreated
for $dcDate in $date
return
{string($dcDate)}
};
declare function local:rights($record) {
let $rights := $record//dc:rights | $record//mods:accessCondition
for $dcRights in $rights
return
{string($dcRights)}
};
(:Creates DC record:)
declare function local:buildDC($record){
{
local:dcTitle($record),
local:dcCreator($record),
local:dcSubject($record),
local:dcDescription($record),
local:dcDate($record),
local:rights($record)
}
};
(: OAI-PMH wrapper for request and response elements :)
{
local:oai-response-date(),
local:oai-request(),
local:oai-response()
}