Skip to content
Snippets Groups Projects
Commit 3dbf788f authored by Mike Lynch's avatar Mike Lynch
Browse files

Fleshing out the datapub conversion stuff -- passes the old tests

parent 79bc8e3e
No related merge requests found
......@@ -2,4 +2,9 @@
Node.js utilities for building and working with DataCrates.
https://github.com/UTS-eResearch/datacrate
\ No newline at end of file
(Standard here: https://github.com/UTS-eResearch/datacrate)
This started as a library for crosswalking ReDBox 2 data publications
into CATALOG.json files as part of the publication workflow. It should
turn into a general-purpose toolkit which makes it easy to build
things like Provisioner bots and publication pipelines.
\ No newline at end of file
......@@ -26,6 +26,48 @@ const path = require('path');
const DEFAULTS = path.join(__dirname, "../defaults");
const CONTEXT = path.join(DEFAULTS, 'context.json');
const RELATED = [ 'publications', 'websites', 'metadata', 'data' ];
const RELATED_TYPES = {
'publications': 'ScholarlyArticle',
'websites': 'WebSite',
'metadata': 'DataSet', // schema.org hasn't got a dedicated type
'data': 'DataSet'
};
const FUNDING_FIELDS = [ 'foaf:fundedBy_foaf:Agent', 'foaf:fundedBy_vivo:Grant' ];
const SUBJECT_FIELDS = [ 'dc:subject_anzsrc:for', 'dc:subject_anzsrc:seo' ];
// these will do for now
const SUBJECT_IRI_PREFIX = {
'dc:subject_anzsrc:for': '_:FOR/',
'dc:subject_anzsrc:seo': '_:SEO/'
};
// dataset -> about -> subjects
// dataset -> spatialCoverage -> place
// dataset -> temporalCoverate -> time
// dataset -> funder -> organisation
// dataset -> licence -> licence (CreativeWork or URL)
// dataset -> citation -> related publications, websites, articles, datasets
// the keys here are schema.org properties on the root DataSet
// and the values are functions which take the data publication
// as an argument and return null or an array of property values
const DATASET_PROPERTIES = {
'about': make_subjects,
'spatialCoverage': make_spatial,
'temporalCoverate': make_temporal,
'funder': make_funders,
'licence': make_licence,
'citation': make_related_works
};
/* datapub2catalog(options)
Convert metadata from a redbox data publication and
......@@ -88,7 +130,7 @@ async function datapub2catalog(options) {
throw Error("Organization must have an id and a name");
}
// assuming that all creators are affiliated to this one organisation
// assumes that all creators are affiliated to this one organisation
var people = make_creators(datapub, organisation);
......@@ -104,19 +146,29 @@ async function datapub2catalog(options) {
const graph = _.flatten([
dataset,
organisation,
make_funding(datapub),
make_about(datapub),
make_spatial(datapub),
make_temporal(datapub),
organisation,
people,
make_licence(datapub),
make_citation(datapub),
files,
make_related(datapub),
history
]);
// these are functions which try to get various DataCrate
// properties from the datapub: if they return anything,
// the results get pushed onto the graph and added to
// the dataset
_.forEach(DATASET_PROPERTIES, ( make_items, property ) => {
const items = make_items(datapub);
if( items ) {
const eitems = items.filter((i) => i);
if( eitems ) {
dataset[property] = items.map((i) => {
return { '@id': i['@id'] }
});
graph.push(eitems);
}
}
});
return {
'@context': context,
......@@ -130,7 +182,7 @@ function dates_default(values) {
const now = (new Date()).toISOString();
[ 'dateCreated', 'datePublished' ].map((d) => {
if( values && values[d] ) {
dates[d] = ( typeof(values[d]) == 'object' ) ? values[d].toISOString() : values[d];
dates[d] = ( typeof(values[d]) === 'object' ) ? values[d].toISOString() : values[d];
} else {
dates[d] = now;
}
......@@ -281,36 +333,117 @@ function make_creators(datapub, organisation) {
return nc;
}
// make related-publication etc links
// this returns an array of arrays of items, which will be _.flattened
// when the graph is populated
// note that these are very simple - just URI, title and description
// FIXME this assumes that there's a URI which can be used as the
// @id
function make_related_works(datapub) {
return RELATED.map((thing) => {
const field = 'related_' + thing;
return datapub[field].map((item) => {
const ld_item = {
'@id': item['related_url'],
'@type': RELATED_TYPES[thing],
'identifier': item['related_url'],
'name': item['related_title'],
};
if( item['related_notes']) {
ld_item['description'] = item['related_notes'];
}
if( item['related_notes']) {
ld_item['description'] = item['related_notes'];
}
return ld_item;
})
})
}
// Both grants (as projects) and funding bodies are represented
// in DataCrates as Organizations with the relationship 'funded'
// TODO - can we do chaining as per the spec?
// TODO - bring the project in from the RDMP?
function make_funders(datapub) {
var f = [];
return FUNDING_FIELDS.map((field) => {
if( datapub[field] ) {
return datapub[field].map((forg) => {
return {
'@id': forg['dc_identifier'],
'identifier': forg['dc_identifier'],
'@type': 'Organization',
'name': forg['dc_title']
}
});
} else {
return [];
}
});
function make_funding(datapub) {
return undefined;
}
function make_about(datapub) {
return undefined;
function make_subjects(datapub) {
return SUBJECT_FIELDS.map((field) => {
if( datapub[field] ) {
return datapub[field].map((subj) => {
const id = SUBJECT_IRI_PREFIX[field] + subj;
return {
'@id': id,
'identifier': id,
'name': subj
};
})
}
});
}
// spatialCoverage
function make_spatial(datapub) {
return undefined;
var items = [];
var i = 1;
if( datapub['geolocations'] ) {
items = datapub['geolocations'].map((gl) => {
const id = '_:spatial/' + String(i);
i += 1;
return {
'@id': id,
'identifier': id,
'description': gl
}
});
}
if( datapub['geospatial'] ) {
// FIXME - need to see what this looks like in a real record
}
return items;
}
// temporalCoverage
function make_temporal(datapub) {
return undefined;
var items = [];
var i = 1;
if( datapub['startDate'] || datapub['endDate'] ) {
const id = '_:temporal/' + String(i);
items.push({
'@id': id,
'identifier': id
})
}
}
function make_licence(datapub) {
return undefined;
}
function make_citation(datapub) {
return undefined;
}
function make_related(datapub) {
return undefined;
}
module.exports = {
'datapub2catalog': datapub2catalog
};
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment