/* This is part of datacrate, a node.js library for working with DataCrates. Copyright (C) 2018 University of Technology Sydney This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see <http://www.gnu.org/licenses/>. */ const CONTEXT = './defaults/context.json'; const fs = require('fs-extra'); const _ = require('lodash'); /* datapub2catalog(options) Convert metadata from a redbox data publication and its data record to a JSON-LD datacrate catalogue options = { id: redbox oid, datapub: redbox data publication record, org: { @id: organisation identifier, name: organisation name }, owner: email of the data pub's record, approver: email of the librarian who approved publication } The owner and approver are used to build two Actions against the object - creation and publication. TODO; add contentSize, encodingFormat and fileFormat from siegfried to the dataLocations array Remember to keep the connection to the data record */ async function datapub2catalog(options) { const id = options['id']; const datapub = options['datapub']; const org = options['organisation']; const owner = options['owner']; const approver = options['approver']; const context = await fs.readJson(CONTEXT); const catalog = { '@context': context }; const organisation = { '@id': org['@id'], '@type': 'Organization', 'identifier': org['@id'], 'name': org['name'] }; // assuming that all creators are affiliated to this one organisation var people = make_creators(datapub, organisation); const files = make_files(datapub); const dataset = make_dataset(id, datapub, organisation, files); const [ history, more_people ] = make_history(dataset, people, owner, approver); if( more_people.length > 0 ) { people = people.concat(more_people); } const graph = _.flatten([ dataset, organisation, make_funding(datapub), make_about(datapub), make_spatial(datapub), make_temporal(datapub), people, make_licence(datapub), make_citation(datapub), files, make_related(datapub), history ]); return { '@context': context, '@graph': graph.filter((e) => e) }; } function link_id(item) { return { '@id': item['@id'] }; } // TODO: isBasedOn the data record function make_dataset(id, datapub, organisation, files) { return { '@id': id, 'type': 'Dataset', 'name': datapub['title'], 'description': datapub['description'], 'Publisher': link_id(organisation), 'hasPart': files.map(link_id), } } function find_by_email(people, email) { const match = people.filter((x) => x['email'] == email); if( match.length ) { return match[0]; } else { return undefined; } } function make_history(dataset, people, owner, approver) { var owner_item = find_by_email(people, owner); var approver_item = find_by_email(people, approver); const new_people = [] if( !owner_item ) { owner_item = { '@id': owner, 'email': owner }; new_people.push(owner_item) } if( !approver_item ) { approver_item = { '@id': approver, 'email': approver }; new_people.push(approver_item) } // TODO: descriptions for these and maybe a link to the // data record for the CreateAction const history = [ { '@id': dataset['@id'] + '_h0', '@type': 'CreateAction', 'name': 'Create', 'object': link_id(dataset), 'agent': link_id(owner_item), 'startTime': 'yyyy-mm-dd' }, { '@id': dataset['@id'] + '_h1', '@type': 'UpdateAction', 'name': 'Publish', 'object': link_id(dataset), 'agent': link_id(approver_item), 'startTime': 'yyyy-mm-dd' } ]; return [ history, new_people ]; } // files(datapub) // crosswalk dataLocations to an array of File items. // The dataLocations are expected to have contentSize, // encodingFormat and fileFormat already set function make_files(datapub) { if( !datapub['dataLocations'] ) { return []; } const files = datapub['dataLocations'].map((dl) => { if( dl['type'] == 'attachment' ) { return { '@id': dl['name'], 'name': dl['name'], '@type': 'File', 'contentSize': dl['contentSize'], 'encodingFormat': dl['encodingFormat'], 'fileFormat': dl['fileFormat'] } } else { // todo: URLs and physical locations return undefined; } }); return files.filter((x) => x); } // creators(datapub) // // returns the array of Person items from the creators // field of the data publication // TODO: affiliations function make_creators(datapub, organisation) { if( !datapub['creators'] ) { throw Error("A DataCrate has to have at least one creator"); } const creators = datapub['creators'].map((p) => { const id = p['orcid'] || p['email'] || p['text_full_name']; if( id ) { return { '@id': id, 'identifier': id, 'name': p['text_full_name'], 'familyName': p['family_name'], 'givenName': p['given_name'], 'email': p['email'], 'affiliation': link_id(organisation) } } else { // warn for unidentifiable creators return undefined; } }) console.log("make_creators " + typeof(creators)); const nc = creators.filter((x) => x) console.log("make_creators " + typeof(nc)); return nc; } function make_funding(datapub) { return undefined; } function make_about(datapub) { return undefined; } function make_spatial(datapub) { return undefined; } function make_temporal(datapub) { return undefined; } function make_licence(datapub) { return undefined; } function make_citation(datapub) { return undefined; } function make_related(datapub) { return undefined; } module.exports = { 'datapub2catalog': datapub2catalog };