This is part of datacrate, a node.js library for working with
DataCrates. Copyright (C) 2018 University of Technology Sydney
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <>.
const fs = require('fs-extra');
const _ = require('lodash');
const DEFAULTS = path.join(__dirname, "../defaults");
const CONTEXT = path.join(DEFAULTS, 'context.json');
/* datapub2catalog(options)
Convert metadata from a redbox data publication and
its data record to a JSON-LD datacrate catalogue
options = {
id: redbox oid,
datapub: redbox data publication record,
org: {
@id: organisation identifier,
name: organisation name
owner: email of the data pub's record,
approver: email of the librarian who approved publication
dates: {
dateCreated: Date object or string}
datePublished: Date object or string
If there are no dates, or either dateCreated or datePublished is
undefined, now() is used as the default.
The owner and approver are used to build two Actions against
the object - creation and publication.
TODO; add contentSize, encodingFormat and fileFormat from
siegfried to the dataLocations array
Remember to keep the connection to the data record
async function datapub2catalog(options) {
const id = options['id'];
const datapub = options['datapub'];
const org = options['organisation'];
const owner = options['owner'];
const approver = options['approver'];
const dates = dates_default(options['dates'])
const context = await fs.readJson(CONTEXT);
const catalog = {
'@context': context
const organisation = {
'@type': 'Organization',
'name': org['name']
if( ! (organisation['@id'] && organisation['name'] ) ) {
throw Error("Organization must have an id and a name");
// assuming that all creators are affiliated to this one organisation
var people = make_creators(datapub, organisation);
const files = make_files(datapub);
const dataset = make_dataset(id, datapub, organisation, dates, files);
const [ history, more_people ] = make_history(
dataset, people, owner, approver, dates
if( more_people.length > 0 ) {
people = people.concat(more_people);
const graph = _.flatten([
return {
'@context': context,
'@graph': graph.filter((e) => e)
function dates_default(values) {
var dates = {};
const now = (new Date()).toISOString();
[ 'dateCreated', 'datePublished' ].map((d) => {
if( values && values[d] ) {
dates[d] = ( typeof(values[d]) == 'object' ) ? values[d].toISOString() : values[d];
} else {
dates[d] = now;
return dates;
function link_id(item) {
return { '@id': item['@id'] };
// TODO: isBasedOn the data record
function make_dataset(id, datapub, organisation, dates, files) {
const ds = {
'@id': id,
'name': datapub['title'],
'description': datapub['description'],
'dateCreated': dates['dateCreated'],
'datePublished': dates['datePublished'],
'Publisher': link_id(organisation),
return ds;
function find_by_email(people, email) {
const match = people.filter((x) => x['email'] == email);
if( match.length ) {
return match[0];
} else {
return undefined;
function make_history(dataset, people, owner, approver, dates) {
var owner_item = find_by_email(people, owner);
var approver_item = find_by_email(people, approver);
const new_people = []
if( !owner_item ) {
owner_item = {
'@id': owner,
'email': owner
if( !approver_item ) {
approver_item = {
'@id': approver,
'email': approver
// TODO: descriptions for these and maybe a link to the
// data record for the CreateAction
const history = [
'@id': dataset['@id'] + '_history1',
'@type': 'CreateAction',
'name': 'Create',
'endTime': dates['dateCreated'],
'object': link_id(dataset),
'agent': link_id(owner_item)
'@id': dataset['@id'] + '_history2',
'@type': 'UpdateAction',
'name': 'Publish',
'endTime': dates['datePublished'],
'object': link_id(dataset),
'agent': link_id(approver_item)
return [ history, new_people ];
// files(datapub)
// crosswalk dataLocations to an array of File items.
// The dataLocations are expected to have contentSize,
// encodingFormat and fileFormat already set
// This assumes that the directory is flat ie all of the files
// are in the root
function make_files(datapub) {
if( !datapub['dataLocations'] ) {
return [];
const files = datapub['dataLocations'].map((dl) => {
if( dl['type'] == 'attachment' ) {
return {
'@id': dl['name'],
'name': dl['name'],
'@type': 'File',
'contentSize': dl['contentSize'],
'encodingFormat': dl['encodingFormat'],
'fileFormat': dl['fileFormat']
} else {
// todo: URLs and physical locations
return undefined;
return files.filter((x) => x);
// creators(datapub)
// returns the array of Person items from the creators
// field of the data publication
// TODO: affiliations
function make_creators(datapub, organisation) {
if( !datapub['creators'] ) {
throw Error("A DataCrate has to have at least one creator");
const creators = datapub['creators'].map((p) => {
const id = p['orcid'] || p['email'] || p['text_full_name'];
'identifier': id,
'name': p['text_full_name'],
'familyName': p['family_name'],
'givenName': p['given_name'],
'email': p['email'],
'affiliation': link_id(organisation)
} else {
// warn for unidentifiable creators
return undefined;
const nc = creators.filter((x) => x)
return nc;
function make_funding(datapub) {
return undefined;
function make_about(datapub) {
return undefined;
function make_spatial(datapub) {
return undefined;
function make_temporal(datapub) {
return undefined;
function make_licence(datapub) {
return undefined;
function make_citation(datapub) {
return undefined;
function make_related(datapub) {
return undefined;
module.exports = {
'datapub2catalog': datapub2catalog