Skip to content
Snippets Groups Projects
catalog.js 7 KiB
Newer Older
/*
This is part of datacrate, a node.js library for working with 
DataCrates.  Copyright (C) 2018  University of Technology Sydney

This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program.  If not, see <http://www.gnu.org/licenses/>.
*/

const fs = require('fs-extra');
const _ = require('lodash');
const path = require('path');
const DEFAULTS = path.join(__dirname, "../defaults");
const CONTEXT = path.join(DEFAULTS, 'context.json');
Convert metadata from a redbox data publication and
its data record to a JSON-LD datacrate catalogue 

options = {
	id: redbox oid,
	datapub: redbox data publication record,
	org: {
	  @id: organisation identifier,
	  name: organisation name
	},
	owner: email of the data pub's record,
	approver: email of the librarian who approved publication
	dates: {
	    dateCreated: Date object or string}
	    datePublished: Date object or string
	}
If there are no dates, or either dateCreated or datePublished is 
undefined, now() is used as the default.

The owner and approver are used to build two Actions against
the object - creation and publication.


TODO; add contentSize, encodingFormat and fileFormat from
siegfried to the dataLocations array

Remember to keep the connection to the data record

*/

async function datapub2catalog(options) {

	const id = options['id'];
	const datapub = options['datapub'];
	const org = options['organisation'];
	const owner = options['owner'];
	const approver = options['approver'];
	const dates = dates_default(options['dates'])
	const context = await fs.readJson(CONTEXT);

		'@id': org['id'],
		'identifier': org['id'],
	if( ! (organisation['@id'] && organisation['name'] ) ) {
		throw Error("Organization must have an id and a name");
	}

	// assuming that all creators are affiliated to this one organisation

	var people = make_creators(datapub, organisation);

	const files = make_files(datapub);
	const dataset = make_dataset(id, datapub, organisation, dates, files);
	const [ history, more_people ] = make_history(
		dataset, people, owner, approver, dates
	);

	if( more_people.length > 0 ) {
		people = people.concat(more_people);
	}
		make_funding(datapub),
		make_about(datapub),
		make_spatial(datapub),
		make_temporal(datapub),
		people,
		make_licence(datapub),
		make_citation(datapub),
	return {
		'@context': context,
		'@graph': graph.filter((e) => e)
	};
function dates_default(values) {
	var dates = {};
	const now = (new Date()).toISOString();
	[ 'dateCreated', 'datePublished' ].map((d) => {
		if( values && values[d] ) {
			dates[d] = ( typeof(values[d]) == 'object' ) ? values[d].toISOString() : values[d];
		} else {
			dates[d] = now;
		}
	});
	return dates;
}

function link_id(item) {
	return { '@id': item['@id'] };
}

// TODO: isBasedOn the data record
function make_dataset(id, datapub, organisation, dates, files) {
	const ds = {
		'@id': id,
		'@type': 'Dataset',
		'path': './',
		'name': datapub['title'],
		'description': datapub['description'],
		'dateCreated': dates['dateCreated'],
		'datePublished': dates['datePublished'],
		'Publisher': link_id(organisation),
		'hasPart': files.map(link_id),
	};
	return ds;
function find_by_email(people, email) {
	const match = people.filter((x) => x['email'] == email);
	if( match.length ) {
		return match[0];
	} else {
		return undefined;
	}
}	

function make_history(dataset, people, owner, approver, dates) {
	var owner_item = find_by_email(people, owner);
	var approver_item = find_by_email(people, approver);
	const new_people = []
	if( !owner_item ) {
		owner_item = { 
			'@id': owner,
			'@type': 'Person',
			'email': owner
		};
		new_people.push(owner_item)
	}
	if( !approver_item ) {
		approver_item = { 
			'@id': approver,
			'@type': 'Person',
			'email': approver
		};
		new_people.push(approver_item)
	}
	// TODO: descriptions for these and maybe a link to the 
	// data record for the CreateAction
			'@id': dataset['@id'] + '_history1',
			'@type': 'CreateAction',
			'name': 'Create',
			'description': 'Data record created',
			'@id': dataset['@id'] + '_history2',
			'@type': 'UpdateAction',
			'name': 'Publish',
			'endTime': dates['datePublished'],


// files(datapub)

// crosswalk dataLocations to an array of File items.
// The dataLocations are expected to have contentSize, 
// encodingFormat and fileFormat already set

// This assumes that the directory is flat ie all of the files
// are in the root

	if( !datapub['dataLocations'] ) {
		return [];
	}
	const files = datapub['dataLocations'].map((dl) => {
		if( dl['type'] == 'attachment' ) {
			return {
				'@id': dl['name'],
				'name': dl['name'],
				'path': dl['name'],
				'@type': 'File',
				'contentSize': dl['contentSize'],
				'encodingFormat': dl['encodingFormat'],
				'fileFormat': dl['fileFormat']
			}
		} else {
			// todo: URLs and physical locations
			return undefined;
		}
	});
	return files.filter((x) => x);
}

// creators(datapub)
//
// returns the array of Person items from the creators
// field of the data publication

// TODO: affiliations

function make_creators(datapub, organisation) {
	if( !datapub['creators'] ) {
		throw Error("A DataCrate has to have at least one creator");
	}	
	const creators = datapub['creators'].map((p) => {
		const id = p['orcid'] || p['email'] || p['text_full_name'];
		if( id ) {
			return {
				'@id': id,
				'@type': 'Person',
				'identifier': id,
				'name': p['text_full_name'],
				'familyName': p['family_name'],
				'givenName': p['given_name'],
				'email': p['email'],
				'affiliation': link_id(organisation)
			}
		} else {
			// warn for unidentifiable creators
			return undefined;
	const nc = creators.filter((x) => x)
	return nc;
	return undefined;
}


module.exports = {
	'datapub2catalog': datapub2catalog
};