catalog.spec.js

/*
This is part of datacrate, a node.js library for working with 
DataCrates.  Copyright (C) 2018  University of Technology Sydney

This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program.  If not, see <http://www.gnu.org/licenses/>.
*/

/* TODO

These tests are a bit clunky and only cover two cases:

 - a datapublication with decent values in every field
 - a datapublication with only the minimum mandatory content, but with
   the slightly janky JSON which the redbox-portal frontend produces,
   with things like empty objects where the user hasn't selected a funder,
   etc

 The second set of tests were included so that the publication workflow 
 produced decent looking DataCrates.

 Mature tests would abstract out the code in the before clauses
 to a general function which loaded a datapub and returned a catalog object,
 and generate lots of datapubs programmatically to fuzz the crosswalk.


*/

const chai = require('chai');
const chaiFiles = require('chai-files');
chai.use(chaiFiles);
const _ = require('lodash');
const assert = chai.assert;
const expect = chai.expect;
const file = chaiFiles.file;

const fs = require('fs-extra');

const catalog = require('../lib/catalog.js');

const ORG = { 
	'id': 'https://www.uts.edu.au',
	'name': 'University of Technology Sydney'
};

const ALLOW_BAD_TYPES = { 'FeatureCollection': true };

// defining these here so that the tests know what to 
// look up in the results @graph

const IRI_PREFIXES = {
	'about': {
	    'dc:subject_anzsrc:for': '_:FOR/',
    	'dc:subject_anzsrc:seo': '_:SEO/'
	},
	'spatialCoverage': '_:spatial/',
	'funder': '_:funder/',
	'licence': '_:licence/',
	'citation': '_:citation/'
};


const OWNER = 'https://orcid.org/0000-0001-5152-5307';
const APPROVER = 'admin';

const DATASET_ID = 'DATASET_ID';

// utility wrapper for converting a datapub to a catalog

async function make_catalog(datapub) {
	cj = await catalog.datapub2catalog({
		'id': DATASET_ID,
		'datapub': datapub,
		'organisation': ORG,
		'owner': OWNER,
		'approver': APPROVER,
		'prefixes': IRI_PREFIXES
	});
	return cj;
}


// get the root of the catalog

function get_root(c) {
	const roots = c['@graph'].filter((item) => {return item['path'] === './' });
	return roots[0];

}

// get catalog item by id: returns null if the item isn't unique, because
// this should always make the test fail

function get_id(c, id) {
	const match = c['@graph'].filter((item) => item['@id'] === id);
	if( match.length ) {
		if( match.length > 1 ) {
			console.error(`Warning: catalog has ${match.length} items with ID ${id}`);
			return null;
		} else {
			return match[0];
		}
	} else {
		return null;
	}
}


// this is for fetching all of the items with the prefix for
// spatialCoverage items, say.

function get_id_prefix(c, prefix) {
	return c['@graph'].filter((item) => item['@id'].startsWith(prefix));
}

// this one returns an array of items with the requested type

function get_type(c, t) {
	return c['@graph'].filter((item) => item['@type'] === t);
}

function assert_link(ds, prop, item) {
	const id = item['@id'];
	expect(ds).to.have.property(prop);
	const links = ds[prop];
	const link = links.filter((l) => l['@id'] === id);
	expect(link).to.be.an('array');
	expect(link).to.have.lengthOf(1);
}


// TODO: it would be great if these could be run against a
// range of different test datapubs which didn't have all of
// the values


describe("Try to load a janky default DataPub with empty fields", () => {

	var mdp, dp, cj, cjds;

	before(async () => {
		mdp = await fs.readJson('./test_data/janky_datapub.json');
		dp = mdp['metadata'];
		cj = await make_catalog(dp);
		cjds = get_root(cj);
		await fs.writeJson('./test_data/janky_CATALOG.json', cj, { 'spaces': 4 });
	});


	it("has the basic json-ld properties", async () => {
		expect(cj).to.be.an('object');
		expect(cj).to.have.property('@graph');
		expect(cj).to.have.property('@context');
	});

	it("has a trimmed context", async () => {
		expect(cj['@context']).not.to.have.property('Bacteria');
	});

	it("has string @id values for every graph item", () => {
		cj['@graph'].map((i) => {
			expect(i).to.have.property('@id');
			expect(i['@id']).to.be.a('string');
			expect(i['@id']).to.not.equal("");
		})
	});

	it("has @types more detailed than Thing for every graph item", () => {
		cj['@graph'].map((i) => {
			expect(i).to.have.property('@type');
			const type = i['@type'];
			expect(type).to.be.a('string');
			expect(type).to.not.equal("");
			expect(type).to.not.equal('Thing');
		})
	});

	it("has no @types which don't appear in the @context", () => {
		cj['@graph'].map((i) => {
			expect(i).to.have.property('@type');
			const type = i['@type'];
			expect(cj['@context']).to.have.property(type);
		})
	});


	it("has no funders", async () => {
		const funderids = cjds['funder'].map((i) => i['@id']);
		expect(funderids).to.be.empty;
	});

	it("has no related works", async () => {
		const citationids = cjds['citation'].map((i) => i['@id']);
		expect(citationids).to.be.empty;
	})

});


describe("Convert a ReDBox 2.0 DataPub to CATALOG.json", () => {

	var mdp, dp, cj, cjds;

	before(async () => {
		mdp = await fs.readJson('./test_data/datapub.json');
		dp = mdp['metadata'];
		cj = await make_catalog(dp);
		cjds = get_root(cj);
		await fs.writeJson('./test_data/CATALOG.json', cj, { 'spaces': 4 });
	});

	it("has a trimmed context", () => {
		expect(cj).to.have.property("@context");
		const context = cj['@context'];
		expect(context).to.not.have.property('Bacteria');
	});

	it("has a root dataset", () => {
  		expect(cjds).to.be.a('object');
	});

	it("has no empty properties on the root dataset", () => {
		_.forOwn(cjds, (prop, values, o) => {
			expect(values).to.not.be.empty;
		});
	});


	it("has string @id values for every graph item", () => {
		cj['@graph'].map((i) => {
			expect(i).to.have.property('@id');
			expect(i['@id']).to.be.a('string');
			expect(i['@id']).to.not.equal("");
		})
	});

	it("has @types more detailed than Thing for every graph item", () => {
		cj['@graph'].map((i) => {
			expect(i).to.have.property('@type');
			const type = i['@type'];
			expect(type).to.be.a('string');
			expect(type).to.not.equal("");
			expect(type).to.not.equal('Thing');
		})
	});

	it("has no @types which don't appear in the @context", () => {
		cj['@graph'].map((i) => {
			expect(i).to.have.property('@type');
			const type = i['@type'];
			if( ! ALLOW_BAD_TYPES[type] ) {
				expect(cj['@context']).to.have.property(type);
			} 
		})
	});


	it("has a dataset item with correct metadata", () => {
		assert(cj, "Got an object");
		assert(cj['@graph'], "Catalog has a @graph");
		const dataset = get_id(cj, DATASET_ID);
		assert(dataset, "Graph has an item with id " + DATASET_ID);
		expect(dataset['name']).to.equal(dp['title']);
		expect(dataset['description']).to.equal(dp['description']);
		expect(dataset['publisher']['@id']).to.equal(ORG['id']);
		if( dp['finalKeywords']) {
			expect(dataset['keywords']).to.equal(dp['finalKeywords'].join(', '));
		}
	});

	it("has a create action with the owner", () => {
		const cas = get_type(cj, 'CreateAction');
		expect(cas).to.have.length(1);
		const ca = cas[0];
		expect(ca['agent']['@id']).to.equal(OWNER);
	});

	it("has an update action with the approver", () => {
		const uas = get_type(cj, 'UpdateAction');
		expect(uas).to.have.length(1);
		const ua = uas[0];
		expect(ua['agent']['@id']).to.equal(APPROVER);
	});

	it("has the payload files", () => {
		const files = get_type(cj, "File");
		const datalocs = dp['dataLocations'];
		expect(files).to.have.length(datalocs.length);
		const fids = files.map((f) => f['@id']).sort();
		const dlids = datalocs.map((dl) => dl['name']).sort();
		expect(fids).to.eql(dlids);
		files.map((f) => assert_link(cjds, 'hasPart', f));
	});


	it("has subjects", () => {
		// FORs and SEOs
		const fields = [ 'dc:subject_anzsrc:for', 'dc:subject_anzsrc:seo'];

		const about = cjds['about'].map((i) => i['@id']);
		_.forEach(fields, (field) => {
			const pref = IRI_PREFIXES['about'][field];
			const expectnames = dp[field].map((f) => f['name']);
			const gotsubs = get_id_prefix(cj, pref)
			const gotnames = gotsubs.map((f) => f['name']);
			if( dp[field] && dp[field].length > 0 ) {
				expect(gotnames.sort()).to.eql(expectnames.sort());
			} else {
				expect(gotsubs).to.be.empty;
			}
			gotsubs.map((s) => assert_link(cjds, 'about', s));
		})
	});

	// funders is different from subjects because we are putting all
	// funders under the same prefix, which might be a mistake

	it("has funders", () => {
		const fields = [ 'foaf:fundedBy_foaf:Agent', 'foaf:fundedBy_vivo:Grant' ];

		const funderids = cjds['funder'].map((i) => i['@id']);

		const pref = IRI_PREFIXES['funder'];

		var expectfunders = [];
		fields.map((field) => {
			if( dp[field] && dp[field].length > 0 ) {
				expectfunders.push(...dp[field]);
			} 
		});
		const expectnames = expectfunders.map((f) => f['dc_title']);
		const gotfunders = get_id_prefix(cj, pref);	
		const gotnames = gotfunders.map((f) => f['name']);
		if( expectfunders.length > 0 ) {
			expect(gotnames.sort()).to.eql(expectnames.sort());
		} else {
			expect(gotfunders).to.be.empty;
		}
		gotfunders.map((s) => assert_link(cjds, 'funder', s));
	});


	// geolocations - basic_name / latitude / longitude

	// this test assumes that each geolocation's name is unique in the
	// datapub

	// and that geospatial is always a FeatureCollection 

	it("has spatial coverage", () => {
		const spatials = get_id_prefix(cj, IRI_PREFIXES['spatialCoverage']);
		if( dp['geoLocations'] ) {
			dp['geoLocations'].map((gl) => {
				const name = gl['basic_name'];
				const matches = spatials.filter((s) => s['name'] === name );
				expect(matches).to.have.lengthOf(1);
				expect(matches[0]['latitude']).to.equal(gl['latitude']);
				expect(matches[0]['longitude']).to.equal(gl['longitude']);
				assert_link(cjds, 'spatialCoverage', matches[0]);
			});
		}
		if( dp['geospatial']) {
			const matches = spatials.filter((s) => s['@type'] === 'FeatureCollection' );
			expect(matches).to.have.lengthOf(1);
			expect(matches[0]['features']).to.eql(dp['geospatial']['features']);			
			assert_link(cjds, 'spatialCoverage', matches[0]);
		}
	});


	it("has temporal coverage", () => {
		var tc = '';
		if( dp['startDate'] ) {
			tc = dp['startDate'];
			if( dp['endDate'] ) {
				tc += '/' + dp['endDate'];
			}
		} else {
			if( dp['endDate']) {
				tc = dp['endDate'];
			}
		}
		if( dp['timePeriod'] ) {
			if( tc ) {
				tc += '; ' + dp['timePeriod'];
			} else {
				tc = dp['timePeriod'];
			}
		}
		if( tc ) {
			expect(cjds['temporalCoverage']).to.equal(tc)
		} else {
			expect(cjds).not.to.have.property('temporalCoverage');
		}
	});

	it("has a licence", () => {
		const licenses = cjds['license'];
		if( dp['license_other_url'] || dp['license_identifier'] ) {
			expect(licenses).to.be.an('array');
			const license = licenses[0];
			const id = license['@id'];
			const litem = get_id(cj, id);
			expect(litem).to.be.a('object');
			if( dp['license_other_url']) {
				expect(id).to.equal(dp['license_other_url']);
				expect(litem['name']).to.equal(dp['license_notes']);
			} else {
				expect(id).to.equal(dp['license_identifier']);
				expect(litem['name']).to.equal(dp['license_statement']);
				expect(litem['url]']).to.equal(dp['license_statement_url']);
			}
			assert_link(cjds, 'license', litem);
		}
	});

	it("has related works", () => {
		const REL_FIELDS = [ 'publications', 'websites', 'metadata', 'data', 'services' ];
		const REL_TYPES = {
			'publications': 'ScholarlyArticle',
			'websites': 'WebSite',
			'metadata': 'CreativeWork',
			'data': 'Dataset',
			'services': 'CreativeWork'
		}
		REL_FIELDS.map((f) => {
			const field = 'related_' + f;
			if( dp[field] ) {
				dp[field].map((rw) => {
					const id = rw['related_url'];
					assert(id, "related_X in datapub has a URL");
					const item = get_id(cj, id);
					expect(item).to.be.an('object');
					expect(item['identifier']).to.equal(id);
					expect(item['@type']).to.equal(REL_TYPES[f]);
					expect(item['name']).to.equal(rw['related_title']);
					expect(item['description']).to.equal(rw['related_notes']);
					assert_link(cjds, 'citation', item);
				})
			}
		})
	});

});


describe("Create catalogs with some or none of the data payloads", () => {

	var mdp, dp;

	before(async () => {
		mdp = await fs.readJson('./test_data/datapub.json');
		dp = mdp['metadata'];
	});

	it("can create a catalog with no payload if the metadata-only flag is true", async () => {
		dp['accessRightsToggle'] = true;
		const cj = await make_catalog(dp);
		const root = get_root(cj);
		const parts = root['hasPart'];
		expect(parts).to.be.empty;
		const files = get_type(cj, '@file');
		expect(files).to.be.empty;
	});
  

	it("can create a catalog with one payload file deselected", async () => {
		dp['dataLocations'][0]['selected'] = false;
		dp['accessRightsToggle'] = true;
		var selected = [];
		dp['dataLocations'].forEach((l) => {
			//files[l['name']] = l['selected'];
			if( l['selected'] ) {
				selected.push(l['name']);
			}
		});

		const cj = await make_catalog(dp);
		const root = get_root(cj);
		const parts = root['hasPart'];
		expect(parts).to.equal(selected.map((f) => { return { '@id': f }}));
		//const files = get_type(cj, '@file');
		//expect(files).to.be.empty;
	});


});