Newer
Older
/*
This is part of datacrate, a node.js library for working with
DataCrates. Copyright (C) 2018 University of Technology Sydney
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
const chai = require('chai');
const chaiFiles = require('chai-files');
chai.use(chaiFiles);
const _ = require('lodash');
const assert = chai.assert;
const expect = chai.expect;
const file = chaiFiles.file;
const fs = require('fs-extra');
const catalog = require('../lib/catalog.js');
const ORG = {
'name': 'University of Technology Sydney'
};
// defining these here so that the tests know what to
// look up in the results @graph
const IRI_PREFIXES = {
'about': {
'dc:subject_anzsrc:for': '_:FOR/',
'dc:subject_anzsrc:seo': '_:SEO/'
},
'spatialCoverage': '_:spatial/',
'funder': '_:funder/',
'licence': '_:licence/',
'citation': '_:citation/'
};
const OWNER = 'https://orcid.org/0000-0001-5152-5307';
const APPROVER = 'admin';
const DATASET_ID = 'DATASET_ID';
// get catalog item by id: returns null if the item isn't unique, because
// this should always make the test fail
function get_id(c, id) {
const match = c['@graph'].filter((item) => item['@id'] === id);
if( match.length > 1 ) {
console.error(`Warning: catalog has ${match.length} items with ID ${id}`);
return null;
} else {
return match[0];
}
// this is for fetching all of the items with the prefix for
// spatialCoverage items, say.
function get_id_prefix(c, prefix) {
return c['@graph'].filter((item) => item['@id'].startsWith(prefix));
}
// this one returns an array of items with the requested type
function get_type(c, t) {
return c['@graph'].filter((item) => item['@type'] === t);
}
function assert_link(ds, prop, item) {
Mike Lynch
committed
const id = item['@id'];
expect(ds).to.have.property(prop);
const links = ds[prop];
Mike Lynch
committed
const link = links.filter((l) => l['@id'] === id);
expect(link).to.be.an('array');
expect(link).to.have.lengthOf(1);
}
var mdp, dp, cj, cjds;
// TODO: it would be great if these could be run against a
// range of different test datapubs which didn't have all of
// the values
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
describe("Try to load a janky DataPub with empty fields", () => {
it("Loads without throwing an error", async () => {
mdp = await fs.readJson('./test_data/janky_datapub.json');
dp = mdp['metadata'];
cj = await catalog.datapub2catalog({
'id': DATASET_ID,
'datapub': dp,
'organisation': ORG,
'owner': OWNER,
'approver': APPROVER,
'prefixes': IRI_PREFIXES
});
expect(cj).to.be.an('object');
expect(cj).to.have.property('@graph');
expect(cj).to.have.property('@context');
});
});
describe("Convert a ReDBox 2.0 DataPub to CATALOG.json", () => {
before(async () => {
mdp = await fs.readJson('./test_data/datapub.json');
dp = mdp['metadata'];
cj = await catalog.datapub2catalog({
'id': DATASET_ID,
'datapub': dp,
'organisation': ORG,
'owner': OWNER,
'approver': APPROVER,
'prefixes': IRI_PREFIXES
const roots = cj['@graph'].filter((item) => {return item['path'] === 'data/'});
cjds = roots[0];
await fs.writeJson('./test_data/CATALOG.json', cj, { 'spaces': 4 });
});
it("has a root dataset", () => {
expect(cjds).to.be.a('object');
});
it("has string @id values for every graph item", () => {
cj['@graph'].map((i) => {
expect(i).to.have.property('@id');
expect(i['@id']).to.be.a('string');
})
});
Mike Lynch
committed
it("has a dataset item with correct metadata", () => {
assert(cj, "Got an object");
assert(cj['@graph'], "Catalog has a @graph");
const dataset = get_id(cj, DATASET_ID);
assert(dataset, "Graph has an item with id " + DATASET_ID);
expect(dataset['name']).to.equal(dp['title']);
expect(dataset['description']).to.equal(dp['description']);
expect(dataset['publisher']['@id']).to.equal(ORG['id']);
if( dp['finalKeywords']) {
expect(dataset['keywords']).to.equal(dp['finalKeywords'].join(', '));
}
it("has a create action with the owner", () => {
const cas = get_type(cj, 'CreateAction');
expect(cas).to.have.length(1);
const ca = cas[0];
expect(ca['agent']['@id']).to.equal(OWNER);
});
it("has an update action with the approver", () => {
const uas = get_type(cj, 'UpdateAction');
expect(uas).to.have.length(1);
const ua = uas[0];
expect(ua['agent']['@id']).to.equal(APPROVER);
});
it("has the payload files", () => {
const files = get_type(cj, "File");
const datalocs = dp['dataLocations'];
expect(files).to.have.length(datalocs.length);
const fids = files.map((f) => f['@id']).sort();
const dlids = datalocs.map((dl) => dl['name']).sort();
expect(fids).to.eql(dlids);
files.map((f) => assert_link(cjds, 'hasPart', f));
it("has subjects", () => {
// FORs and SEOs
const fields = [ 'dc:subject_anzsrc:for', 'dc:subject_anzsrc:seo'];
const about = cjds['about'].map((i) => i['@id']);
_.forEach(fields, (field) => {
const pref = IRI_PREFIXES['about'][field];
Mike Lynch
committed
const expectnames = dp[field].map((f) => f['name']);
const gotsubs = get_id_prefix(cj, pref)
const gotnames = gotsubs.map((f) => f['name']);
if( dp[field] && dp[field].length > 0 ) {
Mike Lynch
committed
expect(gotnames.sort()).to.eql(expectnames.sort());
} else {
expect(gotsubs).to.be.empty;
}
gotsubs.map((s) => assert_link(cjds, 'about', s));
})
});
// funders is different from subjects because we are putting all
// funders under the same prefix, which might be a mistake
it("has funders", () => {
const fields = [ 'foaf:fundedBy_foaf:Agent', 'foaf:fundedBy_vivo:Grant' ];
const funderids = cjds['funder'].map((i) => i['@id']);
const pref = IRI_PREFIXES['funder'];
var expectfunders = [];
fields.map((field) => {
if( dp[field] && dp[field].length > 0 ) {
expectfunders.push(...dp[field]);
}
});
const expectnames = expectfunders.map((f) => f['dc_title']);
const gotfunders = get_id_prefix(cj, pref);
const gotnames = gotfunders.map((f) => f['name']);
if( expectfunders.length > 0 ) {
expect(gotnames.sort()).to.eql(expectnames.sort());
} else {
expect(gotfunders).to.be.empty;
}
gotfunders.map((s) => assert_link(cjds, 'funder', s));
// geolocations - basic_name / latitude / longitude
// this test assumes that each geolocation's name is unique in the
// datapub
// and that geospatial is always a FeatureCollection
it("has spatial coverage", () => {
const spatials = get_id_prefix(cj, IRI_PREFIXES['spatialCoverage']);
if( dp['geoLocations'] ) {
dp['geoLocations'].map((gl) => {
const name = gl['basic_name'];
const matches = spatials.filter((s) => s['name'] === name );
expect(matches).to.have.lengthOf(1);
expect(matches[0]['latitude']).to.equal(gl['latitude']);
expect(matches[0]['longitude']).to.equal(gl['longitude']);
assert_link(cjds, 'spatialCoverage', matches[0]);
});
}
if( dp['geospatial']) {
const matches = spatials.filter((s) => s['@type'] === 'FeatureCollection' );
expect(matches).to.have.lengthOf(1);
expect(matches[0]['features']).to.eql(dp['geospatial']['features']);
assert_link(cjds, 'spatialCoverage', matches[0]);
});
it("has temporal coverage", () => {
var tc = '';
if( dp['startDate'] ) {
tc = dp['startDate'];
if( dp['endDate'] ) {
tc += '/' + dp['endDate'];
}
} else {
if( dp['endDate']) {
tc = dp['endDate'];
}
}
if( dp['timePeriod'] ) {
if( tc ) {
tc += '; ' + dp['timePeriod'];
} else {
tc = dp['timePeriod'];
}
}
if( tc ) {
expect(cjds['temporalCoverage']).to.equal(tc)
} else {
expect(cjds).not.to.have.property('temporalCoverage');
}
});
it("has a licence", () => {
const licenses = cjds['license'];
if( dp['license_other_url'] || dp['license_identifier'] ) {
expect(licenses).to.be.an('array');
const license = licenses[0];
const id = license['@id'];
const litem = get_id(cj, id);
expect(litem).to.be.a('object');
if( dp['license_other_url']) {
expect(id).to.equal(dp['license_other_url']);
expect(litem['name']).to.equal(dp['license_notes']);
} else {
expect(id).to.equal(dp['license_identifier']);
expect(litem['name']).to.equal(dp['license_statement']);
expect(litem['url]']).to.equal(dp['license_statement_url']);
}
assert_link(cjds, 'license', litem);
});
it("has related works", () => {
const REL_FIELDS = [ 'publications', 'websites', 'metadata', 'data', 'services' ];
const REL_TYPES = {
'publications': 'ScholarlyArticle',
'websites': 'WebSite',
'metadata': 'CreativeWork',
'data': 'DataSet',
'services': 'CreativeWork'
}
REL_FIELDS.map((f) => {
const field = 'related_' + f;
if( dp[field] ) {
dp[field].map((rw) => {
const id = rw['related_url'];
assert(id, "related_X in datapub has a URL");
const item = get_id(cj, id);
expect(item).to.be.an('object');
expect(item['identifier']).to.equal(id);
expect(item['@type']).to.equal(REL_TYPES[f]);
expect(item['name']).to.equal(rw['related_title']);
expect(item['description']).to.equal(rw['related_notes']);
assert_link(cjds, 'citation', item);