Newer
Older
/*
This is part of datacrate, a node.js library for working with
DataCrates. Copyright (C) 2018 University of Technology Sydney
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
/* TODO
These tests are a bit clunky and only cover two cases:
- a datapublication with decent values in every field
- a datapublication with only the minimum mandatory content, but with
the slightly janky JSON which the redbox-portal frontend produces,
with things like empty objects where the user hasn't selected a funder,
etc
The second set of tests were included so that the publication workflow
produced decent looking DataCrates.
Mature tests would abstract out the code in the before clauses
to a general function which loaded a datapub and returned a catalog object,
and generate lots of datapubs programmatically to fuzz the crosswalk.
*/
const chai = require('chai');
const chaiFiles = require('chai-files');
chai.use(chaiFiles);
const _ = require('lodash');
const assert = chai.assert;
const expect = chai.expect;
const file = chaiFiles.file;
const fs = require('fs-extra');
const catalog = require('../lib/catalog.js');
const ORG = {
'name': 'University of Technology Sydney'
};
const ALLOW_BAD_TYPES = { 'FeatureCollection': true };
// defining these here so that the tests know what to
// look up in the results @graph
const IRI_PREFIXES = {
'about': {
'dc:subject_anzsrc:for': '_:FOR/',
'dc:subject_anzsrc:seo': '_:SEO/'
},
'spatialCoverage': '_:spatial/',
'funder': '_:funder/',
'licence': '_:licence/',
'citation': '_:citation/'
};
const OWNER = 'https://orcid.org/0000-0001-5152-5307';
const APPROVER = 'admin';
const DATASET_ID = 'DATASET_ID';
// utility wrapper for converting a datapub to a catalog
async function make_catalog(datapub) {
cj = await catalog.datapub2catalog({
'id': DATASET_ID,
'datapub': datapub,
'organisation': ORG,
'owner': OWNER,
'approver': APPROVER,
'prefixes': IRI_PREFIXES
});
return cj;
}
// get the root of the catalog
function get_root(c) {
const roots = c['@graph'].filter((item) => {return item['path'] === './' });
return roots[0];
}
// get catalog item by id: returns null if the item isn't unique, because
// this should always make the test fail
function get_id(c, id) {
const match = c['@graph'].filter((item) => item['@id'] === id);
if( match.length > 1 ) {
console.error(`Warning: catalog has ${match.length} items with ID ${id}`);
return null;
} else {
return match[0];
}
// this is for fetching all of the items with the prefix for
// spatialCoverage items, say.
function get_id_prefix(c, prefix) {
return c['@graph'].filter((item) => item['@id'].startsWith(prefix));
}
// this one returns an array of items with the requested type
function get_type(c, t) {
return c['@graph'].filter((item) => item['@type'] === t);
}
function assert_link(ds, prop, item) {
Mike Lynch
committed
const id = item['@id'];
expect(ds).to.have.property(prop);
const links = ds[prop];
Mike Lynch
committed
const link = links.filter((l) => l['@id'] === id);
expect(link).to.be.an('array');
expect(link).to.have.lengthOf(1);
}
// TODO: it would be great if these could be run against a
// range of different test datapubs which didn't have all of
// the values
describe("Try to load a janky default DataPub with empty fields", () => {
var mdp, dp, cj, cjds;
before(async () => {
mdp = await fs.readJson('./test_data/janky_datapub.json');
dp = mdp['metadata'];
cj = await make_catalog(dp);
cjds = get_root(cj);
await fs.writeJson('./test_data/janky_CATALOG.json', cj, { 'spaces': 4 });
});
it("has the basic json-ld properties", async () => {
expect(cj).to.be.an('object');
expect(cj).to.have.property('@graph');
expect(cj).to.have.property('@context');
});
it("has a trimmed context", async () => {
expect(cj['@context']).not.to.have.property('Bacteria');
it("has string @id values for every graph item", () => {
cj['@graph'].map((i) => {
expect(i).to.have.property('@id');
expect(i['@id']).to.be.a('string');
expect(i['@id']).to.not.equal("");
})
});
it("has @types more detailed than Thing for every graph item", () => {
cj['@graph'].map((i) => {
expect(i).to.have.property('@type');
const type = i['@type'];
expect(type).to.be.a('string');
expect(type).to.not.equal("");
expect(type).to.not.equal('Thing');
})
});
it("has no @types which don't appear in the @context", () => {
cj['@graph'].map((i) => {
expect(i).to.have.property('@type');
const type = i['@type'];
expect(cj['@context']).to.have.property(type);
})
});
it("has no funders", async () => {
const funderids = cjds['funder'].map((i) => i['@id']);
expect(funderids).to.be.empty;
});
it("has no related works", async () => {
const citationids = cjds['citation'].map((i) => i['@id']);
expect(citationids).to.be.empty;
})
});
describe("Convert a ReDBox 2.0 DataPub to CATALOG.json", () => {
var mdp, dp, cj, cjds;
before(async () => {
mdp = await fs.readJson('./test_data/datapub.json');
dp = mdp['metadata'];
cj = await make_catalog(dp);
cjds = get_root(cj);
await fs.writeJson('./test_data/CATALOG.json', cj, { 'spaces': 4 });
});
it("has a trimmed context", () => {
expect(cj).to.have.property("@context");
const context = cj['@context'];
expect(context).to.not.have.property('Bacteria');
});
it("has a root dataset", () => {
expect(cjds).to.be.a('object');
});
it("has no empty properties on the root dataset", () => {
_.forOwn(cjds, (prop, values, o) => {
expect(values).to.not.be.empty;
});
});
it("has string @id values for every graph item", () => {
cj['@graph'].map((i) => {
expect(i).to.have.property('@id');
expect(i['@id']).to.be.a('string');
expect(i['@id']).to.not.equal("");
});
it("has @types more detailed than Thing for every graph item", () => {
cj['@graph'].map((i) => {
expect(i).to.have.property('@type');
const type = i['@type'];
expect(type).to.be.a('string');
expect(type).to.not.equal("");
expect(type).to.not.equal('Thing');
})
});
it("has no @types which don't appear in the @context", () => {
cj['@graph'].map((i) => {
expect(i).to.have.property('@type');
const type = i['@type'];
if( ! ALLOW_BAD_TYPES[type] ) {
expect(cj['@context']).to.have.property(type);
}
})
});
Mike Lynch
committed
it("has a dataset item with correct metadata", () => {
assert(cj, "Got an object");
assert(cj['@graph'], "Catalog has a @graph");
const dataset = get_id(cj, DATASET_ID);
assert(dataset, "Graph has an item with id " + DATASET_ID);
expect(dataset['name']).to.equal(dp['title']);
expect(dataset['description']).to.equal(dp['description']);
expect(dataset['publisher']['@id']).to.equal(ORG['id']);
if( dp['finalKeywords']) {
expect(dataset['keywords']).to.equal(dp['finalKeywords'].join(', '));
}
it("has a create action with the owner", () => {
const cas = get_type(cj, 'CreateAction');
expect(cas).to.have.length(1);
const ca = cas[0];
expect(ca['agent']['@id']).to.equal(OWNER);
});
it("has an update action with the approver", () => {
const uas = get_type(cj, 'UpdateAction');
expect(uas).to.have.length(1);
const ua = uas[0];
expect(ua['agent']['@id']).to.equal(APPROVER);
});
it("has the payload files", () => {
const files = get_type(cj, "File");
const datalocs = dp['dataLocations'];
expect(files).to.have.length(datalocs.length);
const fids = files.map((f) => f['@id']).sort();
const dlids = datalocs.map((dl) => dl['name']).sort();
expect(fids).to.eql(dlids);
files.map((f) => assert_link(cjds, 'hasPart', f));
it("has subjects", () => {
// FORs and SEOs
const fields = [ 'dc:subject_anzsrc:for', 'dc:subject_anzsrc:seo'];
const about = cjds['about'].map((i) => i['@id']);
_.forEach(fields, (field) => {
const pref = IRI_PREFIXES['about'][field];
Mike Lynch
committed
const expectnames = dp[field].map((f) => f['name']);
const gotsubs = get_id_prefix(cj, pref)
const gotnames = gotsubs.map((f) => f['name']);
if( dp[field] && dp[field].length > 0 ) {
Mike Lynch
committed
expect(gotnames.sort()).to.eql(expectnames.sort());
} else {
expect(gotsubs).to.be.empty;
}
gotsubs.map((s) => assert_link(cjds, 'about', s));
})
});
// funders is different from subjects because we are putting all
// funders under the same prefix, which might be a mistake
it("has funders", () => {
const fields = [ 'foaf:fundedBy_foaf:Agent', 'foaf:fundedBy_vivo:Grant' ];
const funderids = cjds['funder'].map((i) => i['@id']);
const pref = IRI_PREFIXES['funder'];
var expectfunders = [];
fields.map((field) => {
if( dp[field] && dp[field].length > 0 ) {
expectfunders.push(...dp[field]);
}
});
const expectnames = expectfunders.map((f) => f['dc_title']);
const gotfunders = get_id_prefix(cj, pref);
const gotnames = gotfunders.map((f) => f['name']);
if( expectfunders.length > 0 ) {
expect(gotnames.sort()).to.eql(expectnames.sort());
} else {
expect(gotfunders).to.be.empty;
}
gotfunders.map((s) => assert_link(cjds, 'funder', s));
// geolocations - basic_name / latitude / longitude
// this test assumes that each geolocation's name is unique in the
// datapub
// and that geospatial is always a FeatureCollection
it("has spatial coverage", () => {
const spatials = get_id_prefix(cj, IRI_PREFIXES['spatialCoverage']);
if( dp['geoLocations'] ) {
dp['geoLocations'].map((gl) => {
const name = gl['basic_name'];
const matches = spatials.filter((s) => s['name'] === name );
expect(matches).to.have.lengthOf(1);
expect(matches[0]['latitude']).to.equal(gl['latitude']);
expect(matches[0]['longitude']).to.equal(gl['longitude']);
assert_link(cjds, 'spatialCoverage', matches[0]);
});
}
if( dp['geospatial']) {
const matches = spatials.filter((s) => s['@type'] === 'FeatureCollection' );
expect(matches).to.have.lengthOf(1);
expect(matches[0]['features']).to.eql(dp['geospatial']['features']);
assert_link(cjds, 'spatialCoverage', matches[0]);
});
it("has temporal coverage", () => {
var tc = '';
if( dp['startDate'] ) {
tc = dp['startDate'];
if( dp['endDate'] ) {
tc += '/' + dp['endDate'];
}
} else {
if( dp['endDate']) {
tc = dp['endDate'];
}
}
if( dp['timePeriod'] ) {
if( tc ) {
tc += '; ' + dp['timePeriod'];
} else {
tc = dp['timePeriod'];
}
}
if( tc ) {
expect(cjds['temporalCoverage']).to.equal(tc)
} else {
expect(cjds).not.to.have.property('temporalCoverage');
}
});
it("has a licence", () => {
const licenses = cjds['license'];
if( dp['license_other_url'] || dp['license_identifier'] ) {
expect(licenses).to.be.an('array');
const license = licenses[0];
const id = license['@id'];
const litem = get_id(cj, id);
expect(litem).to.be.a('object');
if( dp['license_other_url']) {
expect(id).to.equal(dp['license_other_url']);
expect(litem['name']).to.equal(dp['license_notes']);
} else {
expect(id).to.equal(dp['license_identifier']);
expect(litem['name']).to.equal(dp['license_statement']);
expect(litem['url]']).to.equal(dp['license_statement_url']);
}
assert_link(cjds, 'license', litem);
});
it("has related works", () => {
const REL_FIELDS = [ 'publications', 'websites', 'metadata', 'data', 'services' ];
const REL_TYPES = {
'publications': 'ScholarlyArticle',
'websites': 'WebSite',
'metadata': 'CreativeWork',
'data': 'Dataset',
'services': 'CreativeWork'
}
REL_FIELDS.map((f) => {
const field = 'related_' + f;
if( dp[field] ) {
dp[field].map((rw) => {
const id = rw['related_url'];
assert(id, "related_X in datapub has a URL");
const item = get_id(cj, id);
expect(item).to.be.an('object');
expect(item['identifier']).to.equal(id);
expect(item['@type']).to.equal(REL_TYPES[f]);
expect(item['name']).to.equal(rw['related_title']);
expect(item['description']).to.equal(rw['related_notes']);
assert_link(cjds, 'citation', item);
describe("Create catalogs with some or none of the data payloads", () => {
var mdp, dp;
before(async () => {
mdp = await fs.readJson('./test_data/datapub.json');
dp = mdp['metadata'];
});
it("can create a catalog with no payload if the metadata-only flag is true", async () => {
dp['accessRightsToggle'] = true;
const cj = await make_catalog(dp);
const root = get_root(cj);
const parts = root['hasPart'];
expect(parts).to.be.empty;
const files = get_type(cj, '@file');
expect(files).to.be.empty;
});
it("can create a catalog with one payload file deselected", async () => {
dp['dataLocations'][0]['selected'] = false;
dp['accessRightsToggle'] = true;
var selected = [];
dp['dataLocations'].forEach((l) => {
//files[l['name']] = l['selected'];
if( l['selected'] ) {
selected.push(l['name']);
}
});
const cj = await make_catalog(dp);
const root = get_root(cj);
const parts = root['hasPart'];
expect(parts).to.equal(selected.map((f) => { return { '@id': f }}));
//const files = get_type(cj, '@file');
//expect(files).to.be.empty;
});