/* This is part of datacrate, a node.js library for working with DataCrates. Copyright (C) 2018 University of Technology Sydney This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see <http://www.gnu.org/licenses/>. */ /* TODO These tests are a bit clunky and only cover two cases: - a datapublication with decent values in every field - a datapublication with only the minimum mandatory content, but with the slightly janky JSON which the redbox-portal frontend produces, with things like empty objects where the user hasn't selected a funder, etc The second set of tests were included so that the publication workflow produced decent looking DataCrates. Mature tests would abstract out the code in the before clauses to a general function which loaded a datapub and returned a catalog object, and generate lots of datapubs programmatically to fuzz the crosswalk. */ const chai = require('chai'); const chaiFiles = require('chai-files'); chai.use(chaiFiles); const _ = require('lodash'); const assert = chai.assert; const expect = chai.expect; const file = chaiFiles.file; const fs = require('fs-extra'); const catalog = require('../lib/catalog.js'); const ORG = { 'id': 'https://www.uts.edu.au', 'name': 'University of Technology Sydney' }; const ALLOW_BAD_TYPES = { 'FeatureCollection': true }; // defining these here so that the tests know what to // look up in the results @graph const IRI_PREFIXES = { 'about': { 'dc:subject_anzsrc:for': '_:FOR/', 'dc:subject_anzsrc:seo': '_:SEO/' }, 'spatialCoverage': '_:spatial/', 'funder': '_:funder/', 'licence': '_:licence/', 'citation': '_:citation/' }; const OWNER = 'https://orcid.org/0000-0001-5152-5307'; const APPROVER = 'admin'; const DATASET_ID = 'DATASET_ID'; // utility wrapper for converting a datapub to a catalog async function make_catalog(datapub) { cj = await catalog.datapub2catalog({ 'id': DATASET_ID, 'datapub': datapub, 'organisation': ORG, 'owner': OWNER, 'approver': APPROVER, 'prefixes': IRI_PREFIXES }); return cj; } // get the root of the catalog function get_root(c) { const roots = c['@graph'].filter((item) => {return item['path'] === './' }); return roots[0]; } // get catalog item by id: returns null if the item isn't unique, because // this should always make the test fail function get_id(c, id) { const match = c['@graph'].filter((item) => item['@id'] === id); if( match.length ) { if( match.length > 1 ) { console.error(`Warning: catalog has ${match.length} items with ID ${id}`); return null; } else { return match[0]; } } else { return null; } } // this is for fetching all of the items with the prefix for // spatialCoverage items, say. function get_id_prefix(c, prefix) { return c['@graph'].filter((item) => item['@id'].startsWith(prefix)); } // this one returns an array of items with the requested type function get_type(c, t) { return c['@graph'].filter((item) => item['@type'] === t); } function assert_link(ds, prop, item) { const id = item['@id']; expect(ds).to.have.property(prop); const links = ds[prop]; const link = links.filter((l) => l['@id'] === id); expect(link).to.be.an('array'); expect(link).to.have.lengthOf(1); } // TODO: it would be great if these could be run against a // range of different test datapubs which didn't have all of // the values describe("Try to load a janky default DataPub with empty fields", () => { var mdp, dp, cj, cjds; before(async () => { mdp = await fs.readJson('./test_data/janky_datapub.json'); dp = mdp['metadata']; cj = await make_catalog(dp); cjds = get_root(cj); await fs.writeJson('./test_data/janky_CATALOG.json', cj, { 'spaces': 4 }); }); it("has the basic json-ld properties", async () => { expect(cj).to.be.an('object'); expect(cj).to.have.property('@graph'); expect(cj).to.have.property('@context'); }); it("has a trimmed context", async () => { expect(cj['@context']).not.to.have.property('Bacteria'); }); it("has string @id values for every graph item", () => { cj['@graph'].map((i) => { expect(i).to.have.property('@id'); expect(i['@id']).to.be.a('string'); expect(i['@id']).to.not.equal(""); }) }); it("has @types more detailed than Thing for every graph item", () => { cj['@graph'].map((i) => { expect(i).to.have.property('@type'); const type = i['@type']; expect(type).to.be.a('string'); expect(type).to.not.equal(""); expect(type).to.not.equal('Thing'); }) }); it("has no @types which don't appear in the @context", () => { cj['@graph'].map((i) => { expect(i).to.have.property('@type'); const type = i['@type']; expect(cj['@context']).to.have.property(type); }) }); it("has no funders", async () => { const funderids = cjds['funder'].map((i) => i['@id']); expect(funderids).to.be.empty; }); it("has no related works", async () => { const citationids = cjds['citation'].map((i) => i['@id']); expect(citationids).to.be.empty; }) }); describe("Convert a ReDBox 2.0 DataPub to CATALOG.json", () => { var mdp, dp, cj, cjds; before(async () => { mdp = await fs.readJson('./test_data/datapub.json'); dp = mdp['metadata']; cj = await make_catalog(dp); cjds = get_root(cj); await fs.writeJson('./test_data/CATALOG.json', cj, { 'spaces': 4 }); }); it("has a trimmed context", () => { expect(cj).to.have.property("@context"); const context = cj['@context']; expect(context).to.not.have.property('Bacteria'); }); it("has a root dataset", () => { expect(cjds).to.be.a('object'); }); it("has no empty properties on the root dataset", () => { _.forOwn(cjds, (prop, values, o) => { expect(values).to.not.be.empty; }); }); it("has string @id values for every graph item", () => { cj['@graph'].map((i) => { expect(i).to.have.property('@id'); expect(i['@id']).to.be.a('string'); expect(i['@id']).to.not.equal(""); }) }); it("has @types more detailed than Thing for every graph item", () => { cj['@graph'].map((i) => { expect(i).to.have.property('@type'); const type = i['@type']; expect(type).to.be.a('string'); expect(type).to.not.equal(""); expect(type).to.not.equal('Thing'); }) }); it("has no @types which don't appear in the @context", () => { cj['@graph'].map((i) => { expect(i).to.have.property('@type'); const type = i['@type']; if( ! ALLOW_BAD_TYPES[type] ) { expect(cj['@context']).to.have.property(type); } }) }); it("has a dataset item with correct metadata", () => { assert(cj, "Got an object"); assert(cj['@graph'], "Catalog has a @graph"); const dataset = get_id(cj, DATASET_ID); assert(dataset, "Graph has an item with id " + DATASET_ID); expect(dataset['name']).to.equal(dp['title']); expect(dataset['description']).to.equal(dp['description']); expect(dataset['publisher']['@id']).to.equal(ORG['id']); if( dp['finalKeywords']) { expect(dataset['keywords']).to.equal(dp['finalKeywords'].join(', ')); } }); it("has a create action with the owner", () => { const cas = get_type(cj, 'CreateAction'); expect(cas).to.have.length(1); const ca = cas[0]; expect(ca['agent']['@id']).to.equal(OWNER); }); it("has an update action with the approver", () => { const uas = get_type(cj, 'UpdateAction'); expect(uas).to.have.length(1); const ua = uas[0]; expect(ua['agent']['@id']).to.equal(APPROVER); }); it("has the payload files", () => { const files = get_type(cj, "File"); const datalocs = dp['dataLocations']; expect(files).to.have.length(datalocs.length); const fids = files.map((f) => f['@id']).sort(); const dlids = datalocs.map((dl) => dl['name']).sort(); expect(fids).to.eql(dlids); files.map((f) => assert_link(cjds, 'hasPart', f)); }); it("has subjects", () => { // FORs and SEOs const fields = [ 'dc:subject_anzsrc:for', 'dc:subject_anzsrc:seo']; const about = cjds['about'].map((i) => i['@id']); _.forEach(fields, (field) => { const pref = IRI_PREFIXES['about'][field]; const expectnames = dp[field].map((f) => f['name']); const gotsubs = get_id_prefix(cj, pref) const gotnames = gotsubs.map((f) => f['name']); if( dp[field] && dp[field].length > 0 ) { expect(gotnames.sort()).to.eql(expectnames.sort()); } else { expect(gotsubs).to.be.empty; } gotsubs.map((s) => assert_link(cjds, 'about', s)); }) }); // funders is different from subjects because we are putting all // funders under the same prefix, which might be a mistake it("has funders", () => { const fields = [ 'foaf:fundedBy_foaf:Agent', 'foaf:fundedBy_vivo:Grant' ]; const funderids = cjds['funder'].map((i) => i['@id']); const pref = IRI_PREFIXES['funder']; var expectfunders = []; fields.map((field) => { if( dp[field] && dp[field].length > 0 ) { expectfunders.push(...dp[field]); } }); const expectnames = expectfunders.map((f) => f['dc_title']); const gotfunders = get_id_prefix(cj, pref); const gotnames = gotfunders.map((f) => f['name']); if( expectfunders.length > 0 ) { expect(gotnames.sort()).to.eql(expectnames.sort()); } else { expect(gotfunders).to.be.empty; } gotfunders.map((s) => assert_link(cjds, 'funder', s)); }); // geolocations - basic_name / latitude / longitude // this test assumes that each geolocation's name is unique in the // datapub // and that geospatial is always a FeatureCollection it("has spatial coverage", () => { const spatials = get_id_prefix(cj, IRI_PREFIXES['spatialCoverage']); if( dp['geoLocations'] ) { dp['geoLocations'].map((gl) => { const name = gl['basic_name']; const matches = spatials.filter((s) => s['name'] === name ); expect(matches).to.have.lengthOf(1); expect(matches[0]['latitude']).to.equal(gl['latitude']); expect(matches[0]['longitude']).to.equal(gl['longitude']); assert_link(cjds, 'spatialCoverage', matches[0]); }); } if( dp['geospatial']) { const matches = spatials.filter((s) => s['@type'] === 'FeatureCollection' ); expect(matches).to.have.lengthOf(1); expect(matches[0]['features']).to.eql(dp['geospatial']['features']); assert_link(cjds, 'spatialCoverage', matches[0]); } }); it("has temporal coverage", () => { var tc = ''; if( dp['startDate'] ) { tc = dp['startDate']; if( dp['endDate'] ) { tc += '/' + dp['endDate']; } } else { if( dp['endDate']) { tc = dp['endDate']; } } if( dp['timePeriod'] ) { if( tc ) { tc += '; ' + dp['timePeriod']; } else { tc = dp['timePeriod']; } } if( tc ) { expect(cjds['temporalCoverage']).to.equal(tc) } else { expect(cjds).not.to.have.property('temporalCoverage'); } }); it("has a licence", () => { const licenses = cjds['license']; if( dp['license_other_url'] || dp['license_identifier'] ) { expect(licenses).to.be.an('array'); const license = licenses[0]; const id = license['@id']; const litem = get_id(cj, id); expect(litem).to.be.a('object'); if( dp['license_other_url']) { expect(id).to.equal(dp['license_other_url']); expect(litem['name']).to.equal(dp['license_notes']); } else { expect(id).to.equal(dp['license_identifier']); expect(litem['name']).to.equal(dp['license_statement']); expect(litem['url]']).to.equal(dp['license_statement_url']); } assert_link(cjds, 'license', litem); } }); it("has related works", () => { const REL_FIELDS = [ 'publications', 'websites', 'metadata', 'data', 'services' ]; const REL_TYPES = { 'publications': 'ScholarlyArticle', 'websites': 'WebSite', 'metadata': 'CreativeWork', 'data': 'Dataset', 'services': 'CreativeWork' } REL_FIELDS.map((f) => { const field = 'related_' + f; if( dp[field] ) { dp[field].map((rw) => { const id = rw['related_url']; assert(id, "related_X in datapub has a URL"); const item = get_id(cj, id); expect(item).to.be.an('object'); expect(item['identifier']).to.equal(id); expect(item['@type']).to.equal(REL_TYPES[f]); expect(item['name']).to.equal(rw['related_title']); expect(item['description']).to.equal(rw['related_notes']); assert_link(cjds, 'citation', item); }) } }) }); }); describe("Create catalogs with some or none of the data payloads", () => { var mdp, dp; // do this with beforeEach because we need to reset modifications // to the access and selection flags beforeEach(async () => { mdp = await fs.readJson('./test_data/datapub.json'); dp = mdp['metadata']; }); it("can create a catalog with no payload if the metadata-only flag is true", async () => { dp['accessRightsToggle'] = true; const cj = await make_catalog(dp); const root = get_root(cj); const parts = root['hasPart']; expect(parts).to.be.empty; const files = get_type(cj, '@file'); expect(files).to.be.empty; }); it("can create a catalog with one payload file deselected", async () => { dp['dataLocations'][0]['selected'] = false; //dp['accessRightsToggle'] = true; var selected = []; dp['dataLocations'].forEach((l) => { //files[l['name']] = l['selected']; if( l['selected'] ) { selected.push(l['name']); } }); const cj = await make_catalog(dp); await fs.writeJson('./test_data/CATALOG_deselected.json', cj, { 'spaces': 4 }); await fs.writeJson('./test_data/datapub_deselected.json', dp, { 'spaces': 4 }); const root = get_root(cj); const parts = root['hasPart']; expect(parts).to.deep.equal(selected.map((f) => { return { '@id': f }})); //const files = get_type(cj, '@file'); //expect(files).to.be.empty; }); });