Skip to content
Snippets Groups Projects
Commit 78e512f5 authored by Mike Lynch's avatar Mike Lynch
Browse files

Randomised DataCrates are now working. Fixed a bug in catalog.js - it wasn't

linking creators to the top-level dataset item.
parent a575c776
Branches
Tags
No related merge requests found
......@@ -146,7 +146,7 @@ async function datapub2catalog(options) {
var people = make_creators(datapub, organisation);
const files = make_files(datapub);
const dataset = make_dataset(id, datapub, organisation, dates, files);
const dataset = make_dataset(id, datapub, organisation, dates, files, people);
const [ history, more_people ] = make_history(
dataset, people, owner, approver, dates
);
......@@ -227,7 +227,7 @@ function link_id(item) {
// TODO: isBasedOn the data record
function make_dataset(id, datapub, organisation, dates, files) {
function make_dataset(id, datapub, organisation, dates, files, creators) {
const ds = {
'@id': id,
'@type': 'Dataset',
......@@ -238,6 +238,7 @@ function make_dataset(id, datapub, organisation, dates, files) {
'datePublished': dates['datePublished'],
'publisher': link_id(organisation),
'hasPart': files.map(link_id),
'creator': creators.map(link_id)
};
// see https://schema.org/keywords
if( datapub['finalKeywords'] ) {
......@@ -344,6 +345,9 @@ function make_files(datapub) {
// returns the array of Person items from the creators
// field of the data publication
// Doesn't add the creators to the datapub - the calling
// code is responsible for that.
// TODO: affiliations
function make_creators(datapub, organisation) {
......@@ -383,7 +387,11 @@ function make_creators(datapub, organisation) {
function make_related_works(datapub) {
return RELATED.map((thing) => {
const field = 'related_' + thing;
return datapub[field].filter((f) => f['related_url']).map((item) => {
var values = [];
if( field in datapub ) {
values = datapub[field];
}
return values.filter((f) => f['related_url']).map((item) => {
const ld_item = {
'@id': item['related_url'],
'@type': RELATED_TYPES[thing],
......@@ -529,7 +537,7 @@ function make_temporal(datapub) {
function make_license(datapub, prefix) {
const license = [];
if( datapub['license_other_url'] || datapub['license_notes'] ) {
if( datapub['license_other_url'] );
if( datapub['license_other_url'] ) {
licenses.push({
'@id': datapub['license_other_url'],
'@type': 'CreativeWork',
......@@ -550,7 +558,7 @@ function make_license(datapub, prefix) {
'@type': 'CreativeWork',
'name': datapub['license_identifier'],
'url': datapub['license_identifier']
}];
});
}
return [];
}
......
// Module to generate a bunch of CATALOG.json files which
// have arbitrary but realistic data
const ORGANISATION = {
'id': 'https://examples.edu',
'name': 'Examples University'
};
const EMAIL_DOMAIN = 'examples.edu';
const OWNER = 'owner@examples.edu';
const APPROVER = 'approver@examples.edu';
const NAME_MIN = 3;
const NAME_MAX = 10;
const KEYWORD_MIN = 3;
const KEYWORD_MAX = 12;
const WORD_MIN = 2;
const WORD_MAX = 14;
const SENTENCE_MIN = 3;
const SENTENCE_MAX = 30;
const PARA_MIN = 1;
const PARA_MAX = 10;
const N_KEYWORD_MIN = 2;
const N_KEYWORD_MAX = 10;
const N_PEOPLE_MIN = 1;
const N_PEOPLE_MAX = 5;
const HONORIFICS = [ 'Dr', 'A/Prof', 'Prof', 'Dr', 'Dr', 'Dr', 'Mr', 'Ms' ];
const datacrate = require('./catalog.js');
const _ = require('lodash');
const fs = require('fs-extra');
const randdict = require('random-word');
const path = require('path');
const uuidv4 = require('uuid/v4');
function randrange(min, max) {
return Math.floor(Math.random() * (max - min)) + min;
}
function randoms(n, fn) {
return Array(n).fill(0).map(fn);
}
function randname() {
return _.upperFirst(randdict());
}
function randperson() {
const honorific = _.sample(HONORIFICS);
const surname = randname();
const givenname = randname();
const name = givenname + ' ' + surname;
const email = givenname + '.' + surname + '@' + EMAIL_DOMAIN;
const id = uuidv4();
return {
'dc:identifier': id,
'text_full_name': name,
'full_name_honorific': honorific + ' ' + name,
'email': email
}
}
function randkeyword() {
return randdict();
}
function randsentence() {
const nwords = randrange(SENTENCE_MIN, SENTENCE_MAX);
const s = randoms(nwords, randdict).join(' ') + '.';
return _.upperFirst(s);
}
function randtext() {
const nsentences = randrange(PARA_MIN, PARA_MAX);
console.log(nsentences);
return randoms(nsentences, randsentence).join(' ') + '\n';
}
function randdatapub(keywords, people) {
const k = _.sampleSize(keywords, randrange(N_KEYWORD_MIN, N_KEYWORD_MAX));
const title = _.startCase(_.camelCase(randsentence()));
const desc = randtext();
const creators = _.clone(_.sampleSize(people, randrange(N_PEOPLE_MIN, N_PEOPLE_MAX)));
const contributors = _.clone(creators);
const collabs = contributors.splice(1);
contributors[0].role = 'Chief Investigator';
return {
keywords: k,
contributor_ci: contributors[0],
contributors: collabs,
creators: creators,
title: title,
description: desc
}
}
function randdatapubs(n) {
const keywords = randoms(Math.floor(n / 2), randkeyword);
const people = randoms(n * 2, randperson);
return randoms(n, () => randdatapub(keywords, people))
}
async function makedatacrate(dest, datapub) {
const id = uuidv4();
await fs.ensureDir(path.join(dest, id));
const catalog = await datacrate.datapub2catalog({
id: id,
datapub: datapub,
organisation: ORGANISATION,
owner: OWNER,
approver: APPROVER
});
const catfile = path.join(dest, id, 'CATALOG.json');
await fs.writeFile(catfile, JSON.stringify(catalog, null, 2));
console.log('Wrote ' + catfile);
const datapubfile = path.join(dest, id, 'datapub.json');
await fs.writeFile(datapubfile, JSON.stringify(datapub, null, 2));
console.log('Wrote ' + datapubfile);
}
async function makedatacrates(dest, n) {
const datapubs = randdatapubs(n);
Promise.all(
datapubs.map(async p => {
await makedatacrate(dest, p);
})
);
console.log("Done");
}
makedatacrates('./output/', 5)
{
"name": "datacrate",
"version": "1.0.3",
"version": "1.0.4",
"lockfileVersion": 1,
"requires": true,
"dependencies": {
......@@ -1235,6 +1235,15 @@
"resolved": "https://registry.npmjs.org/qs/-/qs-6.5.2.tgz",
"integrity": "sha512-N5ZAX4/LxJmF+7wN74pUD6qAh9/wnvdQcjq9TZjevvXzSUo7bfmw91saqMjzGS2xq91/odN2dW/WOl7qQHNDGA=="
},
"random-word": {
"version": "2.0.0",
"resolved": "https://registry.npmjs.org/random-word/-/random-word-2.0.0.tgz",
"integrity": "sha1-Je0Y1bpamUCerOtZ2r5lt20v9uk=",
"requires": {
"unique-random-array": "^1.0.0",
"word-list": "^2.0.0"
}
},
"rdf-canonize": {
"version": "0.2.4",
"resolved": "https://registry.npmjs.org/rdf-canonize/-/rdf-canonize-0.2.4.tgz",
......@@ -1527,6 +1536,19 @@
"integrity": "sha512-0fr/mIH1dlO+x7TlcMy+bIDqKPsw/70tVyeHW787goQjhmqaZe10uwLujubK9q9Lg6Fiho1KUKDYz0Z7k7g5/g==",
"dev": true
},
"unique-random": {
"version": "1.0.0",
"resolved": "https://registry.npmjs.org/unique-random/-/unique-random-1.0.0.tgz",
"integrity": "sha1-zj4iTIJCzTOg53sNcYDXfmti0MQ="
},
"unique-random-array": {
"version": "1.0.1",
"resolved": "https://registry.npmjs.org/unique-random-array/-/unique-random-array-1.0.1.tgz",
"integrity": "sha512-z9J/SV8CUIhIRROcHe9YUoAT6XthUJt0oUyLGgobiXJprDP9O9dsErNevvSaAv5BkhwFEVPn6nIEOKeNE6Ck1Q==",
"requires": {
"unique-random": "^1.0.0"
}
},
"universalify": {
"version": "0.1.2",
"resolved": "https://registry.npmjs.org/universalify/-/universalify-0.1.2.tgz",
......@@ -1616,6 +1638,11 @@
"resolved": "https://registry.npmjs.org/which-module/-/which-module-2.0.0.tgz",
"integrity": "sha1-2e8H3Od7mQK4o6j6SzHD4/fm6Ho="
},
"word-list": {
"version": "2.0.0",
"resolved": "https://registry.npmjs.org/word-list/-/word-list-2.0.0.tgz",
"integrity": "sha1-VN36Sq4fqnF3LPtukgjE2PmZNj4="
},
"wordwrap": {
"version": "1.0.0",
"resolved": "https://registry.npmjs.org/wordwrap/-/wordwrap-1.0.0.tgz",
......
......@@ -14,10 +14,12 @@
"license": "GPL-3.0",
"repository": "https://code.research.uts.edu.au/eresearch/datacrate",
"dependencies": {
"calcyte": "^1.0.2",
"fs-extra": "^7.0.0",
"jsonld": "^1.1.0",
"lodash": "^4.17.11",
"calcyte": "^1.0.2"
"random-word": "^2.0.0",
"uuid": "^3.3.2"
},
"devDependencies": {
"chai": "^4.1.2",
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment