// Module to generate a bunch of CATALOG.json files which // have arbitrary but realistic data // // TODO: generate some random text files and add them // as well const ORGANISATION = { 'id': 'https://examples.edu', 'name': 'Examples University' }; const EMAIL_DOMAIN = 'examples.edu'; const OWNER = 'owner@examples.edu'; const APPROVER = 'approver@examples.edu'; const NAME_MIN = 3; const NAME_MAX = 10; const KEYWORD_MIN = 3; const KEYWORD_MAX = 12; const WORD_MIN = 2; const WORD_MAX = 14; const SENTENCE_MIN = 3; const SENTENCE_MAX = 30; const PARA_MIN = 1; const PARA_MAX = 10; const N_KEYWORD_MIN = 2; const N_KEYWORD_MAX = 10; const N_PEOPLE_MIN = 1; const N_PEOPLE_MAX = 5; const HONORIFICS = ['Dr', 'A/Prof', 'Prof', 'Dr', 'Dr', 'Dr', 'Mr', 'Ms']; const datacrate = require('./catalog.js'); const _ = require('lodash'); const fs = require('fs-extra'); const randdict = require('random-word'); const path = require('path'); const uuidv4 = require('uuid/v4'); const ArgumentParser = require('argparse').ArgumentParser; const VOCABULARIES = './vocabularies'; function randrange(min, max) { return Math.floor(Math.random() * (max - min)) + min; } function randoms(n, fn) { return Array(n).fill(0).map(fn); } async function loadsource(file) { const text = await fs.readFile(file); return text.toString().split("\n"); } function randperson(sourcedata) { const honorific = _.sample(HONORIFICS); const surname = _.sample(sourcedata['surnames']); const givenname = _.sample(sourcedata['givennames']); const name = givenname + ' ' + surname; const email = givenname + '.' + surname + '@' + EMAIL_DOMAIN; const id = uuidv4(); return { 'dc:identifier': id, 'text_full_name': name, 'full_name_honorific': honorific + ' ' + name, 'email': email } } function randkeyword() { return randdict(); } function randsentence() { const nwords = randrange(SENTENCE_MIN, SENTENCE_MAX); const s = randoms(nwords, randdict).join(' ') + '.'; return _.upperFirst(s); } function randtext() { const nsentences = randrange(PARA_MIN, PARA_MAX); return randoms(nsentences, randsentence).join(' ') + '\n'; } function randdatapub(keywords, people) { const k = _.sampleSize(keywords, randrange(N_KEYWORD_MIN, N_KEYWORD_MAX)); const title = _.startCase(_.camelCase(randsentence())); const desc = randtext(); const creators = _.clone(_.sampleSize(people, randrange(N_PEOPLE_MIN, N_PEOPLE_MAX))); const contributors = _.clone(creators); const collabs = contributors.splice(1); contributors[0].role = 'Chief Investigator'; return { finalKeywords: k, contributor_ci: contributors[0], contributors: collabs, creators: creators, title: title, description: desc } } module.exports = { loadsourcedata: async function (dir) { const sourcedata = {}; sourcedata['surnames'] = await loadsource(path.join(dir, 'surname.txt')); sourcedata['givennames'] = await loadsource(path.join(dir, 'givenname.txt')); return sourcedata; }, randdatapubs: function (n, sourcedata) { const keywords = randoms(Math.floor(n / 2), randkeyword); const people = randoms(n * 2, () => { return randperson(sourcedata) }); return randoms(n, () => randdatapub(keywords, people)) }, makedir: async function (dest) { const id = uuidv4(); const createDir = await fs.ensureDir(path.join(dest, id)); return id; }, makedatacrate: async function (dest, datapub, id) { const catalog = await datacrate.datapub2catalog({ id: id, datapub: datapub, organisation: ORGANISATION, owner: OWNER, approver: APPROVER }); const catfile = path.join(dest, id, 'CATALOG.json'); await fs.writeFile(catfile, JSON.stringify(catalog, null, 2)); } };