Skip to content
Snippets Groups Projects
randomize.js 3.7 KiB
Newer Older
// Module to generate a bunch of CATALOG.json files which 
// have arbitrary but realistic data
//
// TODO: generate some random text files and add them 
// as well

const ORGANISATION = {
  'id': 'https://examples.edu',
  'name': 'Examples University'
};

const EMAIL_DOMAIN = 'examples.edu';
const OWNER = 'owner@examples.edu';
const APPROVER = 'approver@examples.edu';

const NAME_MIN = 3;
const NAME_MAX = 10;
const KEYWORD_MIN = 3;
const KEYWORD_MAX = 12;
const WORD_MIN = 2;
const WORD_MAX = 14;
const SENTENCE_MIN = 3;
const SENTENCE_MAX = 30;
const PARA_MIN = 1;
const PARA_MAX = 10;

const N_KEYWORD_MIN = 2;
const N_KEYWORD_MAX = 10;

const N_PEOPLE_MIN = 1;
const N_PEOPLE_MAX = 5;

const HONORIFICS = ['Dr', 'A/Prof', 'Prof', 'Dr', 'Dr', 'Dr', 'Mr', 'Ms'];

const datacrate = require('./catalog.js');
const _ = require('lodash');
const fs = require('fs-extra');
const randdict = require('random-word');
const path = require('path');
const uuidv4 = require('uuid/v4');
const ArgumentParser = require('argparse').ArgumentParser;

const VOCABULARIES = './vocabularies';

function randrange(min, max) {
  return Math.floor(Math.random() * (max - min)) + min;
}

function randoms(n, fn) {
  return Array(n).fill(0).map(fn);
}


async function loadsource(file) {
  const text = await fs.readFile(file);
  return text.toString().split("\n");
}


function randperson(sourcedata) {
  const honorific = _.sample(HONORIFICS);
  const surname = _.sample(sourcedata['surnames']);
  const givenname = _.sample(sourcedata['givennames']);
  const name = givenname + ' ' + surname;
  const email = givenname + '.' + surname + '@' + EMAIL_DOMAIN;
  const id = uuidv4();
  return {
    'dc:identifier': id,
    'text_full_name': name,
    'full_name_honorific': honorific + ' ' + name,
    'email': email
  }
}

function randkeyword() {
  return randdict();
}

function randsentence() {
  const nwords = randrange(SENTENCE_MIN, SENTENCE_MAX);
  const s = randoms(nwords, randdict).join(' ') + '.';
  return _.upperFirst(s);
}

function randtext() {
  const nsentences = randrange(PARA_MIN, PARA_MAX);
  return randoms(nsentences, randsentence).join(' ') + '\n';
}

function randdatapub(keywords, people) {
  const k = _.sampleSize(keywords, randrange(N_KEYWORD_MIN, N_KEYWORD_MAX));
  const title = _.startCase(_.camelCase(randsentence()));
  const desc = randtext();
  const creators = _.clone(_.sampleSize(people, randrange(N_PEOPLE_MIN, N_PEOPLE_MAX)));
  const contributors = _.clone(creators);
  const collabs = contributors.splice(1);
  contributors[0].role = 'Chief Investigator';
  return {
    finalKeywords: k,
    contributor_ci: contributors[0],
    contributors: collabs,
    creators: creators,
    title: title,
    description: desc
  }
}


module.exports = {
  loadsourcedata: async function (dir) {
    const sourcedata = {};
    sourcedata['surnames'] = await loadsource(path.join(dir, 'surname.txt'));
    sourcedata['givennames'] = await loadsource(path.join(dir, 'givenname.txt'));
    return sourcedata;
  },
  randdatapubs: function (n, sourcedata) {
    const keywords = randoms(Math.floor(n / 2), randkeyword);
    const people = randoms(n * 2, () => {
      return randperson(sourcedata)
    });
    return randoms(n, () => randdatapub(keywords, people))
  },
  makedir: async function (dest) {
    const id = uuidv4();
    const createDir = await fs.ensureDir(path.join(dest, id));
    return id;
  },
  makedatacrate: async function (dest, datapub, id) {
    const catalog = await datacrate.datapub2catalog({
      id: id,
      datapub: datapub,
      organisation: ORGANISATION,
      owner: OWNER,
      approver: APPROVER
    });
    const catfile = path.join(dest, id, 'CATALOG.json');
    await fs.writeFile(catfile, JSON.stringify(catalog, null, 2));
  }
};