Skip to content
Snippets Groups Projects
Commit ea6d1c93 authored by Mike Lynch's avatar Mike Lynch
Browse files

Successfully added all of the ro-crate demos to a solr index

parent 6317e0e1
Branches
No related merge requests found
......@@ -6,7 +6,7 @@ const ROCrate = require('ro-crate').ROCrate;
const fs = require('fs-extra');
const path = require('path');
const OCFLRepository = require('ocfl').Repository;
const uuidv1 = require('uuid/v1');
const argv = yargs['argv'];
......@@ -28,7 +28,7 @@ const catalogFilename = configJson['catalogFilename'] || 'CATALOG.json';
const sourcePath = _.endsWith(configJson['source'], '/') ? configJson['source'] : `${configJson['source']}/`;
const ocflMode = configJson['ocfl'] || false;
const dryRun = configJson['dry-run'] || false;
const dryRun = configJson['dryRun'] || false;
const sleep = ms => new Promise((r, j) => {
console.log('Waiting for ' + ms + ' seconds');
......@@ -165,8 +165,11 @@ async function commitBatches (records) {
batch.reduce((promise, records, index) => {
return promise.then(() => {
if (logLevel >= 4) console.log(`Using: ${Math.round(process.memoryUsage().rss / 1024 / 1024 * 100) / 100} MBs`);
if (logLevel >= 4) {
reportMemUsage();
}
const catalogs = solrObjects(records);
dumpSolrSync(catalogs);
if( dryRun ) {
console.log("Dry-run mode, not committing");
return Promise.resolve();
......@@ -184,13 +187,26 @@ async function commitBatches (records) {
return Promise.resolve();
});
}).catch((e) => {
console.log(e);
console.log("Update failed");
console.log(String(e));
//fs.writeFileSync(path.join('test-data', 'error.log'), e);
})
}, Promise.resolve());
}
function reportMemUsage() {
console.log(`Using: ${Math.round(process.memoryUsage().rss / 1024 / 1024 * 100) / 100} MBs`);
}
async function dumpSolrSync(solr) {
const uuname = path.join('test-data', uuidv1() + '.json');
fs.writeJsonSync(uuname, solr, { spaces: 2 });
console.log(`Wrote solr docs to ${uuname}`);
}
async function main () {
var records = null;
if( ocflMode ) {
......
{
"source": "/Users/mike/working/ocfl-nginx/test_repos/rocrate_ocfl",
"ocfl": 1,
"dry-run": 1,
"dryRun": 0,
"catalogFilename": "ro-crate-metadata.jsonld",
"solrUpdate": "http://localhost:8983/solr/ocflcore/update/json",
"logLevel": 4,
......
......@@ -23,7 +23,8 @@
}
},
"datePublished": {
"facet": true
"facet": true,
"validate": "date"
},
"publisher": {
"resolve": "single",
......
......@@ -123,6 +123,9 @@ class CatalogSolr {
} else {
solr[field] = this.unwrap(value);
}
if( fieldcf['validate'] ) {
solr[field] = this.validate(fieldcf['validate'], solr[field]);
}
// make facets - these can be based on raw or resolved values depending
// on the faceting rule, so pass both in
if( fieldcf['facet'] ) {
......@@ -208,6 +211,19 @@ class CatalogSolr {
return resolved;
}
// primitive validation, just to weed out bad dates at this stage
validate(type, value) {
if( type === 'date' ) {
if( value.match(/^\d\d\d\d-\d\d-\d\d?$/) ) {
return value;
}
console.log(`Date ${value} did not match`);
this.convertError(`Invalid ${type}: ${value}`);
return '';
}
this.convertError(`Unknown validation type ${type}`);
}
// TODO - this should give better context
......
......@@ -23,6 +23,7 @@
"ocfl": "^1.0.3",
"random-word": "^2.0.0",
"ro-crate": "^1.1.0",
"uuid": "^3.3.2",
"yargs": "^13.3.0"
},
"devDependencies": {
......
......@@ -23,7 +23,8 @@
}
},
"datePublished": {
"facet": true
"facet": true,
"validate": "date"
},
"publisher": {
"resolve": "single",
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment