Skip to content
Snippets Groups Projects
Commit ba709a98 authored by PTSEFTON's avatar PTSEFTON
Browse files

Now handles resolution better, does orgs

parent ede851b4
Branches
No related merge requests found
......@@ -172,21 +172,21 @@ function solrObjects(recs) {
const jsonld = record['jsonld'];
const docs = indexer.createSolrDocument(jsonld);
if (docs) {
if (docs.Dataset) {
docs.Dataset.forEach((dataset) => {
dataset['path'] = record['path'];
dataset['uri_id'] = record['uri_id'];
solrDocs.push(dataset);
console.log(`Dataset URI id ${dataset['uri_id']}`);
});
} else {
console.log(`Warning: no Datasets created for record ${record['path']} ${record['uri_id']}`);
}
if (docs.Person) {
docs.Person.forEach((person) => {
solrDocs.push(person);
});
}
for (let t of Object.keys(docs)){
if (t === "Dataset") {
docs.Dataset.forEach((dataset) => {
dataset['path'] = record['path'];
dataset['uri_id'] = record['uri_id'];
solrDocs.push(dataset);
console.log(`Dataset URI id ${dataset['uri_id']}`);
});
} else {
docs[t].forEach((item) => {
solrDocs.push(item);
});
}
}
}
} catch(e) {
console.log("Error converting ro-crate to solr");
......
......@@ -65,7 +65,7 @@
"Person": {
"@id" : {
"filter" : { "re" : ".*$"}
"filter" : { "re" : "NOMATCH"}
},
"@reverse": { "skip": true },
......@@ -81,7 +81,7 @@
"Organization": {
"@id" : {
"filter" : { "re" : "DOES_NOT_MATCH"}
"filter" : { "re" : "NOMATCH"}
},
"@reverse": { "skip": true },
"contactPoint" : {
......
......@@ -128,7 +128,7 @@ class CatalogSolr {
}
} else {
return umapped;
}
}
};
}
......@@ -136,12 +136,12 @@ class CatalogSolr {
createSolrDocument(jsonld) {
const crate = new ROCrate(jsonld);
this.crate = crate;
this.crate = new ROCrate(jsonld);;
// Keep track of things that are resolved and have index config (ignoring filter)
this.resolvedItemsToIndex = {};
this.resolvedItemsToIndex = [];
this.alreadyIndexed = {};
crate.index();
this.crate.index();
const cfBase = this.config['map_all'] || {};
const cfTypes = this.config['types'];
......@@ -154,7 +154,7 @@ class CatalogSolr {
throw Error("ro-crate to solr config must have a Dataset type");
}
this.root = crate.getRootDataset();
this.root = this.crate.getRootDataset();
if( !this.root ) {
throw Error("Couldn't find ro-crate's root dataset");
}
......@@ -165,7 +165,7 @@ class CatalogSolr {
if( datasetCf['@id'] ) {
const namespace = datasetCf['@id']['name'];
const identifier = crate.getNamedIdentifier(namespace);
const identifier = this.crate.getNamedIdentifier(namespace);
if( identifier ) {
rootItem['@id'] = identifier;
} else {
......@@ -174,46 +174,62 @@ class CatalogSolr {
}
rootItem['license'] = this.mapLicenses(rootItem['license']);
const rootSolr = this.mapItem(cfBase, datasetCf, crate, 'Dataset', rootItem);
const rootSolr = this.mapItem(cfBase, datasetCf, 'Dataset', rootItem);
const solrDocument = { 'Dataset': [ rootSolr ] };
// TODO INHERET LICENCE IF ITEM DOES NOT HAVE ONE
// TODO INHERIT LICENCE IF ITEM DOES NOT HAVE ONE
// |||||||
// VVVVVVV
// loop through each item in the JSON-LD @graph
for ( const item of crate.graph ) {
if( item['@id'] !== rootOrigId ) {
this.indexItems(this.crate.json_ld["@graph"], rootOrigId, cfTypes, cfBase, solrDocument, false);
var additionalItems = _.clone(this.resolvedItemsToIndex)
this.resolvedItemsToIndex = []
while (additionalItems.length > 0) {
// Don't filter things that were resolved
this.indexItems(additionalItems, rootOrigId, cfTypes, cfBase, solrDocument, true);
additionalItems = _.clone(this.resolvedItemsToIndex)
this.resolvedItemsToIndex = []
}
return solrDocument;
}
indexItems(items, rootOrigId, cfTypes, cfBase, solrDocument, auto) {
for (const item of items) {
if (item['@id'] !== rootOrigId) {
var types = this.crate.utils.asArray(item['@type']);
// Look through types in order
for (let type of Object.keys(cfTypes)) {
if( types.includes(type) ) {
if (types.includes(type)) {
// get config for this type of item
const cf = cfTypes[type];
if(this.resolvedItemsToIndex[item["@id"]] || this.filters[type](item) ) {
// Only do ONCE per type
// If auto flag set always index regardless of filter
if (auto || this.filters[type](item)) {
// Only do ONCE per type
types = [type];
item["@type"] = types;
const solr = this.mapItem(cfBase, cf, crate, type, item)
if( !(type in solrDocument) ) {
const solr = this.mapItem(cfBase, cf, type, item);
if (!(solrDocument[type])) {
solrDocument[type] = [];
}
solrDocument[type].push(solr)
if (type === "Organization") {
console.log("Doing an org", item);
console.log(solr);
}
solrDocument[type].push(solr);
}
}
}
}
}
}
return solrDocument;
}
// map the fields in an an ro-crate item to a solr document
mapItem(cfBase, cf, crate, type, item) {
mapItem(cfBase, cf, type, item) {
const solr = this.baseSolr(cfBase, item);
_.each(item, ( value, field ) => {
const fieldcf = cf[field];
......@@ -225,7 +241,7 @@ class CatalogSolr {
if( ! fieldcf['skip'] ) {
// resolve lookups
if( fieldcf['resolve'] ) {
solr[field] = this.resolveValues(crate, fieldcf['resolve'], value);
solr[field] = this.resolveValues(fieldcf['resolve'], value);
const vals = this.crate.utils.asArray(solr[field]);
solr[`${field}_id`] = [];
for (let val of vals) {
......@@ -234,16 +250,15 @@ class CatalogSolr {
solr[`${field}_id`].push(value["@id"]);
}
catch (e) {
console.log("ERROR", e.message, val)
//console.log("ERROR", e.message, val)
}
}
} else {
if( fieldcf['multi'] ) {
console.log("Multi", fieldcf)
//
solr[field] = this.unwrap(value, fieldcf.escapedJSON);
} else {
solr[field] = this.unwrap(value);
}
// } else {
// solr[field] = this.unwrap(value);
//}
}
if( fieldcf['validate'] ) {
const type = fieldcf['validate'];
......@@ -253,7 +268,7 @@ class CatalogSolr {
// make facets - these can be based on raw or resolved values depending
// on the faceting rule, so pass both in
if( fieldcf['facet'] ) {
const facet = this.makeFacet(crate, fieldcf['facet'], value, solr[field]);
const facet = this.makeFacet(fieldcf['facet'], value, solr[field]);
const facetField = [ type, field, Array.isArray(facet) ? 'facetmulti' : 'facet'].join('_');
solr[facetField] = facet;
}
......@@ -264,9 +279,7 @@ class CatalogSolr {
return solr;
}
resolveValues(crate, cf, value) {
resolveValues(cf, value) {
if( typeof value !== 'object' ) {
return value;
const error = this.convertError(`Can't resolve '${value}'`);
......@@ -274,35 +287,36 @@ class CatalogSolr {
}
if( cf === 'multi' ) {
if( Array.isArray(value) ) {
return value.map((v) => this.resolveAndFlatten(crate, v));
return value.map((v) => this.resolveAndFlatten(v));
} else {
return [ this.resolveAndFlatten(crate, value) ];
return [ this.resolveAndFlatten(value) ];
}
} else {
if( Array.isArray(value) ) {
return this.resolveAndFlatten(crate, value[0]);
return this.resolveAndFlatten(value[0]);
} else {
return this.resolveAndFlatten(crate, value);
return this.resolveAndFlatten(value);
}
}
}
resolveAndFlatten(crate, value, solr) {
resolveAndFlatten(value, solr) {
if( !('@id' in value ) ) {
return value;
return this.convertError(`no @id found in value ${JSON.stringify(value)}`);
}
const resolved = crate.getItem(value['@id']);
const resolved = this.crate.getItem(value['@id']);
if( !resolved ) {
return this.convertError(`@id ${value['@id']} not found`);
}
const resolvedTypes = this.crate.utils.asArray((resolved["@type"]));
const resolvedTypes = this.crate.utils.asArray(resolved["@type"]);
for (let type of Object.keys(this.config['types'])) {
const cf = this.config['type'];
if (resolvedTypes.includes(type)) {
this.resolvedItemsToIndex[resolved["@id"]] = true;
if (resolvedTypes.includes(type) && !this.alreadyIndexed[resolved["@id"]]) {
this.alreadyIndexed[resolved["@id"]] = true;
this.resolvedItemsToIndex.push(resolved);
}
}
......@@ -311,7 +325,7 @@ class CatalogSolr {
// returns
makeFacet(crate, cf, raw, resolved) {
makeFacet(cf, raw, resolved) {
if( cf['tokenize'] ) {
if( raw ) {
......@@ -323,7 +337,7 @@ class CatalogSolr {
if (cf['fieldName']) {
if( Array.array(raw) ) {
return raw.map((v) => {
const lookup = crate.getItem(v['@id']);
const lookup = this.crate.getItem(v['@id']);
if( lookup ) {
return lookup[cf['field']]
} else {
......
......@@ -21,11 +21,23 @@
"stored": true,
"multiValued": true
},
{
"name": "yearPublished",
"type": "text_en",
"stored": true,
"multiValued": true
},
{
"name": "author_id",
"type": "text_en",
"stored": true,
"multiValued": true
},
{
"name": "affiliation_id",
"type": "text_en",
"stored": true,
"multiValued": true
}
],
"dynamic-field": [
......@@ -83,6 +95,13 @@
"main_search"
]
},
{
"source": "datePublished",
"maxChars": 4,
"dest": [
"yearPublished"
]
},
{
"source": "description",
"dest": [
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment