Commit d095eec8 authored by PTSEFTON's avatar PTSEFTON
Browse files

Added new helper module for JSON-LD tasks, added context-trimming to get rid of unused terms

parent 5507f5c8
......@@ -28,6 +28,8 @@ const path = require("path");
const html_file_name = "CATALOG.html";
const catalog_file_name = "CATALOG.json";
const defaults = require("./defaults.js");
const JSON_helper = require("./jsonldhelper.js")
const fs = require("fs")
var dirs = undefined;
program
......@@ -130,6 +132,7 @@ if (program.generateHtml) {
} else {
//TODO - there is a lot of repetition here, sort it out
var zipname;
dirs.forEach(function(dir) {
if (shell.test("-d", dir)) {
var c = new Collection();
......@@ -147,6 +150,10 @@ if (program.generateHtml) {
}
c.to_json_ld().then(function() {
var text_citation;
var json_helper = new JSON_helper()
json_helper.init(c.json_ld)
json_helper.trim_context()
fs.writeFileSync(path.join(c.dir, defaults.catalog_json_file_name), JSON.stringify(json_helper.json_ld))
if (program.bag) {
c.generate_bag_info();
c.save_bag_info();
......
......@@ -15,71 +15,160 @@ You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
/* Defaults for Calcyte such as names of key files */
/* JSON-LD utility and lookup functions */
var jsonld = require("jsonld");
var defaults = require("./defaults.js")
var URI = require("uri-js")
const init = function init(json) {
this.json_ld = json;
this.item_by_id = {};
this.item_by_url = {};
this.item_by_type = {}; // dict of arrays
//console.log("CRATE-data", crate_data)
graph = this.json_ld["@graph"];
for (let i = 0; i < graph.length; i++) {
var item = graph[i];
for (let key of Object.keys(item)) { //TODO: Filter
if (key != "@id" && key != "@reverse") {
if (!item[key]) {
item[key] = "";
}
if (!Array.isArray(item[key])) {
item[key] = [item[key]];
//console.log("Making array", key, item[key])
}
}
}
if (item["@id"]) {
this.item_by_id[item["@id"]] = item;
}
if (item["path"]) {
this.item_by_url[item["path"]] = item;
}
if (!item["@type"]) {
item["@type"] = ["Thing"];
}
//console.log("TYPE", item['@type'])
for (let t of item["@type"]) {
//console.log(t)
if (!this.item_by_type[t]) {
this.item_by_type[t] = [];
}
this.item_by_type[t].push(item);
}
}
// TODO
}
// Context trimmer
// Context looker-upper
const add_back_links = function () {
// Add @reverse properties if not there
for (let i = 0; i < graph.length; i++) {
var item = graph[i];
this.make_back_links(item);
}
}
module.exports = function () {
return {
init: init,
add_back_links: add_back_links,
item_by_path: this.item_by_path,
item_by_id: this.item_by_id,
item_by_type: this.item_by_type
item_by_type: this.item_by_type,
get_uri_for_term: function (term) {
if (this.json_ld["@context"][term]) {
if (this.json_ld["@context"][term]["@id"]) {
term = this.json_ld["@context"][term]["@id"]
}
else if (!this.json_ld["@context"][term]["@type"]) {
term = this.json_ld["@context"][term]
}
}
var url = URI.parse(term)
// Looks like a URL
if (url.scheme) {
if (!url.host) {
term = this.get_uri_for_term(url.scheme) + url.path
}
//this.context_keys_used.add(term)
return (term)
}
else {
return null
}
},
trim_context: function () {
var new_context = {}
for (term of this.context_keys_used) {
var uri = this.get_uri_for_term(term)
if (uri) new_context[term] = uri
}
this.json_ld["@context"] = new_context
},
reference_to_item: function (node) {
// Check if node is a reference to something else
// If it is, return the something else
if (node["@id"] && this.item_by_id[node["@id"]]) {
return this.item_by_id[node["@id"]]
}
else {
return null
}
},
value_as_array: function (value) {
if (!Array.isArray(value)) {
return [value];
console.log("Making array", key, item[key])
} else {
return value;
}
},
init: function init(json) {
this.json_ld = json;
this.item_by_id = {};
this.item_by_url = {};
this.item_by_type = {}; // dict of arrays
this.graph = this.json_ld["@graph"];
this.context_keys_used = new Set()
for (let i = 0; i < this.graph.length; i++) {
var item = this.graph[i];
for (let key of Object.keys(item)) { //TODO: Filter
this.context_keys_used.add(key)
}
if (item["@id"]) {
this.item_by_id[item["@id"]] = item;
}
if (item["path"]) {
this.item_by_url[item["path"]] = item;
}
if (!item["@type"]) {
item["@type"] = ["Thing"];
}
for (let t of this.value_as_array(item["@type"])) {
if (!this.item_by_type[t]) {
this.item_by_type[t] = [];
}
this.item_by_type[t].push(item);
}
}
},
make_back_links: function (item) {
for (let key of Object.keys(item)) {
if (key != "@id" && key != "@reverse") {
for (let part of this.value_as_array(item[key])) {
var target = this.reference_to_item(part);
var back_link = defaults.back_links[key];
if (target && back_link) {
if (!target[back_link]) {
//console.log("Making link", key, back_link, target)
target[back_link] = [{ "@id": item["@id"] }];
}
} else if (
!back_link && target
) {
// We are linking to something
//console.log("Doing a back link", key, target['name'], item['name'])
if (!target["@reverse"]) {
target["@reverse"] = {};
}
if (!target["@reverse"][key]) {
target["@reverse"][key] = [];
}
var got_this_reverse_already = false;
// for (let r of target["@reverse"][key]) {
// //console.log(r, r["@id"], item["@id"])
// if (r["@id"] === item["@id"]) {
// got_this_reverse_already = true
// }
// }
if (!got_this_reverse_already) {
target["@reverse"][key].push({ "@id": item["@id"] });
}
//console.log(JSON.stringify(target, null, 2))
}
}
}
}
},
add_back_links: function () {
// Add @reverse properties if not there
for (let item of this.json_ld["@graph"]) {
this.make_back_links(item);
}
}
}
};
......
......@@ -1388,6 +1388,21 @@
"resolved": "https://registry.npmjs.org/ultron/-/ultron-1.1.1.tgz",
"integrity": "sha512-UIEXBNeYmKptWH6z8ZnqTeS8fV74zG0/eRU9VGkpzz+LIJNs8W/zM/L+7ctCkRrgbNnnR0xxw4bKOr0cW0N0Og=="
},
"uri-js": {
"version": "4.2.2",
"resolved": "https://registry.npmjs.org/uri-js/-/uri-js-4.2.2.tgz",
"integrity": "sha512-KY9Frmirql91X2Qgjry0Wd4Y+YTdrdZheS8TFwvkbLWf/G5KNJDCh6pKL5OZctEW4+0Baa5idK2ZQuELRwPznQ==",
"requires": {
"punycode": "2.1.1"
},
"dependencies": {
"punycode": {
"version": "2.1.1",
"resolved": "https://registry.npmjs.org/punycode/-/punycode-2.1.1.tgz",
"integrity": "sha512-XRsRjdf+j5ml+y/6GKHPZbrF/8p2Yga0JPtdqTIY2Xe5ohJPD9saDJJLPvp9+NSBprVvevdXZybnj2cv8OEd0A=="
}
}
},
"util-deprecate": {
"version": "1.0.2",
"resolved": "https://registry.npmjs.org/util-deprecate/-/util-deprecate-1.0.2.tgz",
......
......@@ -25,6 +25,7 @@
"node-json2html": "^1.1.1",
"shelljs": "^0.8.1",
"tmp": "0.0.33",
"uri-js": "^4.2.2",
"xlsx": "^0.11.17",
"xmlbuilder": "^9.0.4"
}
......
const fs = require("fs");
const jsonld = require("../jsonldhelper.js")
const assert = require("assert");
describe("JSON-LD helper simple tests", function () {
it("Test context resolving", function (done) {
// From the spec https://json-ld.org/spec/latest/json-ld/
var json_content = {
"@context": {
"name": "http://schema.org/name",
"image": {
"@id": "http://schema.org/image",
"@type": "@id"
},
"homepage": {
"@id": "http://schema.org/url",
"@type": "@id"
}
},
"@graph": [
{
"name": "Manu Sporny",
"homepage": "http://manu.sporny.org/",
"image": "http://manu.sporny.org/images/manu.png"
}
]
}
const helper = new jsonld()
helper.init(json_content)
assert(helper.get_uri_for_term("name"), "http://schema.org/name")
assert(helper.get_uri_for_term("image"), "http://schema.org/image")
helper.trim_context()
assert.equal(Object.keys(helper.json_ld["@context"]).length, 3)
//from the spec - example 24
json_content = {
"@graph": [
{
"@id": "http://me.markus-lanthaler.com/",
"@type": "foaf:Person",
"foaf:name": "Markus Lanthaler",
"foaf:homepage": "http://www.markus-lanthaler.com/",
"picture": "http://twitter.com/account/profile_image/markuslanthaler"
}],
"@context": {
"@version": 1.1,
"xsd": "http://www.w3.org/2001/XMLSchema#",
"foaf": "http://xmlns.com/foaf/0.1/",
"foaf:homepage": { "@type": "@id" },
"picture": { "@id": "foaf:depiction", "@type": "@id" }
}
}
const helper2 = new jsonld()
helper2.init(json_content)
console.log("Looking up name", helper2.get_uri_for_term("foaf:name"))
assert.equal(helper2.get_uri_for_term("foaf:name"), "http://xmlns.com/foaf/0.1/name")
assert.equal(helper2.get_uri_for_term("picture"), "http://xmlns.com/foaf/0.1/depiction")
assert.equal(helper2.get_uri_for_term("foaf:homepage"), "http://xmlns.com/foaf/0.1/homepage")
helper2.trim_context()
assert.equal(Object.keys(helper2.json_ld["@context"]).length, 3)
// Try with a real CATALOG
json_content = JSON.parse(fs.readFileSync("test_data/context_trimming/CATALOG.json"));
const helper3 = new jsonld()
helper3.init(json_content)
assert.equal(helper3.get_uri_for_term("Person"), "http://schema.org/Person")
assert.equal(helper3.get_uri_for_term("Project"), "http://purl.org/cerif/frapo/Project")
helper3.trim_context()
assert.equal(Object.keys(helper3.json_ld["@context"]).length, 22)
done();
});
it("Test basic indexing", function (done) {
const json = {
"@graph": [
{ "@id": "1", "name": "one", "path": "./nothing", "@type": "Test" }
]
}
var helper = new jsonld()
helper.init(json)
assert.equal(
helper.item_by_id["1"]["name"], "one"
);
assert.equal(
helper.item_by_url["./nothing"]["name"], "one"
);
assert.equal(
helper.item_by_type["Test"][0]["name"], "one"
);
assert.equal(
helper.item_by_type["Test"].length, 1
);
const json1 = {
"@graph": [
{
"@id": "1", "name": "one", "path": "./nothing", "@type": "Test",
"hasPart": [{ "@id": "2" }, { "@id": "3" }, { "@id": "4" }], "creator": { "@id": "2" }
},
{ "@id": "2", "name": "two", "path": "./something", "@type": "Test" },
{ "@id": "3", "name": "three", "path": "./somethin_else", "@type": "Test1" },
{ "@id": "4", "name": "four", "path": "./nothin", "@type": "Test1" }
]
}
helper = new jsonld()
helper.init(json1)
assert.equal(
helper.item_by_id["1"]["name"], "one"
);
assert.equal(
helper.item_by_url["./something"]["name"], "two"
);
assert.equal(
helper.item_by_type["Test"].length, 2
);
for (let part of helper.value_as_array(helper.item_by_id["1"]["hasPart"])) {
assert.equal(helper.reference_to_item(part)["path"].startsWith("./"), true)
}
// Check that inverse links have been put in place
helper.add_back_links()
assert.equal(helper.item_by_id["2"]["isPartOf"][0]["@id"], "1")
assert.equal(helper.item_by_id["3"]["isPartOf"][0]["@id"], "1")
assert.equal(helper.item_by_id["4"]["isPartOf"][0]["@id"], "1")
assert.equal(helper.item_by_id["2"]["@reverse"]["creator"][0]["@id"], "1")
done();
});
});
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment