Commit 2e325cfb authored by PTSEFTON's avatar PTSEFTON
Browse files

Made some changes to ID handling and then undid them.

parent 566949d2
......@@ -48,10 +48,8 @@ program
"Create a zipped version of the bag - only applies with --bag"
)
.option("-d, --depth", "Maximum depth to recurse into directories")
.option(
"-r, --recurse",
"Recurse into directories - up to " + defaults.max_depth
)
.option("-r, --recurse", "Recurse into directories looking for CATALOG_.xslc files")
.option(
"-u, --url [distro]",
"Distribution URL"
......@@ -111,6 +109,7 @@ console.log("Generating html from exsiting " + catalog_path + " file");
catalog_path = path.join(process.cwd(), catalog_path);
}
var dir = path.dirname(catalog_path)
shell.rm("-rf", path.join(dir, "CATALOG_files"));
var json_helper = new JSON_helper();
json_helper.init(require(catalog_path));
if (program.url){
......
This source diff could not be displayed because it is too large. You can view the blob instead.
......@@ -36,6 +36,7 @@ module.exports = function() {
defaults.DataCrate_Specification_Identifier
};
if (this.helper.root_node["contactPoint"] && this.helper.root_node["contactPoint"]["@id"]) {
contact = this.helper.item_by_id[this.helper.root_node["contactPoint"]["@id"]];
map = {
......@@ -80,17 +81,22 @@ module.exports = function() {
shell.exec("bagit updatetagmanifests " + this.target_dir);
},
fix_paths: function fix_paths(catalog_path) {
console.log(shell.test("-f", catalog_path));
if (!path.isAbsolute(catalog_path)) {
catalog_path = path.join("./", catalog_path);
}
var catalog = this.helper.json_ld;
for (let item of catalog["@graph"]) {
if (item["path"]) {
var p = this.helper.value_as_array(item["path"])[0];
var new_p = path.join("./data/", p);
var new_p = path.join("data", p);
item["path"] = [new_p];
if (item["@id"] === p) {
this.helper.update_id(item, new_p)
}
}
this.helper.update_all_ids();
}
fs.writeFileSync(
catalog_path,
......
......@@ -23,6 +23,7 @@ var defaults = require('./defaults');
var XLSX = require('xlsx');
var path = require('path');
const ignore= /^\./;
const querystring = require('querystring')
const Property = require("./property.js");
const Item = require("./item.js");
......@@ -65,13 +66,15 @@ module.exports = function() {
item_json[name_value["Name"]] = [name_value["Value"]];
}
}
item_json["TYPE:"] = "Dataset";
item_json["path"] = collection.rel_path;
if(!item_json["Name"]) {
item_json["Name"] = collection.rel_path;
}
if (!(collection.rel_path === "./")) {
item_json["ID"] = collection.rel_path;
item_json["ID"] = "./" + collection.rel_path;
} else if (!item_json["ID"]) {
item_json["ID"] = collection.rel_path;
item_json["ID"] = "./" + collection.rel_path;
}
collection.collection_metadata.load_json(item_json, collection);
}
......@@ -80,7 +83,6 @@ module.exports = function() {
metadata = XLSX.utils.sheet_to_json(workbook.Sheets[sheet_name]);
for (var i = 0; i < metadata.length; i++) {
item_json = metadata[i];
//console.log("JSON", item_json);
item = new Item();
item.load_json(item_json, collection);
......@@ -109,7 +111,32 @@ module.exports = function() {
item_by_id: this.item_by_id,
item_by_type: this.item_by_type,
same_as: this.same_as,
make_id: function(id){
id = String(id);
if (id.match(/^https?:\/\//i)){
return id;
}
else {
return id;
}
},
add_id: function(item) {
var id = this.make_id(item.id);
this.id_lookup[id] = item;
},
lookup_id: function(id){
var id = this.make_id(id);
if (this.id_lookup[id]) {
return this.make_id(this.id_lookup[id].id);
}
},
lookup_name: function(name){
if (this.name_lookup[name]){
//console.log(name, this.make_id(this.name_lookup[name].id))
return this.make_id(this.name_lookup[name].id)
}
},
get_unique_catalog_name: function get_unique_catalog_name(
dir,
existing_catalogs = []
......@@ -131,7 +158,6 @@ module.exports = function() {
},
index_graph: function index_graph() {
// TODO - REMOVE this - we now have a helper for this stuff
this.item_by_id = {};
this.item_by_path = {};
this.item_by_type = {};
......@@ -155,14 +181,10 @@ module.exports = function() {
? this.item_by_path["./"]
: this.item_by_path["data/"];
},
to_json: function to_json(graph) {
if (!this.collection_metadata) {
this.collection_metadata = new Item();
}
var collection_json = this.collection_metadata.to_json_ld_fragment();
// Need to work out how to do this
/* collection_json["distribution"] = {
......@@ -184,7 +206,7 @@ module.exports = function() {
collection_json["hasPart"] = [];
}
collection_json["hasPart"].push({
"@id": item.id
"@id": this.make_id(item.id)
});
if (!item_json["name"]) {
item_json["name"] = item.id;
......@@ -198,16 +220,16 @@ module.exports = function() {
}
}
//Sub collections
this.children.forEach(function(child) {
for (var child of this.children) {
child.to_json(graph);
if (!collection_json["hasPart"]) {
collection_json["hasPart"] = [];
}
collection_json["hasPart"].push({
"@id": child.collection_metadata.id
"@id": this.make_id(child.collection_metadata.id)
//"@type": "@id"
});
});
}
},
to_json_ld: function to_json_ld() {
......@@ -216,9 +238,9 @@ module.exports = function() {
"@graph": [],
"@context": context
};
this.to_json(json["@graph"]);
fs.writeFileSync("TEST.json", String(json));
json = JSON.parse(JSON.stringify(json));
for (var same of this.same_as) {
......
......@@ -23,7 +23,7 @@ const path = require("path");
const shell = require("shelljs");
const jsonld_helper = require("./jsonldhelper")
const filesize = require("filesize");
const sha1 = require('sha1');
const display_keys = [
"name",
......@@ -189,7 +189,7 @@ module.exports = function () {
var up_link;
var catalog_json_link = "";
var zip_link;
var name = this.root_node["name"];
var name = this.helper.get_name(this.root_node);
var json = "";
if (this.first_page) {
......@@ -262,14 +262,14 @@ module.exports = function () {
*/
var td_ele = "";
if (!part) {
// TODO: Not sure if this ever happens..
} else if (k == "@type") {
td_ele += this.format_header(part);
} else if (k === "name") {
td_ele += ele("b");
td_ele += part;
td_ele += this.helper.get_name(item);
td_ele += close("b");
//td_ele.ele("a", part).att('href', item["@id"]).att('class', 'fa fa-external-link').att('title',item["@id"]);
} else if (
......@@ -303,11 +303,11 @@ module.exports = function () {
td_ele += part;
td_ele += close("a");
} else if (k == "@id") {
if (item["@id"].match(/^https?:\/\//i)) {
if (item["@id"] && item["@id"].match(/^https?:\/\//i)) {
td_ele += ele("a", {
href: item["@id"],
class: "fa fa-external-link",
title: item.name
title: this.helper.get_name(item)
});
td_ele += item["@id"];
......@@ -323,20 +323,26 @@ module.exports = function () {
"name" in this.helper.item_by_id[part["@id"]]
) {
td_ele += this.helper.item_by_id[part["@id"]].name + " : " + this.helper.item_by_id[part["@id"]].value
td_ele += this.helper.get_name(this.helper.item_by_id[part["@id"]]) + " : " + this.helper.item_by_id[part["@id"]].value
}
else if (part["@id"] && this.helper.item_by_id[part["@id"]]) {
var target_name = this.helper.item_by_id[part["@id"]].name
? this.helper.item_by_id[part["@id"]].name
var target_name = this.helper.get_name(this.helper.item_by_id[part["@id"]])
? this.helper.get_name(this.helper.item_by_id[part["@id"]])
: part["@id"];
var href = this.get_href(part["@id"], item["@id"]);
td_ele += ele("a", { href: href });
td_ele += target_name;
td_ele += close("a");
} else {
} else if (part["@id"] && part["@label"]) {
td_ele += `<a href="${part["@id"]}" class="fa fa-external-link">${part["@label"]}<a/>`
}
else if (part["@value"]) {
td_ele += part["@value"];
}else {
td_ele += part;
}
return td_ele;
},
get_up_path: function get_up_path(path) {
......@@ -360,26 +366,12 @@ module.exports = function () {
return defaults.html_file_name;
}
}
var p = "";
p += defaults.html_multi_file_dirs + "/";
p += "pairtree_root/";
id = id.replace(/\^/g, "^5e")
.replace(/ /g, "^20")
.replace(/"/g, "^22")
.replace(/</g, "^3c")
.replace(/\\/g, "^5c")
.replace(/\*/g, "^2a")
.replace(/\=/g, "^3d")
.replace(/\+/g, "^2b")
.replace(/>/g, "^3e")
.replace(/\|/g, "^7c")
.replace(/,/g, "^2c")
.replace(/\?/g, "^3f")
.replace(/\//g, "=")
.replace(/\:/g, "+")
.replace(/\./g,",")
p += id.replace(/(..)/g, '$1/').replace(/([^\/])$/, "$1/")
id = sha1(id)
//arcp://ni,sha-256;f4OxZX_x_FO5LcGBSKHWXfwtSx-j1ncoSt3SABJtkGk/src/luhn.c
p += id.replace(/(........)/g, '$1/').replace(/([^\/])$/, "$1/")
p += "index.html"
return p
},
......@@ -454,7 +446,6 @@ module.exports = function () {
dataset_to_html: function dataset_to_html(node) {
// Turns any item into an HTML table
var html = "";
var keys = new Set(Object.keys(node));
if (node["identifier"]) {
......@@ -463,14 +454,13 @@ module.exports = function () {
keys.delete("identifier")
}
}
keys.delete("@id");
keys.delete("filename");
keys.delete("@reverse");
if (keys.has("encodingFormat")) {
keys.delete("fileFormat");
}
if (!this.first_page && node["@label"] && !node["name"]) {
if (!this.first_page && node["@label"] && !this.helper.get_name(node)) {
html += node["@label"];
for (let key of Object.keys(node)) {
node[key] = this.helper.value_as_array(node[key])
......@@ -493,14 +483,17 @@ module.exports = function () {
html += ele("hr");
//keys.delete("@type");
//keys.delete("hasPart");
html += ele("tr");
html += ele("th", { style: "white-space: nowrap; width: 1%;" });
html += "@id";
html += close("th");
html += ele("td");
html += this.format_property(node, "@id", node, false);
html += close("td");
html += close("tr");
if (!node["@id"].startsWith("./")) {
html += ele("tr");
html += ele("th", { style: "white-space: nowrap; width: 1%;" });
html += "@id";
html += close("th");
html += ele("td");
html += this.format_property(node, "@id", node, false);
html += close("td");
html += close("tr");
}
key_set = this.sort_keys(keys);
// Show back-links last
if ("@reverse" in node) {
......@@ -525,7 +518,7 @@ module.exports = function () {
out_path = this.out_path;
} else {
out_path = this.get_html_path(node["@id"]);
cite = node["name"];
cite = this.helper.get_name(node);
}
this.write_html(out_path, html, node);
......@@ -546,7 +539,7 @@ module.exports = function () {
var datasets = [];
var readmes = [];
var html = "";
for (part of ["hasPart", "hasMember"]) {
for (part of ["hasPart", "hasMember", "hasFile"]) {
if (node[part]) {
if (!Array.isArray(node[part])) {
node[part] = [node[part]];
......@@ -649,7 +642,7 @@ module.exports = function () {
this.text_citation = text_citation;
if (!text_citation) {
this.text_citation = root_node["name"];
this.text_citation = this.helper.get_name(root_node);
}
//if (root_node) {
body_el += this.dataset_to_html(root_node, true);
......@@ -672,7 +665,6 @@ module.exports = function () {
} //console.log(type);
}
body_el += close("div");
if (!this.multiple_files_dir) {
this.write_html(this.out_path, body_el, this.helper.json_ld);
}
......
......@@ -39,11 +39,11 @@ module.exports = function() {
this.nested_items = {};
function links_to_id(string, collection) {
var links_to = undefined;
if (collection.id_lookup[string]) {
links_to = collection.id_lookup[string].id;
} else if (collection.name_lookup[string]) {
links_to = collection.name_lookup[string].id;
var links_to = collection.lookup_id(string);
//console.log("Linksto", string, links_to)
if (!links_to) {
links_to = collection.lookup_name(string)
//console.log("looked up", string, links_to);
}
return links_to;
}
......@@ -78,7 +78,9 @@ module.exports = function() {
to_json_ld_fragment: function to_json_ld_fragment() {
//console.log("Keys at start of output", Object.keys(this.properties));
var frag = { "@id": String(this.id) };
var id = this.collection.make_id(this.id);
var frag = { "@id": String(id) };
//console.log("Setting id", this.id, this.name, this.nested_items);
//console.log(" NAMELOOKUPS", this.collection.name_lookup);
......@@ -97,11 +99,12 @@ module.exports = function() {
var link_id = f.links_to
? f.links_to
: links_to_id(f.data[k], this.collection);
//console.log("link_id", link_id, "relational", f.is_relational)
if (f.is_file) {
this.is_file = true;
//types.push("schema:MediaObject");
} else if (f.is_relational && link_id) {
} else if (f.is_relational && link_id && link_id != id) {
//console.log("Looking for relations", f.data[k], this.collection)
//console.log("GOT A LINK", f.name, f.id);
frag[f.name].push({ "@id": link_id }); //, "@type" : "@id"
......@@ -117,9 +120,7 @@ module.exports = function() {
}
}
frag["@type"] = this.types;
if (this.collection.bagged && frag.path) {
frag.path = path.join("data", frag.path);
}
var ident = helper.value_as_array(this.id)
for (let id of ident){
if (id && id.match(/(https?:\/\/)(dx\.)?(doi.org\/10\.)/i)) {
......@@ -163,20 +164,18 @@ module.exports = function() {
} else if (property.is_type) {
this.types = property.data;
}
// Add to lookup table
if (this.id) {
this.collection.id_lookup[this.id] = this;
} else {
if (!this.id) {
this.id = uuidv4();
}
}
//console.log(this.name)
if (this.name) {
//console.log("Adding name", this.name)
this.collection.name_lookup[this.name] = this;
}
// Add name to lookup table too
if (property.nested_item_json.length > 0) {
// Need to make new items later
for (var i = 0; i < property.nested_item_json.length; i++) {
var nested_json = property.nested_item_json[i];
......@@ -225,6 +224,8 @@ module.exports = function() {
name_prop.parse("encodingFormat", file_info.matches[0].format);
this.properties[name_prop.name] = name_prop;
}
this.collection.add_id(this);
//console.log("MY THINGS", this.items)
}
};
......
......@@ -46,7 +46,6 @@ module.exports = function () {
else if (!this.json_ld["@context"][term]["@type"]) {
term = this.json_ld["@context"][term]
}
}
var url = URI.parse(term)
......@@ -62,7 +61,16 @@ module.exports = function () {
}
},
trim_context: function () {
flatten: function(json) {
var promises = jsonld.promises;
var promise = promises.flatten(json, require("../defaults/context.json")); //require("../defaults/context.json")); //,
return promise.then(
(flattened) => {
this.init(flattened);
this.trim_context();
});
},
trim_context: function (){
var new_context = {}
for (let term of this.context_keys_used) {
var uri = this.get_uri_for_term(term)
......@@ -76,7 +84,6 @@ module.exports = function () {
this.json_ld["@context"] = new_context
},
reference_to_item: function (node) {
// Check if node is a reference to something else
// If it is, return the something else
......@@ -116,11 +123,14 @@ module.exports = function () {
this.item_by_id = {};
this.item_by_url = {};
this.item_by_type = {}; // dict of arrays
this.items_by_new_id = {}
this.graph = this.json_ld["@graph"];
if (!this.json_ld["@context"]) {
this.json_ld["@context"] = require(path.join(__dirname, "..", defaults.default_context))
}
this.context_keys_used = new Set()
this.graph = this.json_ld["@graph"];
for (let i = 0; i < this.graph.length; i++) {
var item = this.graph[i];
for (let key of Object.keys(item)) { //TODO: Filter
......@@ -146,10 +156,43 @@ module.exports = function () {
? this.item_by_url["./"]
: this.item_by_url["data/"];
},
update_id: function(item, new_id){
item["@id"] = new_id
},
update_all_ids: function(){
for (let item of this.json_ld["@graph"]) {
for (let key of Object.keys(item)) {
for (let val of this.value_as_array(item[key])) {
if (val["@id"] && this.item_by_id[val["@id"]]){
val["@id"] = this.item_by_id[val["@id"]]["@id"];
}
}
}
}
},
get_name: function get_name(node){
if (!node["name"]) {
return;
}
var name = "";
for (var n of this.value_as_array(node["name"])){
if (n["@value"]) {
name += n["@value"];
} else if (n["@label"]) {
name += n["@label"];
} else {
name += n;
}
return name;
}
},
make_back_links: function (item) {
for (let key of Object.keys(item)) {
if (key != "@id" && key != "@reverse") {
for (let part of this.value_as_array(item[key])) {
var target = this.reference_to_item(part);
var back_link = defaults.back_links[key];
// Dealing with one of the known stuctural properties
......
......@@ -124,12 +124,12 @@ module.exports = function() {
this.property_URI = get_RDF_for_column(this.name);
if (this.property_URI == "https://schema.org/identifier") {
if (this.property_URI == "http://schema.org/identifier") {
this.is_id = true;
}
if (this.property_URI == "https://schema.org/name") {
if (this.property_URI == "http://schema.org/name") {
this.is_name = true;
}
......
......@@ -21,11 +21,13 @@
"filesize": "^3.6.1",
"gladstone": "^0.2.4",
"html-entities": "^1.2.1",
"iri": "^1.3.0",
"jquery": "^3.3.1",
"jsdom": "^11.6.2",
"json": "^9.0.6",
"jsonld": "^0.4.12",
"node-json2html": "^1.1.1",
"sha1": "^1.1.1",
"shelljs": "^0.8.1",
"tmp": "0.0.33",
"uri-js": "^4.2.2",
......
......@@ -24,7 +24,6 @@ var fs = require("fs");
var path = require("path");
const XLSX = require("xlsx");
const Datacite = require("../lib/datacite.js");
const fixtures = require("./fixtures");
......@@ -90,8 +89,8 @@ describe("Create a CATALOG", function() {
return c.to_json_ld().then(
function() {
//console.log(JSON.stringify(c.json_ld, null, 2));
assert.equal(c.item_by_id["./"]["hasPart"].length, 11);
assert(!c.item_by_id["./1.pdf"]);
assert.equal(c.item_by_url["./"]["hasPart"].length, 11);
assert(!c.item_by_id["1.pdf"]);
assert(c.item_by_id["1.sh"]);
},