Commit 13f1c12d authored by PTSEFTON's avatar PTSEFTON
Browse files

Adding examples, changing URIs to https

parent 8153460c
......@@ -35,16 +35,12 @@ var dirs = undefined;
program
.version("0.1.0")
.description(
"To run calcyfy on a group of directories pass it a list of directories"
"Generates DataCrate HTML for CATALOG.JSON files. Pass a list of directories. To create Spreadsheet files for entring metadata use -d or -r."
)
.arguments("<directories...>")
.action(function(directories) {
dirs = directories;
})
.option(
"-g, --generate-html",
'Generate HTML from a "' + catalog_file_name + '" in a directory'
)
.option("-b, --bag [bag-dir]", "Create Bagit Bag(s) under [bag-dir])")
.option("-n, --no", "No Citation - only applies ith --bag")
.option(
......@@ -57,13 +53,10 @@ program
"Recurse into directories - up to " + defaults.max_depth
)
.option(
"-m, --multiple",
"Output multiple files instead of a single " + html_file_name
).option(
"-u, --url [distro]",
"Distribution URL"
);
var depth = 1;
var depth = 0;
program.parse(process.argv);
if (program.recurse) {
depth = defaults.max_depth;
......@@ -73,56 +66,110 @@ if (program.depth) {
}
var citer = new Datacite();
if (!program.rawArgs.length || !dirs) program.help();
var multiple = program.multiple;
// TODO: Refactor this by if statement - there is a lot of repetition
if (program.generateHtml) {
console.log("Generating html from exsiting " + catalog_file_name + " file");
if (depth) {
//TODO - there is a lot of repetition here, sort it out
dirs.forEach(function(dir) {
if (shell.test("-d", dir)) {
var c = new Collection();
var output_dir = dir;
c.read(dir, "./", undefined, depth);
c.to_json_ld().then(function() {
var text_citation;
var json_helper = new JSON_helper()
json_helper.init(c.json_ld)
var catalog_path = path.join(c.dir, defaults.catalog_json_file_name)
fs.writeFileSync(catalog_path, JSON.stringify(json_helper.json_ld, null, 2 ));
generateHTML(catalog_path);
});
}
});
}
else {
dirs.forEach(function(dir) {
var catalog_path
if (shell.test("-d", dir)) {
if (!path.isAbsolute(dir)) {
dir = path.join(process.cwd(), dir);
}
var catalog = path.join(dir, catalog_file_name);
catalog_path = path.join(dir, catalog_file_name);
} else if (shell.test("-f", dir)) {
if (!path.isAbsolute(dir)) {
dir = path.join(process.cwd(), dir);
}
var catalog = dir;
dir = path.dirname(catalog);
catalog_path = dir;
}
generateHTML(catalog_path);
})
}
function generateHTML(catalog_path) {
console.log("Generating html from exsiting " + catalog_path + " file");
if (!path.isAbsolute(catalog_path)) {
catalog_path = path.join(process.cwd(), catalog_path);
}
var dir = path.dirname(catalog_path)
var json_helper = new JSON_helper();
json_helper.init(require(catalog_path));
if (program.url){
if (!json_helper.root_node["distribution"]) {
json_helper.root_node["distribution"] = []
}
json_helper.root_node["distribution"].push(
{
"@id": program.url
}
);
json_helper.json_ld["@graph"].push(
{
"@id": program.url,
"contentUrl": program.url,
"@type": "DataDownload",
"encodingFormat": "zip"
});
json_helper.init(json_helper.json_ld);
}
if (program.bag) {
//Bag the HTML
var bagger = new Bag();
var dest = path.join(program.bag, path.basename(dir));
var zipname
shell.rm("-rf", dest);
dir = bagger.bag(dir, program.bag);
dir = bagger.bag(dir, program.bag, json_helper);
bagger.generate_bag_info()
bagger.save_bag_info()
if (!path.isAbsolute(dir)) {
dir = path.join(process.cwd(), dir);
}
catalog = path.join(dir, catalog_file_name);
catalog_path = path.join(dir, catalog_file_name);
if (!program.no) {
shell.mkdir("-p", path.join(dest, "metadata"));
var text_citation = citer.make_citation(
catalog,
catalog_path,
path.join(dir, "metadata", "datacite.xml")
);
}
}
var index_maker = new Index();
json_helper.trim_context()
index_maker.init(
catalog,
json_helper.json_ld,
path.join(dir),
multiple,
true,
path.join(__dirname, "defaults/catalog_template.html")
);
if (program.bag) {
bagger.update();
if (program.zip) {
shell.cd(bagger.target_dir);
zipname = path.join("../", path.basename(bagger.target_dir) + ".zip");
shell.cd(bagger.target_dir);
index_maker.make_index_html(text_citation, "");
......@@ -131,85 +178,5 @@ if (program.generateHtml) {
}
index_maker.make_index_html(text_citation, zipname);
});
} else {
//TODO - there is a lot of repetition here, sort it out
var zipname;
dirs.forEach(function(dir) {
if (shell.test("-d", dir)) {
var c = new Collection();
var output_dir = dir;
c.read(dir, "./", undefined, depth);
if (!path.isAbsolute(output_dir)) {
output_dir = path.join(process.cwd(), output_dir);
}
if (program.bag) {
output_dir = path.join(program.bag, path.basename(dir));
c.bag(output_dir);
console.log("OUTPUT", output_dir)
if (!path.isAbsolute(output_dir)) {
output_dir = path.join(process.cwd(), output_dir);
}
}
c.to_json_ld().then(function() {
var text_citation;
var json_helper = new JSON_helper()
json_helper.init(c.json_ld)
if (program.url){
if (!json_helper.root_node["distribution"]) {
json_helper.root_node["distribution"] = []
}
json_helper.root_node["distribution"].push(
{
"@id": program.url
}
)
json_helper.json_ld["@graph"].push(
{
"@id": program.url,
"contentUrl": program.url,
"@type": "DataDownload",
"encodingFormat": "zip"
}
)
// Reinitialize the helper object as we have added to the JSON
json_helper.init(json_helper.json_ld);
}
json_helper.trim_context()
fs.writeFileSync(path.join(c.dir, defaults.catalog_json_file_name), JSON.stringify(json_helper.json_ld, null, 2 ))
if (program.bag) {
c.generate_bag_info();
c.save_bag_info();
c.update();
if (!program.no) {
shell.mkdir("-p", path.join(c.dir, "metadata"));
text_citation = citer.make_citation(
path.join(c.dir, defaults.catalog_json_file_name),
path.join(c.dir, "metadata", "datacite.xml")
);
}
}
var index_maker = new Index();
index_maker.init(
path.join(output_dir, catalog_file_name),
output_dir,
multiple,
path.join(__dirname, "defaults/catalog_template.html")
);
if (program.zip) {
//TODO: Refactor: This is REPEATING YOURSELF
shell.cd(c.dir);
zipname = path.join("../", path.basename(c.dir) + ".zip");
var zip_cmd = "zip -r " + zipname + " *";
index_maker.make_index_html(text_citation, "");
shell.exec(zip_cmd);
}
index_maker.make_index_html(text_citation, zipname);
});
}
});
}
}
\ No newline at end of file
......@@ -2279,16 +2279,11 @@
"yearsInOperation": "schema:yearsInOperation",
"yield": "schema:yield",
"contact": "schema:accountablePerson",
"File": "schema:MediaObject",
"path": "schema:contentUrl",
"journal": "schema:Periodical",
"Project": "frapo:Project",
"isOutputOf": "frapo:isOutputOf",
"Equipment": "frapo:Equipment",
"interviewee": "bibo:interviewee",
"interviewer": "bibo:interviewer",
"hasFile": "pcdm:hasFile",
"hasMember": "pcdm:hasMember",
"RepositoryCollection": "pcdm:Collection",
......@@ -2301,9 +2296,7 @@
"cc": "http://creativecommons.org/ns#",
"dct": "http://purl.org/dc/terms/",
"foaf": "http://xmlns.com/foaf/0.1/",
"rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#",
"rdfa": "http://www.w3.org/ns/rdfa#",
"rdfs": "http://www.w3.org/2000/01/rdf-schema#",
"schema": "http://schema.org/",
"frapo": "http://purl.org/cerif/frapo/"
"rdf": "httpd://www.w3.org/1999/02/22-rdf-syntax-ns#",
"rdfs": "https://www.w3.org/2000/01/rdf-schema#",
"schema": "https://schema.org/"
}
......@@ -19,7 +19,6 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
const path = require("path");
const shell = require("shelljs");
const jsonld = require("jsonld");
const tmp = require("tmp");
const fs = require("fs");
const defaults = require("./defaults.js");
......@@ -28,65 +27,47 @@ module.exports = function() {
return {
target_dir: this.target_dir,
catalog_path: this.catalog_path,
json_ld: this.json_ld,
json_by_id: this.json_by_id,
json_by_path: this.json_by_path,
json_by_type: this.json_by_path,
generate_bag_info: function generate_bag_info() {
this.index_graph();
this.bag_meta = {
"BagIt-Profile-Identifier": defaults.bagit_profile_identifier,
"BagIt-Profile-Identifier": defaults.BagIt_Profile_Identifier,
"DataCrate-Specification-Identifier":
defaults.datacrate_specification_identifier
defaults.DataCrate_Specification_Identifier
};
if (this.root_node["contact"] && this.root_node["contact"]["@id"]) {
contact = this.json_by_id[this.root_node["contact"]["@id"]];
if (this.helper.root_node["contactPoint"] && this.helper.root_node["contactPoint"]["@id"]) {
contact = this.helper.item_by_id[this.helper.root_node["contactPoint"]["@id"]];
map = {
email: "Contact-Email",
phone: "Contact-Telephone",
name: "Contact-Name"
};
for (var [k, v] of Object.entries(map)) {
if (contact[k]) {
this.bag_meta[v] = String(contact[k]);
}
}
}
if (this.root_node["description"]) {
this.bag_meta["Description"] = this.root_node["description"];
if (this.helper.root_node["publisher"] && this.helper.root_node["publisher"]["@id"]) {
publisher = this.helper.item_by_id[this.helper.root_node["publisher"]["@id"]];
if(publisher["name"]) {
this.bag_meta["SourceOrganization"] = publisher.name
}
}
if (this.helper.root_node["description"]) {
this.bag_meta["External-Description"] = this.helper.root_node["description"];
}
this.bag_meta["Bagging-Date"] = new Date().toISOString();
// Return a hash of BagIt style metadata by looking for it in the JSON-LD structure
},
index_graph: function index_graph() {
//TODO - make this a helper function
this.catalog_path = path.join(this.target_dir, defaults.catalog_json_file_name);
this.json_ld = require(this.catalog_path);
this.json_by_id = {};
this.json_by_path = {};
this.json_by_type = {};
this.graph = this.json_ld["@graph"];
for (let i = 0; i < this.graph.length; i++) {
var item = this.graph[i];
if (item["@id"]) {
this.json_by_id[item["@id"]] = item;
}
if (item["path"]) {
this.json_by_path[item["path"]] = item;
}
if (item["@type"]) {
if (!this.json_by_type[item["@type"]]) {
this.json_by_type[item["@type"]] = [];
}
this.json_by_type[item["@type"]].push(item);
}
}
this.root_node = this.json_by_path["./"]
? this.json_by_path["./"]
: this.json_by_path["data/"];
},
save_bag_info: function save_bag_info() {
var bag_info = "";
......@@ -98,39 +79,36 @@ module.exports = function() {
update: function update_bag_tags() {
shell.exec("bagit updatetagmanifests " + this.target_dir);
},
bag: function bag(source_dir, bag_dir) {
function fix_paths(catalog_path) {
console.log(shell.test("-f", catalog_path));
if (!path.isAbsolute(catalog_path)) {
catalog_path = path.join("./", catalog_path);
fix_paths: function fix_paths(catalog_path) {
console.log(shell.test("-f", catalog_path));
if (!path.isAbsolute(catalog_path)) {
catalog_path = path.join("./", catalog_path);
}
var catalog = this.helper.json_ld;
for (let item of catalog["@graph"]) {
if (item["path"]) {
var p = this.helper.value_as_array(item["path"])[0];
var new_p = path.join("./data/", p);
item["path"] = [new_p];
}
var catalog = require(catalog_path);
for (let item of catalog["@graph"]) {
if (item["path"]) {
if (!Array.isArray(item["path"])) {
item["path"] = [item["path"]];
}
var p = item["path"][0];
var new_p = path.join("./data/", p);
item["path"] = [new_p];
}
fs.writeFileSync(
catalog_path,
JSON.stringify(this.helper.json_ld, null, 2),
function(err) {
if (err) {
return console.log(err, "Error writing in", catalog_path);
}
}
fs.writeFileSync(
catalog_path,
JSON.stringify(catalog, null, 2),
function(err) {
if (err) {
return console.log(err, "Error writing in", catalog_path);
}
}
);
}
);
},
bag: function bag(source_dir, bag_dir, helper) {
// TODO Generate a list of all files
// FOR NOW: delete CATALOG.json and index.html
// Generate bag info later
this.helper = helper;
var tmpobj = tmp.dirSync();
console.log("Tempdir: ", tmpobj.name);
var bag_name = path.basename(source_dir);
var target_dir = path.join(bag_dir, bag_name);
shell.cp(
......@@ -149,11 +127,10 @@ module.exports = function() {
shell.exec("bagit update " + target_dir);
shell.cp(path.join(tmpobj.name, defaults.catalog_json_file_name), target_dir);
shell.cp(
path.join(__dirname, "defaults", defaults.DataCrate_profile_file),
path.join(__dirname, "..", defaults.DataCrate_profile_file),
target_dir
);
fix_paths(path.join(target_dir, defaults.catalog_json_file_name));
this.fix_paths(path.join(target_dir, defaults.catalog_json_file_name));
this.target_dir = target_dir;
return target_dir;
}
......
......@@ -105,7 +105,6 @@ module.exports = function() {
items: this.items,
json_ld: this.json_ld,
existing_catalogs: this.existing_catalogs,
bagged: this.bagged,
root_node: this.root_node,
item_by_path: this.item_by_path,
item_by_id: this.item_by_id,
......@@ -157,57 +156,12 @@ module.exports = function() {
: this.item_by_path["data/"];
},
generate_bag_info: function generate_bag_info() {
//TODO: Remove this and use bag.js which is more generic
this.index_graph();
this.bag_meta = {
"BagIt-Profile-Identifier": defaults.bagit_profile_identifier,
"DataCrate-Specification-Identifier":
defaults.datacrate_specification_identifier
};
if (this.root_node["contact"] && this.root_node["contact"]["@id"]) {
contact = this.item_by_id[this.root_node["contact"]["@id"]];
map = {
email: "Contact-Email",
phone: "Contact-Telephone",
name: "Contact-Name"
};
for (var [k, v] of Object.entries(map)) {
if (contact[k]) {
this.bag_meta[v] = String(contact[k]);
}
}
}
if (this.root_node["description"]) {
this.bag_meta["Description"] = this.root_node["description"];
}
this.bag_meta["Bagging-Date"] = new Date().toISOString();
// Return a hash of BagIt style metadata by looking for it in the JSON-LD structure
},
save_bag_info: function save_bag_info() {
//TODO: Remove this and use bag.js
var bag_info = "";
for (var [k, v] of Object.entries(this.bag_meta)) {
bag_info += k + ": " + v + "\n";
}
fs.writeFileSync(path.join(this.dir, "bag-info.txt"), bag_info);
},
update: function update_bag_tags() {
//TDOD get rid of this - use bag.js
shell.exec("bagit updatetagmanifests " + this.dir);
},
to_json: function to_json(graph) {
if (!this.collection_metadata) {
this.collection_metadata = new Item();
}
if (this.parent) {
this.bagged = this.parent.bagged;
}
var collection_json = this.collection_metadata.to_json_ld_fragment();
// Need to work out how to do this
/* collection_json["distribution"] = {
......@@ -254,24 +208,9 @@ module.exports = function() {
});
});
},
bag: function bag(bag_dir) {
// TODO Generate a list of all files
// FOR NOW: delete DataCrate files
// Generate bag info later
shell.rm("-f", path.join(this.dir, defaults.catalog_json_file_name));
shell.rm("-f", path.join(this.dir, defaults.html_file_name));
shell.exec(
"bagit create --excludebaginfo " +
bag_dir +
" " +
path.join(this.dir, "*")
);
this.bagged = true;
this.dir = bag_dir;
},
to_json_ld: function to_json_ld() {
// Turn the entire collection into a JSON-LD document
json = {
"@graph": [],
"@context": context
......@@ -336,7 +275,6 @@ module.exports = function() {
this.id_lookup = parent.id_lookup;
this.existing_catalogs = parent.existing_catalogs;
this.root_dir = parent.root_dir;
this.bagged = parent.bagged;
//console.log(this.existing_catalogs);
} else {
this.depth = 1;
......@@ -344,7 +282,6 @@ module.exports = function() {
this.id_lookup = {};
this.existing_catalogs = [];
this.root_dir = dir;
this.bagged = false;
}
//console.log("XXXXXX Collecting", dir, "Depth", this.depth);
this.children = [];
......
......@@ -33,8 +33,8 @@ const BagIt_Profile_Identifier =
const DataCrate_Specification_Identifier =
"https://github.com/UTS-eResearch/datacrate/blob/master/spec/0.3/data_crate_specification_v0.3.md";
const DataCrate_version = "0.3";
const DataCrate_profile_file = "defaults/profile-datacrate-v" + DataCrate_version + ".json"
const default_context = "defaults/context.json"
// DataCrate specific terms which have inverses
const back_links = {
......@@ -47,6 +47,7 @@ const back_back_links = new Set(Object.values(back_links))
module.exports = {
html_multi_file_dirs: html_multi_file_dirs,
default_context: default_context,
catalog_root_name: catalog_root_name,
datacite_file_name: datacite_file_name,
catalog_json_file_name: catalog_json_file_name,
......
......@@ -188,8 +188,10 @@ module.exports = function () {
var catalog_json_link = "";
var zip_link;
var name = this.root_node["name"];
var json = JSON.stringify(node, null, 2);
var json = "";
if (this.first_page) {
json = JSON.stringify(this.helper.json_ld, null, 2);
zip_link = this.zip_path
? "<a href='" + this.zip_path + "'>Download a zip file</a>"
: "";
......@@ -405,7 +407,7 @@ module.exports = function () {
rev += close("th");
rev += close("tr");
for (let r of Object.keys(item["@reverse"]).filter(key => !(defaults.back_back_links.has(key)))) {
for (let r of Object.keys(item["@reverse"])) {
rev += ele("tr");
rev += ele("th");
rev += r;
......@@ -535,6 +537,7 @@ module.exports = function () {
},
dataset_children_to_html: function dataset_children_to_html(node) {
// TODO - I think this is now obsolete
var files = [];
var datasets = [];
var readmes = [];
......
......@@ -20,7 +20,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
var jsonld = require("jsonld");
var defaults = require("./defaults.js")
var URI = require("uri-js")
const path = require("path")
......@@ -55,7 +55,6 @@ module.exports = function () {
if (!url.host) {