Commit fd769ec2 authored by PTSEFTON's avatar PTSEFTON
Browse files

Fixing example

parents 62681ed3 7d8fc5a1
This diff is collapsed.
# CalcyteJS
This is a work-in-progress port of the python-based [Calcyte tool](https://codeine.research.uts.edu.au/eresearch/calcyte).
## Status
This is Beta code.
## About
Calcyte is a toolkit to implement the [DataCrate] specification:
1. Managing metadata for collections of content via automatically generated
spreadsheets, to create CATALOG.json files
2. Generating HTML from DataCrate CATALOG.json files.
3. Packaging data in BagIt format, and optionally zipping it.
Calcyte targets the [Draft DataCrate Packaging format v0.3](https://github.com/UTS-eResearch/datacrate/blob/master/spec/0.3/data_crate_specification_v0.3.md).
## Installation
- Install [node.js](https://nodejs.org/en/)
- Install the [BagIt](https://github.com/LibraryOfCongress/bagit-java)
`brew install bagit`
- Install Siegfreid using the [instructions](https://github.com/richardlehane/siegfried/wiki/Getting-started).
- Get the code:
git clone https://code.research.uts.edu.au/eresearch/CalcyteJS.git
- Link the binary for development use:
npm link
## Usage / instructions
Usage:
```
> ./calcyfy
Usage: calcyfy [options] <directories...>
To run calcyfy on a group of directories pass it a list of directories
Options:
-V, --version output the version number
-g, --generate-html Generate HTML from a "CATALOG.json" in a directory
-b, --bag [bag-dir] Create Bagit Bag(s) under [bag-dir])
-n, --no No Citation - only applies ith --bag
-z, --zip Create a zipped version of the bag - only applies with --bag
-d, --depth Maximum depth to recurse into directories
-r, --recurse Recurse into directories - up to 10
-m, --multiple Output multiple files instead of a single CATALOG.html
-h, --help output usage information
```
To run Calcyte on a group of directories pass it a list of directories
One directory:
```
calcyfy test_data/Glop_Pot
```
This will create a CATALOG.json file and CATALOG.html file in test_data/Glop_Pot
All the sample directories:
```
calcyfy test_data/*
```
Calcyte will generate:
- a CATALOG\_$dir.xlsx file in each directory (this is for humans to fill in with
metadata about the data)
- An index.html file summarizing the data using metadata from CATALOG\_$dir.xlsx
- A CATALOG.json file containing JSON-LD metadata derived from the CATALOG\* files plus some basic file-format information.
See the examples in `test_data`.
TODO: Instructions for filling in the CATALOG files.
[datacrate]: https://github.com/UTS-eResearch/datacrate
......@@ -114,7 +114,7 @@ if (program.generateHtml) {
var index_maker = new Index();
index_maker.init(
catalog,
path.join(dir, html_file_name),
path.join(dir),
multiple,
path.join(__dirname, "defaults/catalog_template.html")
);
......@@ -195,7 +195,7 @@ if (program.generateHtml) {
var index_maker = new Index();
index_maker.init(
path.join(output_dir, catalog_file_name),
path.join(output_dir, html_file_name),
path.join(output_dir),
multiple,
path.join(__dirname, "defaults/catalog_template.html")
);
......
// Type definitions for calcyte 0.0.2
// Project: DataCrate
// Definitions by: Mike Lynch
declare class Collection {
constructor ();
children: Item[]; // check
rel_path: string;
items: Item[];
name_lookup: any;
id_lookup: any;
json_ld: Object;
field_names_by_type: any;
existing_catalogs: any;
root_node: any;
get_unique_catalog_name(dir: string, existing_catalogs?: string[]): string;
index_graph(): void;
generate_bag_info(): void;
save_bag_info(): void;
update():void;
to_html():void;
to_json(graph: Object):void;
bag:(bag_dir: string):void;
to_json_ld(): Promise<void>;
read(dir:string, rel_path: string, parent:Collection):void;
}
module.exports = {
Collection: require('./lib/collection.js'),
Index: require('./lib/index_html.js'),
Datacite: require('./lib/datacite.js'),
Bag: require('./lib/bag.js')
}
......@@ -17,7 +17,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
/* Defaults for Calcyte such as names of key files */
const html_multi_file_dirs = ["CATALOG"];
const html_multi_file_dirs = "CATALOG_files";
const catalog_root_name = "CATALOG";
const datacite_file_name = "datacite.xml";
const catalog_json_file_name = `${catalog_root_name}.json`;
......@@ -27,7 +27,7 @@ const max_files_in_dir = "100"; // Don't list files in a directory if there are
const ignore_file_regex = new RegExp(
`(^${html_multi_file_dirs[0]}$)|(^~)|(^\\.)|(${datacite_file_name})`
);
const ignore_dir_regex = new RegExp(`(^${html_multi_file_dirs[0]}$)|(^\\.)`);
const ignore_dir_regex = new RegExp(`(^${html_multi_file_dirs}$)|(^\\.)`);
const BagIt_Profile_Identifier =
"https://raw.githubusercontent.com/UTS-eResearch/datacrate/master/spec/0.3/profile-datacrate-v0.3.json";
const DataCrate_Specification_Identifier =
......
......@@ -22,8 +22,9 @@ context = require("../defaults/context.json");
const path = require("path");
const shell = require("shelljs");
const jsonld_helper = require("./jsonldhelper")
var helper = new jsonld_helper();
const filesize = require("filesize");
const display_keys = [
"name",
"@type",
......@@ -80,6 +81,7 @@ module.exports = function () {
// K is for Key - ie the property name
// list is the list of values for property k in item
// details: bool - are we doing pagination? If so need to display the "details element"
list = this.helper.value_as_array(list)
var l = list.length;
var html = "";
if (l === 1) {
......@@ -174,13 +176,12 @@ module.exports = function () {
var zip_link;
var name = this.root_node["name"];
var json = JSON.stringify(node, null, 2);
if (this.first_page) {
zip_link = this.zip_path
? "<a href='" + this.zip_path + "'>Download a zip file</a>"
: "";
var catalog_actual_path = path.join(
path.dirname(out_path),
path.dirname(this.out_dir),
defaults.catalog_json_file_name
);
var stats = fs.statSync(catalog_actual_path);
......@@ -193,12 +194,18 @@ module.exports = function () {
up_link = `<a href="" class="active"><button type="button" class="btn btn-default btn-sm"><span class="glyphicon glyphicon-home"></span>&nbsp;${name}</button></a>`;
}
} else if (this.multiple_files_dir) {
var href = this.get_href(this.root_node["@id"]);
var href = this.get_href(this.root_node["@id"], node["@id"]);
up_link = `<a href=${href}><button type="button" class="btn btn-default btn-sm"><span class="glyphicon glyphicon-home"></span>&nbsp;${name}</button></a> `;
}
var time = new Date().toISOString();
if (node["@id"]) {
out_path = path.join(this.out_dir, this.get_html_path(node["@id"]))
} else {
out_path = path.join(this.out_dir, this.get_html_path(this.root_node["@id"]))
}
shell.mkdir("-p", path.dirname(out_path));
fs.writeFileSync(
out_path,
this.template({
......@@ -233,48 +240,6 @@ module.exports = function () {
return keys_in_order;
},
make_back_links: function make_back_links(item) {
for (let key of Object.keys(item)) {
if (key != "@id" && key != "@reverse") {
for (let part of item[key]) {
var target = this.item_by_id[part["@id"]];
var back_link = back_links[key];
if (target && back_link) {
if (!target[back_link]) {
//console.log("Making link", key, back_link, target)
target[back_link] = [{ "@id": item["@id"] }];
}
} else if (
!dont_back_link.has(key) &&
item["name"] &&
target &&
target["name"]
) {
// We are linking to something
//console.log("Doing a back link", key, target['name'], item['name'])
if (!target["@reverse"]) {
target["@reverse"] = {};
}
if (!target["@reverse"][key]) {
target["@reverse"][key] = [];
}
var got_this_reverse_already = false;
// for (let r of target["@reverse"][key]) {
// //console.log(r, r["@id"], item["@id"])
// if (r["@id"] === item["@id"]) {
// got_this_reverse_already = true
// }
// }
if (!got_this_reverse_already) {
target["@reverse"][key].push({ "@id": item["@id"] });
}
//console.log(JSON.stringify(target, null, 2))
}
}
}
}
},
format_property: function format_property(item, k, part) {
/*
TODO: Work out *my* path
......@@ -294,13 +259,13 @@ module.exports = function () {
} else if (
k === "thumbnail" &&
part["@id"] &&
this.item_by_id[part["@id"]]
this.helper.item_by_id[part["@id"]]
) {
td_ele += ele("img", {
src: this.get_file_ref(this.item_by_id[part["@id"]]["path"])
src: this.get_file_ref(this.helper.item_by_id[part["@id"]]["path"], item["@id"])
});
} else if (k === "path") {
td_ele += ele("a", { href: encodeURI(this.get_file_ref(part)) });
td_ele += ele("a", { href: encodeURI(this.get_file_ref(part, item["@id"])) });
td_ele += part
.replace(/\/$/, "")
.split("/")
......@@ -333,24 +298,24 @@ module.exports = function () {
} else {
td_ele += item["@id"];
}
} else if (part["@id"] && this.item_by_id[part["@id"]]) {
} else if (part["@id"] && this.helper.item_by_id[part["@id"]]) {
/*else if (
!item["@name"] &&
k != "hasPart" &&
this.item_by_id[part["@id"]] &&
this.helper.item_by_id[part["@id"]] &&
!(
this.item_by_id[part["@id"]].name ||
this.item_by_id[part["@id"]].description
this.helper.item_by_id[part["@id"]].name ||
this.helper.item_by_id[part["@id"]].description
)
) {
// Embed small bits of info that don't have a name or description
td_ele += this.dataset_to_html(this.item_by_id[part["@id"]]);
td_ele += this.dataset_to_html(this.helper.item_by_id[part["@id"]]);
} */
var target_name = this.item_by_id[part["@id"]].name
? this.item_by_id[part["@id"]].name
var target_name = this.helper.item_by_id[part["@id"]].name
? this.helper.item_by_id[part["@id"]].name
: part["@id"];
var href = this.get_href(part["@id"]);
var href = this.get_href(part["@id"], item["@id"]);
td_ele += ele("a", { href: href });
td_ele += target_name;
td_ele += close("a");
......@@ -362,36 +327,55 @@ module.exports = function () {
get_up_path: function get_up_path(path) {
return "../".repeat(defaults.html_multi_file_dirs.length) + path;
},
get_file_ref: function get_file_ref(path) {
get_file_ref: function get_file_ref(dest_path, from_id) {
if (this.multiple_files_dir && !this.first_page) {
return this.get_up_path(path);
var source_path = this.get_html_path(from_id);
return path.relative(path.dirname(source_path), dest_path);
} else {
return path;
return dest_path;
}
},
get_href: function get_href(id) {
var path;
if (this.item_by_id[id]["path"]) {
path = this.item_by_id[id]["path"];
if (Array.isArray(path)) {
path = path[0];
}
}
if (this.multiple_files_dir) {
var link = "./";
if (path === "./" || path === "data/") {
link = this.get_up_path(defaults.html_file_name);
} else {
if (this.first_page) {
link += defaults.html_multi_file_dirs.join("/") + "/";
}
// TODO: fix this appalling hack and work out a better system of filenames!
link += id.replace(/\//g, "_").replace("?", "%3F") + ".html";
}
return link;
} else {
get_html_path: function get_html_path(id) {
if (this.helper.item_by_id[id] && this.helper.item_by_id[id]["path"]) {
var actual_path = this.helper.value_as_array(this.helper.item_by_id[id]["path"])[0];
if (actual_path === "./" || actual_path === "data/") {
return defaults.html_file_name;
}
}
var p = "";
p += defaults.html_multi_file_dirs + "/";
p += "pairtree_root/";
id = id.replace(/\^/g, "^5e")
.replace(/ /g, "^20")
.replace(/"/g, "^22")
.replace(/</g, "^3c")
.replace(/\\/g, "^5c")
.replace(/\*/g, "^2a")
.replace(/\=/g, "^3d")
.replace(/\+/g, "^2b")
.replace(/>/g, "^3e")
.replace(/\|/g, "^7c")
.replace(/,/g, "^2c")
.replace(/\?/g, "^3f")
.replace(/\//g, "=")
.replace(/\:/g, "+")
.replace(/\./g,",")
p += id.replace(/(..)/g, '$1/').replace(/([^\/])$/, "$1/")
p += "index.html"
return p
},
get_href: function get_href(id, from_id) {
if (!this.multiple_files_dir) {
return "#" + id;
}
var dest_path = this.get_html_path(id);
var source_path = this.get_html_path(from_id);
return path.relative(path.dirname(source_path), dest_path);
},
format_cell: function (item, k) {
......@@ -457,13 +441,11 @@ module.exports = function () {
dataset_to_html: function dataset_to_html(node) {
// Turns any item into an HTML table
console.log("Processing dataset", node["@id"]);
var path = "pairtree" + node["@id"].replace(/(..)/g, "/$1");
console.log(path)
var html = "";
var keys = new Set(Object.keys(node));
if (node["identifier"]) {
node["identifier"] = helper.value_as_array(node["identifier"]).filter(id => id != node["@id"]);
node["identifier"] = this.helper.value_as_array(node["identifier"]).filter(id => id != node["@id"]);
if (node["identifier"].length === 0) {
keys.delete("identifier")
}
......@@ -483,9 +465,9 @@ module.exports = function () {
key != "@id" &&
key != "@reverse" &&
v["@id"] &&
this.item_by_id[v["@id"]]
this.helper.item_by_id[v["@id"]]
) {
html += " | " + dataset_to_html(this.item_by_id[v["@id"]]);
html += " | " + dataset_to_html(this.helper.item_by_id[v["@id"]]);
}
}
}
......@@ -523,10 +505,7 @@ module.exports = function () {
if (this.first_page) {
out_path = this.out_path;
} else {
out_path = path.join(
this.out_dir,
node["@id"].replace(/\//g, "_") + ".html"
);
out_path = this.get_html_path(node["@id"]);
cite = node["name"];
}
......@@ -553,8 +532,8 @@ module.exports = function () {
node[part] = [node[part]];
}
for (let [key, value] of Object.entries(node[part])) {
if (value["@id"] && this.item_by_id[value["@id"]]) {
var child = this.item_by_id[value["@id"]];
if (value["@id"] && this.helper.item_by_id[value["@id"]]) {
var child = this.helper.item_by_id[value["@id"]];
if (child["@type"]) {
// if (!Array.isArray(child['@type'])) {
// child['@type'] = [child['@type']];
......@@ -586,7 +565,7 @@ module.exports = function () {
html += ele("iframe", {
width: "80%",
height: "90%",
src: this.get_file_ref(readme.path),
src: this.get_file_ref(readme.path, node["@id"]),
border: 1
});
html += close("iframe");
......@@ -615,21 +594,12 @@ module.exports = function () {
this.template = ejs.compile(temp);
}
this.out_path = out_path;
this.out_dir = out_path;
this.first_page = true;
this.multiple_files_dir = multiple_files;
if (multiple_files) {
this.multiple_files_dir = defaults.html_multi_file_dirs.join("/"); // Where to write out the
var out_dir = path.dirname(out_path);
for (let d of defaults.html_multi_file_dirs) {
out_dir += "/" + d;
shell.mkdir("-p", out_dir);
}
shell.rm(out_dir + "/*");
this.out_dir = out_dir;
} else {
this.multiple_files_dir = false;
shell.rm("-rf", path.join(out_path, defaults.html_multi_file_dirs, "*"))
}
this.first_page = true;
// TODO: Use loadjson or somesuch
// Shift loading into the calcyte script
if (!crate_data["@graph"]) {
......@@ -637,49 +607,9 @@ module.exports = function () {
}
//console.log(crate_data);
this.json_ld = crate_data;
this.item_by_id = {};
this.item_by_url = {};
this.item_by_type = {}; // dict of arrays
//console.log("CRATE-data", crate_data)
graph = crate_data["@graph"];
for (let i = 0; i < graph.length; i++) {
var item = graph[i];
for (let key of Object.keys(item)) {
if (key != "@id" && key != "@reverse") {
if (!item[key]) {
item[key] = "";
}
if (!Array.isArray(item[key])) {
item[key] = [item[key]];
//console.log("Making array", key, item[key])
}
}
}
if (item["@id"]) {
this.item_by_id[item["@id"]] = item;
}
if (item["path"]) {
this.item_by_url[item["path"]] = item;
}
if (!item["@type"]) {
item["@type"] = ["Thing"];
}
//console.log("TYPE", item['@type'])
for (let t of item["@type"]) {
//console.log(t)
if (!this.item_by_type[t]) {
this.item_by_type[t] = [];
}
this.item_by_type[t].push(item);
}
}
for (let i = 0; i < graph.length; i++) {
var item = graph[i];
this.make_back_links(item);
}
this.helper = new jsonld_helper();
this.helper.init(crate_data)
this.helper.add_back_links()
// A container for our page
},
make_index_html: function make_index_html(text_citation, zip_path) {
......@@ -688,11 +618,11 @@ module.exports = function () {
this.text_citation = text_citation;
this.first_page = true;
body_el += ele("div");
//console.log("DATA", this.item_by_url);
// Get root of graph
root_node = this.item_by_url["./"]
? this.item_by_url["./"]
: this.item_by_url["data/"];
//console.log("DATA", this.helper.item_by_url);
// Get root of this.helper.graph
root_node = this.helper.item_by_url["./"]
? this.helper.item_by_url["./"]
: this.helper.item_by_url["data/"];
this.root_node = root_node;
......@@ -704,26 +634,26 @@ module.exports = function () {
body_el += this.dataset_to_html(root_node, true);
//}
delete this.item_by_type["Dataset"];
delete this.item_by_type["File"];
delete this.item_by_type["RepositoryCollection"];
delete this.item_by_type["RepositoryObject"];
for (let type of Object.keys(this.item_by_type).sort()) {
delete this.helper.item_by_type["Dataset"];
delete this.helper.item_by_type["File"];
delete this.helper.item_by_type["RepositoryCollection"];
delete this.helper.item_by_type["RepositoryObject"];
for (let type of Object.keys(this.helper.item_by_type).sort()) {
body_el += ele("h1");
body_el += "Contextual info: ";
body_el += ele("span");
body_el += this.format_header(type);
body_el += close("span");
body_el += close("h1");
//this.items_to_html(this.item_by_type[type], body_el);
for (let i of this.item_by_type[type]) {
//this.items_to_html(this.helper.item_by_type[type], body_el);
for (let i of this.helper.item_by_type[type]) {
body_el += this.dataset_to_html(i, true);
} //console.log(type);
}
body_el += close("div");
if (!this.multiple_files_dir) {
this.write_html(this.out_path, body_el, this.json_ld);
this.write_html(this.out_path, body_el, this.helper.json_ld);
}
}
};
......
......@@ -123,10 +123,10 @@ module.exports = function() {
if (id && id.match(/(https?:\/\/)(dx\.)?(doi.org\/10\.)/i)) {
helper.push_value(frag, "identifier", id.replace(/https?:\/\//i, ""))
}
this.json_ld_fragment = frag;
}
this.json_ld_fragment = frag;
//console.log(frag);
return frag;
},
......@@ -152,7 +152,6 @@ module.exports = function() {
var pr = new metadata_property_name();
pr.parse("path", this.id);
this.properties[pr.name] = pr;
this.is_file = value;
} else if (property.is_id) {
//console.log("Got an ID", value);
......