Commit a8028b98 authored by PTSEFTON's avatar PTSEFTON
Browse files

First go at a new dynamic HTML generations system

parent 2e325cfb
......@@ -28,14 +28,14 @@ const shell = require("shelljs");
const program = require("commander");
const defaults = require("./lib/defaults.js");
const html_file_name = defaults.html_file_name;
const catalog_file_name = defaults.catalog_json_file_name;
const metadata_file_name = defaults.metadata_json_file_name;
const JSON_helper = require("./lib/jsonldhelper.js")
const fs = require("fs")
var dirs = undefined;
program
.version("0.1.0")
.description(
"Generates DataCrate HTML for CATALOG.JSON files. Pass a list of directories. To create Spreadsheet files for entring metadata use -d or -r."
"Generates DataCrate HTML for ro-crate-metadata.jsonld files. Pass a list of directories. To create Spreadsheet files for entring metadata use -d or -r."
)
.arguments("<directories...>")
.action(function(directories) {
......@@ -48,7 +48,8 @@ program
"Create a zipped version of the bag - only applies with --bag"
)
.option("-d, --depth", "Maximum depth to recurse into directories")
.option("-r, --recurse", "Recurse into directories looking for CATALOG_.xslc files")
.option("-r, --recurse", "Recurse into directories looking for ro-crate-metadata_.xslx files")
.option("-s, --save", "Save ro-crate-metadata after trimming it (and adding missing context if necessary).")
.option(
"-u, --url [distro]",
......@@ -70,48 +71,55 @@ if (depth) {
//TODO - there is a lot of repetition here, sort it out
dirs.forEach(function(dir) {
if (shell.test("-d", dir)) {
shell.rm("-rf", path.join(dir, "ro-crate-metadata_files"));
var c = new Collection();
var output_dir = dir;
c.read(dir, "./", undefined, depth);
c.to_json_ld().then(function() {
var json_helper = new JSON_helper()
json_helper.init(c.json_ld)
var catalog_path = path.join(c.dir, defaults.catalog_json_file_name)
json_helper.trim_context()
fs.writeFileSync(catalog_path, JSON.stringify(json_helper.json_ld, null, 2 ));
generateHTML(catalog_path);
var metadata_path = path.join(c.dir, defaults.metadata_json_file_name)
fs.writeFileSync(metadata_path, JSON.stringify(json_helper.json_ld, null, 2 ));
generateHTML(metadata_path);
});
}
});
}
else {
dirs.forEach(function(dir) {
var catalog_path
shell.rm("-rf", path.join(dir, "ro-crate-metadata_files"));
var metadata_path
if (shell.test("-d", dir)) {
if (!path.isAbsolute(dir)) {
dir = path.join(process.cwd(), dir);
}
catalog_path = path.join(dir, catalog_file_name);
metadata_path = path.join(dir, metadata_file_name);
} else if (shell.test("-f", dir)) {
if (!path.isAbsolute(dir)) {
dir = path.join(process.cwd(), dir);
}
catalog_path = dir;
metadata_path = dir;
}
generateHTML(catalog_path);
generateHTML(metadata_path);
})
}
function generateHTML(catalog_path) {
function generateHTML(metadata_path) {
console.log("Generating html from exsiting " + catalog_path + " file");
if (!path.isAbsolute(catalog_path)) {
catalog_path = path.join(process.cwd(), catalog_path);
console.log("Generating html from exsiting " + metadata_path + " file");
if (!path.isAbsolute(metadata_path)) {
metadata_path = path.join(process.cwd(), metadata_path);
}
var dir = path.dirname(catalog_path)
shell.rm("-rf", path.join(dir, "CATALOG_files"));
var dir = path.dirname(metadata_path)
shell.rm("-rf", path.join(dir, "ro-crate-metadata_files"));
var json_helper = new JSON_helper();
json_helper.init(require(catalog_path));
json_helper.init(JSON.parse(fs.readFileSync(metadata_path)));
if (program.save) {
fs.writeFileSync(metadata_path, JSON.stringify(json_helper.json_ld, null, 2 ));
}
if (program.url){
if (!json_helper.root_node["distribution"]) {
json_helper.root_node["distribution"] = []
......@@ -128,6 +136,7 @@ console.log("Generating html from exsiting " + catalog_path + " file");
"@type": "DataDownload",
"encodingFormat": "zip"
});
// TODO - GET RID OF THIS ABOMINATION!
json_helper.json_ld["@context"]["DataDownload"] = "https://schema.org/DataDownload"
json_helper.json_ld["@context"]["distribution"] = "https://schema.org/distribution"
json_helper.init(json_helper.json_ld);
......@@ -145,22 +154,20 @@ console.log("Generating html from exsiting " + catalog_path + " file");
if (!path.isAbsolute(dir)) {
dir = path.join(process.cwd(), dir);
}
catalog_path = path.join(dir, catalog_file_name);
metadata_path = path.join(dir, metadata_file_name);
if (!program.no) {
shell.mkdir("-p", path.join(dest, "metadata"));
var text_citation = citer.make_citation(
catalog_path,
metadata_path,
path.join(dir, "metadata", "datacite.xml")
);
}
}
var index_maker = new Index();
json_helper.trim_context()
index_maker.init(
json_helper.json_ld,
path.join(dir),
true,
path.join(__dirname, "defaults/catalog_template.html")
path.join(dir, defaults.html_file_name),
path.join(__dirname, "defaults/metadata_template.html")
);
......@@ -170,6 +177,7 @@ console.log("Generating html from exsiting " + catalog_path + " file");
if (program.zip) {
shell.cd(bagger.target_dir);
zipname = path.join("../", path.basename(bagger.target_dir) + ".zip");
shell.rm("-f", zipname);
shell.cd(bagger.target_dir);
index_maker.make_index_html(text_citation, "");
......
......@@ -2,6 +2,8 @@
<html>
<head>
<script src="https://ajax.googleapis.com/ajax/libs/jquery/3.4.1/jquery.min.js"></script>
<style>
table {
......@@ -30,9 +32,12 @@
</style>
<script type="application/ld+json">
<%- json_ld %>
</script>
<script>
meta = <%- json_ld %>
</script>
<link rel="stylesheet"
href="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.7/css/bootstrap.min.css"
integrity="sha384-BVYiiSIFeK1dGmJRAkycuHAHRg32OmUcww7on3RYdg4Va+PmSTsz/K68vbdEjh4u"
......@@ -42,27 +47,55 @@
<meta charset='utf-8'/>
<script src="http://localhost:8080/lib/render.js"></script>
<script>
function load() {
var hash = location.hash;
if (hash) {
display(unescape(hash.replace("#", "")));
}
else {
display(root["@id"]);
}
}
window.onhashchange = function() {
load()
}
$(document).ready(load);
</script>
</head>
<body>
<nav class="navbar navbar-inverse">
<ul class="nav navbar-nav" >
<li >
<%- up_link %></li>
</ul>
<li ><a href="#"><span class="glyphicon glyphicon-home"></span></a></li>
</ul>
</nav>
<div class="container">
<div class="jumbotron">
<h3><%- citation %></h3>
<h4 class="citation"><%- citation %></h3>
<h3 class="name">NAME - TODO</h4>
<h4><%- zip_link %></h4>
</div>
<%- catalog_json_link %>
<%- metadata_json_link %>
<div class="meta">
<table width="100%">
</table>
</div>
<%- html %>
......
......@@ -31,7 +31,7 @@ const uuidv4 = require("uuid/v4");
const shell = require("shelljs");
var fs = require('fs');
//const catalog_template = require("../defaults/catalog_template.html");
//const metadata_template = require("../defaults/metadata_template.html");
const builder = require('xmlbuilder');
const Index = require('./index_html.js');
const Datacite = require('./datacite.js')
......@@ -48,7 +48,7 @@ module.exports = function() {
this.id_lookup = {};
this.json_ld = {};
this.field_names_by_type = {};
this.existing_catalogs = [];
this.existing_metadatas = [];
this.root_node = {};
function get_collection_metadata(workbook, collection) {
......@@ -91,8 +91,8 @@ module.exports = function() {
}
function flattenit(json, collection) {
var promises = jsonld.promises;
json["@context"] = context;
var promise = promises.flatten(json, context); //frame(json, frame);
json["@context"] = defaults.context;
var promise = promises.flatten(json, defaults.context);
return promise;
}
......@@ -105,7 +105,7 @@ module.exports = function() {
name_lookup: this.name_lookup,
items: this.items,
json_ld: this.json_ld,
existing_catalogs: this.existing_catalogs,
existing_metadatas: this.existing_metadatas,
root_node: this.root_node,
item_by_path: this.item_by_path,
item_by_id: this.item_by_id,
......@@ -137,24 +137,24 @@ module.exports = function() {
return this.make_id(this.name_lookup[name].id)
}
},
get_unique_catalog_name: function get_unique_catalog_name(
get_unique_metadata_name: function get_unique_metadata_name(
dir,
existing_catalogs = []
existing_metadatas = []
) {
var index = 0;
dir = path.basename(dir).replace(" ", "_");
var potential_catalog_filename = `${
defaults.catalog_root_name
var potential_metadata_filename = `${
defaults.metadata_root_name
}_${dir}.xlsx`;
while (existing_catalogs.includes(potential_catalog_filename)) {
while (existing_metadatas.includes(potential_metadata_filename)) {
index += 1;
potential_catalog_filename = `${
defaults.catalog_root_name
potential_metadata_filename = `${
defaults.metadata_root_name
}_${dir}_${index}.xlsx`;
//console.log(index, potential_catalog_filename);
//console.log(index, potential_metadata_filename);
}
//console.log(index, potential_catalog_filename)
return potential_catalog_filename;
//console.log(index, potential_metadata_filename)
return potential_metadata_filename;
},
index_graph: function index_graph() {
......@@ -204,6 +204,9 @@ module.exports = function() {
if (shell.test("-e", path.join(this.root_dir, item.id))) {
if (!collection_json["hasPart"]) {
collection_json["hasPart"] = [];
} else if (!Array.isArray(collection_json["hasPart"])) {
collection_json["hasPart"] = [collection_json["hasPart"]]
}
collection_json["hasPart"].push({
"@id": this.make_id(item.id)
......@@ -224,6 +227,8 @@ module.exports = function() {
child.to_json(graph);
if (!collection_json["hasPart"]) {
collection_json["hasPart"] = [];
} else if (!Array.isArray(collection_json["hasPart"])) {
collection_json["hasPart"] = [collection_json["hasPart"]]
}
collection_json["hasPart"].push({
"@id": this.make_id(child.collection_metadata.id)
......@@ -262,7 +267,7 @@ module.exports = function() {
}
}
fs.writeFileSync(
path.join(collection.dir, defaults.catalog_json_file_name),
path.join(collection.dir, defaults.metadata_json_file_name),
JSON.stringify(
{
"@graph": flattenated["@graph"],
......@@ -277,7 +282,7 @@ module.exports = function() {
}
console.log(
"The file was saved!" +
path.join(collection.dir, defaults.catalog_json_file_name)
path.join(collection.dir, defaults.metadata_json_file_name)
);
}
);
......@@ -288,7 +293,7 @@ module.exports = function() {
);
},
read: function read(dir, rel_path = "./", parent = false, max_depth = 1) {
//console.log("existing", parent.existing_catalogs)
//console.log("existing", parent.existing_metadatas)
if (max_depth) {
this.max_depth = max_depth;
} else {
......@@ -300,14 +305,14 @@ module.exports = function() {
this.depth = parent.depth + 1;
this.name_lookup = parent.name_lookup;
this.id_lookup = parent.id_lookup;
this.existing_catalogs = parent.existing_catalogs;
this.existing_metadatas = parent.existing_metadatas;
this.root_dir = parent.root_dir;
//console.log(this.existing_catalogs);
//console.log(this.existing_metadatas);
} else {
this.depth = 1;
this.name_lookup = {};
this.id_lookup = {};
this.existing_catalogs = [];
this.existing_metadatas = [];
this.root_dir = dir;
}
//console.log("XXXXXX Collecting", dir, "Depth", this.depth);
......@@ -327,47 +332,47 @@ module.exports = function() {
if (items) {
//console.log("ITEMS NOW", items);
//TODO - make this a testable function
var catalog_regex = new RegExp(`^${defaults.catalog_root_name}.*xlsx$`);
var catalogs = items.filter(item => catalog_regex.test(item));
this.existing_catalogs = this.existing_catalogs.concat(catalogs);
if (catalogs.length > 1) {
console.log("More than one catalog, using this one: ", catalogs[0]);
var metadata_regex = new RegExp(`^${defaults.metadata_root_name}.*xlsx$`);
var metadatas = items.filter(item => metadata_regex.test(item));
this.existing_metadatas = this.existing_metadatas.concat(metadatas);
if (metadatas.length > 1) {
console.log("More than one metadata, using this one: ", metadatas[0]);
}
var catalog_file_regex = new RegExp(
`^${defaults.catalog_root_name}.*(xlsx|html|json)$`
var metadata_file_regex = new RegExp(
`^${defaults.metadata_root_name}.*(xlsx|html|json)$`
);
items = items.filter(item => !catalog_file_regex.test(item));
items = items.filter(item => !metadata_file_regex.test(item));
items = items.filter(item => !defaults.ignore_file_regex.test(item));
items = items.filter(item => shell.test("-f", path.join(dir, item)));
//console.log("CATALOGS", catalogs)
//console.log("CATALOGS", metadatas)
//TODO - make this configurable
if (catalogs.length === 0) {
//console.log("Making new catalog");
var catalog_file = !parent
? `${defaults.catalog_root_name}.xlsx`
: `${defaults.catalog_root_name}_subdir.xlsx`;
var new_catalog_file = this.get_unique_catalog_name(
if (metadatas.length === 0) {
console.log("Making new metadata");
var metadata_file = !parent
? `${defaults.metadata_root_name}.xlsx`
: `${defaults.metadata_root_name}_subdir.xlsx`;
var new_metadata_file = this.get_unique_metadata_name(
dir,
this.existing_catalogs
this.existing_metadatas
);
this.existing_catalogs.push(new_catalog_file);
//console.log("EXISTING AT THIS POINT", this.existing_catalogs);
catalogs = [new_catalog_file];
this.existing_metadatas.push(new_metadata_file);
console.log("EXISTING AT THIS POINT", this.existing_metadatas);
metadatas = [new_metadata_file];
fs.writeFileSync(
path.join(dir, new_catalog_file),
fs.readFileSync(path.join(defaults.defaults_dir, catalog_file))
path.join(dir, new_metadata_file),
fs.readFileSync(path.join(defaults.defaults_dir, metadata_file))
);
//console.log("New Catalog", new_catalog_file);
//console.log("New Catalog", new_metadata_file);
//COPY IN A NEW CATALOG
//IF ROOT - use default
//ELSE sub catalog
//ELSE sub metadata
}
if (catalogs.length > 0) {
if (metadatas.length > 0) {
if (items.length < defaults.max_files_in_dir) {
try {
//console.log("Running SF")
this.file_info = JSON.parse(shell.exec('sf -nr -json "' + dir + '"', {silent:true}).stdout);
} catch(e) {
console.error("File identification error: " + e);
......@@ -377,9 +382,7 @@ module.exports = function() {
}
this.file_info = JSON.parse(
shell.exec('sf -nr -json "' + dir + '"', { silent: true }).stdout
);
//console.log("FILES", JSON.stringify(this.file_info.files, null, 2));
this.file_info_by_filename = {};
for (var i = 0; i < this.file_info.files.length; i++) {
......@@ -387,9 +390,11 @@ module.exports = function() {
this.file_info_by_filename[f.filename.replace(/.*\//, "")] = f;
}
}
//console.log(dir, catalogs[0]);
catalog_path = path.join(dir, catalogs[0]);
this.workbook = XLSX.readFile(catalog_path); //First one found only
//console.log(dir, metadatas[0]);
metadata_path = path.join(dir, metadatas[0]);
//console.log("Opening spreadsheet", metadata_path)
this.workbook = XLSX.readFile(metadata_path); //First one found only
sheet_names = this.workbook.SheetNames;
for (var i = 0; i < sheet_names.length; i++) {
sheet_name = sheet_names[i];
......@@ -410,6 +415,8 @@ module.exports = function() {
//console.log("SHEET JSON ORIGINAL", sheet_json);
sheet_json.forEach(function(row) {
var f = row["FILE:Filename"];
//console.log("Going through files: ", f)
if (f) {
if (items.includes(f)) {
items = items.filter(function(e) {
......@@ -423,6 +430,7 @@ module.exports = function() {
// items now only contains new files so add them
items.forEach(function(f) {
sheet_json.push({ "FILE:Filename": f });
});
// Iterate over items and add files
......@@ -433,13 +441,13 @@ module.exports = function() {
sheet_json,
(options = { header: header })
);
XLSX.writeFile(this.workbook, catalog_path);
//console.log("Writing file", metadata_path)
XLSX.writeFile(this.workbook, metadata_path);
//console.log("Done writing file, getting metadata");
//console.log(XLSX.utils.sheet_to_json(this.workbook.Sheets['Files']));
get_metadata(this.workbook, this, "Files");
// Find subdirs
// Write back
//console.log("got metadata");
} else if (sheet_name == "@context") {
extra_context = XLSX.utils.sheet_to_json(
this.workbook.Sheets["@context"]
......@@ -456,11 +464,11 @@ module.exports = function() {
}
}
else {
//console.log("getting metaadata", sheet_name)
//console.log("getting metadata", sheet_name)
get_metadata(this.workbook, this, sheet_name);
}
//console.log("COLLECTION METADATA:", this.collection_metadata);
//console.log("COLLECTION METADATA:", this.collection_metadata);
}
}
......@@ -476,13 +484,14 @@ module.exports = function() {
for (var i = 0; i < subdirs.length; i++) {
if (this.depth < this.max_depth) {
var child = new module.exports();
//console.log("making new child:", subdirs[i]);
child.read(
path.join(dir, subdirs[i]),
path.join(this.rel_path, subdirs[i]),
this,
this.max_depth
);
this.existing_catalogs = child.existing_catalogs;
this.existing_metadatas = child.existing_metadatas;
this.children.push(child);
} else {
item = new Item();
......
......@@ -14,18 +14,18 @@ GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
/* Defaults for Calcyte such as names of key files */
const html_multi_file_dirs = "CATALOG_files";
const catalog_root_name = "CATALOG";
const ro_crate_name = "ro-crate-metadata";
const html_multi_file_dirs = "ro-crate-metadata_files";
const metadata_root_name = "CATALOG";
const datacite_file_name = "datacite.xml";
const catalog_json_file_name = `${catalog_root_name}.json`;
const html_file_name = `${catalog_root_name}.html`;
const metadata_json_file_name = `${ro_crate_name}.jsonld`;
const html_file_name = "ro-crate-preview.html";
const max_depth = "10"; // Number of dirs to recurse into
const max_files_in_dir = "100"; // Don't list files in a directory if there are more than this
const ignore_file_regex = new RegExp(
`(^${html_multi_file_dirs[0]}$)|(^~)|(^\\.)|(${datacite_file_name})`
`(^~)|(^\\.)|(${datacite_file_name})|^${html_file_name}$|^${metadata_json_file_name}$`
);
const ignore_dir_regex = new RegExp(`(^${html_multi_file_dirs}$)|(^\\.)`);
const BagIt_Profile_Identifier =
......@@ -34,18 +34,42 @@ const DataCrate_Specification_Identifier =
"https://github.com/UTS-eResearch/datacrate/blob/master/spec/1.0/data_crate_specification_v1.0.md";
const DataCrate_version = "1.0";
const DataCrate_profile_file = "defaults/profile-datacrate-v" + DataCrate_version + ".json"
const default_context = "defaults/context.json"
const path = require("path")
const DEFAULTS = path.join(__dirname, '../defaults');
const defaults_dir = DEFAULTS;
const context = require(path.join(DEFAULTS, 'context.json'));
const catalog_template = path.join(DEFAULTS, 'catalog_template.html');
const context = "https://raw.githubusercontent.com/ResearchObject/ro-crate/master/docs/0.2-DRAFT/context.json";
const default_context = context;
const metadata_template = path.join(DEFAULTS, 'metadata_template.html');
const display_keys = [
"name",
"familyName",
"givenName",
"@type",
"description",
"funder",
"memberOf",
"isPartOf",
"fileOf",
"thumbnail",
"datePublished",
"creator",
"path",
"encodingFormat",
"contentSize",
"affiliation",
"email",
"@reverse",
];
// DataCrate specific terms which have inverses
const back_links = {
hasFile: "fileOf",
hasPart: "isPartOf",
hasMember: "memberOf"
hasMember: "memberOf",
memberOf: "hasMember"
};
......@@ -55,9 +79,9 @@ const back_back_links = new Set(Object.values(back_links))
module.exports = {
html_multi_file_dirs: html_multi_file_dirs,
default_context: default_context,
catalog_root_name: catalog_root_name,
metadata_root_name: metadata_root_name,
datacite_file_name: datacite_file_name,
catalog_json_file_name: catalog_json_file_name,
metadata_json_file_name: metadata_json_file_name,
html_file_name: html_file_name,
ignore_file_regex: ignore_file_regex,
max_depth: max_depth,
......@@ -71,5 +95,6 @@ module.exports = {
DataCrate_profile_file: DataCrate_profile_file,
context: context,
defaults_dir: defaults_dir,
catalog_template: catalog_template
metadata_template: metadata_template,
display_keys: display_keys
};
This diff is collapsed.
/* This is part of Calcyte a tool for implementing the DataCrate data packaging
spec. Copyright (C) 2018 University of Technology Sydney