Skip to content
Snippets Groups Projects
Commit 27344e6e authored by Moises Sacal's avatar Moises Sacal
Browse files

fixed memory leak by removing require and promise chains

parent a7ca2574
No related merge requests found
This diff is collapsed.
...@@ -3,6 +3,7 @@ const _ = require('lodash'); ...@@ -3,6 +3,7 @@ const _ = require('lodash');
const yargs = require('yargs'); const yargs = require('yargs');
const CatalogSolr = require('./lib/CatalogSolr'); const CatalogSolr = require('./lib/CatalogSolr');
const fs = require('fs-extra'); const fs = require('fs-extra');
const path = require('path');
const argv = yargs['argv']; const argv = yargs['argv'];
const configPath = argv.config || './config.json'; const configPath = argv.config || './config.json';
...@@ -10,6 +11,7 @@ if (!fs.existsSync(configPath)) { ...@@ -10,6 +11,7 @@ if (!fs.existsSync(configPath)) {
console.error(`Please provide a valid config file path: ${configPath}`); console.error(`Please provide a valid config file path: ${configPath}`);
process.exit(1); process.exit(1);
} }
const configJson = require(configPath); const configJson = require(configPath);
const sourcePath = _.endsWith(configJson['source'], '/') ? configJson['source'] : `${configJson['source']}/`; const sourcePath = _.endsWith(configJson['source'], '/') ? configJson['source'] : `${configJson['source']}/`;
const solrUpdate = configJson['solrUpdate'] || ''; const solrUpdate = configJson['solrUpdate'] || '';
...@@ -18,17 +20,14 @@ const logLevel = configJson['logLevel'] || 4; ...@@ -18,17 +20,14 @@ const logLevel = configJson['logLevel'] || 4;
const waitPeriod = configJson['waitPeriod'] || 0; const waitPeriod = configJson['waitPeriod'] || 0;
const batchNum = configJson['batch'] || 1000; const batchNum = configJson['batch'] || 1000;
const catalog = new CatalogSolr();
catalog.setConfig(fieldConfig);
const sleep = ms => new Promise((r, j) => { const sleep = ms => new Promise((r, j) => {
console.log('Waiting for ' + ms + ' seconds'); console.log('Waiting for ' + ms + ' seconds');
setTimeout(r, ms * 1000); setTimeout(r, ms * 1000);
}); });
function commitDocs(URI) { function commitDocs(solrURL, URI) {
return axios({ return axios({
url: solrUpdate + URI, url: solrURL + URI,
method: 'get', method: 'get',
responseType: 'json', responseType: 'json',
headers: { headers: {
...@@ -37,9 +36,9 @@ function commitDocs(URI) { ...@@ -37,9 +36,9 @@ function commitDocs(URI) {
}); });
} }
function updateDocs(coreObjects) { function updateDocs(solrURL, coreObjects) {
return axios({ return axios({
url: solrUpdate + '/docs', url: solrURL + '/docs',
method: 'post', method: 'post',
data: coreObjects, data: coreObjects,
responseType: 'json', responseType: 'json',
...@@ -69,7 +68,22 @@ function recordsArray(sourcePath) { ...@@ -69,7 +68,22 @@ function recordsArray(sourcePath) {
return records; return records;
} }
function createCatalogSolr(ca) { function entries(basePath, dirs) {
const records = [];
_.each(dirs, (d) => {
const entryPath = path.join(basePath, `${d}/CATALOG.json`);
if (fs.existsSync(entryPath)) {
let entryJson = fs.readFileSync(entryPath).toString();
entryJson = JSON.parse(entryJson);
records.push(entryJson);
entryJson = null;
}
});
return records;
}
function createCatalogSolr(catalog, ca) {
//Peter's idea is to convert everything into an array then it is safer to work to convert //Peter's idea is to convert everything into an array then it is safer to work to convert
const graph = _.each(ca['@graph'], (g) => { const graph = _.each(ca['@graph'], (g) => {
return catalog.ensureObjArray(g); return catalog.ensureObjArray(g);
...@@ -94,18 +108,12 @@ function createCatalogSolr(ca) { ...@@ -94,18 +108,12 @@ function createCatalogSolr(ca) {
return catalogSolr; return catalogSolr;
} }
let records = [];
if (fs.existsSync(sourcePath)) {
records = recordsArray(sourcePath);
} else {
console.error(`Source path doesn't exist: ${sourcePath}`);
process.exit(1);
}
function catalogToArray(recs) { function catalogToArray(recs) {
let catalog = new CatalogSolr();
catalog.setConfig(fieldConfig);
const catalogs = []; const catalogs = [];
recs.forEach((rec) => { recs.forEach((rec) => {
const solrObj = createCatalogSolr(rec); const solrObj = createCatalogSolr(catalog, rec);
if (solrObj) { if (solrObj) {
if (solrObj.Dataset) { if (solrObj.Dataset) {
solrObj.Dataset.forEach((c) => { solrObj.Dataset.forEach((c) => {
...@@ -119,28 +127,76 @@ function catalogToArray(recs) { ...@@ -119,28 +127,76 @@ function catalogToArray(recs) {
} }
} }
}); });
catalog = null;
return catalogs; return catalogs;
} }
const batch = _.chunk(records, batchNum); function batchIt(b) {
b.map(async (p, index) => {
batch.reduce((promise, p, index) => { try {
return promise.then(() => { if (logLevel >= 4) console.log(`Using: ${Math.round(process.memoryUsage().rss / 1024 / 1024 * 100) / 100} MBs`);
const catalogs = catalogToArray(records); records = entries(sourcePath, p);
return updateDocs(catalogs).then(async () => { catalogs = catalogToArray(records);
records = null;
let update = await updateDocs(solrUpdate, catalogs);
catalogs = null;
p = null;
console.log(`batch ${index} of ${batch.length} : Update docs`);
if (waitPeriod) { if (waitPeriod) {
const waited = await sleep(waitPeriod); const waited = await sleep(waitPeriod);
} }
console.log('Update docs'); } catch (e) {
if (index >= batch.length - 1) { console.log(e);
console.log('run commit'); }
return commitDocs('?commit=true&overwrite=true').then(() => { });
return Promise.resolve(); commitDocs(solrUpdate, '?commit=true&overwrite=true').then(() => {
}); console.log('solr commit');
} return Promise.resolve();
return Promise.resolve(); }).catch((err) => {
}); return Promise.reject(err);
}).catch((e) => { });
console.log(e); }
})
}, Promise.resolve()); function reduceIt(b){
\ No newline at end of file b.reduce((promise, p, index) => {
return promise.then(() => {
if (logLevel >= 4) console.log(`Using: ${Math.round(process.memoryUsage().rss / 1024 / 1024 * 100) / 100} MBs`);
const records = entries(sourcePath, p);
const catalogs = catalogToArray(records);
return updateDocs(solrUpdate, catalogs).then(async () => {
if (waitPeriod) {
const waited = await sleep(waitPeriod);
}
console.log(`batch ${index} of ${batch.length} : Update docs`);
if (index >= b.length - 1) {
console.log('run commit');
return commitDocs(solrUpdate,'?commit=true&overwrite=true').then(() => {
return Promise.resolve();
});
}
return Promise.resolve();
});
}).catch((e) => {
console.log(e);
})
}, Promise.resolve());
}
let dirs = null;
if (fs.existsSync(sourcePath)) {
dirs = fs.readdirSync(sourcePath).filter(f => fs.statSync(path.join(sourcePath, f)).isDirectory());
} else {
console.error(`Source path doesn't exist: ${sourcePath}`);
process.exit(1);
}
const batch = _.chunk(dirs, batchNum);
dirs = null;
let records = [];
let catalogs = [];
//batchIt(batch);
reduceIt(batch);
if (logLevel >= 4) console.log(`Using: ${Math.round(process.memoryUsage().rss / 1024 / 1024 * 100) / 100} MBs`);
...@@ -2,8 +2,8 @@ ...@@ -2,8 +2,8 @@
"source": "/Users/moises/source/peppermint/publication/public/", "source": "/Users/moises/source/peppermint/publication/public/",
"ocfl": "/Users/moises/source/peppermint/publication/ocfl/", "ocfl": "/Users/moises/source/peppermint/publication/ocfl/",
"solrUpdate": "http://localhost:8983/solr/dataset/update/json", "solrUpdate": "http://localhost:8983/solr/dataset/update/json",
"logLevel": 3, "logLevel": 4,
"waitPeriod": 1, "waitPeriod": 0,
"fields": "./fields.json", "fields": "./fields.json",
"batch": 1000 "batch": 1000
} }
...@@ -31,8 +31,8 @@ async function createDatacrates(dest, n) { ...@@ -31,8 +31,8 @@ async function createDatacrates(dest, n) {
try { try {
fs.ensureDirSync(datacrateDirPath); fs.ensureDirSync(datacrateDirPath);
createDatacrates(datacrateDirPath, numberOfDatacrates) createDatacrates(datacrateDirPath, numberOfDatacrates)
.then((res) => { .then(() => {
console.log(res); console.log(numberOfDatacrates + ' datacrates generated in ' + datacrateDirPath);
}) })
.catch((err) => { .catch((err) => {
throw new Error(err); throw new Error(err);
......
This diff is collapsed.
...@@ -16,12 +16,14 @@ ...@@ -16,12 +16,14 @@
"license": "GPL-3.0-or-later", "license": "GPL-3.0-or-later",
"dependencies": { "dependencies": {
"axios": "^0.18.0", "axios": "^0.18.0",
"datacrate": "git+https://code.research.uts.edu.au/eresearch/datacrate.git#random_as_library",
"fs-extra": "^8.0.1", "fs-extra": "^8.0.1",
"lodash": "^4.17.11", "lodash": "^4.17.11",
"request": "^2.88.0",
"request-promise": "^4.2.4",
"yargs": "^13.2.4" "yargs": "^13.2.4"
}, },
"devDependencies": { "devDependencies": {
"datacrate": "git+https://code.research.uts.edu.au/eresearch/datacrate.git#random_as_library",
"mocha": "^6.1.4" "mocha": "^6.1.4"
} }
} }
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment