diff --git a/.idea/workspace.xml b/.idea/workspace.xml index 8422d715231aae45c29565a32dc95aaad9b767fd..1872063cc9424da0aaf8adedf34cba8311f3caad 100644 --- a/.idea/workspace.xml +++ b/.idea/workspace.xml @@ -2,8 +2,9 @@ <project version="4"> <component name="ChangeListManager"> <list default="true" id="ad75bb9b-12db-4cad-af64-4c68cecdb87b" name="Default Changelist" comment=""> + <change afterPath="$PROJECT_DIR$/README.md" afterDir="false" /> + <change beforePath="$PROJECT_DIR$/.idea/workspace.xml" beforeDir="false" afterPath="$PROJECT_DIR$/.idea/workspace.xml" afterDir="false" /> <change beforePath="$PROJECT_DIR$/commit-to-solr.js" beforeDir="false" afterPath="$PROJECT_DIR$/commit-to-solr.js" afterDir="false" /> - <change beforePath="$PROJECT_DIR$/package.json" beforeDir="false" afterPath="$PROJECT_DIR$/package.json" afterDir="false" /> </list> <ignored path="$PROJECT_DIR$/.tmp/" /> <ignored path="$PROJECT_DIR$/temp/" /> @@ -28,17 +29,29 @@ <file pinned="false" current-in-tab="false"> <entry file="file://$PROJECT_DIR$/lib/CatalogSolr.js"> <provider selected="true" editor-type-id="text-editor"> - <state relative-caret-position="125"> - <caret line="37" lean-forward="true" selection-start-line="37" selection-end-line="37" /> + <state relative-caret-position="99"> + <caret line="37" selection-start-line="37" selection-end-line="37" /> </state> </provider> </entry> </file> - <file pinned="false" current-in-tab="true"> + <file pinned="false" current-in-tab="false"> <entry file="file://$PROJECT_DIR$/package.json"> <provider selected="true" editor-type-id="text-editor"> - <state relative-caret-position="300"> - <caret line="20" column="25" selection-start-line="20" selection-start-column="25" selection-end-line="20" selection-end-column="25" /> + <state relative-caret-position="120"> + <caret line="8" column="28" selection-start-line="8" selection-end-line="9" /> + </state> + </provider> + </entry> + </file> + <file pinned="false" current-in-tab="true"> + <entry file="file://$PROJECT_DIR$/README.md"> + <provider selected="true" editor-type-id="split-provider[text-editor;markdown-preview-editor]"> + <state split_layout="SPLIT"> + <first_editor relative-caret-position="260"> + <caret line="43" column="31" lean-forward="true" selection-start-line="43" selection-start-column="31" selection-end-line="43" selection-end-column="31" /> + </first_editor> + <second_editor /> </state> </provider> </entry> @@ -46,7 +59,7 @@ <file pinned="false" current-in-tab="false"> <entry file="file://$PROJECT_DIR$/generate-datacrates.js"> <provider selected="true" editor-type-id="text-editor"> - <state relative-caret-position="375"> + <state relative-caret-position="350"> <caret line="25" column="13" selection-start-line="25" selection-start-column="13" selection-end-line="25" selection-end-column="13" /> </state> </provider> @@ -55,8 +68,8 @@ <file pinned="false" current-in-tab="false"> <entry file="file://$PROJECT_DIR$/commit-to-solr.js"> <provider selected="true" editor-type-id="text-editor"> - <state relative-caret-position="112"> - <caret line="138" column="8" lean-forward="true" selection-start-line="138" selection-start-column="8" selection-end-line="138" selection-end-column="8" /> + <state relative-caret-position="255"> + <caret line="17" column="20" lean-forward="true" selection-start-line="17" selection-end-line="18" /> </state> </provider> </entry> @@ -90,8 +103,9 @@ <option value="$PROJECT_DIR$/.gitignore" /> <option value="$PROJECT_DIR$/generate-datacrates.js" /> <option value="$PROJECT_DIR$/config.json" /> - <option value="$PROJECT_DIR$/commit-to-solr.js" /> <option value="$PROJECT_DIR$/package.json" /> + <option value="$PROJECT_DIR$/commit-to-solr.js" /> + <option value="$PROJECT_DIR$/README.md" /> </list> </option> </component> @@ -164,7 +178,7 @@ </list> </option> </component> - <component name="RunManager" selected="Node.js.commit-to-solr"> + <component name="RunManager" selected="Node.js.generate-datacrates"> <configuration name="commit-to-solr" type="NodeJSConfigurationType" node-parameters="--expose-gc" path-to-js-file="commit-to-solr.js" working-dir="$PROJECT_DIR$"> <method v="2" /> </configuration> @@ -200,7 +214,7 @@ <option name="presentableId" value="Default" /> <updated>1558421147646</updated> <workItem from="1558421149207" duration="30478000" /> - <workItem from="1558654598476" duration="77898000" /> + <workItem from="1558654598476" duration="79515000" /> </task> <servers /> </component> @@ -237,7 +251,7 @@ </history-entry> </component> <component name="TimeTrackingManager"> - <option name="totallyTimeSpent" value="108376000" /> + <option name="totallyTimeSpent" value="109993000" /> </component> <component name="ToolWindowManager"> <frame x="1441" y="-277" width="1920" height="1177" extended-state="6" /> @@ -327,13 +341,6 @@ </state> </provider> </entry> - <entry file="file://$PROJECT_DIR$/config.json"> - <provider selected="true" editor-type-id="text-editor"> - <state relative-caret-position="135"> - <caret line="9" selection-start-line="9" selection-end-line="9" /> - </state> - </provider> - </entry> <entry file="file://$PROJECT_DIR$/node_modules/lodash/lodash.js"> <provider selected="true" editor-type-id="text-editor"> <state relative-caret-position="489"> @@ -343,29 +350,46 @@ </entry> <entry file="file://$PROJECT_DIR$/generate-datacrates.js"> <provider selected="true" editor-type-id="text-editor"> - <state relative-caret-position="375"> + <state relative-caret-position="350"> <caret line="25" column="13" selection-start-line="25" selection-start-column="13" selection-end-line="25" selection-end-column="13" /> </state> </provider> </entry> - <entry file="file://$PROJECT_DIR$/lib/CatalogSolr.js"> + <entry file="file://$PROJECT_DIR$/config.json"> <provider selected="true" editor-type-id="text-editor"> - <state relative-caret-position="125"> - <caret line="37" lean-forward="true" selection-start-line="37" selection-end-line="37" /> + <state relative-caret-position="135"> + <caret line="9" selection-start-line="9" selection-end-line="9" /> </state> </provider> </entry> - <entry file="file://$PROJECT_DIR$/commit-to-solr.js"> + <entry file="file://$PROJECT_DIR$/lib/CatalogSolr.js"> <provider selected="true" editor-type-id="text-editor"> - <state relative-caret-position="112"> - <caret line="138" column="8" lean-forward="true" selection-start-line="138" selection-start-column="8" selection-end-line="138" selection-end-column="8" /> + <state relative-caret-position="99"> + <caret line="37" selection-start-line="37" selection-end-line="37" /> </state> </provider> </entry> <entry file="file://$PROJECT_DIR$/package.json"> <provider selected="true" editor-type-id="text-editor"> - <state relative-caret-position="300"> - <caret line="20" column="25" selection-start-line="20" selection-start-column="25" selection-end-line="20" selection-end-column="25" /> + <state relative-caret-position="120"> + <caret line="8" column="28" selection-start-line="8" selection-end-line="9" /> + </state> + </provider> + </entry> + <entry file="file://$PROJECT_DIR$/commit-to-solr.js"> + <provider selected="true" editor-type-id="text-editor"> + <state relative-caret-position="255"> + <caret line="17" column="20" lean-forward="true" selection-start-line="17" selection-end-line="18" /> + </state> + </provider> + </entry> + <entry file="file://$PROJECT_DIR$/README.md"> + <provider selected="true" editor-type-id="split-provider[text-editor;markdown-preview-editor]"> + <state split_layout="SPLIT"> + <first_editor relative-caret-position="260"> + <caret line="43" column="31" lean-forward="true" selection-start-line="43" selection-start-column="31" selection-end-line="43" selection-end-column="31" /> + </first_editor> + <second_editor /> </state> </provider> </entry> diff --git a/README.md b/README.md new file mode 100644 index 0000000000000000000000000000000000000000..8cae053f29dff91806fad05bbb40c0d645439f7d --- /dev/null +++ b/README.md @@ -0,0 +1,66 @@ +## JSON LD Solr Indexer + +--- +**NOTES:** + + - Currently spec'd to work only with Datacrate JSON-LD. + TODO: + - build schema of solr with config file + - Generalise to use any json-ld + - Make createCatalogSolr as part of the library + - Test with other types of json-ld's +--- + +Example implementation: + +### Generate JSON-LDs + +``` +node generate-datacrates.js -d ./test-data/generated/ -n 100 +``` +or +``` +npm run generate +``` + +### Commit to SOLR + +Modify config.json to suit your needs. + +Use +``` +node ./commit-to-solr.js --config ./config.json +``` +or +``` +npm run commit +``` + +### Detail + +```JavaScript +const fieldConfig = require('./fields.json'); +let catalog = new CatalogSolr(); +catalog.setConfig(fieldConfig); + +const graph = _.each(ca['@graph'], (g) => { +return catalog.ensureObjArray(g); +}); + +const solrObject = {}; +_.each(fieldConfig, (field, name) => { +let graphElement = _.filter(graph, (g) => { + return _.find(g['@type'], (gg) => gg === name) ? g : undefined; +}); +if (graphElement) { + _.each(graphElement, (ge) => { + if (Array.isArray(solrObject[name])) { + solrObject[name].push(catalog.getGraphElement(fieldConfig[name], graph, ge)); + } else { + solrObject[name] = [catalog.getGraphElement(fieldConfig[name], graph, ge)]; + } + }); +} +}); +``` + diff --git a/commit-to-solr.js b/commit-to-solr.js index 8a3b6d5e4a4910c30a51d91e518eaa40f78cacf2..f6d840873fe783bb2d0752ea1689d532e2e45dcb 100644 --- a/commit-to-solr.js +++ b/commit-to-solr.js @@ -68,7 +68,7 @@ function recordsArray(sourcePath) { return records; } -function entries(basePath, dirs) { +function jsonRecords(basePath, dirs) { const records = []; _.each(dirs, (d) => { const entryPath = path.join(basePath, `${d}/CATALOG.json`); @@ -108,7 +108,7 @@ function createCatalogSolr(catalog, ca) { return catalogSolr; } -function catalogToArray(recs) { +function solrObjects(recs) { let catalog = new CatalogSolr(); catalog.setConfig(fieldConfig); const catalogs = []; @@ -147,8 +147,8 @@ dirs = null; batch.reduce((promise, p, index) => { return promise.then(() => { if (logLevel >= 4) console.log(`Using: ${Math.round(process.memoryUsage().rss / 1024 / 1024 * 100) / 100} MBs`); - const records = entries(sourcePath, p); - const catalogs = catalogToArray(records); + const records = jsonRecords(sourcePath, p); + const catalogs = solrObjects(records); return updateDocs(solrUpdate, catalogs).then(async () => { if (waitPeriod) { const waited = await sleep(waitPeriod);