topcoderinc
diff --git a/‎.gitignore
Lines changed: 1 addition & 0 deletions b/‎.gitignore
Lines changed: 1 addition & 0 deletions
diff --git a/‎README.md
Lines changed: 51 additions & 0 deletions b/‎README.md
Lines changed: 51 additions & 0 deletions
diff --git a/‎common/logger.js
Lines changed: 48 additions & 0 deletions b/‎common/logger.js
Lines changed: 48 additions & 0 deletions
diff --git a/‎config/custom-environment-variables.json
Lines changed: 8 additions & 0 deletions b/‎config/custom-environment-variables.json
Lines changed: 8 additions & 0 deletions
diff --git a/‎config/default.json
Lines changed: 8 additions & 0 deletions b/‎config/default.json
Lines changed: 8 additions & 0 deletions
diff --git a/‎constants.js
Lines changed: 65 additions & 0 deletions b/‎constants.js
Lines changed: 65 additions & 0 deletions
diff --git a/‎docs/DeploymentGuide.pdf
173 KB b/‎docs/DeploymentGuide.pdf
173 KB
@@ -0,0 +1 @@
+node_modules
@@ -0,0 +1,51 @@
+# VA ONLINE MEMORIAL - DATA IMPORT & SYNC
+
+## Dependencies
+-   [Nodejs](https://nodejs.org/en/)
+-   [PostgreSQL](https://www.postgresql.org/)
+-   [eslint](http://eslint.org/)
+
+## Configuration
+-   Edit configuration in `config/default.json` and
+-   custom environment variables names in `config/custom-environment-variables.json`,
+
+## Application constants
+
+-   Application constants can be configured in `./constants.js`
+
+## Available tools
+
+-   Since the data we need to download and process is huge it's better (/ safer) to use 2 different tools instead of one single script so in case that something goes wrong during processing, we'll minimise the damage.
+
+### Download datasets
+
+-   Run `npm run download-data` to download all available datasets.
+-   The datasets will be stored in the configured directory.
+-   Old data will be replaced.
+-   This operation does not affect the database.
+
+### Import data from downloaded files
+
+-   Run `npm run import-data` to import all data using the downloaded files from the previous step.
+
+## Local Deployment
+
+*Before starting the application, make sure that PostgreSQL is running and you have configured everything correctly in `config/default.json`*
+
+-   Install dependencies `npm i`
+-   Run lint check `npm run lint`
+-   Start scraper `npm run scrape`. This will run all tools in the following sequence:
+
+`npm run download-data` => `npm run import-data`
+
+*The application will print progress information and the results in the terminal.*
+
+## Verification
+
+-   To verify that the data is imported, you can use the [pgAdmin](https://www.pgadmin.org/) tool and browser the database.
+
+## Notes:
+
+-   The total size of all datasets is > 1.5GB so it will take quite some time, depending on your internet connection, to finish the operation.
+-   `max_old_space_size` has been set to *4096MB* to allow parse/process such huge data files without any issues. The app will clean the memory right after using the data to prevent memory/heap leaks.
+-   The dataset for `FOREIGN ADDRESSES` doesn't have a header in the CSV file and it has slightly different format (it has an extra column). The app handles all datasets without any issue.
@@ -0,0 +1,48 @@
+'use strict';
+
+/*
+ * Copyright (C) 2018 Topcoder Inc., All Rights Reserved.
+ */
+
+/**
+ * This module contains the winston logger configuration.
+ */
+const winston = require('winston');
+const config = require('config');
+const chalk = require('chalk');
+
+const logger = new (winston.Logger)({
+  transports: [
+    new (winston.transports.Console)({
+      level: config.logLevel,
+      timestamp: () => new Date().toISOString(),
+      formatter(options) {
+        const message = options.message || '';
+
+        let meta = '';
+        if (options.meta && Object.keys(options.meta).length) {
+          meta = '\n\t' + JSON.stringify(options.meta);
+        }
+
+        let level = options.level.toUpperCase();
+        switch (level) {
+          case 'INFO':
+            level = chalk.cyan(level);
+            break;
+          case 'WARN':
+            level = chalk.yellow(level);
+            break;
+          case 'ERROR':
+            level = chalk.red(level);
+            break;
+          default:
+            break;
+        }
+
+        return `[${options.timestamp()}][${level}] ${message} ${meta}`;
+      }
+    })
+  ]
+});
+
+module.exports = logger;
@@ -0,0 +1,8 @@
+{
+  "dataset_url": "DATASET_URL",
+  "logLevel": "LOG_LEVEL",
+  "dbConfig": {
+    "db_url": "DATABASE_URL"
+  },
+  "downloadPath": "DOWNLOAD_PATH"
+}
@@ -0,0 +1,8 @@
+{
+  "dataset_url": "https://www.data.va.gov/data.json",
+  "logLevel": "info",
+  "dbConfig": {
+    "db_url": "postgres://postgres:123456@localhost:5432/vaonline"
+  },
+  "downloadPath": "downloads"
+}
@@ -0,0 +1,65 @@
+'use strict';
+
+/*
+ * Copyright (c) 2018 Topcoder, Inc. All rights reserved.
+ */
+
+/**
+ * Application constants
+ */
+
+// The accepted program codes
+const acceptedProgramCodes = [
+  '029:001'
+];
+
+// The accepted keywords
+const acceptedKeywords = [
+  'burial data'
+];
+
+// The accepted file format
+const acceptedFormat = 'csv';
+
+// Entry names that should be ignored
+const ignoredNames = [
+  // Source of cemeteries data
+  'VA Cemeteries - Address, Location, Contact Information, Burial Space'
+];
+
+// CSV headers
+const csvHeaders = [
+  'd_first_name',
+  'd_mid_name',
+  'd_last_name',
+  'd_suffix',
+  'd_birth_date',
+  'd_death_date',
+  'section_id',
+  'row_num',
+  'site_num',
+  'cem_name',
+  'cem_addr_one',
+  'cem_addr_two',
+  'city',
+  'state',
+  'zip',
+  'cem_url',
+  'cem_phone',
+  'relationship',
+  'v_first_name',
+  'v_mid_name',
+  'v_last_name',
+  'v_suffix',
+  'branch',
+  'rank',
+  'war'
+];
+
+module.exports = {
+  acceptedProgramCodes,
+  acceptedKeywords,
+  acceptedFormat,
+  ignoredNames,
+  csvHeaders
+};