Skip to content

Commit e4ede72

Browse files
committed
Initial commit
0 parents  commit e4ede72

18 files changed

+3040
-0
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
node_modules

README.md

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
# VA ONLINE MEMORIAL - DATA IMPORT & SYNC
2+
3+
## Dependencies
4+
- [Nodejs](https://nodejs.org/en/)
5+
- [PostgreSQL](https://www.postgresql.org/)
6+
- [eslint](http://eslint.org/)
7+
8+
## Configuration
9+
- Edit configuration in `config/default.json` and
10+
- custom environment variables names in `config/custom-environment-variables.json`,
11+
12+
## Application constants
13+
14+
- Application constants can be configured in `./constants.js`
15+
16+
## Available tools
17+
18+
- Since the data we need to download and process is huge it's better (/ safer) to use 2 different tools instead of one single script so in case that something goes wrong during processing, we'll minimise the damage.
19+
20+
### Download datasets
21+
22+
- Run `npm run download-data` to download all available datasets.
23+
- The datasets will be stored in the configured directory.
24+
- Old data will be replaced.
25+
- This operation does not affect the database.
26+
27+
### Import data from downloaded files
28+
29+
- Run `npm run import-data` to import all data using the downloaded files from the previous step.
30+
31+
## Local Deployment
32+
33+
*Before starting the application, make sure that PostgreSQL is running and you have configured everything correctly in `config/default.json`*
34+
35+
- Install dependencies `npm i`
36+
- Run lint check `npm run lint`
37+
- Start scraper `npm run scrape`. This will run all tools in the following sequence:
38+
39+
`npm run download-data` => `npm run import-data`
40+
41+
*The application will print progress information and the results in the terminal.*
42+
43+
## Verification
44+
45+
- To verify that the data is imported, you can use the [pgAdmin](https://www.pgadmin.org/) tool and browser the database.
46+
47+
## Notes:
48+
49+
- The total size of all datasets is > 1.5GB so it will take quite some time, depending on your internet connection, to finish the operation.
50+
- `max_old_space_size` has been set to *4096MB* to allow parse/process such huge data files without any issues. The app will clean the memory right after using the data to prevent memory/heap leaks.
51+
- The dataset for `FOREIGN ADDRESSES` doesn't have a header in the CSV file and it has slightly different format (it has an extra column). The app handles all datasets without any issue.

common/logger.js

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
'use strict';
2+
3+
/*
4+
* Copyright (C) 2018 Topcoder Inc., All Rights Reserved.
5+
*/
6+
7+
/**
8+
* This module contains the winston logger configuration.
9+
*/
10+
const winston = require('winston');
11+
const config = require('config');
12+
const chalk = require('chalk');
13+
14+
const logger = new (winston.Logger)({
15+
transports: [
16+
new (winston.transports.Console)({
17+
level: config.logLevel,
18+
timestamp: () => new Date().toISOString(),
19+
formatter(options) {
20+
const message = options.message || '';
21+
22+
let meta = '';
23+
if (options.meta && Object.keys(options.meta).length) {
24+
meta = '\n\t' + JSON.stringify(options.meta);
25+
}
26+
27+
let level = options.level.toUpperCase();
28+
switch (level) {
29+
case 'INFO':
30+
level = chalk.cyan(level);
31+
break;
32+
case 'WARN':
33+
level = chalk.yellow(level);
34+
break;
35+
case 'ERROR':
36+
level = chalk.red(level);
37+
break;
38+
default:
39+
break;
40+
}
41+
42+
return `[${options.timestamp()}][${level}] ${message} ${meta}`;
43+
}
44+
})
45+
]
46+
});
47+
48+
module.exports = logger;
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
{
2+
"dataset_url": "DATASET_URL",
3+
"logLevel": "LOG_LEVEL",
4+
"dbConfig": {
5+
"db_url": "DATABASE_URL"
6+
},
7+
"downloadPath": "DOWNLOAD_PATH"
8+
}

config/default.json

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
{
2+
"dataset_url": "https://www.data.va.gov/data.json",
3+
"logLevel": "info",
4+
"dbConfig": {
5+
"db_url": "postgres://postgres:123456@localhost:5432/vaonline"
6+
},
7+
"downloadPath": "downloads"
8+
}

constants.js

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
'use strict';
2+
3+
/*
4+
* Copyright (c) 2018 Topcoder, Inc. All rights reserved.
5+
*/
6+
7+
/**
8+
* Application constants
9+
*/
10+
11+
// The accepted program codes
12+
const acceptedProgramCodes = [
13+
'029:001'
14+
];
15+
16+
// The accepted keywords
17+
const acceptedKeywords = [
18+
'burial data'
19+
];
20+
21+
// The accepted file format
22+
const acceptedFormat = 'csv';
23+
24+
// Entry names that should be ignored
25+
const ignoredNames = [
26+
// Source of cemeteries data
27+
'VA Cemeteries - Address, Location, Contact Information, Burial Space'
28+
];
29+
30+
// CSV headers
31+
const csvHeaders = [
32+
'd_first_name',
33+
'd_mid_name',
34+
'd_last_name',
35+
'd_suffix',
36+
'd_birth_date',
37+
'd_death_date',
38+
'section_id',
39+
'row_num',
40+
'site_num',
41+
'cem_name',
42+
'cem_addr_one',
43+
'cem_addr_two',
44+
'city',
45+
'state',
46+
'zip',
47+
'cem_url',
48+
'cem_phone',
49+
'relationship',
50+
'v_first_name',
51+
'v_mid_name',
52+
'v_last_name',
53+
'v_suffix',
54+
'branch',
55+
'rank',
56+
'war'
57+
];
58+
59+
module.exports = {
60+
acceptedProgramCodes,
61+
acceptedKeywords,
62+
acceptedFormat,
63+
ignoredNames,
64+
csvHeaders
65+
};

docs/DeploymentGuide.pdf

173 KB
Binary file not shown.

0 commit comments

Comments
 (0)