Skip to content
This repository was archived by the owner on Jun 29, 2022. It is now read-only.

Commit b96df60

Browse files
committed
initial commit
0 parents  commit b96df60

File tree

8 files changed

+300
-0
lines changed

8 files changed

+300
-0
lines changed

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
node_modules/
2+
config.json

.jshintrc

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
{
2+
"predef": [ ]
3+
, "bitwise": false
4+
, "camelcase": false
5+
, "curly": false
6+
, "eqeqeq": false
7+
, "forin": false
8+
, "immed": false
9+
, "latedef": false
10+
, "newcap": true
11+
, "noarg": true
12+
, "noempty": true
13+
, "nonew": true
14+
, "plusplus": false
15+
, "quotmark": true
16+
, "regexp": false
17+
, "undef": true
18+
, "unused": true
19+
, "strict": false
20+
, "trailing": true
21+
, "maxlen": 120
22+
, "asi": true
23+
, "boss": true
24+
, "debug": true
25+
, "eqnull": true
26+
, "es5": true
27+
, "esnext": true
28+
, "evil": true
29+
, "expr": true
30+
, "funcscope": false
31+
, "globalstrict": false
32+
, "iterator": false
33+
, "lastsemic": true
34+
, "laxbreak": true
35+
, "laxcomma": true
36+
, "loopfunc": true
37+
, "multistr": false
38+
, "onecase": false
39+
, "proto": false
40+
, "regexdash": false
41+
, "scripturl": true
42+
, "smarttabs": false
43+
, "shadow": false
44+
, "sub": true
45+
, "supernew": false
46+
, "validthis": true
47+
, "browser": true
48+
, "couch": false
49+
, "devel": false
50+
, "dojo": false
51+
, "mootools": false
52+
, "node": true
53+
, "nonstandard": true
54+
, "prototypejs": false
55+
, "rhino": false
56+
, "worker": true
57+
, "wsh": false
58+
, "nomen": false
59+
, "onevar": true
60+
, "passfail": false
61+
}

LICENSE

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
Copyright 2013, Rod Vagg (the "Original Author")
2+
All rights reserved.
3+
4+
MIT +no-false-attribs License
5+
6+
Permission is hereby granted, free of charge, to any person
7+
obtaining a copy of this software and associated documentation
8+
files (the "Software"), to deal in the Software without
9+
restriction, including without limitation the rights to use,
10+
copy, modify, merge, publish, distribute, sublicense, and/or sell
11+
copies of the Software, and to permit persons to whom the
12+
Software is furnished to do so, subject to the following
13+
conditions:
14+
15+
The above copyright notice and this permission notice shall be
16+
included in all copies or substantial portions of the Software.
17+
18+
Distributions of all or part of the Software intended to be used
19+
by the recipients as they would use the unmodified Software,
20+
containing modifications that substantially alter, remove, or
21+
disable functionality of the Software, outside of the documented
22+
configuration mechanisms provided by the Software, shall be
23+
modified such that the Original Author's bug reporting email
24+
addresses and urls are either replaced with the contact information
25+
of the parties responsible for the changes, or removed entirely.
26+
27+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
28+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
29+
OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
30+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
31+
HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
32+
WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
33+
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
34+
OTHER DEALINGS IN THE SOFTWARE.
35+
36+
37+
Except where noted, this license applies to any and all software
38+
programs and associated documentation files created by the
39+
Original Author, when distributed with the Software.

fetch-github-data.js

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
const GITHUB_USER_API_URL = 'https://api.github.com/users/{user}'
2+
3+
const request = require('request')
4+
5+
var requestPool = { maxSockets: 20 }
6+
7+
function fetchGithubData (githubToken, user, callback) {
8+
9+
var opts = {
10+
url : GITHUB_USER_API_URL.replace('{user}', user)
11+
, headers : {
12+
authorization : 'token ' + githubToken
13+
, 'user-agent' : 'npm user data fetcher <https://github.com/polyhack/>'
14+
}
15+
, json : true
16+
, pool : requestPool
17+
}
18+
19+
, handle = function (err, response, body) {
20+
if (err)
21+
return callback('Error requesting repo data from GitHub for ' + user + ': ' + err)
22+
23+
callback(null, body)
24+
}
25+
26+
request(opts, handle)
27+
}
28+
29+
module.exports = fetchGithubData

filter-aussies.js

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
const AU_LOCATION_REGEX =
2+
/\Wau(s|st)?\W|australia|straya|hobart|sydney|melbourne|brisbane|perth|darwin|adelaide|canberra|\W(nsw|vic|qld|new south wales|victoria|queensland|western australia|northern territory|south australia|tasmania)\W/i
3+
// secondary guess, does your blog UI end with .au?
4+
, AU_BLOG_REGEX = /\.au$/i
5+
6+
function isAussie (user) {
7+
if (!user) return false
8+
if (user.location && AU_LOCATION_REGEX.test(user.location)) return true
9+
if (user.blog && AU_BLOG_REGEX.test(user.blog)) return true
10+
return false
11+
}
12+
13+
function filterAussies (users) {
14+
return users.filter(isAussie)
15+
}
16+
17+
module.exports = filterAussies

index.js

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
const GITHUB_CALLS_PER_HOUR = 4500
2+
3+
const fs = require('fs')
4+
, async = require('async')
5+
, rateLimit = require('function-rate-limit')
6+
, loadNpmData = require('./load-npm-data')
7+
, filterAussies = require('./filter-aussies')
8+
, fetchGithubData = rateLimit(
9+
GITHUB_CALLS_PER_HOUR
10+
, 1000 * 60 * 60
11+
, require('./fetch-github-data')
12+
)
13+
, config = require('./config')
14+
15+
/* ./config.json needs to look like this:
16+
{
17+
"allPackagesOutput" : "/path/to/allpackages.json"
18+
, "repositoriesOutput" : "/path/to/repositories.json"
19+
, "githubOutput" : "/path/to/githubusers.json"
20+
, "githubAuthToken" : "yourgithubauthtoken"
21+
}
22+
23+
where githubAuthToken can be obtained with something like:
24+
curl -i -u <your_username> -d '{"scopes": ["repo"]}' https://api.github.com/authorizations
25+
(not that it needs "repo" scope)
26+
*/
27+
28+
function write (location, data) {
29+
fs.writeFile(
30+
location
31+
, JSON.stringify(data, null, 2)
32+
, function (err) {
33+
if (err)
34+
console.error(err)
35+
console.log('Wrote', location)
36+
}
37+
)
38+
}
39+
40+
function githubUsers (repositories) {
41+
var users = []
42+
43+
repositories.forEach(function (repo) {
44+
if (users.indexOf(repo.githubUser) == -1)
45+
users.push(repo.githubUser)
46+
})
47+
48+
return users
49+
}
50+
51+
function fetchUsers (users, callback) {
52+
async.map(
53+
users
54+
, function (user, callback) {
55+
fetchGithubData(config.githubAuthToken, user, function (err, data) {
56+
if (err || !user) {
57+
console.error(err || 'No user data for ' + user)
58+
return callback()
59+
}
60+
61+
if (user.message) {
62+
console.error('GitHub:', user.message)
63+
return callback()
64+
}
65+
66+
callback(null, data)
67+
})
68+
}
69+
, callback
70+
)
71+
}
72+
73+
loadNpmData(function (err, data) {
74+
if (err)
75+
return console.error(err)
76+
77+
write(config.allPackagesOutput, data.allPackages)
78+
write(config.repositoriesOutput, data.repositories)
79+
80+
fetchUsers(githubUsers(data.repositories), function (err, data) {
81+
if (err)
82+
return console.error(err)
83+
84+
write(config.githubOutput, data)
85+
86+
write(config.aussieOutput, filterAussies(data))
87+
})
88+
})

load-npm-data.js

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
// if your GitHub location field matches this then we'll guess you're Aussie
2+
const GITHUB_REPO_REGEX = /github.com[:\/]([\.\-\w]+)\/([^$\/\.]+)/
3+
4+
const npm = require('npm')
5+
6+
function matchGitHubRepo (npmPackage, repo) {
7+
var match = repo
8+
&& typeof repo.url == 'string'
9+
&& repo.url.match(GITHUB_REPO_REGEX)
10+
11+
return match && {
12+
githubUser : match[1]
13+
, githubRepo : match[2]
14+
, npmPackage : npmPackage
15+
}
16+
}
17+
18+
// load the list of all npm libs with 'repo' pointing to GitHub
19+
function loadNpmData (callback) {
20+
var repositories = []
21+
, allPackages = []
22+
23+
npm.load(function (err) {
24+
if (err) return callback(err)
25+
26+
npm.registry.get('/-/all', function (err, data) {
27+
if (err) return callback(err)
28+
29+
Object.keys(data).forEach(function (k) {
30+
var repo = matchGitHubRepo(data[k].name, data[k].repository)
31+
if (repo)
32+
repositories.push(repo)
33+
34+
allPackages.push({
35+
name : data[k].name
36+
, maintainers : (data[k].maintainers || []).map(function (m) { return m.name })
37+
, githubUser : repo ? repo.githubUser : null
38+
, githubRepo : repo ? repo.githubRepo : null
39+
, description : data[k].description
40+
})
41+
})
42+
43+
callback(null, { repositories: repositories, allPackages: allPackages })
44+
})
45+
})
46+
}
47+
48+
module.exports = loadNpmData

package.json

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
{
2+
"name": "npm-github-data-collector",
3+
"version": "0.0.0",
4+
"description": "",
5+
"main": "index.js",
6+
"repository": "",
7+
"author": "",
8+
"license": "MIT",
9+
"dependencies": {
10+
"npm": "~1.2.18",
11+
"async": "~0.2.7",
12+
"request": "~2.20.0",
13+
"function-rate-limit": "0.0.1"
14+
},
15+
"private": true
16+
}

0 commit comments

Comments
 (0)