2
2
Reads in the contributions.yaml file, and updates the entries by hitting the 'source' url.
3
3
"""
4
4
import argparse
5
- from datetime import datetime
5
+ from datetime import datetime , UTC
6
6
import pathlib
7
7
from ruamel .yaml import YAML
8
+ from multiprocessing import Pool
8
9
9
10
from parse_and_validate_properties_txt import read_properties_txt , parse_text , validate_existing
10
11
11
12
12
13
def update_contribution (contribution , props ):
13
- datetime_today = datetime .utcnow ( ).strftime ('%Y-%m-%dT%H:%M:%S%z' )
14
+ datetime_today = datetime .now ( UTC ).strftime ('%Y-%m-%dT%H:%M:%S%z' )
14
15
contribution ['lastUpdated' ] = datetime_today
15
16
if 'previousVersions' not in contribution :
16
17
contribution ['previousVersions' ] = []
@@ -29,6 +30,7 @@ def update_contribution(contribution, props):
29
30
30
31
if 'download' not in contribution :
31
32
contribution ['download' ] = contribution ['source' ][:contribution ['source' ].rfind ('.' )] + '.zip'
33
+
32
34
33
35
def log_broken (contribution , msg ):
34
36
if contribution ['status' ] == 'VALID' :
@@ -37,8 +39,10 @@ def log_broken(contribution, msg):
37
39
contribution ['log' ] = []
38
40
contribution ['log' ].append (msg )
39
41
40
- def process_contribution (contribution ):
41
- date_today = datetime .utcnow ().strftime ('%Y-%m-%d' )
42
+ def process_contribution (item ):
43
+ index , contribution = item
44
+
45
+ date_today = datetime .now (UTC ).strftime ('%Y-%m-%d' )
42
46
this_version = '0'
43
47
44
48
if contribution ['status' ] != 'DEPRECATED' :
@@ -51,16 +55,16 @@ def process_contribution(contribution):
51
55
properties_raw = read_properties_txt (contribution ['source' ])
52
56
except FileNotFoundError as e :
53
57
log_broken (contribution , f'file not found, { e } , { date_today } ' )
54
- return
58
+ return index , contribution
55
59
except Exception :
56
60
log_broken (contribution , f'url timeout, { date_today } ' )
57
- return
61
+ return index , contribution
58
62
59
63
try :
60
64
props = validate_existing (parse_text (properties_raw ))
61
65
except Exception :
62
66
log_broken (contribution , f'invalid file, { date_today } ' )
63
- return
67
+ return index , contribution
64
68
65
69
# some library files have field lastUpdated. This also exists in the database, but is defined
66
70
# by our scripts, so remove this field.
@@ -71,6 +75,7 @@ def process_contribution(contribution):
71
75
if props ['version' ] != this_version :
72
76
# update from online
73
77
update_contribution (contribution , props )
78
+ return index , contribution
74
79
75
80
76
81
if __name__ == "__main__" :
@@ -92,14 +97,22 @@ def process_contribution(contribution):
92
97
contributions_list = data ['contributions' ]
93
98
94
99
if index == 'all' :
95
- # update all contributions
96
- for contribution in contributions_list :
97
- process_contribution (contribution )
100
+ total = len (contributions_list )
101
+ completed = 0
102
+ print (f"Starting processing of { total } contributions..." )
103
+
104
+ with Pool (processes = 256 ) as pool :
105
+ for index , contribution in pool .imap_unordered (process_contribution , enumerate (contributions_list )):
106
+ contributions_list [index ] = contribution
107
+ completed += 1
108
+ print (f"Progress: { completed } /{ total } ({ (completed / total * 100 ):.1f} %)" )
109
+
110
+ print ("All processing complete" )
98
111
else :
99
112
# update only contribution with id==index
100
113
contribution = next ((x for x in contributions_list if x ['id' ] == int (index )), None )
101
114
print (contribution )
102
- process_contribution (contribution )
115
+ process_contribution (( index , contribution ) )
103
116
print (contribution )
104
117
105
118
# write all contributions to database file
0 commit comments