Skip to content

Commit bef5839

Browse files
authored
Tweaks to datetime optimization (#988)
* Tweaks to datetime optimization * explicit days/weeks for time pairs * and more validation * logging
1 parent c638cc4 commit bef5839

File tree

5 files changed

+93
-95
lines changed

5 files changed

+93
-95
lines changed

src/server/_query.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
from ._exceptions import DatabaseErrorException
2323
from ._validate import DateRange, extract_strings
2424
from ._params import GeoPair, SourceSignalPair, TimePair
25-
from .utils import dates_to_ranges
25+
from .utils import time_values_to_ranges, days_to_ranges, weeks_to_ranges
2626

2727

2828
def date_string(value: int) -> str:
@@ -90,7 +90,7 @@ def filter_dates(
9090
param_key: str,
9191
params: Dict[str, Any],
9292
):
93-
ranges = dates_to_ranges(values)
93+
ranges = time_values_to_ranges(values)
9494
return filter_values(field, ranges, param_key, params, date_string)
9595

9696

@@ -187,7 +187,7 @@ def filter_pair(pair: TimePair, i) -> str:
187187
params[type_param] = pair.time_type
188188
if isinstance(pair.time_values, bool) and pair.time_values:
189189
return f"{type_field} = :{type_param}"
190-
ranges = dates_to_ranges(pair.time_values)
190+
ranges = weeks_to_ranges(pair.time_values) if pair.is_week else days_to_ranges(pair.time_values)
191191
return f"({type_field} = :{type_param} AND {filter_integers(time_field, cast(Sequence[Union[int, Tuple[int,int]]], ranges), type_param, params)})"
192192

193193
parts = [filter_pair(p, i) for i, p in enumerate(values)]

src/server/utils/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
from .dates import shift_time_value, date_to_time_value, time_value_to_iso, time_value_to_date, days_in_range, weeks_in_range, shift_week_value, week_to_time_value, week_value_to_week, guess_time_value_is_day, dates_to_ranges
1+
from .dates import shift_time_value, date_to_time_value, time_value_to_iso, time_value_to_date, days_in_range, weeks_in_range, shift_week_value, week_to_time_value, week_value_to_week, guess_time_value_is_day, time_values_to_ranges, days_to_ranges, weeks_to_ranges

src/server/utils/dates.py

Lines changed: 51 additions & 79 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,13 @@
11
from typing import (
2+
Callable,
23
Optional,
34
Sequence,
45
Tuple,
56
Union
67
)
78
from datetime import date, timedelta
89
from epiweeks import Week, Year
9-
10+
import logging
1011

1112
def time_value_to_date(value: int) -> date:
1213
year, month, day = value // 10000, (value % 10000) // 100, value % 100
@@ -26,7 +27,7 @@ def week_value_to_week(value: int) -> Week:
2627

2728
def guess_time_value_is_day(value: int) -> bool:
2829
# YYYYMMDD type and not YYYYMM
29-
return len(str(value)) > 6
30+
return len(str(value)) == 8
3031

3132
def guess_time_value_is_week(value: int) -> bool:
3233
# YYYYWW type and not YYYYMMDD
@@ -77,7 +78,7 @@ def weeks_in_range(week_range: Tuple[int, int]) -> int:
7778
acc += year.totalweeks()
7879
return acc + 1 # same week should lead to 1 week that will be queried
7980

80-
def dates_to_ranges(values: Optional[Sequence[Union[Tuple[int, int], int]]]) -> Optional[Sequence[Union[Tuple[int, int], int]]]:
81+
def time_values_to_ranges(values: Optional[Sequence[Union[Tuple[int, int], int]]]) -> Optional[Sequence[Union[Tuple[int, int], int]]]:
8182
"""
8283
Converts a mixed list of dates and date ranges to an optimized list where dates are merged into ranges where possible.
8384
e.g. [20200101, 20200102, (20200101, 20200104), 20200106] -> [(20200101, 20200104), 20200106]
@@ -87,84 +88,55 @@ def dates_to_ranges(values: Optional[Sequence[Union[Tuple[int, int], int]]]) ->
8788
return values
8889

8990
# determine whether the list is of days (YYYYMMDD) or weeks (YYYYWW) based on first element
90-
try:
91-
if (isinstance(values[0], tuple) and guess_time_value_is_day(values[0][0]))\
92-
or (isinstance(values[0], int) and guess_time_value_is_day(values[0])):
93-
return days_to_ranges(values)
94-
elif (isinstance(values[0], tuple) and guess_time_value_is_week(values[0][0]))\
95-
or (isinstance(values[0], int) and guess_time_value_is_week(values[0])):
96-
return weeks_to_ranges(values)
97-
else:
98-
return values
99-
except:
91+
first_element = values[0][0] if isinstance(values[0], tuple) else values[0]
92+
if guess_time_value_is_day(first_element):
93+
return days_to_ranges(values)
94+
elif guess_time_value_is_week(first_element):
95+
return weeks_to_ranges(values)
96+
else:
10097
return values
10198

10299
def days_to_ranges(values: Sequence[Union[Tuple[int, int], int]]) -> Sequence[Union[Tuple[int, int], int]]:
103-
intervals = []
104-
105-
# populate list of intervals based on original values
106-
for v in values:
107-
if isinstance(v, int):
108-
# 20200101 -> [20200101, 20200101]
109-
intervals.append([time_value_to_date(v), time_value_to_date(v)])
110-
else: # tuple
111-
# (20200101, 20200102) -> [20200101, 20200102]
112-
intervals.append([time_value_to_date(v[0]), time_value_to_date(v[1])])
113-
114-
intervals.sort(key=lambda x: x[0])
115-
116-
# merge overlapping intervals https://leetcode.com/problems/merge-intervals/
117-
merged = []
118-
for interval in intervals:
119-
# no overlap, append the interval
120-
# caveat: we subtract 1 from interval[0] so that contiguous intervals are considered overlapping. i.e. [1, 1], [2, 2] -> [1, 2]
121-
if not merged or merged[-1][1] < interval[0] - timedelta(days=1):
122-
merged.append(interval)
123-
# overlap, merge the current and previous intervals
124-
else:
125-
merged[-1][1] = max(merged[-1][1], interval[1])
126-
127-
# convert intervals from dates back to integers
128-
ranges = []
129-
for m in merged:
130-
if m[0] == m[1]:
131-
ranges.append(date_to_time_value(m[0]))
132-
else:
133-
ranges.append((date_to_time_value(m[0]), date_to_time_value(m[1])))
134-
135-
return ranges
100+
return _to_ranges(values, time_value_to_date, date_to_time_value, timedelta(days=1))
136101

137102
def weeks_to_ranges(values: Sequence[Union[Tuple[int, int], int]]) -> Sequence[Union[Tuple[int, int], int]]:
138-
intervals = []
139-
140-
# populate list of intervals based on original values
141-
for v in values:
142-
if isinstance(v, int):
143-
# 202001 -> [202001, 202001]
144-
intervals.append([week_value_to_week(v), week_value_to_week(v)])
145-
else: # tuple
146-
# (202001, 202002) -> [202001, 202002]
147-
intervals.append([week_value_to_week(v[0]), week_value_to_week(v[1])])
148-
149-
intervals.sort(key=lambda x: x[0])
150-
151-
# merge overlapping intervals https://leetcode.com/problems/merge-intervals/
152-
merged = []
153-
for interval in intervals:
154-
# no overlap, append the interval
155-
# caveat: we subtract 1 from interval[0] so that contiguous intervals are considered overlapping. i.e. [1, 1], [2, 2] -> [1, 2]
156-
if not merged or merged[-1][1] < interval[0] - 1:
157-
merged.append(interval)
158-
# overlap, merge the current and previous intervals
159-
else:
160-
merged[-1][1] = max(merged[-1][1], interval[1])
161-
162-
# convert intervals from weeks back to integers
163-
ranges = []
164-
for m in merged:
165-
if m[0] == m[1]:
166-
ranges.append(week_to_time_value(m[0]))
167-
else:
168-
ranges.append((week_to_time_value(m[0]), week_to_time_value(m[1])))
169-
170-
return ranges
103+
return _to_ranges(values, week_value_to_week, week_to_time_value, 1)
104+
105+
def _to_ranges(values: Sequence[Union[Tuple[int, int], int]], value_to_date: Callable, date_to_value: Callable, time_unit: Union[int, timedelta]) -> Sequence[Union[Tuple[int, int], int]]:
106+
try:
107+
intervals = []
108+
109+
# populate list of intervals based on original date/week values
110+
for v in values:
111+
if isinstance(v, int):
112+
# 20200101 -> [20200101, 20200101]
113+
intervals.append([value_to_date(v), value_to_date(v)])
114+
else: # tuple
115+
# (20200101, 20200102) -> [20200101, 20200102]
116+
intervals.append([value_to_date(v[0]), value_to_date(v[1])])
117+
118+
intervals.sort()
119+
120+
# merge overlapping intervals https://leetcode.com/problems/merge-intervals/
121+
merged = []
122+
for interval in intervals:
123+
# no overlap, append the interval
124+
# caveat: we subtract 1 from interval[0] so that contiguous intervals are considered overlapping. i.e. [1, 1], [2, 2] -> [1, 2]
125+
if not merged or merged[-1][1] < interval[0] - time_unit:
126+
merged.append(interval)
127+
# overlap, merge the current and previous intervals
128+
else:
129+
merged[-1][1] = max(merged[-1][1], interval[1])
130+
131+
# convert intervals from dates/weeks back to integers
132+
ranges = []
133+
for m in merged:
134+
if m[0] == m[1]:
135+
ranges.append(date_to_value(m[0]))
136+
else:
137+
ranges.append((date_to_value(m[0]), date_to_value(m[1])))
138+
139+
return ranges
140+
except Exception as e:
141+
logging.info('bad input to date ranges', input=values, exception=e)
142+
return values

tests/server/test_query.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -278,3 +278,24 @@ def test_filter_time_pairs(self):
278278
"((t = :p_0t AND (v BETWEEN :p_0t_0 AND :p_0t_0_2)))",
279279
)
280280
self.assertEqual(params, {"p_0t": "day", "p_0t_0": 20201201, "p_0t_0_2": 20201203})
281+
with self.subTest("dedupe"):
282+
params = {}
283+
self.assertEqual(
284+
filter_time_pairs("t", "v", [TimePair("day", [20200101, 20200101, (20200101, 20200101), 20200101])], "p", params),
285+
"((t = :p_0t AND (v = :p_0t_0)))",
286+
)
287+
self.assertEqual(params, {"p_0t": "day", "p_0t_0": 20200101})
288+
with self.subTest("merge single range"):
289+
params = {}
290+
self.assertEqual(
291+
filter_time_pairs("t", "v", [TimePair("day", [20200101, 20200102, (20200101, 20200104)])], "p", params),
292+
"((t = :p_0t AND (v BETWEEN :p_0t_0 AND :p_0t_0_2)))",
293+
)
294+
self.assertEqual(params, {"p_0t": "day", "p_0t_0": 20200101, "p_0t_0_2": 20200104})
295+
with self.subTest("merge ranges and singles"):
296+
params = {}
297+
self.assertEqual(
298+
filter_time_pairs("t", "v", [TimePair("day", [20200101, 20200103, (20200105, 20200107)])], "p", params),
299+
"((t = :p_0t AND (v = :p_0t_0 OR v = :p_0t_1 OR v BETWEEN :p_0t_2 AND :p_0t_2_2)))",
300+
)
301+
self.assertEqual(params, {"p_0t": "day", "p_0t_0": 20200101, "p_0t_1": 20200103, 'p_0t_2': 20200105, 'p_0t_2_2': 20200107})

tests/server/utils/test_dates.py

Lines changed: 17 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
from datetime import date
33
from epiweeks import Week
44

5-
from delphi.epidata.server.utils.dates import time_value_to_date, date_to_time_value, shift_time_value, time_value_to_iso, days_in_range, weeks_in_range, week_to_time_value, week_value_to_week, dates_to_ranges
5+
from delphi.epidata.server.utils.dates import time_value_to_date, date_to_time_value, shift_time_value, time_value_to_iso, days_in_range, weeks_in_range, week_to_time_value, week_value_to_week, time_values_to_ranges
66

77

88
class UnitTests(unittest.TestCase):
@@ -41,16 +41,21 @@ def test_week_to_time_value(self):
4141
self.assertEqual(week_to_time_value(Week(2021, 1)), 202101)
4242
self.assertEqual(week_to_time_value(Week(2020, 42)), 202042)
4343

44-
def test_dates_to_ranges(self):
45-
self.assertEqual(dates_to_ranges(None), None)
46-
self.assertEqual(dates_to_ranges([]), [])
44+
def test_time_values_to_ranges(self):
45+
self.assertEqual(time_values_to_ranges(None), None)
46+
self.assertEqual(time_values_to_ranges([]), [])
4747
# days
48-
self.assertEqual(dates_to_ranges([20200101]), [20200101])
49-
self.assertEqual(dates_to_ranges([(20200101, 20200105)]), [(20200101, 20200105)])
50-
self.assertEqual(dates_to_ranges([20211231, (20211230, 20220102), 20220102]), [(20211230, 20220102)])
51-
self.assertEqual(dates_to_ranges([20200101, 20200102, (20200101, 20200104), 20200106]), [(20200101, 20200104), 20200106])
48+
self.assertEqual(time_values_to_ranges([20200101]), [20200101])
49+
self.assertEqual(time_values_to_ranges([(20200101, 20200105)]), [(20200101, 20200105)])
50+
self.assertEqual(time_values_to_ranges([20211231, (20211230, 20220102), 20220102]), [(20211230, 20220102)])
51+
self.assertEqual(time_values_to_ranges([20200101, 20200102, (20200101, 20200104), 20200106]), [(20200101, 20200104), 20200106])
5252
# weeks
53-
self.assertEqual(dates_to_ranges([202001]), [202001])
54-
self.assertEqual(dates_to_ranges([(202001, 202005)]), [(202001, 202005)])
55-
self.assertEqual(dates_to_ranges([202051, (202050, 202102), 202101]), [(202050, 202102)])
56-
self.assertEqual(dates_to_ranges([202050, 202051, (202050, 202101), 202103]), [(202050, 202101), 202103])
53+
self.assertEqual(time_values_to_ranges([202001]), [202001])
54+
self.assertEqual(time_values_to_ranges([(202001, 202005)]), [(202001, 202005)])
55+
self.assertEqual(time_values_to_ranges([202051, (202050, 202102), 202101]), [(202050, 202102)])
56+
self.assertEqual(time_values_to_ranges([202050, 202051, (202050, 202101), 202103]), [(202050, 202101), 202103])
57+
# non-contiguous integers that represent actually contiguous time objects should join to become a range:
58+
self.assertEqual(time_values_to_ranges([20200228, 20200301]), [20200228, 20200301]) # this is NOT a range because 2020 was a leap year
59+
self.assertEqual(time_values_to_ranges([20210228, 20210301]), [(20210228, 20210301)]) # this becomes a range because these dates are indeed consecutive
60+
# individual weeks become a range (2020 is a rare year with 53 weeks)
61+
self.assertEqual(time_values_to_ranges([202051, 202052, 202053, 202101, 202102]), [(202051, 202102)])

0 commit comments

Comments
 (0)