1
1
import json
2
2
import logging
3
+ from collections import Counter
3
4
from copy import deepcopy
4
5
from datetime import datetime , timedelta
5
6
@@ -70,7 +71,9 @@ def __init__(self, db_name='metric'):
70
71
71
72
def create_database (self ):
72
73
""" creates connection to elasticsearch """
73
- connections .create_connection (hosts = [f"{ TIMESERIES_DB ['HOST' ]} :{ TIMESERIES_DB ['PORT' ]} " ])
74
+ connections .create_connection (
75
+ hosts = [f"{ TIMESERIES_DB ['HOST' ]} :{ TIMESERIES_DB ['PORT' ]} " ]
76
+ )
74
77
db = self .get_db
75
78
# Skip if support for Index Lifecycle Management is disabled or no privileges
76
79
self .ilm_enabled = db .ilm .start ()['acknowledged' ]
@@ -122,15 +125,16 @@ def create_or_alter_retention_policy(self, name, duration=None):
122
125
ilm .put_lifecycle (policy = name , body = policy )
123
126
124
127
def query (self , query , precision = None ):
125
- index = query .pop ('key' )
126
- return Search (using = self .get_db , index = index ).from_dict (query ).execute ().to_dict ()
128
+ if 'summary' in query :
129
+ query .pop ('summary' )
130
+ return Search (using = self .get_db ).from_dict (query ).execute ().to_dict ()
127
131
128
132
def write (self , name , values , ** kwargs ):
129
133
rp = kwargs .get ('retention_policy' )
130
134
tags = kwargs .get ('tags' )
131
135
timestamp = kwargs .get ('timestamp' )
132
- metric_id = find_metric (self .get_db , name , tags , rp , add = True )
133
- metric_index = MetricIndex .get (metric_id , index = name , using = self .get_db )
136
+ metric_id , index = find_metric (self .get_db , name , tags , rp , add = True )
137
+ metric_index = MetricIndex .get (metric_id , index = index , using = self .get_db )
134
138
point = Point (fields = values , time = timestamp or datetime .now ())
135
139
metric_index .points .append (point )
136
140
metric_index .save ()
@@ -140,10 +144,11 @@ def read(self, key, fields, tags, limit=1, order='-time', **kwargs):
140
144
time_format = kwargs .get ('time_format' )
141
145
# TODO: It will be of the form 'now() - <int>s'
142
146
# since = kwargs.get('since')
143
- metric_id = find_metric (self .get_db , key , tags )
144
- if not metric_id :
147
+ try :
148
+ metric_id , index = find_metric (self .get_db , key , tags )
149
+ except TypeError :
145
150
return []
146
- metric_index = MetricIndex .get (index = key , id = metric_id , using = self .get_db )
151
+ metric_index = MetricIndex .get (index = index , id = metric_id , using = self .get_db )
147
152
if order == 'time' :
148
153
points = list (metric_index .points [0 :limit ])
149
154
elif order == '-time' :
@@ -195,26 +200,31 @@ def _format_time(self, obj, time_format=None):
195
200
196
201
def get_list_query (self , query , precision = 's' ):
197
202
response = self .query (query , precision )
198
- points = response ['aggregations' ]['GroupByTime' ]['buckets' ]
199
- list_points = self ._fill_points (
200
- query , [self ._format (point ) for point in points ]
201
- )
203
+ try :
204
+ points = response ['aggregations' ]['GroupByTime' ]['set_range' ]['time' ][
205
+ 'buckets'
206
+ ]
207
+ list_points = self ._fill_points (
208
+ query , [self ._format (point ) for point in points ],
209
+ )
210
+ except KeyError :
211
+ return []
202
212
return list_points
203
213
204
214
def _fill_points (self , query , points ):
205
- _range = next (
206
- (item for item in query ['query' ]['bool' ]['must' ] if item .get ('range' )), None
207
- )
215
+ _range = query ['aggs' ]['GroupByTime' ]['nested' ]['aggs' ]['set_range' ]
208
216
if not _range or not points :
209
217
return points
210
- days = int (_range ['range' ]['points.time' ]['from' ][4 :- 3 ])
218
+ days = int (_range ['filter' ]['range' ]['points.time' ]['from' ][4 :- 3 ])
219
+ interval = _range ['aggs' ]['time' ]['date_histogram' ]['fixed_interval' ]
220
+ # Check if summary query
221
+ if f'{ days } d' == interval :
222
+ return points
223
+ interval_dict = {'10m' : 600 , '20m' : 1200 , '1h' : 3600 , '24h' : 86400 }
224
+ interval = interval_dict [interval ]
211
225
start_time = datetime .now ()
212
226
end_time = start_time - timedelta (days = days ) # include today
213
227
dummy_point = deepcopy (points [0 ])
214
- if len (points ) > 2 :
215
- interval = points [0 ]['time' ] - points [1 ]['time' ]
216
- else :
217
- interval = 600
218
228
start_ts = points [0 ]['time' ] + interval
219
229
end_ts = points [- 1 ]['time' ] - interval
220
230
for field in dummy_point .keys ():
@@ -223,7 +233,7 @@ def _fill_points(self, query, points):
223
233
dummy_point ['time' ] = start_ts
224
234
points .insert (0 , deepcopy (dummy_point ))
225
235
start_ts += interval
226
- # TODO: This needs to be fixed and shouldn't be required since intervals are set
236
+ # TODO: Why is this required since intervals are set?
227
237
while points [- 1 ]['time' ] < end_time .timestamp ():
228
238
points .pop (- 1 )
229
239
while end_ts > end_time .timestamp ():
@@ -238,8 +248,8 @@ def delete_metric_data(self, key=None, tags=None):
238
248
deletes all metrics if neither provided
239
249
"""
240
250
if key and tags :
241
- metric_id = find_metric (self .get_db , key , tags )
242
- self .get_db .delete (index = key , id = metric_id )
251
+ metric_id , index = find_metric (self .get_db , key , tags )
252
+ self .get_db .delete (index = index , id = metric_id )
243
253
elif key :
244
254
self .get_db .indices .delete (index = key , ignore = [400 , 404 ])
245
255
else :
@@ -252,14 +262,19 @@ def validate_query(self, query):
252
262
query = json .loads (query )
253
263
# Elasticsearch currently supports validation of only query section,
254
264
# aggs, size, _source etc. are not supported
255
- valid_check = self .get_db .indices .validate_query (body = {'query' : query ['query' ]}, explain = True )
265
+ valid_check = self .get_db .indices .validate_query (
266
+ body = {'query' : query ['query' ]}, explain = True
267
+ )
256
268
# Show a helpful message for failure
257
269
if not valid_check ['valid' ]:
258
- raise ValidationError (valid_check ['explanations ' ])
270
+ raise ValidationError (valid_check ['error ' ])
259
271
return self ._is_aggregate (query )
260
272
273
+ # TODO: This is not covering everything
261
274
def _is_aggregate (self , q ):
262
- agg_dict = q ['aggs' ]['GroupByTime' ]['aggs' ].values ()
275
+ agg_dict = q ['aggs' ]['GroupByTime' ]['nested' ]['aggs' ]['set_range' ]['aggs' ][
276
+ 'time'
277
+ ]['aggs' ]['nest' ]['nested' ]['aggs' ].values ()
263
278
agg = []
264
279
for item in agg_dict :
265
280
agg .append (next (iter (item )))
@@ -276,53 +291,88 @@ def get_query(
276
291
query = None ,
277
292
timezone = settings .TIME_ZONE ,
278
293
):
279
- query ['key' ] = params .pop ('key' )
280
294
query = json .dumps (query )
281
295
for k , v in params .items ():
282
296
query = query .replace ('{' + k + '}' , v )
283
297
query = self ._group_by (query , time , chart_type , group_map , strip = summary )
284
298
query = json .loads (query )
285
- if summary :
286
- _range = next (
287
- (item for item in query ['query' ]['bool' ]['must' ] if item .get ('range' )),
288
- None ,
299
+ set_range = query ['aggs' ]['GroupByTime' ]['nested' ]['aggs' ]['set_range' ]['aggs' ]['time' ]
300
+ if fields :
301
+ aggregate_dict = set_range ['aggs' ]['nest' ]['nested' ]['aggs' ]
302
+ agg = deepcopy (aggregate_dict ).popitem ()[1 ].popitem ()[0 ]
303
+ aggregate_dict .update (
304
+ {
305
+ f'{ field } ' : {agg : {'field' : f'points.fields.{ field } ' }}
306
+ for field in fields
307
+ }
289
308
)
290
- if _range :
291
- query ['query' ]['bool' ]['must' ].remove (_range )
292
- query ['aggs' ]['GroupByTime' ]['date_histogram' ]['time_zone' ] = timezone
309
+ try :
310
+ set_range ['date_histogram' ]['time_zone' ] = timezone
311
+ except KeyError :
312
+ pass
293
313
return query
294
314
295
315
def _group_by (self , query , time , chart_type , group_map , strip = False ):
296
- if not self .validate_query (query ):
297
- return query
316
+ query = query .replace ('1d/d' , f'{ time } /d' )
298
317
if not strip and not chart_type == 'histogram' :
299
318
value = group_map [time ]
300
- query = query .replace ('1d/d' , f'{ time } /d' )
301
319
query = query .replace ('10m' , value )
302
320
if strip :
303
321
query = query .replace ('10m' , time )
304
322
return query
305
323
306
- # TODO:
307
324
def _get_top_fields (
308
325
self ,
309
- query ,
310
326
params ,
311
327
chart_type ,
312
328
group_map ,
313
329
number ,
314
330
time ,
331
+ query = None ,
315
332
timezone = settings .TIME_ZONE ,
333
+ get_fields = True ,
334
+ ** kwargs ,
316
335
):
317
- pass
336
+ """
337
+ Returns top fields if ``get_fields`` set to ``True`` (default)
338
+ else it returns points containing the top fields.
339
+ """
340
+ response = self .get_db .indices .get_mapping (index = params ['key' ])
341
+ fields = [
342
+ k
343
+ for k , v in list (response .values ())[0 ]['mappings' ]['properties' ]['points' ][
344
+ 'properties'
345
+ ]['fields' ]['properties' ].items ()
346
+ ]
347
+ query = self .get_query (
348
+ chart_type ,
349
+ params ,
350
+ time ,
351
+ group_map ,
352
+ summary = True ,
353
+ fields = fields ,
354
+ query = query ,
355
+ timezone = timezone ,
356
+ )
357
+ point = self .get_list_query (query )[0 ]
358
+ time = point .pop ('time' )
359
+ point = Counter (point ).most_common (number )
360
+ if get_fields :
361
+ return [k for k , v in point ]
362
+ points = [{'time' : time }]
363
+ for k , v in point :
364
+ points [0 ].update ({k : v })
365
+ return points
318
366
319
367
def _format (self , point ):
320
368
pt = {}
321
369
# Convert time from milliseconds -> seconds precision
322
370
pt ['time' ] = int (point ['key' ] / 1000 )
323
371
for key , value in point .items ():
324
372
if isinstance (value , dict ):
325
- pt [key ] = self ._transform_field (key , value ['value' ])
373
+ for k , v in value .items ():
374
+ if isinstance (v , dict ):
375
+ pt [k ] = self ._transform_field (k , v ['value' ])
326
376
return pt
327
377
328
378
def _transform_field (self , field , value ):
@@ -338,12 +388,10 @@ def _transform_field(self, field, value):
338
388
def default_chart_query (self , tags ):
339
389
q = deepcopy (default_chart_query )
340
390
if not tags :
341
- q ['query' ]['bool' ]['must' ].pop (0 )
342
- q ['query' ]['bool' ]['must' ].pop (1 )
391
+ q ['query' ]['nested' ][ 'query' ][ ' bool' ]['must' ].pop (0 )
392
+ q ['query' ]['nested' ][ 'query' ][ ' bool' ]['must' ].pop (1 )
343
393
return q
344
394
345
-
346
- # TODO:
347
- # Fix Average - currently it's computing average over all fields!
348
- # Time Interval - fix range
349
- # Device query
395
+ def _device_data (self , key , tags , fields , ** kwargs ):
396
+ """ returns last snapshot of ``device_data`` """
397
+ return self .read (key = key , fields = fields , tags = tags , time_format = 'isoformat' ,)
0 commit comments