@@ -75,6 +75,7 @@ class LogStash::Inputs::Elasticsearch < LogStash::Inputs::Base
75
75
76
76
require 'logstash/inputs/elasticsearch/paginated_search'
77
77
require 'logstash/inputs/elasticsearch/aggregation'
78
+ require 'logstash/inputs/elasticsearch/cursor_tracker'
78
79
79
80
include LogStash ::PluginMixins ::ECSCompatibilitySupport ( :disabled , :v1 , :v8 => :v1 )
80
81
include LogStash ::PluginMixins ::ECSCompatibilitySupport ::TargetCheck
@@ -126,6 +127,22 @@ class LogStash::Inputs::Elasticsearch < LogStash::Inputs::Base
126
127
# by this pipeline input.
127
128
config :slices , :validate => :number
128
129
130
+ # Enable tracking the value of a given field to be used as a cursor
131
+ # TODO: main concerns
132
+ # * schedule overlap needs to be disabled (hardcoded as enabled)
133
+ # * using anything other than _event.timestamp easily leads to data loss
134
+ # * the first "synchronization run can take a long time"
135
+ # * checkpointing is only safe to do after each run (not per document)
136
+ config :tracking_field , :validate => :string
137
+
138
+ # Define the initial seed value of the tracking_field
139
+ config :tracking_field_seed , :validate => :string
140
+
141
+ # The location of where the tracking field value will be stored
142
+ # The value is persisted after each scheduled run (and not per result)
143
+ # If it's not set it defaults to '${path.data}/plugins/inputs/elasticsearch/last_run_value'
144
+ config :last_run_metadata_path , :validate => :string
145
+
129
146
# If set, include Elasticsearch document information such as index, type, and
130
147
# the id in the event.
131
148
#
@@ -261,6 +278,10 @@ class LogStash::Inputs::Elasticsearch < LogStash::Inputs::Base
261
278
# exactly once.
262
279
config :schedule , :validate => :string
263
280
281
+ # Allow scheduled runs to overlap (enabled by default). Setting to false will
282
+ # only start a new scheduled run after the previous one completes.
283
+ config :schedule_overlap , :validate => :string
284
+
264
285
# If set, the _source of each hit will be added nested under the target instead of at the top-level
265
286
config :target , :validate => :field_reference
266
287
@@ -331,18 +352,30 @@ def register
331
352
332
353
setup_query_executor
333
354
355
+ setup_cursor_tracker
356
+
334
357
@client
335
358
end
336
359
337
360
def run ( output_queue )
338
361
if @schedule
339
- scheduler . cron ( @schedule ) { @query_executor . do_run ( output_queue ) }
362
+ scheduler . cron ( @schedule , :overlap => @schedule_overlap ) do
363
+ @query_executor . do_run ( output_queue , get_query_object ( ) )
364
+ @cursor_tracker . checkpoint_cursor
365
+ end
340
366
scheduler . join
341
367
else
342
- @query_executor . do_run ( output_queue )
368
+ @query_executor . do_run ( output_queue , get_query_object ( ) )
369
+ @cursor_tracker . checkpoint_cursor
343
370
end
344
371
end
345
372
373
+ def get_query_object
374
+ injected_query = @cursor_tracker . inject_cursor ( @query )
375
+ @logger . debug ( "new query is #{ injected_query } " )
376
+ query_object = LogStash ::Json . load ( injected_query )
377
+ end
378
+
346
379
##
347
380
# This can be called externally from the query_executor
348
381
public
@@ -351,6 +384,7 @@ def push_hit(hit, output_queue, root_field = '_source')
351
384
set_docinfo_fields ( hit , event ) if @docinfo
352
385
decorate ( event )
353
386
output_queue << event
387
+ @cursor_tracker . record_last_value ( event )
354
388
end
355
389
356
390
def set_docinfo_fields ( hit , event )
@@ -664,6 +698,17 @@ def setup_query_executor
664
698
end
665
699
end
666
700
701
+ def setup_cursor_tracker
702
+ if @tracking_field
703
+ @tracking_field_seed ||= Time . now . utc . iso8601
704
+ @cursor_tracker = CursorTracker . new ( last_run_metadata_path : @last_run_metadata_path ,
705
+ tracking_field : @tracking_field ,
706
+ tracking_field_seed : @tracking_field_seed )
707
+ else
708
+ @cursor_tracker = NoopCursorTracker . new
709
+ end
710
+ end
711
+
667
712
module URIOrEmptyValidator
668
713
##
669
714
# @override to provide :uri_or_empty validator
0 commit comments