@@ -20,7 +20,7 @@ use crate::event::error::EventError;
20
20
use crate :: handlers:: http:: fetch_schema;
21
21
use actix_web:: http:: header:: ContentType ;
22
22
use actix_web:: web:: { self , Json } ;
23
- use actix_web:: { FromRequest , HttpRequest , HttpResponse , Responder } ;
23
+ use actix_web:: { Either , FromRequest , HttpRequest , HttpResponse , Responder } ;
24
24
use bytes:: Bytes ;
25
25
use chrono:: { DateTime , Utc } ;
26
26
use datafusion:: common:: tree_node:: TreeNode ;
@@ -43,7 +43,7 @@ use crate::metrics::QUERY_EXECUTE_TIME;
43
43
use crate :: option:: Mode ;
44
44
use crate :: parseable:: { StreamNotFound , PARSEABLE } ;
45
45
use crate :: query:: error:: ExecuteError ;
46
- use crate :: query:: { execute, execute_stream , CountsRequest , CountsResponse , Query as LogicalQuery } ;
46
+ use crate :: query:: { execute, CountsRequest , CountsResponse , Query as LogicalQuery } ;
47
47
use crate :: query:: { TableScanVisitor , QUERY_SESSION } ;
48
48
use crate :: rbac:: Users ;
49
49
use crate :: response:: QueryResponse ;
@@ -104,17 +104,38 @@ pub async fn query(req: HttpRequest, query_request: Query) -> Result<HttpRespons
104
104
105
105
let time = Instant :: now ( ) ;
106
106
107
+ // if the query is `select count(*) from <dataset>`
108
+ // we use the `get_bin_density` method to get the count of records in the dataset
109
+ // instead of executing the query using datafusion
107
110
if let Some ( column_name) = query. is_logical_plan_count_without_filters ( ) {
108
111
return handle_count_query ( & query_request, & table_name, column_name, time) . await ;
109
112
}
110
113
114
+ // if the query request has streaming = false (default)
115
+ // we use datafusion's `execute` method to get the records
111
116
if !query_request. streaming {
112
117
return handle_non_streaming_query ( query, & table_name, & query_request, time) . await ;
113
118
}
114
119
120
+ // if the query request has streaming = true
121
+ // we use datafusion's `execute_stream` method to get the records
115
122
handle_streaming_query ( query, & table_name, & query_request, time) . await
116
123
}
117
124
125
+ /// Handles count queries (e.g., `SELECT COUNT(*) FROM <dataset-name>`)
126
+ ///
127
+ /// Instead of executing the query through DataFusion, this function uses the
128
+ /// `CountsRequest::get_bin_density` method to quickly retrieve the count of records
129
+ /// in the specified dataset and time range.
130
+ ///
131
+ /// # Arguments
132
+ /// - `query_request`: The original query request from the client.
133
+ /// - `table_name`: The name of the table/dataset to count records in.
134
+ /// - `column_name`: The column being counted (usually `*`).
135
+ /// - `time`: The timer for measuring query execution time.
136
+ ///
137
+ /// # Returns
138
+ /// - `HttpResponse` with the count result as JSON, including fields if requested.
118
139
async fn handle_count_query (
119
140
query_request : & Query ,
120
141
table_name : & str ,
@@ -150,13 +171,35 @@ async fn handle_count_query(
150
171
. json ( response) )
151
172
}
152
173
174
+ /// Handles standard (non-streaming) queries, returning all results in a single JSON response.
175
+ ///
176
+ /// Executes the logical query using DataFusion's batch execution, collects all results,
177
+ /// and serializes them into a single JSON object. The response includes the records,
178
+ /// field names, and other metadata as specified in the query request.
179
+ ///
180
+ /// # Arguments
181
+ /// - `query`: The logical query to execute.
182
+ /// - `table_name`: The name of the table/dataset being queried.
183
+ /// - `query_request`: The original query request from the client.
184
+ /// - `time`: The timer for measuring query execution time.
185
+ ///
186
+ /// # Returns
187
+ /// - `HttpResponse` with the full query result as a JSON object.
153
188
async fn handle_non_streaming_query (
154
189
query : LogicalQuery ,
155
190
table_name : & str ,
156
191
query_request : & Query ,
157
192
time : Instant ,
158
193
) -> Result < HttpResponse , QueryError > {
159
- let ( records, fields) = execute ( query, table_name) . await ?;
194
+ let ( records, fields) = execute ( query, table_name, query_request. streaming ) . await ?;
195
+ let records = match records {
196
+ Either :: Left ( rbs) => rbs,
197
+ Either :: Right ( _) => {
198
+ return Err ( QueryError :: MalformedQuery (
199
+ "Expected batch results, got stream" ,
200
+ ) )
201
+ }
202
+ } ;
160
203
let total_time = format ! ( "{:?}" , time. elapsed( ) ) ;
161
204
let time = time. elapsed ( ) . as_secs_f64 ( ) ;
162
205
@@ -175,13 +218,36 @@ async fn handle_non_streaming_query(
175
218
. json ( response) )
176
219
}
177
220
221
+ /// Handles streaming queries, returning results as newline-delimited JSON (NDJSON).
222
+ ///
223
+ /// Executes the logical query using DataFusion's streaming execution. If the `fields`
224
+ /// flag is set, the first chunk of the response contains the field names as a JSON object.
225
+ /// Each subsequent chunk contains a record batch as a JSON object, separated by newlines.
226
+ /// This allows clients to start processing results before the entire query completes.
227
+ ///
228
+ /// # Arguments
229
+ /// - `query`: The logical query to execute.
230
+ /// - `table_name`: The name of the table/dataset being queried.
231
+ /// - `query_request`: The original query request from the client.
232
+ /// - `time`: The timer for measuring query execution time.
233
+ ///
234
+ /// # Returns
235
+ /// - `HttpResponse` streaming the query results as NDJSON, optionally prefixed with the fields array.
178
236
async fn handle_streaming_query (
179
237
query : LogicalQuery ,
180
238
table_name : & str ,
181
239
query_request : & Query ,
182
240
time : Instant ,
183
241
) -> Result < HttpResponse , QueryError > {
184
- let ( records_stream, fields) = execute_stream ( query, table_name) . await ?;
242
+ let ( records_stream, fields) = execute ( query, table_name, query_request. streaming ) . await ?;
243
+ let records_stream = match records_stream {
244
+ Either :: Left ( _) => {
245
+ return Err ( QueryError :: MalformedQuery (
246
+ "Expected stream results, got batch" ,
247
+ ) )
248
+ }
249
+ Either :: Right ( stream) => stream,
250
+ } ;
185
251
let fields = fields. clone ( ) ;
186
252
let total_time = format ! ( "{:?}" , time. elapsed( ) ) ;
187
253
let time = time. elapsed ( ) . as_secs_f64 ( ) ;
@@ -193,7 +259,7 @@ async fn handle_streaming_query(
193
259
let with_fields = query_request. fields ;
194
260
195
261
let stream = if with_fields {
196
- // send the fields as an initial chunk
262
+ // send the fields json as an initial chunk
197
263
let fields_json = serde_json:: json!( {
198
264
"fields" : fields
199
265
} )
0 commit comments