Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions src/mito2/src/cache.rs
Original file line number Diff line number Diff line change
Expand Up @@ -398,6 +398,19 @@ impl CacheManager {
}
}

/// Returns the total weighted size of the in-memory SST meta cache.
pub(crate) fn sst_meta_cache_weighted_size(&self) -> u64 {
self.sst_meta_cache
.as_ref()
.map(|cache| cache.weighted_size())
.unwrap_or(0)
}

/// Returns true if the in-memory SST meta cache is enabled.
pub(crate) fn sst_meta_cache_enabled(&self) -> bool {
self.sst_meta_cache.is_some()
}

/// Gets a vector with repeated value for specific `key`.
pub fn get_repeated_vector(
&self,
Expand Down
44 changes: 44 additions & 0 deletions src/mito2/src/cache/index/result_cache.rs
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,8 @@ pub enum PredicateKey {
Bloom(BloomFilterKey),
/// Inverted index predicate.
Inverted(InvertedIndexKey),
/// Min-max pruning predicate.
MinMax(MinMaxKey),
}

impl PredicateKey {
Expand All @@ -134,12 +136,18 @@ impl PredicateKey {
Self::Inverted(InvertedIndexKey::new(predicates))
}

/// Creates a new min-max pruning key.
pub fn new_minmax(exprs: Arc<Vec<String>>, schema_version: u64, skip_fields: bool) -> Self {
Self::MinMax(MinMaxKey::new(exprs, schema_version, skip_fields))
}

/// Returns the memory usage of this key.
pub fn mem_usage(&self) -> usize {
match self {
Self::Fulltext(key) => key.mem_usage,
Self::Bloom(key) => key.mem_usage,
Self::Inverted(key) => key.mem_usage,
Self::MinMax(key) => key.mem_usage,
}
}
}
Expand Down Expand Up @@ -239,6 +247,30 @@ impl InvertedIndexKey {
}
}

/// Key for min-max pruning.
#[derive(Debug, Clone, PartialEq, Eq, Hash, Default)]
pub struct MinMaxKey {
exprs: Arc<Vec<String>>,
schema_version: u64,
skip_fields: bool,
mem_usage: usize,
}

impl MinMaxKey {
pub fn new(exprs: Arc<Vec<String>>, schema_version: u64, skip_fields: bool) -> Self {
let mem_usage = size_of::<Self>()
+ size_of::<Vec<String>>()
+ exprs.len() * size_of::<String>()
+ exprs.iter().map(|s| s.len()).sum::<usize>();
Self {
exprs,
schema_version,
skip_fields,
mem_usage,
}
}
}

#[cfg(test)]
#[allow(clippy::single_range_in_vec_init)]
mod tests {
Expand Down Expand Up @@ -282,6 +314,18 @@ mod tests {
assert!(cache.get(&key, non_existent_file_id).is_none());
}

#[test]
fn test_minmax_key_should_distinguish_schema_version_and_skip_fields() {
let exprs = Arc::new(vec!["col > 1".to_string()]);

let key1 = PredicateKey::new_minmax(exprs.clone(), 1, false);
let key2 = PredicateKey::new_minmax(exprs.clone(), 2, false);
assert_ne!(key1, key2);

let key3 = PredicateKey::new_minmax(exprs, 1, true);
assert_ne!(key1, key3);
}

#[test]
fn test_cache_capacity_limit() {
// Create a cache with small capacity (100 bytes)
Expand Down
16 changes: 16 additions & 0 deletions src/mito2/src/read/scan_util.rs
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,10 @@ pub(crate) struct ScanMetricsSet {
bloom_filter_cache_hit: usize,
/// Number of index result cache misses for bloom filter index.
bloom_filter_cache_miss: usize,
/// Number of index result cache hits for minmax pruning.
minmax_cache_hit: usize,
/// Number of index result cache misses for minmax pruning.
minmax_cache_miss: usize,
/// Number of pruner builder cache hits.
pruner_cache_hit: usize,
/// Number of pruner builder cache misses.
Expand Down Expand Up @@ -308,6 +312,8 @@ impl fmt::Debug for ScanMetricsSet {
inverted_index_cache_miss,
bloom_filter_cache_hit,
bloom_filter_cache_miss,
minmax_cache_hit,
minmax_cache_miss,
pruner_cache_hit,
pruner_cache_miss,
pruner_prune_cost,
Expand Down Expand Up @@ -433,6 +439,12 @@ impl fmt::Debug for ScanMetricsSet {
if *bloom_filter_cache_miss > 0 {
write!(f, ", \"bloom_filter_cache_miss\":{bloom_filter_cache_miss}")?;
}
if *minmax_cache_hit > 0 {
write!(f, ", \"minmax_cache_hit\":{minmax_cache_hit}")?;
}
if *minmax_cache_miss > 0 {
write!(f, ", \"minmax_cache_miss\":{minmax_cache_miss}")?;
}
if *pruner_cache_hit > 0 {
write!(f, ", \"pruner_cache_hit\":{pruner_cache_hit}")?;
}
Expand Down Expand Up @@ -639,6 +651,8 @@ impl ScanMetricsSet {
inverted_index_cache_miss,
bloom_filter_cache_hit,
bloom_filter_cache_miss,
minmax_cache_hit,
minmax_cache_miss,
pruner_cache_hit,
pruner_cache_miss,
pruner_prune_cost,
Expand Down Expand Up @@ -680,6 +694,8 @@ impl ScanMetricsSet {
self.inverted_index_cache_miss += *inverted_index_cache_miss;
self.bloom_filter_cache_hit += *bloom_filter_cache_hit;
self.bloom_filter_cache_miss += *bloom_filter_cache_miss;
self.minmax_cache_hit += *minmax_cache_hit;
self.minmax_cache_miss += *minmax_cache_miss;
self.pruner_cache_hit += *pruner_cache_hit;
self.pruner_cache_miss += *pruner_cache_miss;
self.pruner_prune_cost += *pruner_prune_cost;
Expand Down
38 changes: 19 additions & 19 deletions src/mito2/src/read/stream.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,15 +12,15 @@
// See the License for the specific language governing permissions and
// limitations under the License.

use std::collections::VecDeque;
use std::pin::Pin;
use std::sync::Arc;
use std::task::{Context, Poll};
use std::time::Instant;

use common_error::ext::BoxedError;
use common_recordbatch::error::{ArrowComputeSnafu, ExternalSnafu};
use common_recordbatch::error::ExternalSnafu;
use common_recordbatch::{DfRecordBatch, RecordBatch};
use datatypes::compute;
use futures::stream::BoxStream;
use futures::{Stream, StreamExt};
use snafu::ResultExt;
Expand All @@ -47,7 +47,7 @@ pub(crate) struct ConvertBatchStream {
projection_mapper: Arc<ProjectionMapper>,
cache_strategy: CacheStrategy,
partition_metrics: PartitionMetrics,
buffer: Vec<DfRecordBatch>,
pending: VecDeque<RecordBatch>,
}

impl ConvertBatchStream {
Expand All @@ -62,7 +62,7 @@ impl ConvertBatchStream {
projection_mapper,
cache_strategy,
partition_metrics,
buffer: Vec::new(),
pending: VecDeque::new(),
}
}

Expand All @@ -79,40 +79,36 @@ impl ConvertBatchStream {
}
}
ScanBatch::Series(series) => {
self.buffer.clear();
debug_assert!(
self.pending.is_empty(),
"ConvertBatchStream should not convert a new SeriesBatch when pending batches exist"
);

match series {
SeriesBatch::PrimaryKey(primary_key_batch) => {
self.buffer.reserve(primary_key_batch.batches.len());
// Safety: Only primary key format returns this batch.
let mapper = self.projection_mapper.as_primary_key().unwrap();

for batch in primary_key_batch.batches {
let record_batch = mapper.convert(&batch, &self.cache_strategy)?;
self.buffer.push(record_batch.into_df_record_batch());
self.pending
.push_back(mapper.convert(&batch, &self.cache_strategy)?);
}
}
SeriesBatch::Flat(flat_batch) => {
self.buffer.reserve(flat_batch.batches.len());
// Safety: Only flat format returns this batch.
let mapper = self.projection_mapper.as_flat().unwrap();

for batch in flat_batch.batches {
let record_batch = mapper.convert(&batch)?;
self.buffer.push(record_batch.into_df_record_batch());
self.pending.push_back(mapper.convert(&batch)?);
}
}
}

let output_schema = self.projection_mapper.output_schema();
let record_batch =
compute::concat_batches(output_schema.arrow_schema(), &self.buffer)
.context(ArrowComputeSnafu)?;

Ok(RecordBatch::from_df_record_batch(
output_schema,
record_batch,
))
Ok(self
.pending
.pop_front()
.unwrap_or_else(|| RecordBatch::new_empty(output_schema)))
}
ScanBatch::RecordBatch(df_record_batch) => {
// Safety: Only flat format returns this batch.
Expand All @@ -128,6 +124,10 @@ impl Stream for ConvertBatchStream {
type Item = common_recordbatch::error::Result<RecordBatch>;

fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Option<Self::Item>> {
if let Some(batch) = self.pending.pop_front() {
return Poll::Ready(Some(Ok(batch)));
}

let batch = futures::ready!(self.inner.poll_next_unpin(cx));
let Some(batch) = batch else {
return Poll::Ready(None);
Expand Down
Loading