|
| 1 | +/* Check the cardinality estimation of very simple predicates using histograms by running |
| 2 | + the predicate itself and comparing the number of documents matched to the estimate. |
| 3 | +
|
| 4 | + In this test, we use distributions that allow for "perfect" histograms, that is, |
| 5 | + histograms where, even with the information loss, perfect estimates can be made. |
| 6 | +
|
| 7 | + Simularily, the predicates used are those that can be estimated perfectly |
| 8 | + (except for the occasional off-by-one errors) |
| 9 | +*/ |
| 10 | + |
| 11 | +import { |
| 12 | + getAllPlans, |
| 13 | +} from "jstests/libs/query/analyze_plan.js"; |
| 14 | +import {checkSbeFullyEnabled} from "jstests/libs/query/sbe_util.js"; |
| 15 | +import {ArrayDataset} from 'jstests/noPassthroughWithMongod/query/cbr/lib/datasets/array.js'; |
| 16 | +import {BooleanDataset} from 'jstests/noPassthroughWithMongod/query/cbr/lib/datasets/boolean.js'; |
| 17 | +import { |
| 18 | + DateDataset, |
| 19 | + TimestampDataset |
| 20 | +} from 'jstests/noPassthroughWithMongod/query/cbr/lib/datasets/date_time.js'; |
| 21 | +import { |
| 22 | + OneHoleDataset, |
| 23 | + OnePeakDataset, |
| 24 | + SkewedDataset, |
| 25 | + ThreePeakDataset, |
| 26 | + UniformDataset |
| 27 | +} from "jstests/noPassthroughWithMongod/query/cbr/lib/datasets/distributions.js"; |
| 28 | +import { |
| 29 | + MixedNumbersDataset, |
| 30 | + MixedTypesDataset |
| 31 | +} from "jstests/noPassthroughWithMongod/query/cbr/lib/datasets/mixed_types.js"; |
| 32 | +import { |
| 33 | + TwoFieldDataset |
| 34 | +} from "jstests/noPassthroughWithMongod/query/cbr/lib/datasets/multifield.js"; |
| 35 | +import {NumberDataset} from "jstests/noPassthroughWithMongod/query/cbr/lib/datasets/number.js"; |
| 36 | +import {StringDataset} from "jstests/noPassthroughWithMongod/query/cbr/lib/datasets/string.js"; |
| 37 | + |
| 38 | +// TODO SERVER-92589: Remove this exemption |
| 39 | +if (checkSbeFullyEnabled(db)) { |
| 40 | + jsTestLog(`Skipping ${jsTestName()} as SBE executor is not supported yet`); |
| 41 | + quit(); |
| 42 | +} |
| 43 | + |
| 44 | +const collName = jsTestName(); |
| 45 | +const coll = db[collName]; |
| 46 | + |
| 47 | +function runOneTest({dataset, indexes, analyze, numberBuckets = 1000}) { |
| 48 | + try { |
| 49 | + assert.commandWorked(db.adminCommand({setParameter: 1, planRankerMode: "histogramCE"})); |
| 50 | + |
| 51 | + coll.drop(); |
| 52 | + assert.commandWorked(coll.insertMany(dataset.docs())); |
| 53 | + |
| 54 | + for (const index of indexes ? indexes : []) { |
| 55 | + assert.commandWorked(coll.createIndex(index)); |
| 56 | + } |
| 57 | + |
| 58 | + for (const analyze_key of analyze ? analyze : ["a"]) { |
| 59 | + var analyze_cmd = {analyze: collName, key: analyze_key, numberBuckets: numberBuckets}; |
| 60 | + |
| 61 | + assert.commandWorked(coll.runCommand(analyze_cmd)); |
| 62 | + } |
| 63 | + |
| 64 | + for (const predicate of dataset.predicates()) { |
| 65 | + var cursor = coll.find(predicate); |
| 66 | + const actualDocuments = cursor.count(); |
| 67 | + |
| 68 | + const explain = cursor.explain(); |
| 69 | + const plans = getAllPlans(explain); |
| 70 | + for (const plan of plans) { |
| 71 | + assert(plan.hasOwnProperty("cardinalityEstimate")); |
| 72 | + const cardinalityEstimate = plan.cardinalityEstimate; |
| 73 | + |
| 74 | + // 'Histogram', 'Code' and 'Metadata' all imply a confident estimate, |
| 75 | + // so we accept all of them. |
| 76 | + assert(plan.estimatesMetadata.ceSource === "Histogram" || |
| 77 | + plan.estimatesMetadata.ceSource === "Code" || |
| 78 | + plan.estimatesMetadata.ceSource === "Metadata", |
| 79 | + predicate); |
| 80 | + |
| 81 | + printjsononeline(predicate); |
| 82 | + print(`actualDocuments: ${actualDocuments}; cardinalityEstimate: ${ |
| 83 | + cardinalityEstimate}`); |
| 84 | + |
| 85 | + if (Math.abs(actualDocuments - cardinalityEstimate) > 1) { |
| 86 | + printjsononeline(plan); |
| 87 | + assert( |
| 88 | + false, |
| 89 | + `Got cardinalityEstimate = ${cardinalityEstimate} but actualDocuments = ${ |
| 90 | + actualDocuments} for predicate: ${tojson(predicate)}; dataset: ${ |
| 91 | + dataset.constructor.name}; indexes: ${indexes};`); |
| 92 | + } |
| 93 | + } |
| 94 | + } |
| 95 | + } finally { |
| 96 | + // Make sure that we restore the default no matter what |
| 97 | + assert.commandWorked(db.adminCommand({setParameter: 1, planRankerMode: "multiPlanning"})); |
| 98 | + } |
| 99 | +} |
| 100 | + |
| 101 | +for (const indexes of [[], [{a: 1}]]) { |
| 102 | + for (const dataset of [new ArrayDataset(), |
| 103 | + new BooleanDataset(), |
| 104 | + new DateDataset(), |
| 105 | + new TimestampDataset(), |
| 106 | + new SkewedDataset(), |
| 107 | + new MixedTypesDataset(), |
| 108 | + new MixedNumbersDataset(), |
| 109 | + new NumberDataset(), |
| 110 | + new StringDataset()]) { |
| 111 | + runOneTest({dataset: dataset, indexes: indexes}); |
| 112 | + } |
| 113 | + |
| 114 | + /* Skewed datasets under a constrained number of buckets. We give each |
| 115 | + dataset just enough buckets for it can be estimated accurately. |
| 116 | + */ |
| 117 | + for (const test of [{dataset: new UniformDataset(), numberBuckets: 2}, |
| 118 | + {dataset: new OnePeakDataset(), numberBuckets: 4}, |
| 119 | + {dataset: new OneHoleDataset(), numberBuckets: 3}, |
| 120 | + {dataset: new ThreePeakDataset(), numberBuckets: 8}, |
| 121 | + {dataset: new SkewedDataset(), numberBuckets: 10}]) { |
| 122 | + test.indexes = indexes; |
| 123 | + runOneTest(test); |
| 124 | + } |
| 125 | +} |
| 126 | + |
| 127 | +// Multi-field predicates |
| 128 | + |
| 129 | +for (const indexes of [[{a: 1, b: 1}], [{a: 1}, {b: 1}]]) { |
| 130 | + runOneTest({dataset: new TwoFieldDataset(), indexes: indexes, analyze: ["a", "b"]}); |
| 131 | +} |
0 commit comments