mongodb
diff --git a/‎jstests/noPassthroughWithMongod/query/cbr/histogramCE_accuracy.js
Lines changed: 131 additions & 0 deletions b/‎jstests/noPassthroughWithMongod/query/cbr/histogramCE_accuracy.js
Lines changed: 131 additions & 0 deletions
diff --git a/‎jstests/noPassthroughWithMongod/query/cbr/lib/datasets/array.js
Lines changed: 36 additions & 0 deletions b/‎jstests/noPassthroughWithMongod/query/cbr/lib/datasets/array.js
Lines changed: 36 additions & 0 deletions
diff --git a/‎jstests/noPassthroughWithMongod/query/cbr/lib/datasets/boolean.js
Lines changed: 41 additions & 0 deletions b/‎jstests/noPassthroughWithMongod/query/cbr/lib/datasets/boolean.js
Lines changed: 41 additions & 0 deletions
diff --git a/‎jstests/noPassthroughWithMongod/query/cbr/lib/datasets/date_time.js
Lines changed: 69 additions & 0 deletions b/‎jstests/noPassthroughWithMongod/query/cbr/lib/datasets/date_time.js
Lines changed: 69 additions & 0 deletions
@@ -0,0 +1,131 @@
+/* Check the cardinality estimation of very simple predicates using histograms by running
+   the predicate itself and comparing the number of documents matched to the estimate.
+
+   In this test, we use distributions that allow for "perfect" histograms, that is,
+   histograms where, even with the information loss, perfect estimates can be made.
+
+   Simularily, the predicates used are those that can be estimated perfectly
+   (except for the occasional off-by-one errors)
+*/
+
+import {
+    getAllPlans,
+} from "jstests/libs/query/analyze_plan.js";
+import {checkSbeFullyEnabled} from "jstests/libs/query/sbe_util.js";
+import {ArrayDataset} from 'jstests/noPassthroughWithMongod/query/cbr/lib/datasets/array.js';
+import {BooleanDataset} from 'jstests/noPassthroughWithMongod/query/cbr/lib/datasets/boolean.js';
+import {
+    DateDataset,
+    TimestampDataset
+} from 'jstests/noPassthroughWithMongod/query/cbr/lib/datasets/date_time.js';
+import {
+    OneHoleDataset,
+    OnePeakDataset,
+    SkewedDataset,
+    ThreePeakDataset,
+    UniformDataset
+} from "jstests/noPassthroughWithMongod/query/cbr/lib/datasets/distributions.js";
+import {
+    MixedNumbersDataset,
+    MixedTypesDataset
+} from "jstests/noPassthroughWithMongod/query/cbr/lib/datasets/mixed_types.js";
+import {
+    TwoFieldDataset
+} from "jstests/noPassthroughWithMongod/query/cbr/lib/datasets/multifield.js";
+import {NumberDataset} from "jstests/noPassthroughWithMongod/query/cbr/lib/datasets/number.js";
+import {StringDataset} from "jstests/noPassthroughWithMongod/query/cbr/lib/datasets/string.js";
+
+// TODO SERVER-92589: Remove this exemption
+if (checkSbeFullyEnabled(db)) {
+    jsTestLog(`Skipping ${jsTestName()} as SBE executor is not supported yet`);
+    quit();
+}
+
+const collName = jsTestName();
+const coll = db[collName];
+
+function runOneTest({dataset, indexes, analyze, numberBuckets = 1000}) {
+    try {
+        assert.commandWorked(db.adminCommand({setParameter: 1, planRankerMode: "histogramCE"}));
+
+        coll.drop();
+        assert.commandWorked(coll.insertMany(dataset.docs()));
+
+        for (const index of indexes ? indexes : []) {
+            assert.commandWorked(coll.createIndex(index));
+        }
+
+        for (const analyze_key of analyze ? analyze : ["a"]) {
+            var analyze_cmd = {analyze: collName, key: analyze_key, numberBuckets: numberBuckets};
+
+            assert.commandWorked(coll.runCommand(analyze_cmd));
+        }
+
+        for (const predicate of dataset.predicates()) {
+            var cursor = coll.find(predicate);
+            const actualDocuments = cursor.count();
+
+            const explain = cursor.explain();
+            const plans = getAllPlans(explain);
+            for (const plan of plans) {
+                assert(plan.hasOwnProperty("cardinalityEstimate"));
+                const cardinalityEstimate = plan.cardinalityEstimate;
+
+                // 'Histogram', 'Code' and 'Metadata' all imply a confident estimate,
+                // so we accept all of them.
+                assert(plan.estimatesMetadata.ceSource === "Histogram" ||
+                           plan.estimatesMetadata.ceSource === "Code" ||
+                           plan.estimatesMetadata.ceSource === "Metadata",
+                       predicate);
+
+                printjsononeline(predicate);
+                print(`actualDocuments: ${actualDocuments}; cardinalityEstimate: ${
+                    cardinalityEstimate}`);
+
+                if (Math.abs(actualDocuments - cardinalityEstimate) > 1) {
+                    printjsononeline(plan);
+                    assert(
+                        false,
+                        `Got cardinalityEstimate = ${cardinalityEstimate} but actualDocuments = ${
+                            actualDocuments} for predicate: ${tojson(predicate)}; dataset: ${
+                            dataset.constructor.name}; indexes: ${indexes};`);
+                }
+            }
+        }
+    } finally {
+        // Make sure that we restore the default no matter what
+        assert.commandWorked(db.adminCommand({setParameter: 1, planRankerMode: "multiPlanning"}));
+    }
+}
+
+for (const indexes of [[], [{a: 1}]]) {
+    for (const dataset of [new ArrayDataset(),
+                           new BooleanDataset(),
+                           new DateDataset(),
+                           new TimestampDataset(),
+                           new SkewedDataset(),
+                           new MixedTypesDataset(),
+                           new MixedNumbersDataset(),
+                           new NumberDataset(),
+                           new StringDataset()]) {
+        runOneTest({dataset: dataset, indexes: indexes});
+    }
+
+    /* Skewed datasets under a constrained number of buckets. We give each
+       dataset just enough buckets for it can be estimated accurately.
+    */
+    for (const test of [{dataset: new UniformDataset(), numberBuckets: 2},
+                        {dataset: new OnePeakDataset(), numberBuckets: 4},
+                        {dataset: new OneHoleDataset(), numberBuckets: 3},
+                        {dataset: new ThreePeakDataset(), numberBuckets: 8},
+                        {dataset: new SkewedDataset(), numberBuckets: 10}]) {
+        test.indexes = indexes;
+        runOneTest(test);
+    }
+}
+
+// Multi-field predicates
+
+for (const indexes of [[{a: 1, b: 1}], [{a: 1}, {b: 1}]]) {
+    runOneTest({dataset: new TwoFieldDataset(), indexes: indexes, analyze: ["a", "b"]});
+}
@@ -0,0 +1,36 @@
+/*
+  A dataset with an array column and a set of relevant predicates.
+*/
+
+export class ArrayDataset {
+    docs() {
+        let array_docs = [];
+        let array = [];
+        for (let i = 0; i < 100; i++) {
+            array_docs.push({a: array.slice()});
+            array.push(i);
+        }
+        return array_docs;
+    }
+
+    predicates() {
+        return [
+            // TODO(SERVER-99630): {a: null},
+            {a: -1},
+            {a: 50},
+            // TODO(SERVER-99634): {a: {$all:[]}},
+            {a: {$all: [-1]}},
+            {a: {$all: [-1, 50]}},
+            // TODO(SERVER-98085): {a: {$all:[50,75]}},
+            // Not estimated via histograms: {a: {$size: 50}},
+            // TODO(SERVER-99025): {a: {$gt: 900}},
+            // TODO(SERVER-99025): {a: {$gt: 250, $lt: 750}},
+
+            /* TODO(SERVER-100451): Not supported under histogramCE:
+            {a: {$elemMatch: {$eq: 50}}},
+            {a: {$elemMatch: {$gt: 50}}},
+            {a: {$elemMatch: {$ne: 50}}}
+            */
+        ];
+    }
+}
@@ -0,0 +1,41 @@
+/*
+  A dataset with a boolean column and relevant predicates for it
+*/
+
+export class BooleanDataset {
+    docs() {
+        let boolean_docs = [];
+
+        for (let i = 0; i < 100; i++) {
+            boolean_docs.push({a: 0});
+            boolean_docs.push({a: 1});
+            boolean_docs.push({a: 1.0});
+            boolean_docs.push({a: ""});
+            boolean_docs.push({a: true});
+            boolean_docs.push({a: false});
+            boolean_docs.push({a: null});
+            boolean_docs.push({b: 123});
+        }
+
+        return boolean_docs;
+    }
+
+    predicates() {
+        let boolean_predicates = [];
+
+        for (let val of [true,
+                         false,
+                         // TODO(SERVER-98094): null
+        ]) {
+            boolean_predicates.push({a: val});
+            boolean_predicates.push({a: {$gt: val}});
+            boolean_predicates.push({a: {$gte: val}});
+
+            boolean_predicates.push({a: {$lt: val}});
+            boolean_predicates.push({a: {$lte: val}});
+
+            boolean_predicates.push({a: {$ne: val}});
+        }
+        return boolean_predicates;
+    }
+}
@@ -0,0 +1,69 @@
+/*
+  Datasets and relevant predicates for Date and Timestamp
+*/
+
+export class DateDataset {
+    docs() {
+        const date_docs = [];
+
+        // Vary the year component of the date
+        for (let i = 0; i < 1000; i++) {
+            i = String(i).padStart(3, '0');
+            date_docs.push({a: ISODate(`2${i}-01-01T01:01:01.001`)});
+        }
+
+        // Vary the subsecond component of the date
+        for (let i = 0; i < 1000; i++) {
+            i = String(i).padStart(3, '0');
+            date_docs.push({a: ISODate(`2050-05-05T05:05:05.${i}`)});
+        }
+
+        date_docs.push({a: ISODate(0)});
+        return date_docs;
+    }
+
+    predicates() {
+        // Each corresponds to one of the two batches of dates from docs()
+        const date1 = ISODate("2010-01-01T01:01:01.001");
+        const date2 = ISODate("2050-05-05T05:05:05.050");
+        return [
+            {a: date1},
+            {a: {$gt: date1}},
+            {a: {$gte: date1}},
+            {a: {$lt: date1}},
+            {a: {$lte: date1}},
+            {a: {$ne: date1}},
+
+            {a: date2},
+            {a: {$lt: date2}},
+            {a: {$gt: date2}},
+
+            {a: ISODate(0)},
+            {a: {$ne: ISODate(0)}},
+            {a: {$gt: ISODate(0)}},
+        ];
+    }
+}
+
+export class TimestampDataset {
+    docs() {
+        let timestamp_docs = [];
+
+        for (let i = 0; i < 100; i++) {
+            timestamp_docs.push({a: Timestamp(0, i)});
+            timestamp_docs.push({a: Timestamp(i, 0)});
+        }
+        return timestamp_docs;
+    }
+
+    predicates() {
+        return [
+            {a: Timestamp(0, 50)},
+            {a: {$gt: Timestamp(0, 50)}},
+            {a: {$ne: Timestamp(0, 50)}},
+            {a: Timestamp(50, 0)},
+            {a: {$gt: Timestamp(50, 0)}},
+            {a: {$ne: Timestamp(50, 0)}},
+        ];
+    }
+}