SERVER-102989 Refactor PBT runner to use a workload model (#34208)

mattBoros · MongoDB Bot · commit 6e26f553f512 · 2025-04-10T03:33:22.000Z
GitOrigin-RevId: 9256f7f
diff --git a/jstests/aggregation/sources/agg_stages_basic_behavior_pbt.js b/jstests/aggregation/sources/agg_stages_basic_behavior_pbt.js
@@ -29,6 +29,7 @@ import {
     limitArb,
     sortArb
 } from "jstests/libs/property_test_helpers/models/query_models.js";
+import {makeWorkloadModel} from "jstests/libs/property_test_helpers/models/workload_models.js";
 import {testProperty} from "jstests/libs/property_test_helpers/property_testing_utils.js";
 import {isSlowBuild} from "jstests/libs/query/aggregation_pipeline_utils.js";
 import {fc} from "jstests/third_party/fast_check/fc-3.1.0.js";
@@ -216,15 +217,18 @@ for (const {stageArb, checkResultsFn, failMsg} of testCases) {
                          });
 
     // Run the property with a regular collection.
-    testProperty(propFn,
-                 {experimentColl},
-                 {collModel: getCollectionModel(), aggModel},
-                 {numRuns, numQueriesPerRun: 20});
+    testProperty(
+        propFn,
+        {experimentColl},
+        makeWorkloadModel({collModel: getCollectionModel(), aggModel, numQueriesPerRun: 20}),
+        numRuns);
 
     // TODO SERVER-101271 re-enable timeseries PBT testing.
     // Run the property with a TS collection.
     // testProperty(propFn,
     //              {experimentColl},
-    //              {collModel: getCollectionModel({isTS: true}), aggModel},
-    //              {numRuns, numQueriesPerRun: 20});
+    //              makeWorkloadModel(
+    //                  {collModel: getCollectionModel({isTS: true}), aggModel, numQueriesPerRun:
+    //                  20}),
+    //              numRuns);
 }
diff --git a/jstests/core/query/index_correctness_pbt.js b/jstests/core/query/index_correctness_pbt.js
@@ -19,6 +19,7 @@
 import {createCorrectnessProperty} from "jstests/libs/property_test_helpers/common_properties.js";
 import {getCollectionModel} from "jstests/libs/property_test_helpers/models/collection_models.js";
 import {getAggPipelineModel} from "jstests/libs/property_test_helpers/models/query_models.js";
+import {makeWorkloadModel} from "jstests/libs/property_test_helpers/models/workload_models.js";
 import {testProperty} from "jstests/libs/property_test_helpers/property_testing_utils.js";
 import {isSlowBuild} from "jstests/libs/query/aggregation_pipeline_utils.js";
 
@@ -37,8 +38,8 @@ const aggModel = getAggPipelineModel();
 // Test with a regular collection.
 testProperty(correctnessProperty,
              {controlColl, experimentColl},
-             {collModel: getCollectionModel(), aggModel},
-             {numRuns, numQueriesPerRun});
+             makeWorkloadModel({collModel: getCollectionModel(), aggModel, numQueriesPerRun}),
+             numRuns);
 
 // TODO SERVER-101271 re-enable PBT testing for time-series
 // // Test with a TS collection.
@@ -51,7 +52,9 @@ testProperty(correctnessProperty,
 //     }
 //     return true;
 // });
-// testProperty(correctnessProperty,
-//              {controlColl, experimentColl},
-//              {collModel: getCollectionModel({isTS: true}), aggModel: tsAggModel},
-//              {numRuns, numQueriesPerRun});
+// testProperty(
+//     correctnessProperty,
+//     {controlColl, experimentColl},
+//     makeWorkloadModel(
+//         {collModel: getCollectionModel({isTS: true}), aggModel: tsAggModel, numQueriesPerRun}),
+//     numRuns);
diff --git a/jstests/core/query/plan_cache/cache_correctness_pbt.js b/jstests/core/query/plan_cache/cache_correctness_pbt.js
@@ -22,6 +22,7 @@ import {
 } from "jstests/libs/property_test_helpers/common_properties.js";
 import {getCollectionModel} from "jstests/libs/property_test_helpers/models/collection_models.js";
 import {getAggPipelineModel} from "jstests/libs/property_test_helpers/models/query_models.js";
+import {makeWorkloadModel} from "jstests/libs/property_test_helpers/models/workload_models.js";
 import {testProperty} from "jstests/libs/property_test_helpers/property_testing_utils.js";
 import {isSlowBuild} from "jstests/libs/query/aggregation_pipeline_utils.js";
 
@@ -41,8 +42,8 @@ const aggModel = getAggPipelineModel();
 // Test with a regular collection.
 testProperty(correctnessProperty,
              {controlColl, experimentColl},
-             {collModel: getCollectionModel(), aggModel},
-             {numRuns, numQueriesPerRun});
+             makeWorkloadModel({collModel: getCollectionModel(), aggModel, numQueriesPerRun}),
+             numRuns);
 
 // TODO SERVER-101271 re-enable PBT testing for time-series
 // // Test with a TS collection.
@@ -55,7 +56,9 @@ testProperty(correctnessProperty,
 //     }
 //     return true;
 // });
-// testProperty(correctnessProperty,
-//              {controlColl, experimentColl},
-//              {collModel: getCollectionModel({isTS: true}), aggModel: tsAggModel},
-//              {numRuns, numQueriesPerRun});
+// testProperty(
+//     correctnessProperty,
+//     {controlColl, experimentColl},
+//     makeWorkloadModel(
+//         {collModel: getCollectionModel({isTS: true}), aggModel: tsAggModel, numQueriesPerRun}),
+//     numRuns);
diff --git a/jstests/core/query/plan_cache/cache_usage_pbt.js b/jstests/core/query/plan_cache/cache_usage_pbt.js
@@ -16,6 +16,7 @@
  */
 import {getCollectionModel} from "jstests/libs/property_test_helpers/models/collection_models.js";
 import {getAggPipelineModel} from "jstests/libs/property_test_helpers/models/query_models.js";
+import {makeWorkloadModel} from "jstests/libs/property_test_helpers/models/workload_models.js";
 import {
     getPlanCache,
     testProperty
@@ -83,11 +84,13 @@ function repeatQueriesUseCache(getQuery, testHelpers) {
 
 const aggModel = getAggPipelineModel();
 
-testProperty(repeatQueriesUseCache,
-             {experimentColl},
-             {collModel: getCollectionModel({isTS: false}), aggModel},
-             {numRuns, numQueriesPerRun});
-testProperty(repeatQueriesUseCache,
-             {experimentColl},
-             {collModel: getCollectionModel({isTS: true}), aggModel},
-             {numRuns, numQueriesPerRun});
+testProperty(
+    repeatQueriesUseCache,
+    {experimentColl},
+    makeWorkloadModel({collModel: getCollectionModel({isTS: false}), aggModel, numQueriesPerRun}),
+    numRuns);
+testProperty(
+    repeatQueriesUseCache,
+    {experimentColl},
+    makeWorkloadModel({collModel: getCollectionModel({isTS: true}), aggModel, numQueriesPerRun}),
+    numRuns);
diff --git a/jstests/core/query/plan_cache/queries_create_one_cache_entry_pbt.js b/jstests/core/query/plan_cache/queries_create_one_cache_entry_pbt.js
@@ -15,6 +15,7 @@
  */
 import {getCollectionModel} from "jstests/libs/property_test_helpers/models/collection_models.js";
 import {getAggPipelineModel} from "jstests/libs/property_test_helpers/models/query_models.js";
+import {makeWorkloadModel} from "jstests/libs/property_test_helpers/models/workload_models.js";
 import {
     getPlanCache,
     testProperty
@@ -60,11 +61,13 @@ function identicalQueryCreatesAtMostOneCacheEntry(getQuery, testHelpers) {
 
 const aggModel = getAggPipelineModel({allowOrs: false});
 
-testProperty(identicalQueryCreatesAtMostOneCacheEntry,
-             {experimentColl},
-             {collModel: getCollectionModel({isTS: false}), aggModel},
-             {numRuns, numQueriesPerRun});
-testProperty(identicalQueryCreatesAtMostOneCacheEntry,
-             {experimentColl},
-             {collModel: getCollectionModel({isTS: true}), aggModel},
-             {numRuns, numQueriesPerRun});
+testProperty(
+    identicalQueryCreatesAtMostOneCacheEntry,
+    {experimentColl},
+    makeWorkloadModel({collModel: getCollectionModel({isTS: false}), aggModel, numQueriesPerRun}),
+    numRuns);
+testProperty(
+    identicalQueryCreatesAtMostOneCacheEntry,
+    {experimentColl},
+    makeWorkloadModel({collModel: getCollectionModel({isTS: true}), aggModel, numQueriesPerRun}),
+    numRuns);
diff --git a/jstests/core/query/run_all_plans_pbt.js b/jstests/core/query/run_all_plans_pbt.js
@@ -28,6 +28,7 @@
 import {getDifferentlyShapedQueries} from "jstests/libs/property_test_helpers/common_properties.js";
 import {getCollectionModel} from "jstests/libs/property_test_helpers/models/collection_models.js";
 import {getAggPipelineModel} from "jstests/libs/property_test_helpers/models/query_models.js";
+import {makeWorkloadModel} from "jstests/libs/property_test_helpers/models/workload_models.js";
 import {
     runDeoptimized,
     testProperty
@@ -95,8 +96,8 @@ const aggModel = getAggPipelineModel();
 // Test with a regular collection.
 testProperty(hintedQueryHasSameResultsAsControlCollScan,
              {controlColl, experimentColl},
-             {collModel: getCollectionModel(), aggModel},
-             {numRuns, numQueriesPerRun});
+             makeWorkloadModel({collModel: getCollectionModel(), aggModel, numQueriesPerRun}),
+             numRuns);
 
 // TODO SERVER-101271 re-enable PBT testing for time-series
 // // Test with a TS collection.
@@ -112,6 +113,10 @@ testProperty(hintedQueryHasSameResultsAsControlCollScan,
 //     });
 //     testProperty(hintedQueryHasSameResultsAsControlCollScan,
 //                  {controlColl, experimentColl},
-//                  {collModel: getCollectionModel({isTS: true}), aggModel: tsAggModel},
-//                  {numRuns, numQueriesPerRun});
+//                  makeWorkloadModel({
+//                      collModel: getCollectionModel({isTS: true}),
+//                      aggModel: tsAggModel,
+//                      numQueriesPerRun
+//                  }),
+//                  numRuns);
 // }
diff --git a/jstests/libs/property_test_helpers/README.md b/jstests/libs/property_test_helpers/README.md
@@ -46,7 +46,40 @@ There are inconsistencies in our query language that are accepted behavior, but
 
 Floating point values are another area the PBTs avoid. Results can differ depending on the order of floating point operations. These differences can propogate. For this reason the only number values allowed are integers.
 
-#### Schema
+## Modeling Workloads
+
+A workload consists of a collection model and an aggregation model, in the following format:
+
+```
+{
+   collSpec: {
+       isTS:      true/false to indicate if the collection should be time-series
+       docs:      a list of documents
+       indexes:   a list of indexes
+   },
+   queries:  a list of aggregation pipelines
+}
+```
+
+Using one workload model instead of separate (and independent) collection models and agg models allows them to be interrelated.
+For example, if we want to model a PBT to test partial indexes where every query should satisfy the partial index filter, we can write:
+
+```
+fc.record({
+    partialFilter: partialFilterPredicateModel,
+    docs: docsModel,
+    indexes: indexesModel,
+    aggs: aggsModel
+}).map(({partialFilter, docs, indexes, aggs}) => {
+    // Append {partialFilterExpression: partialFilter} to all index options
+    // Prefix every query with {$match: partialFilter}
+    // Return our workload object.
+});
+```
+
+and this is a valid workload model. If the collection and aggregation models are passed separately, they would be independent an unable to coordinate with shared arbitraries (like `partialFilter`).
+
+### Schema
 
 The Core PBT schema is:
 
diff --git a/jstests/libs/property_test_helpers/models/collection_models.js b/jstests/libs/property_test_helpers/models/collection_models.js
@@ -13,15 +13,14 @@ import {
 import {fc} from "jstests/third_party/fast_check/fc-3.1.0.js";
 
 // Maximum number of documents that our collection model can generate.
-const kMaxNumQueries = 250;
+const kMaxNumDocs = 250;
 
 // An array of [0...249] to label our documents with.
 const docIds = [];
-for (let i = 0; i < kMaxNumQueries; i++) {
+for (let i = 0; i < kMaxNumDocs; i++) {
     docIds.push(i);
 }
-const uniqueIdsArb =
-    fc.shuffledSubarray(docIds, {minLength: kMaxNumQueries, maxLength: kMaxNumQueries});
+const uniqueIdsArb = fc.shuffledSubarray(docIds, {minLength: kMaxNumDocs, maxLength: kMaxNumDocs});
 
 function getDocsModel(isTS) {
     const docModel = isTS ? timeseriesDocModel : defaultDocModel;
@@ -31,7 +30,7 @@ function getDocsModel(isTS) {
     // failure, fast-check will still minimize down to 1 document if possible.
     // These docs are 'unlabeled' because we have not assigned them unique _ids yet.
     const unlabeledDocsModel =
-        fc.array(docModel, {minLength: 1, maxLength: kMaxNumQueries, size: '+2'});
+        fc.array(docModel, {minLength: 1, maxLength: kMaxNumDocs, size: '+2'});
     // Now label the docs with unique _ids.
     return fc.record({unlabeledDocs: unlabeledDocsModel, _ids: uniqueIdsArb})
         .map(({unlabeledDocs, _ids}) => {
diff --git a/jstests/libs/property_test_helpers/models/query_models.js b/jstests/libs/property_test_helpers/models/query_models.js
@@ -28,8 +28,8 @@ export function getSingleFieldProjectArb(isInclusion, {simpleFieldsOnly = false}
             return {$project: {_id: includeIdVal, [field]: includeFieldVal}};
         });
 }
-const projectArb = fc.oneof(getSingleFieldProjectArb(true /*isInclusion*/),
-                            getSingleFieldProjectArb(false /*isInclusion*/));
+const projectArb = oneof(getSingleFieldProjectArb(true /*isInclusion*/),
+                         getSingleFieldProjectArb(false /*isInclusion*/));
 
 // Project from one field to another. {$project {a: '$b'}}
 const computedProjectArb = fc.tuple(fieldArb, dollarFieldArb).map(function([destField, srcField]) {
diff --git a/jstests/libs/property_test_helpers/models/workload_models.js b/jstests/libs/property_test_helpers/models/workload_models.js
@@ -0,0 +1,41 @@
+/*
+ * Fast-check models for workloads. A workload is a collection model and an aggregation model.
+ * See property_test_helpers/README.md for more detail on the design.
+ */
+import {fc} from "jstests/third_party/fast_check/fc-3.1.0.js";
+
+function typeCheckSingleAggModel(aggregation) {
+    // Should be a list of objects.
+    assert(Array.isArray(aggregation), 'Each aggregation pipeline should be an array.');
+    for (const aggStage of aggregation) {
+        assert.eq(typeof aggStage, 'object', 'Each aggregation stage should be an object.');
+    }
+}
+
+// Sample once from the aggsModel to do some type checking. This can prevent accidentally passing
+// models to the wrong parameters.
+function typeCheckManyAggsModel(aggsModel) {
+    const aggregations = fc.sample(aggsModel, {numRuns: 1})[0];
+    // Should be a list of aggregation pipelines.
+    assert(Array.isArray(aggregations), 'aggsModel should generate an array');
+    assert.gt(aggregations.length, 0, 'aggsModel should generate a non-empty array');
+    aggregations.forEach(agg => typeCheckSingleAggModel(agg));
+}
+
+/*
+ * Creates a workload model from the given collection model and aggregation model.
+ * Can be passed:
+ *    - `aggsModel` which generates multiple aggregation pipelines at a time or
+ *    - `aggModel` and `numQueriesPerRun` which will be used to create an `aggsModel`
+ */
+export function makeWorkloadModel({collModel, aggModel, aggsModel, numQueriesPerRun} = {}) {
+    assert(!aggsModel || !aggModel, 'Cannot  specify both `aggsModel` and `aggModel`');
+    assert(
+        !aggsModel || !numQueriesPerRun,
+        'Cannot specify `aggsModel` and `numQueriesPerRun`, since `numQueriesPerRun` is only used when provided `aggModel`.');
+    if (aggModel) {
+        aggsModel = fc.array(aggModel, {minLength: numQueriesPerRun, maxLength: numQueriesPerRun});
+    }
+    typeCheckManyAggsModel(aggsModel);
+    return fc.record({collSpec: collModel, queries: aggsModel});
+}
diff --git a/jstests/libs/property_test_helpers/property_testing_utils.js b/jstests/libs/property_test_helpers/property_testing_utils.js
@@ -77,20 +77,21 @@ const okIndexCreationErrorCodes = [
  * TODO SERVER-98132 redesign getQuery to be more opaque about how many query shapes and constants
  * there are.
  */
-function runProperty(propertyFn, namespaces, collectionSpec, queries) {
+function runProperty(propertyFn, namespaces, workload) {
+    const {collSpec, queries} = workload;
     const {controlColl, experimentColl} = namespaces;
 
     // Setup the control/experiment collections, define the helper functions, then run the property.
     if (controlColl) {
         assert(controlColl.drop());
         createColl(controlColl);
-        assert.commandWorked(controlColl.insert(collectionSpec.docs));
+        assert.commandWorked(controlColl.insert(collSpec.docs));
     }
 
     assert(experimentColl.drop());
-    createColl(experimentColl, collectionSpec.isTS);
-    assert.commandWorked(experimentColl.insert(collectionSpec.docs));
-    collectionSpec.indexes.forEach((indexSpec, num) => {
+    createColl(experimentColl, collSpec.isTS);
+    assert.commandWorked(experimentColl.insert(collSpec.docs));
+    collSpec.indexes.forEach((indexSpec, num) => {
         const name = "index_" + num;
         assert.commandWorkedOrFailedWithCode(
             experimentColl.createIndex(indexSpec.def, Object.extend(indexSpec.options, {name})),
@@ -123,9 +124,9 @@ function reporter(propertyFn, namespaces) {
             // about the property failure.
             jsTestLog('Failed property: ' + propertyFn.name);
             jsTestLog(runDetails);
-            const {collSpec, queries} = runDetails.counterexample[0];
-            jsTestLog({collSpec, queries});
-            jsTestLog(runProperty(propertyFn, namespaces, collSpec, queries));
+            const workload = runDetails.counterexample[0];
+            jsTestLog(workload);
+            jsTestLog(runProperty(propertyFn, namespaces, workload));
             assert(false);
         }
     };
@@ -137,8 +138,12 @@ function reporter(propertyFn, namespaces) {
  * failure, `runProperty` is called again in the reporter, and prints out more details about the
  * failed property.
  */
-export function testProperty(
-    propertyFn, namespaces, {collModel, aggModel}, {numRuns, numQueriesPerRun}) {
+export function testProperty(propertyFn, namespaces, workloadModel, numRuns) {
+    assert.eq(typeof propertyFn, 'function');
+    assert(Object.keys(namespaces)
+               .every(collName => collName === 'controlColl' || collName === 'experimentColl'));
+    assert.eq(typeof numRuns, 'number');
+
     const seed = 4;
     jsTestLog('Running property `' + propertyFn.name + '` from test file `' + jsTestName() +
               '`, seed = ' + seed);
@@ -150,21 +155,17 @@ export function testProperty(
     // True PBT failures (uncaught) are still readable and have stack traces.
     TestData.traceExceptions = false;
 
-    const nPipelinesModel =
-        fc.array(aggModel, {minLength: numQueriesPerRun, maxLength: numQueriesPerRun});
-    const scenarioArb = fc.record({collSpec: collModel, queries: nPipelinesModel});
-
     let alwaysPassed = true;
-    fc.assert(fc.property(scenarioArb, ({collSpec, queries}) => {
+    fc.assert(fc.property(workloadModel, workload => {
         // Only return if the property passed or not. On failure,
         // `runProperty` is called again and more details are exposed.
-        const result = runProperty(propertyFn, namespaces, collSpec, queries);
+        const result = runProperty(propertyFn, namespaces, workload);
         // If it failed for the first time, print that out so we have the first failure available
         // in case shrinking fails.
         if (!result.passed && alwaysPassed) {
             jsTestLog('The property ' + propertyFn.name + ' from ' + jsTestName() + ' failed');
             jsTestLog('Initial inputs **before minimization**');
-            jsTestLog({collSpec, queries});
+            jsTestLog(workload);
             jsTestLog('Initial failure details **before minimization**');
             jsTestLog(result);
             alwaysPassed = false;
diff --git a/jstests/libs/property_test_helpers/self_tests/pbt_model_test.js b/jstests/libs/property_test_helpers/self_tests/pbt_model_test.js