Skip to content
Closed
Show file tree
Hide file tree
Changes from 16 commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
82790ae
design doc
itamargolan Feb 21, 2026
69efe6f
FE communication and ERD additions
itamargolan Feb 21, 2026
772eed9
ui reporting events in flow
itamargolan Feb 23, 2026
444a002
changes
itamargolan Feb 24, 2026
5bee1ad
add reason to TrialItemRun
itamargolan Feb 24, 2026
35d9ec9
[NA] [SDK] feat: add greenfield optimization framework package
itamargolan Feb 26, 2026
9e672b5
Adjustments for UI and framework review
itamargolan Feb 26, 2026
fd5c4fd
fix: address PR review comments - dict access bug and theme color
itamargolan Feb 27, 2026
828dbf9
fix: address remaining PR review comments
itamargolan Feb 27, 2026
fe12451
fix: separate experiment scores from feedback scores and handle singl…
itamargolan Feb 27, 2026
899b6b2
fix: extract shared getBestOptimizationScore helper to deduplicate logic
itamargolan Feb 27, 2026
36238e6
Merge branch 'main' into itamar/new-optimizer-framework
itamargolan Feb 27, 2026
8af967c
fix: evaluate baseline on full dataset instead of validation split only
itamargolan Feb 27, 2026
6f02d7b
feat: enrich GEPA experiment metadata for optimization visualization
itamargolan Mar 2, 2026
d9439cd
fix: address PR review comments and simplify optimizer factory
itamargolan Mar 2, 2026
c3e4b93
Merge branch 'main' into itamar/new-optimizer-framework
itamargolan Mar 2, 2026
e16df93
Merge branch 'main' into itamar/new-optimizer-framework
itamargolan Mar 2, 2026
e4757a3
fix: lineage-based step_index and parent_candidate_ids for GEPA exper…
itamargolan Mar 2, 2026
ae3b902
refactor: remove unused config_hash and merge event emitters
itamargolan Mar 2, 2026
624e7d2
refactor: make CandidateConfig a plain dict and pass baseline_config …
itamargolan Mar 3, 2026
3eae9fb
refactor: move gepa tests to library_integration to avoid unit suite …
itamargolan Mar 3, 2026
2dd844d
Merge branch 'main' into itamar/new-optimizer-framework
itamargolan Mar 3, 2026
f802868
refactor: remove event_emitter from optimizer interface, auto-emit st…
itamargolan Mar 3, 2026
2142952
test: assert on actual log messages in event emitter tests
itamargolan Mar 3, 2026
65a46c2
fix: set evaluation_method on optimizer trial experiments for correct…
itamargolan Mar 3, 2026
4cb8ffd
Merge branch 'main' into itamar/new-optimizer-framework
itamargolan Mar 3, 2026
a363730
fix: validate dataset is evaluation suite before running suite evalua…
itamargolan Mar 3, 2026
049c72e
refactor: extract _run_suite_evaluation to deduplicate suite evaluati…
itamargolan Mar 3, 2026
2646f79
[OPIK-4687] [SDK] feat: GEPA v2 optimizer with reflection-based promp…
alexkuzmik Mar 6, 2026
e84f963
Merge branch 'main' into itamar/new-optimizer-framework
itamargolan Mar 9, 2026
5a3d79f
Merge branch 'main' into itamar/new-optimizer-framework
itamargolan Mar 10, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/workflows/build_and_push_docker.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,7 @@ jobs:
uses: docker/build-push-action@v6
with:
context: apps/${{ inputs.image }}/
build-contexts: ${{ inputs.image == 'opik-python-backend' && 'opik-optimizer=apps/opik-optimizer/' || '' }}
platforms: linux/${{ matrix.platform }}
cache-from: type=registry,ref=${{ env.DOCKER_REGISTRY }}/${{ steps.set_vars.outputs.image_name }}:main
provenance: false
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@ public record Optimization(
@JsonView({Optimization.View.Public.class}) @Schema(accessMode = Schema.AccessMode.READ_ONLY) Long numTrials,
@JsonView({
Optimization.View.Public.class}) @Schema(accessMode = Schema.AccessMode.READ_ONLY) List<FeedbackScoreAverage> feedbackScores,
@JsonView({
Optimization.View.Public.class}) @Schema(accessMode = Schema.AccessMode.READ_ONLY) List<FeedbackScoreAverage> experimentScores,
@JsonView({Optimization.View.Public.class}) @Schema(accessMode = Schema.AccessMode.READ_ONLY) Instant createdAt,
@JsonView({Optimization.View.Public.class}) @Schema(accessMode = Schema.AccessMode.READ_ONLY) String createdBy,
@JsonView({Optimization.View.Public.class,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,8 @@ WITH optimization_final AS (
), experiments_final AS (
SELECT
id,
optimization_id
optimization_id,
experiment_scores
FROM experiments
WHERE workspace_id = :workspace_id
AND optimization_id IN (SELECT id FROM optimization_final)
Expand Down Expand Up @@ -222,15 +223,35 @@ LEFT JOIN (
HAVING length(fs.name) > 0
) as fs_avg
GROUP BY experiment_id
), experiment_scores_parsed AS (
SELECT
e.id AS experiment_id,
JSON_VALUE(score, '$.name') AS name,
CAST(JSON_VALUE(score, '$.value') AS Float64) AS value
FROM experiments_final AS e
ARRAY JOIN JSONExtractArrayRaw(e.experiment_scores) AS score
WHERE length(e.experiment_scores) > 2
AND length(JSON_VALUE(score, '$.name')) > 0
), experiment_scores_agg AS (
SELECT
experiment_id,
mapFromArrays(
groupArray(name),
groupArray(value)
) AS experiment_scores
FROM experiment_scores_parsed
GROUP BY experiment_id
)
SELECT
o.*,
o.id as id,
COUNT(DISTINCT e.id) FILTER (WHERE e.id != '') AS num_trials,
maxMap(fs.feedback_scores) AS feedback_scores
maxMap(fs.feedback_scores) AS feedback_scores,
maxMap(es.experiment_scores) AS experiment_scores
FROM optimization_final AS o
LEFT JOIN experiments_final AS e ON o.id = e.optimization_id
LEFT JOIN feedback_scores_agg AS fs ON e.id = fs.experiment_id
LEFT JOIN experiment_scores_agg AS es ON e.id = es.experiment_id
GROUP BY o.*
ORDER BY o.id DESC
<if(limit)> LIMIT :limit <endif> <if(offset)> OFFSET :offset <endif>
Expand Down Expand Up @@ -647,6 +668,7 @@ private Publisher<Optimization> mapToDto(Result result) {
.createdBy(row.get("created_by", String.class))
.lastUpdatedBy(row.get("last_updated_by", String.class))
.feedbackScores(getFeedbackScores(row, "feedback_scores"))
.experimentScores(getFeedbackScores(row, "experiment_scores"))
.numTrials(row.get("num_trials", Long.class))
.build();
});
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -366,8 +366,9 @@ private void enqueueStudioOptimizationJob(Optimization optimization, String work
.opikApiKey(opikApiKey)
.build();

// Enqueue to Redis RQ
queueProducer.enqueue(Queue.OPTIMIZER_CLOUD, jobMessage)
// Route to the appropriate queue based on optimizer type
var queue = resolveQueue(optimization);
queueProducer.enqueue(queue, jobMessage)
.doOnSuccess(
jobId -> log.info("Studio optimization job enqueued successfully for id: '{}', jobId: '{}'",
optimization.id(), jobId))
Expand All @@ -379,6 +380,20 @@ private void enqueueStudioOptimizationJob(Optimization optimization, String work
.subscribe();
}

private static final java.util.Set<String> LEGACY_OPTIMIZER_TYPES = java.util.Set.of(
"gepa", "evolutionary", "hierarchical_reflective");

private Queue resolveQueue(Optimization optimization) {
if (optimization.studioConfig() != null
&& optimization.studioConfig().optimizer() != null) {
var optimizerType = optimization.studioConfig().optimizer().type();
if (optimizerType != null && !LEGACY_OPTIMIZER_TYPES.contains(optimizerType.toLowerCase())) {
return Queue.OPTIMIZER_FRAMEWORK;
}
}
return Queue.OPTIMIZER_CLOUD;
}

private void cancelOptimization(UUID optimizationId, String workspaceId) {
var optimizationUpdate = OptimizationUpdate.builder()
.status(OptimizationStatus.CANCELLED)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
public enum Queue {

OPTIMIZER_CLOUD("opik:optimizer-cloud", "opik_backend.rq_worker.process_optimizer_job"),
OPTIMIZER_FRAMEWORK("opik:optimizer-framework", "opik_backend.rq_worker.process_framework_optimizer_job"),
;

@JsonValue
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ public Optimization.OptimizationBuilder createPartialOptimization() {
.status(OptimizationStatus.INITIALIZED)
.numTrials(0L)
.feedbackScores(null)
.experimentScores(null)
.studioConfig(null);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -769,8 +769,9 @@ void createStudioOptimization__thenVerifyRedisJobEnqueued() {
.studioConfig(studioConfig)
.build();

// Get initial queue size
String queueKey = "rq:queue:" + Queue.OPTIMIZER_CLOUD.toString();
// Get initial queue size (PODAM generates a random optimizer type which is not a legacy type,
// so resolveQueue routes to OPTIMIZER_FRAMEWORK)
String queueKey = "rq:queue:" + Queue.OPTIMIZER_FRAMEWORK.toString();
RQueueReactive<String> queue = redisClient.getQueue(queueKey, StringCodec.INSTANCE);
Integer initialSize = queue.size().block();
assertThat(initialSize).isNotNull();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,17 @@ import { JsonParam, StringParam, useQueryParam } from "use-query-params";
import isArray from "lodash/isArray";

import {
AggregatedFeedbackScore,
COLUMN_FEEDBACK_SCORES_ID,
COLUMN_ID_ID,
COLUMN_NAME_ID,
ROW_HEIGHT,
} from "@/types/shared";
import { Experiment, EXPERIMENT_TYPE } from "@/types/datasets";
import { OPTIMIZATION_ACTIVE_REFETCH_INTERVAL } from "@/lib/optimizations";
import {
OPTIMIZATION_ACTIVE_REFETCH_INTERVAL,
checkIsEvaluationSuite,
} from "@/lib/optimizations";
import { migrateSelectedColumns } from "@/lib/table";
import useAppStore from "@/store/AppStore";
import useBreadcrumbsStore from "@/store/BreadcrumbsStore";
Expand Down Expand Up @@ -50,6 +54,15 @@ const DEFAULT_COLUMNS_ORDER: string[] = [

const DEFAULT_SORTING: ColumnSort[] = [{ id: COLUMN_ID_ID, desc: false }];

const mergeExperimentScores = (
feedbackScores: AggregatedFeedbackScore[] | undefined,
experimentScores: AggregatedFeedbackScore[] | undefined,
): AggregatedFeedbackScore[] => {
if (!experimentScores?.length) return [];
const existingNames = new Set(feedbackScores?.map((s) => s.name));
return experimentScores.filter((s) => !existingNames.has(s.name));
};

export const useCompareOptimizationsData = () => {
const navigate = useNavigate();
const workspaceName = useAppStore((state) => state.activeWorkspaceName);
Expand Down Expand Up @@ -151,7 +164,37 @@ export const useCompareOptimizationsData = () => {
const title = optimization?.name || optimizationId;
const noData = !search;
const noDataText = noData ? "There are no trials yet" : "No search results";
const experiments = useMemo(() => data?.content ?? [], [data?.content]);

const isEvaluationSuite = useMemo(
() => checkIsEvaluationSuite(data?.content ?? []),
[data?.content],
);

const experiments = useMemo(() => {
const content = data?.content ?? [];
const objectiveName = optimization?.objective_name;

return content.map((experiment) => {
const additional = mergeExperimentScores(
experiment.feedback_scores,
experiment.experiment_scores,
);

let feedbackScores = additional.length
? [...(experiment.feedback_scores ?? []), ...additional]
: experiment.feedback_scores;

if (isEvaluationSuite && objectiveName && feedbackScores) {
feedbackScores = feedbackScores.filter((s) => s.name === objectiveName);
}

if (!additional.length && !isEvaluationSuite) return experiment;
return {
...experiment,
feedback_scores: feedbackScores,
};
});
}, [data?.content, isEvaluationSuite, optimization?.objective_name]);

useEffect(() => {
title &&
Expand Down Expand Up @@ -209,6 +252,7 @@ export const useCompareOptimizationsData = () => {
optimizationId,
optimization,
experiments,
isEvaluationSuite,
rows,
title,
noDataText,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,14 @@ type CompareTrialsDetailsProps = {
optimization?: Optimization;
experimentsIds: string[];
experiments: Experiment[];
isEvaluationSuite?: boolean;
};

const CompareTrialsDetails: React.FC<CompareTrialsDetailsProps> = ({
optimization,
experiments,
experimentsIds,
isEvaluationSuite = false,
}) => {
const setBreadcrumbParam = useBreadcrumbsStore((state) => state.setParam);
const { getColor } = useWorkspaceColorMap();
Expand All @@ -38,13 +40,22 @@ const CompareTrialsDetails: React.FC<CompareTrialsDetailsProps> = ({

const objectiveName = optimization?.objective_name;

// For evaluation suite experiments, only show the aggregated objective score
// The pass_rate is stored in experiment_scores (not feedback_scores)
if (isEvaluationSuite && objectiveName) {
const objectiveScore =
experiment.feedback_scores.find((s) => s.name === objectiveName) ??
experiment.experiment_scores?.find((s) => s.name === objectiveName);
return objectiveScore ? [objectiveScore] : [];
}

// Sort scores: main objective first, then alphabetically
return [...experiment.feedback_scores].sort((a, b) => {
if (a.name === objectiveName) return -1;
if (b.name === objectiveName) return 1;
return a.name.localeCompare(b.name, undefined, { sensitivity: "base" });
});
}, [experiment, isCompare, optimization?.objective_name]);
}, [experiment, isCompare, isEvaluationSuite, optimization?.objective_name]);

const colorMap = useMemo(() => {
if (!optimization?.objective_name || scores.length === 0) return {};
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import React from "react";
import React, { useMemo } from "react";
import isUndefined from "lodash/isUndefined";
import { JsonParam, StringParam, useQueryParam } from "use-query-params";

Expand All @@ -10,13 +10,18 @@ import CompareTrialsDetails from "@/components/pages/CompareTrialsPage/CompareTr
import PageBodyScrollContainer from "@/components/layout/PageBodyScrollContainer/PageBodyScrollContainer";
import PageBodyStickyContainer from "@/components/layout/PageBodyStickyContainer/PageBodyStickyContainer";
import useExperimentsByIds from "@/api/datasets/useExperimenstByIds";
import useExperimentsList from "@/api/datasets/useExperimentsList";
import useDeepMemo from "@/hooks/useDeepMemo";
import { Experiment } from "@/types/datasets";
import { Experiment, EXPERIMENT_TYPE } from "@/types/datasets";
import useOptimizationById from "@/api/optimizations/useOptimizationById";
import useAppStore from "@/store/AppStore";
import { checkIsEvaluationSuite } from "@/lib/optimizations";
import { keepPreviousData } from "@tanstack/react-query";
import { useParams } from "@tanstack/react-router";

const CompareTrialsPage: React.FunctionComponent = () => {
const workspaceName = useAppStore((state) => state.activeWorkspaceName);

const [tab = "prompt", setTab] = useQueryParam("tab", StringParam, {
updateType: "replaceIn",
});
Expand Down Expand Up @@ -44,6 +49,19 @@ const CompareTrialsPage: React.FunctionComponent = () => {
},
);

const { data: optimizationExperimentsData } = useExperimentsList(
{
workspaceName,
optimizationId,
types: [EXPERIMENT_TYPE.TRIAL, EXPERIMENT_TYPE.MINI_BATCH],
page: 1,
size: 100,
},
{
enabled: !!optimizationId,
},
);

const isPending = response.reduce<boolean>(
(acc, r) => acc || r.isPending,
false,
Expand All @@ -57,13 +75,22 @@ const CompareTrialsPage: React.FunctionComponent = () => {
return experiments ?? [];
}, [experiments]);

const isEvaluationSuite = useMemo(() => {
const allExperiments = [
...memorizedExperiments,
...(optimizationExperimentsData?.content ?? []),
];
return checkIsEvaluationSuite(allExperiments);
}, [memorizedExperiments, optimizationExperimentsData?.content]);

return (
<PageBodyScrollContainer>
<PageBodyStickyContainer direction="horizontal" limitWidth>
<CompareTrialsDetails
optimization={optimization}
experimentsIds={experimentsIds}
experiments={memorizedExperiments}
isEvaluationSuite={isEvaluationSuite}
/>
</PageBodyStickyContainer>
<Tabs
Expand Down Expand Up @@ -98,6 +125,7 @@ const CompareTrialsPage: React.FunctionComponent = () => {
datasetId={datasetId}
experimentsIds={experimentsIds}
experiments={memorizedExperiments}
isEvaluationSuite={isEvaluationSuite}
/>
</TabsContent>
<TabsContent value="config">
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ import IdCell from "@/components/shared/DataTableCells/IdCell";
import AutodetectCell from "@/components/shared/DataTableCells/AutodetectCell";
import CompareExperimentsOutputCell from "@/components/pages-shared/experiments/CompareExperimentsOutputCell/CompareExperimentsOutputCell";
import CompareExperimentsFeedbackScoreCell from "@/components/pages-shared/experiments/CompareExperimentsFeedbackScoreCell/CompareExperimentsFeedbackScoreCell";
import TrialPassedCell from "./TrialPassedCell";
import TraceDetailsPanel from "@/components/pages-shared/traces/TraceDetailsPanel/TraceDetailsPanel";
import CompareExperimentsNameCell from "@/components/pages-shared/experiments/CompareExperimentsNameCell/CompareExperimentsNameCell";
import CompareExperimentsNameHeader from "@/components/pages-shared/experiments/CompareExperimentsNameHeader/CompareExperimentsNameHeader";
Expand Down Expand Up @@ -115,13 +116,15 @@ export type TrialItemsTabProps = {
datasetId: string;
experimentsIds: string[];
experiments?: Experiment[];
isEvaluationSuite?: boolean;
};

const TrialItemsTab: React.FC<TrialItemsTabProps> = ({
objectiveName,
datasetId,
experimentsIds = [],
experiments,
isEvaluationSuite = false,
}) => {
const workspaceName = useAppStore((state) => state.activeWorkspaceName);
const [traceId = "", setTraceId] = useQueryParam("trace", StringParam, {
Expand Down Expand Up @@ -322,6 +325,21 @@ const TrialItemsTab: React.FC<TrialItemsTabProps> = ({
}, [dynamicOutputColumns, experiments, experimentsIds, setTraceId]);

const scoresColumnsData = useMemo(() => {
// For evaluation suite experiments, show a single "passed" column
if (isEvaluationSuite) {
return [
{
id: "score_passed",
label: "passed",
type: COLUMN_TYPE.string,
cell: TrialPassedCell as never,
customMeta: {
experimentsIds,
},
},
] as ColumnData<ExperimentsCompare>[];
}

// Extract all unique feedback score names from experiments
const feedbackScoreNames = new Set<string>();

Expand Down Expand Up @@ -367,7 +385,7 @@ const TrialItemsTab: React.FC<TrialItemsTabProps> = ({
)?.value,
},
})) as ColumnData<ExperimentsCompare>[];
}, [experiments, experimentsIds, objectiveName]);
}, [experiments, experimentsIds, objectiveName, isEvaluationSuite]);

// Auto-select all score columns when they become available
useEffect(() => {
Expand Down
Loading
Loading