Skip to content

Commit d766e84

Browse files
authored
feat: experimental --output-last-message flag to exec subcommand (#1037)
This introduces an experimental `--output-last-message` flag that can be used to identify a file where the final message from the agent will be written. Two use cases: - Ultimately, we will likely add a `--quiet` option to `exec`, but even if the user does not want any output written to the terminal, they probably want to know what the agent did. Writing the output to a file makes it possible to get that information in a clean way. - Relatedly, when using `exec` in CI, it is easier to review the transcript written "normally," (i.e., not as JSON or something with extra escapes), but getting programmatic access to the last message is likely helpful, so writing the last message to a file gets the best of both worlds. I am calling this "experimental" because it is possible that we are overfitting and will want a more general solution to this problem that would justify removing this flag.
1 parent a4bfdf6 commit d766e84

11 files changed

+79
-20
lines changed

codex-rs/core/src/codex.rs

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,7 @@ use crate::protocol::ReviewDecision;
7777
use crate::protocol::SandboxPolicy;
7878
use crate::protocol::SessionConfiguredEvent;
7979
use crate::protocol::Submission;
80+
use crate::protocol::TaskCompleteEvent;
8081
use crate::rollout::RolloutRecorder;
8182
use crate::safety::SafetyCheck;
8283
use crate::safety::assess_command_safety;
@@ -766,6 +767,7 @@ async fn run_task(sess: Arc<Session>, sub_id: String, input: Vec<InputItem>) {
766767
}
767768

768769
let mut pending_response_input: Vec<ResponseInputItem> = vec![ResponseInputItem::from(input)];
770+
let last_agent_message: Option<String>;
769771
loop {
770772
let mut net_new_turn_input = pending_response_input
771773
.drain(..)
@@ -795,7 +797,7 @@ async fn run_task(sess: Arc<Session>, sub_id: String, input: Vec<InputItem>) {
795797

796798
// 2. Update the in-memory transcript so that future turns
797799
// include these items as part of the history.
798-
transcript.record_items(net_new_turn_input);
800+
transcript.record_items(&net_new_turn_input);
799801

800802
// Note that `transcript.record_items()` does some filtering
801803
// such that `full_transcript` may include items that were
@@ -830,7 +832,6 @@ async fn run_task(sess: Arc<Session>, sub_id: String, input: Vec<InputItem>) {
830832
.into_iter()
831833
.flatten()
832834
.collect::<Vec<ResponseInputItem>>();
833-
let last_assistant_message = get_last_assistant_message_from_turn(&items);
834835

835836
// Only attempt to take the lock if there is something to record.
836837
if !items.is_empty() {
@@ -839,16 +840,17 @@ async fn run_task(sess: Arc<Session>, sub_id: String, input: Vec<InputItem>) {
839840

840841
// For ZDR we also need to keep a transcript clone.
841842
if let Some(transcript) = sess.state.lock().unwrap().zdr_transcript.as_mut() {
842-
transcript.record_items(items);
843+
transcript.record_items(&items);
843844
}
844845
}
845846

846847
if responses.is_empty() {
847848
debug!("Turn completed");
849+
last_agent_message = get_last_assistant_message_from_turn(&items);
848850
sess.maybe_notify(UserNotification::AgentTurnComplete {
849851
turn_id: sub_id.clone(),
850852
input_messages: turn_input_messages,
851-
last_assistant_message,
853+
last_assistant_message: last_agent_message.clone(),
852854
});
853855
break;
854856
}
@@ -871,7 +873,7 @@ async fn run_task(sess: Arc<Session>, sub_id: String, input: Vec<InputItem>) {
871873
sess.remove_task(&sub_id);
872874
let event = Event {
873875
id: sub_id,
874-
msg: EventMsg::TaskComplete,
876+
msg: EventMsg::TaskComplete(TaskCompleteEvent { last_agent_message }),
875877
};
876878
sess.tx_event.send(event).await.ok();
877879
}

codex-rs/core/src/conversation_history.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,8 @@ impl ConversationHistory {
2525
/// `items` is ordered from oldest to newest.
2626
pub(crate) fn record_items<I>(&mut self, items: I)
2727
where
28-
I: IntoIterator<Item = ResponseItem>,
28+
I: IntoIterator,
29+
I::Item: std::ops::Deref<Target = ResponseItem>,
2930
{
3031
for item in items {
3132
if is_api_message(&item) {

codex-rs/core/src/protocol.rs

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -321,7 +321,7 @@ pub enum EventMsg {
321321
TaskStarted,
322322

323323
/// Agent has completed all actions
324-
TaskComplete,
324+
TaskComplete(TaskCompleteEvent),
325325

326326
/// Agent text output message
327327
AgentMessage(AgentMessageEvent),
@@ -365,6 +365,11 @@ pub struct ErrorEvent {
365365
pub message: String,
366366
}
367367

368+
#[derive(Debug, Clone, Deserialize, Serialize)]
369+
pub struct TaskCompleteEvent {
370+
pub last_agent_message: Option<String>,
371+
}
372+
368373
#[derive(Debug, Clone, Deserialize, Serialize)]
369374
pub struct AgentMessageEvent {
370375
pub message: String,

codex-rs/core/tests/live_agent.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,7 @@ async fn live_streaming_and_prev_id_reset() {
9898

9999
match ev.msg {
100100
EventMsg::AgentMessage(_) => saw_message_before_complete = true,
101-
EventMsg::TaskComplete => break,
101+
EventMsg::TaskComplete(_) => break,
102102
EventMsg::Error(ErrorEvent { message }) => {
103103
panic!("agent reported error in task1: {message}")
104104
}
@@ -136,7 +136,7 @@ async fn live_streaming_and_prev_id_reset() {
136136
{
137137
got_expected = true;
138138
}
139-
EventMsg::TaskComplete => break,
139+
EventMsg::TaskComplete(_) => break,
140140
EventMsg::Error(ErrorEvent { message }) => {
141141
panic!("agent reported error in task2: {message}")
142142
}
@@ -204,7 +204,7 @@ async fn live_shell_function_call() {
204204
assert!(stdout.contains(MARKER));
205205
saw_end_with_output = true;
206206
}
207-
EventMsg::TaskComplete => break,
207+
EventMsg::TaskComplete(_) => break,
208208
EventMsg::Error(codex_core::protocol::ErrorEvent { message }) => {
209209
panic!("agent error during shell test: {message}")
210210
}

codex-rs/core/tests/previous_response_id.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -132,7 +132,7 @@ async fn keeps_previous_response_id_between_tasks() {
132132
.await
133133
.unwrap()
134134
.unwrap();
135-
if matches!(ev.msg, EventMsg::TaskComplete) {
135+
if matches!(ev.msg, EventMsg::TaskComplete(_)) {
136136
break;
137137
}
138138
}
@@ -154,7 +154,7 @@ async fn keeps_previous_response_id_between_tasks() {
154154
.unwrap()
155155
.unwrap();
156156
match ev.msg {
157-
EventMsg::TaskComplete => break,
157+
EventMsg::TaskComplete(_) => break,
158158
EventMsg::Error(ErrorEvent { message }) => {
159159
panic!("unexpected error: {message}")
160160
}

codex-rs/core/tests/stream_no_completed.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ use std::time::Duration;
66
use codex_core::Codex;
77
use codex_core::ModelProviderInfo;
88
use codex_core::exec::CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR;
9+
use codex_core::protocol::EventMsg;
910
use codex_core::protocol::InputItem;
1011
use codex_core::protocol::Op;
1112
mod test_support;
@@ -118,7 +119,7 @@ async fn retries_on_early_close() {
118119
.await
119120
.unwrap()
120121
.unwrap();
121-
if matches!(ev.msg, codex_core::protocol::EventMsg::TaskComplete) {
122+
if matches!(ev.msg, EventMsg::TaskComplete(_)) {
122123
break;
123124
}
124125
}

codex-rs/exec/src/cli.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,10 @@ pub struct Cli {
4141
#[arg(long = "color", value_enum, default_value_t = Color::Auto)]
4242
pub color: Color,
4343

44+
/// Specifies file where the last message from the agent should be written.
45+
#[arg(long = "output-last-message")]
46+
pub last_message_file: Option<PathBuf>,
47+
4448
/// Initial instructions for the agent.
4549
pub prompt: String,
4650
}

codex-rs/exec/src/event_processor.rs

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ use codex_core::protocol::McpToolCallEndEvent;
1313
use codex_core::protocol::PatchApplyBeginEvent;
1414
use codex_core::protocol::PatchApplyEndEvent;
1515
use codex_core::protocol::SessionConfiguredEvent;
16+
use codex_core::protocol::TaskCompleteEvent;
1617
use owo_colors::OwoColorize;
1718
use owo_colors::Style;
1819
use shlex::try_join;
@@ -117,7 +118,9 @@ impl EventProcessor {
117118
let msg = format!("Task started: {id}");
118119
ts_println!("{}", msg.style(self.dimmed));
119120
}
120-
EventMsg::TaskComplete => {
121+
EventMsg::TaskComplete(TaskCompleteEvent {
122+
last_agent_message: _,
123+
}) => {
121124
let msg = format!("Task complete: {id}");
122125
ts_println!("{}", msg.style(self.bold));
123126
}

codex-rs/exec/src/lib.rs

Lines changed: 41 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ mod cli;
22
mod event_processor;
33

44
use std::io::IsTerminal;
5+
use std::path::Path;
56
use std::sync::Arc;
67

78
pub use cli::Cli;
@@ -14,6 +15,7 @@ use codex_core::protocol::EventMsg;
1415
use codex_core::protocol::InputItem;
1516
use codex_core::protocol::Op;
1617
use codex_core::protocol::SandboxPolicy;
18+
use codex_core::protocol::TaskCompleteEvent;
1719
use codex_core::util::is_inside_git_repo;
1820
use event_processor::EventProcessor;
1921
use tracing::debug;
@@ -32,6 +34,7 @@ pub async fn run_main(cli: Cli) -> anyhow::Result<()> {
3234
skip_git_repo_check,
3335
disable_response_storage,
3436
color,
37+
last_message_file,
3538
prompt,
3639
} = cli;
3740

@@ -137,7 +140,14 @@ pub async fn run_main(cli: Cli) -> anyhow::Result<()> {
137140
let initial_images_event_id = codex.submit(Op::UserInput { items }).await?;
138141
info!("Sent images with event ID: {initial_images_event_id}");
139142
while let Ok(event) = codex.next_event().await {
140-
if event.id == initial_images_event_id && matches!(event.msg, EventMsg::TaskComplete) {
143+
if event.id == initial_images_event_id
144+
&& matches!(
145+
event.msg,
146+
EventMsg::TaskComplete(TaskCompleteEvent {
147+
last_agent_message: _,
148+
})
149+
)
150+
{
141151
break;
142152
}
143153
}
@@ -151,13 +161,40 @@ pub async fn run_main(cli: Cli) -> anyhow::Result<()> {
151161
// Run the loop until the task is complete.
152162
let mut event_processor = EventProcessor::create_with_ansi(stdout_with_ansi);
153163
while let Some(event) = rx.recv().await {
154-
let last_event =
155-
event.id == initial_prompt_task_id && matches!(event.msg, EventMsg::TaskComplete);
164+
let (is_last_event, last_assistant_message) = match &event.msg {
165+
EventMsg::TaskComplete(TaskCompleteEvent { last_agent_message }) => {
166+
(true, last_agent_message.clone())
167+
}
168+
_ => (false, None),
169+
};
156170
event_processor.process_event(event);
157-
if last_event {
171+
if is_last_event {
172+
handle_last_message(last_assistant_message, last_message_file.as_deref())?;
158173
break;
159174
}
160175
}
161176

162177
Ok(())
163178
}
179+
180+
fn handle_last_message(
181+
last_agent_message: Option<String>,
182+
last_message_file: Option<&Path>,
183+
) -> std::io::Result<()> {
184+
match (last_agent_message, last_message_file) {
185+
(Some(last_agent_message), Some(last_message_file)) => {
186+
// Last message and a file to write to.
187+
std::fs::write(last_message_file, last_agent_message)?;
188+
}
189+
(None, Some(last_message_file)) => {
190+
eprintln!(
191+
"Warning: No last message to write to file: {}",
192+
last_message_file.to_string_lossy()
193+
);
194+
}
195+
(_, None) => {
196+
// No last message and no file to write to.
197+
}
198+
}
199+
Ok(())
200+
}

codex-rs/mcp-server/src/codex_tool_runner.rs

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ use codex_core::protocol::Event;
99
use codex_core::protocol::EventMsg;
1010
use codex_core::protocol::InputItem;
1111
use codex_core::protocol::Op;
12+
use codex_core::protocol::TaskCompleteEvent;
1213
use mcp_types::CallToolResult;
1314
use mcp_types::CallToolResultContent;
1415
use mcp_types::JSONRPC_VERSION;
@@ -125,7 +126,9 @@ pub async fn run_codex_tool_session(
125126
.await;
126127
break;
127128
}
128-
EventMsg::TaskComplete => {
129+
EventMsg::TaskComplete(TaskCompleteEvent {
130+
last_agent_message: _,
131+
}) => {
129132
let result = if let Some(msg) = last_agent_message {
130133
CallToolResult {
131134
content: vec![CallToolResultContent::TextContent(TextContent {

codex-rs/tui/src/chatwidget.rs

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ use codex_core::protocol::McpToolCallBeginEvent;
1717
use codex_core::protocol::McpToolCallEndEvent;
1818
use codex_core::protocol::Op;
1919
use codex_core::protocol::PatchApplyBeginEvent;
20+
use codex_core::protocol::TaskCompleteEvent;
2021
use crossterm::event::KeyEvent;
2122
use ratatui::buffer::Buffer;
2223
use ratatui::layout::Constraint;
@@ -246,7 +247,9 @@ impl ChatWidget<'_> {
246247
self.bottom_pane.set_task_running(true);
247248
self.request_redraw();
248249
}
249-
EventMsg::TaskComplete => {
250+
EventMsg::TaskComplete(TaskCompleteEvent {
251+
last_agent_message: _,
252+
}) => {
250253
self.bottom_pane.set_task_running(false);
251254
self.request_redraw();
252255
}

0 commit comments

Comments
 (0)