Skip to content

Commit 2b122da

Browse files
authored
feat: add support for AGENTS.md in Rust CLI (#885)
The TypeScript CLI already has support for including the contents of `AGENTS.md` in the instructions sent with the first turn of a conversation. This PR brings this functionality to the Rust CLI. To be considered, `AGENTS.md` must be in the `cwd` of the session, or in one of the parent folders up to a Git/filesystem root (whichever is encountered first). By default, a maximum of 32 KiB of `AGENTS.md` will be included, though this is configurable using the new-in-this-PR `project_doc_max_bytes` option in `config.toml`.
1 parent b42ad67 commit 2b122da

File tree

6 files changed

+301
-1
lines changed

6 files changed

+301
-1
lines changed

AGENTS.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
# Rust/codex-rs
2+
3+
In the codex-rs folder where the rust code lives:
4+
5+
- Never add or modify any code related to `CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR`. You operate in a sandbox where `CODEX_SANDBOX_NETWORK_DISABLED=1` will be set whenever you use the `shell` tool. Any existing code that uses `CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR` was authored with this fact in mind. It is often used to early exit out of tests that the author knew you would not be able to run given your sandbox limitations.

codex-rs/README.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -250,3 +250,7 @@ To have Codex use this script for notifications, you would configure it via `not
250250
```toml
251251
notify = ["python3", "/Users/mbolin/.codex/notify.py"]
252252
```
253+
254+
### project_doc_max_bytes
255+
256+
Maximum number of bytes to read from an `AGENTS.md` file to include in the instructions sent with the first turn of a session. Defaults to 32 KiB.

codex-rs/core/src/codex.rs

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ use crate::models::FunctionCallOutputPayload;
5252
use crate::models::ResponseInputItem;
5353
use crate::models::ResponseItem;
5454
use crate::models::ShellToolCallParams;
55+
use crate::project_doc::create_full_instructions;
5556
use crate::protocol::AskForApproval;
5657
use crate::protocol::Event;
5758
use crate::protocol::EventMsg;
@@ -83,10 +84,12 @@ impl Codex {
8384
pub async fn spawn(config: Config, ctrl_c: Arc<Notify>) -> CodexResult<(Codex, String)> {
8485
let (tx_sub, rx_sub) = async_channel::bounded(64);
8586
let (tx_event, rx_event) = async_channel::bounded(64);
87+
88+
let instructions = create_full_instructions(&config).await;
8689
let configure_session = Op::ConfigureSession {
8790
provider: config.model_provider.clone(),
8891
model: config.model.clone(),
89-
instructions: config.instructions.clone(),
92+
instructions,
9093
approval_policy: config.approval_policy,
9194
sandbox_policy: config.sandbox_policy.clone(),
9295
disable_response_storage: config.disable_response_storage,

codex-rs/core/src/config.rs

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,11 @@ use std::path::PathBuf;
1515
/// correctly even if the user has not created `~/.codex/instructions.md`.
1616
const EMBEDDED_INSTRUCTIONS: &str = include_str!("../prompt.md");
1717

18+
/// Maximum number of bytes of the documentation that will be embedded. Larger
19+
/// files are *silently truncated* to this size so we do not take up too much of
20+
/// the context window.
21+
pub(crate) const PROJECT_DOC_MAX_BYTES: usize = 32 * 1024; // 32 KiB
22+
1823
/// Application configuration loaded from disk and merged with overrides.
1924
#[derive(Debug, Clone)]
2025
pub struct Config {
@@ -72,6 +77,9 @@ pub struct Config {
7277

7378
/// Combined provider map (defaults merged with user-defined overrides).
7479
pub model_providers: HashMap<String, ModelProviderInfo>,
80+
81+
/// Maximum number of bytes to include from an AGENTS.md project doc file.
82+
pub project_doc_max_bytes: usize,
7583
}
7684

7785
/// Base config deserialized from ~/.codex/config.toml.
@@ -111,6 +119,9 @@ pub struct ConfigToml {
111119
/// User-defined provider entries that extend/override the built-in list.
112120
#[serde(default)]
113121
pub model_providers: HashMap<String, ModelProviderInfo>,
122+
123+
/// Maximum number of bytes to include from an AGENTS.md project doc file.
124+
pub project_doc_max_bytes: Option<usize>,
114125
}
115126

116127
impl ConfigToml {
@@ -267,6 +278,7 @@ impl Config {
267278
instructions,
268279
mcp_servers: cfg.mcp_servers,
269280
model_providers,
281+
project_doc_max_bytes: cfg.project_doc_max_bytes.unwrap_or(PROJECT_DOC_MAX_BYTES),
270282
};
271283
Ok(config)
272284
}

codex-rs/core/src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ mod model_provider_info;
2828
pub use model_provider_info::ModelProviderInfo;
2929
pub use model_provider_info::WireApi;
3030
mod models;
31+
mod project_doc;
3132
pub mod protocol;
3233
mod rollout;
3334
mod safety;

codex-rs/core/src/project_doc.rs

Lines changed: 275 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,275 @@
1+
//! Project-level documentation discovery.
2+
//!
3+
//! Project-level documentation can be stored in a file named `AGENTS.md`.
4+
//! Currently, we include only the contents of the first file found as follows:
5+
//!
6+
//! 1. Look for the doc file in the current working directory (as determined
7+
//! by the `Config`).
8+
//! 2. If not found, walk *upwards* until the Git repository root is reached
9+
//! (detected by the presence of a `.git` directory/file), or failing that,
10+
//! the filesystem root.
11+
//! 3. If the Git root is encountered, look for the doc file there. If it
12+
//! exists, the search stops – we do **not** walk past the Git root.
13+
14+
use crate::config::Config;
15+
use std::path::Path;
16+
use tokio::io::AsyncReadExt;
17+
use tracing::error;
18+
19+
/// Currently, we only match the filename `AGENTS.md` exactly.
20+
const CANDIDATE_FILENAMES: &[&str] = &["AGENTS.md"];
21+
22+
/// When both `Config::instructions` and the project doc are present, they will
23+
/// be concatenated with the following separator.
24+
const PROJECT_DOC_SEPARATOR: &str = "\n\n--- project-doc ---\n\n";
25+
26+
/// Combines `Config::instructions` and `AGENTS.md` (if present) into a single
27+
/// string of instructions.
28+
pub(crate) async fn create_full_instructions(config: &Config) -> Option<String> {
29+
match find_project_doc(config).await {
30+
Ok(Some(project_doc)) => match &config.instructions {
31+
Some(original_instructions) => Some(format!(
32+
"{original_instructions}{PROJECT_DOC_SEPARATOR}{project_doc}"
33+
)),
34+
None => Some(project_doc),
35+
},
36+
Ok(None) => config.instructions.clone(),
37+
Err(e) => {
38+
error!("error trying to find project doc: {e:#}");
39+
config.instructions.clone()
40+
}
41+
}
42+
}
43+
44+
/// Attempt to locate and load the project documentation. Currently, the search
45+
/// starts from `Config::cwd`, but if we may want to consider other directories
46+
/// in the future, e.g., additional writable directories in the `SandboxPolicy`.
47+
///
48+
/// On success returns `Ok(Some(contents))`. If no documentation file is found
49+
/// the function returns `Ok(None)`. Unexpected I/O failures bubble up as
50+
/// `Err` so callers can decide how to handle them.
51+
async fn find_project_doc(config: &Config) -> std::io::Result<Option<String>> {
52+
let max_bytes = config.project_doc_max_bytes;
53+
54+
// Attempt to load from the working directory first.
55+
if let Some(doc) = load_first_candidate(&config.cwd, CANDIDATE_FILENAMES, max_bytes).await? {
56+
return Ok(Some(doc));
57+
}
58+
59+
// Walk up towards the filesystem root, stopping once we encounter the Git
60+
// repository root. The presence of **either** a `.git` *file* or
61+
// *directory* counts.
62+
let mut dir = config.cwd.clone();
63+
64+
// Canonicalize the path so that we do not end up in an infinite loop when
65+
// `cwd` contains `..` components.
66+
if let Ok(canon) = dir.canonicalize() {
67+
dir = canon;
68+
}
69+
70+
while let Some(parent) = dir.parent() {
71+
// `.git` can be a *file* (for worktrees or submodules) or a *dir*.
72+
let git_marker = dir.join(".git");
73+
let git_exists = match tokio::fs::metadata(&git_marker).await {
74+
Ok(_) => true,
75+
Err(e) if e.kind() == std::io::ErrorKind::NotFound => false,
76+
Err(e) => return Err(e),
77+
};
78+
79+
if git_exists {
80+
// We are at the repo root – attempt one final load.
81+
if let Some(doc) = load_first_candidate(&dir, CANDIDATE_FILENAMES, max_bytes).await? {
82+
return Ok(Some(doc));
83+
}
84+
break;
85+
}
86+
87+
dir = parent.to_path_buf();
88+
}
89+
90+
Ok(None)
91+
}
92+
93+
/// Attempt to load the first candidate file found in `dir`. Returns the file
94+
/// contents (truncated if it exceeds `max_bytes`) when successful.
95+
async fn load_first_candidate(
96+
dir: &Path,
97+
names: &[&str],
98+
max_bytes: usize,
99+
) -> std::io::Result<Option<String>> {
100+
for name in names {
101+
let candidate = dir.join(name);
102+
103+
let file = match tokio::fs::File::open(&candidate).await {
104+
Err(e) if e.kind() == std::io::ErrorKind::NotFound => continue,
105+
Err(e) => return Err(e),
106+
Ok(f) => f,
107+
};
108+
109+
let size = file.metadata().await?.len();
110+
111+
let reader = tokio::io::BufReader::new(file);
112+
let mut data = Vec::with_capacity(std::cmp::min(size as usize, max_bytes));
113+
let mut limited = reader.take(max_bytes as u64);
114+
limited.read_to_end(&mut data).await?;
115+
116+
if size as usize > max_bytes {
117+
tracing::warn!(
118+
"Project doc `{}` exceeds {max_bytes} bytes - truncating.",
119+
candidate.display(),
120+
);
121+
}
122+
123+
let contents = String::from_utf8_lossy(&data).to_string();
124+
if contents.trim().is_empty() {
125+
// Empty file – treat as not found.
126+
continue;
127+
}
128+
129+
return Ok(Some(contents));
130+
}
131+
132+
Ok(None)
133+
}
134+
135+
#[cfg(test)]
136+
mod tests {
137+
#![allow(clippy::unwrap_used)]
138+
139+
use super::*;
140+
use crate::config::Config;
141+
use std::fs;
142+
use tempfile::TempDir;
143+
144+
/// Helper that returns a `Config` pointing at `root` and using `limit` as
145+
/// the maximum number of bytes to embed from AGENTS.md. The caller can
146+
/// optionally specify a custom `instructions` string – when `None` the
147+
/// value is cleared to mimic a scenario where no system instructions have
148+
/// been configured.
149+
fn make_config(root: &TempDir, limit: usize, instructions: Option<&str>) -> Config {
150+
let mut cfg = Config::load_default_config_for_test();
151+
cfg.cwd = root.path().to_path_buf();
152+
cfg.project_doc_max_bytes = limit;
153+
154+
cfg.instructions = instructions.map(ToOwned::to_owned);
155+
cfg
156+
}
157+
158+
/// AGENTS.md missing – should yield `None`.
159+
#[tokio::test]
160+
async fn no_doc_file_returns_none() {
161+
let tmp = tempfile::tempdir().expect("tempdir");
162+
163+
let res = create_full_instructions(&make_config(&tmp, 4096, None)).await;
164+
assert!(
165+
res.is_none(),
166+
"Expected None when AGENTS.md is absent and no system instructions provided"
167+
);
168+
assert!(res.is_none(), "Expected None when AGENTS.md is absent");
169+
}
170+
171+
/// Small file within the byte-limit is returned unmodified.
172+
#[tokio::test]
173+
async fn doc_smaller_than_limit_is_returned() {
174+
let tmp = tempfile::tempdir().expect("tempdir");
175+
fs::write(tmp.path().join("AGENTS.md"), "hello world").unwrap();
176+
177+
let res = create_full_instructions(&make_config(&tmp, 4096, None))
178+
.await
179+
.expect("doc expected");
180+
181+
assert_eq!(
182+
res, "hello world",
183+
"The document should be returned verbatim when it is smaller than the limit and there are no existing instructions"
184+
);
185+
}
186+
187+
/// Oversize file is truncated to `project_doc_max_bytes`.
188+
#[tokio::test]
189+
async fn doc_larger_than_limit_is_truncated() {
190+
const LIMIT: usize = 1024;
191+
let tmp = tempfile::tempdir().expect("tempdir");
192+
193+
let huge = "A".repeat(LIMIT * 2); // 2 KiB
194+
fs::write(tmp.path().join("AGENTS.md"), &huge).unwrap();
195+
196+
let res = create_full_instructions(&make_config(&tmp, LIMIT, None))
197+
.await
198+
.expect("doc expected");
199+
200+
assert_eq!(res.len(), LIMIT, "doc should be truncated to LIMIT bytes");
201+
assert_eq!(res, huge[..LIMIT]);
202+
}
203+
204+
/// When `cwd` is nested inside a repo, the search should locate AGENTS.md
205+
/// placed at the repository root (identified by `.git`).
206+
#[tokio::test]
207+
async fn finds_doc_in_repo_root() {
208+
let repo = tempfile::tempdir().expect("tempdir");
209+
210+
// Simulate a git repository. Note .git can be a file or a directory.
211+
std::fs::write(
212+
repo.path().join(".git"),
213+
"gitdir: /path/to/actual/git/dir\n",
214+
)
215+
.unwrap();
216+
217+
// Put the doc at the repo root.
218+
fs::write(repo.path().join("AGENTS.md"), "root level doc").unwrap();
219+
220+
// Now create a nested working directory: repo/workspace/crate_a
221+
let nested = repo.path().join("workspace/crate_a");
222+
std::fs::create_dir_all(&nested).unwrap();
223+
224+
// Build config pointing at the nested dir.
225+
let mut cfg = make_config(&repo, 4096, None);
226+
cfg.cwd = nested;
227+
228+
let res = create_full_instructions(&cfg).await.expect("doc expected");
229+
assert_eq!(res, "root level doc");
230+
}
231+
232+
/// Explicitly setting the byte-limit to zero disables project docs.
233+
#[tokio::test]
234+
async fn zero_byte_limit_disables_docs() {
235+
let tmp = tempfile::tempdir().expect("tempdir");
236+
fs::write(tmp.path().join("AGENTS.md"), "something").unwrap();
237+
238+
let res = create_full_instructions(&make_config(&tmp, 0, None)).await;
239+
assert!(
240+
res.is_none(),
241+
"With limit 0 the function should return None"
242+
);
243+
}
244+
245+
/// When both system instructions *and* a project doc are present the two
246+
/// should be concatenated with the separator.
247+
#[tokio::test]
248+
async fn merges_existing_instructions_with_project_doc() {
249+
let tmp = tempfile::tempdir().expect("tempdir");
250+
fs::write(tmp.path().join("AGENTS.md"), "proj doc").unwrap();
251+
252+
const INSTRUCTIONS: &str = "base instructions";
253+
254+
let res = create_full_instructions(&make_config(&tmp, 4096, Some(INSTRUCTIONS)))
255+
.await
256+
.expect("should produce a combined instruction string");
257+
258+
let expected = format!("{INSTRUCTIONS}{PROJECT_DOC_SEPARATOR}{}", "proj doc");
259+
260+
assert_eq!(res, expected);
261+
}
262+
263+
/// If there are existing system instructions but the project doc is
264+
/// missing we expect the original instructions to be returned unchanged.
265+
#[tokio::test]
266+
async fn keeps_existing_instructions_when_doc_missing() {
267+
let tmp = tempfile::tempdir().expect("tempdir");
268+
269+
const INSTRUCTIONS: &str = "some instructions";
270+
271+
let res = create_full_instructions(&make_config(&tmp, 4096, Some(INSTRUCTIONS))).await;
272+
273+
assert_eq!(res, Some(INSTRUCTIONS.to_string()));
274+
}
275+
}

0 commit comments

Comments
 (0)