|
| 1 | +//! Project-level documentation discovery. |
| 2 | +//! |
| 3 | +//! Project-level documentation can be stored in a file named `AGENTS.md`. |
| 4 | +//! Currently, we include only the contents of the first file found as follows: |
| 5 | +//! |
| 6 | +//! 1. Look for the doc file in the current working directory (as determined |
| 7 | +//! by the `Config`). |
| 8 | +//! 2. If not found, walk *upwards* until the Git repository root is reached |
| 9 | +//! (detected by the presence of a `.git` directory/file), or failing that, |
| 10 | +//! the filesystem root. |
| 11 | +//! 3. If the Git root is encountered, look for the doc file there. If it |
| 12 | +//! exists, the search stops – we do **not** walk past the Git root. |
| 13 | +
|
| 14 | +use crate::config::Config; |
| 15 | +use std::path::Path; |
| 16 | +use tokio::io::AsyncReadExt; |
| 17 | +use tracing::error; |
| 18 | + |
| 19 | +/// Currently, we only match the filename `AGENTS.md` exactly. |
| 20 | +const CANDIDATE_FILENAMES: &[&str] = &["AGENTS.md"]; |
| 21 | + |
| 22 | +/// When both `Config::instructions` and the project doc are present, they will |
| 23 | +/// be concatenated with the following separator. |
| 24 | +const PROJECT_DOC_SEPARATOR: &str = "\n\n--- project-doc ---\n\n"; |
| 25 | + |
| 26 | +/// Combines `Config::instructions` and `AGENTS.md` (if present) into a single |
| 27 | +/// string of instructions. |
| 28 | +pub(crate) async fn create_full_instructions(config: &Config) -> Option<String> { |
| 29 | + match find_project_doc(config).await { |
| 30 | + Ok(Some(project_doc)) => match &config.instructions { |
| 31 | + Some(original_instructions) => Some(format!( |
| 32 | + "{original_instructions}{PROJECT_DOC_SEPARATOR}{project_doc}" |
| 33 | + )), |
| 34 | + None => Some(project_doc), |
| 35 | + }, |
| 36 | + Ok(None) => config.instructions.clone(), |
| 37 | + Err(e) => { |
| 38 | + error!("error trying to find project doc: {e:#}"); |
| 39 | + config.instructions.clone() |
| 40 | + } |
| 41 | + } |
| 42 | +} |
| 43 | + |
| 44 | +/// Attempt to locate and load the project documentation. Currently, the search |
| 45 | +/// starts from `Config::cwd`, but if we may want to consider other directories |
| 46 | +/// in the future, e.g., additional writable directories in the `SandboxPolicy`. |
| 47 | +/// |
| 48 | +/// On success returns `Ok(Some(contents))`. If no documentation file is found |
| 49 | +/// the function returns `Ok(None)`. Unexpected I/O failures bubble up as |
| 50 | +/// `Err` so callers can decide how to handle them. |
| 51 | +async fn find_project_doc(config: &Config) -> std::io::Result<Option<String>> { |
| 52 | + let max_bytes = config.project_doc_max_bytes; |
| 53 | + |
| 54 | + // Attempt to load from the working directory first. |
| 55 | + if let Some(doc) = load_first_candidate(&config.cwd, CANDIDATE_FILENAMES, max_bytes).await? { |
| 56 | + return Ok(Some(doc)); |
| 57 | + } |
| 58 | + |
| 59 | + // Walk up towards the filesystem root, stopping once we encounter the Git |
| 60 | + // repository root. The presence of **either** a `.git` *file* or |
| 61 | + // *directory* counts. |
| 62 | + let mut dir = config.cwd.clone(); |
| 63 | + |
| 64 | + // Canonicalize the path so that we do not end up in an infinite loop when |
| 65 | + // `cwd` contains `..` components. |
| 66 | + if let Ok(canon) = dir.canonicalize() { |
| 67 | + dir = canon; |
| 68 | + } |
| 69 | + |
| 70 | + while let Some(parent) = dir.parent() { |
| 71 | + // `.git` can be a *file* (for worktrees or submodules) or a *dir*. |
| 72 | + let git_marker = dir.join(".git"); |
| 73 | + let git_exists = match tokio::fs::metadata(&git_marker).await { |
| 74 | + Ok(_) => true, |
| 75 | + Err(e) if e.kind() == std::io::ErrorKind::NotFound => false, |
| 76 | + Err(e) => return Err(e), |
| 77 | + }; |
| 78 | + |
| 79 | + if git_exists { |
| 80 | + // We are at the repo root – attempt one final load. |
| 81 | + if let Some(doc) = load_first_candidate(&dir, CANDIDATE_FILENAMES, max_bytes).await? { |
| 82 | + return Ok(Some(doc)); |
| 83 | + } |
| 84 | + break; |
| 85 | + } |
| 86 | + |
| 87 | + dir = parent.to_path_buf(); |
| 88 | + } |
| 89 | + |
| 90 | + Ok(None) |
| 91 | +} |
| 92 | + |
| 93 | +/// Attempt to load the first candidate file found in `dir`. Returns the file |
| 94 | +/// contents (truncated if it exceeds `max_bytes`) when successful. |
| 95 | +async fn load_first_candidate( |
| 96 | + dir: &Path, |
| 97 | + names: &[&str], |
| 98 | + max_bytes: usize, |
| 99 | +) -> std::io::Result<Option<String>> { |
| 100 | + for name in names { |
| 101 | + let candidate = dir.join(name); |
| 102 | + |
| 103 | + let file = match tokio::fs::File::open(&candidate).await { |
| 104 | + Err(e) if e.kind() == std::io::ErrorKind::NotFound => continue, |
| 105 | + Err(e) => return Err(e), |
| 106 | + Ok(f) => f, |
| 107 | + }; |
| 108 | + |
| 109 | + let size = file.metadata().await?.len(); |
| 110 | + |
| 111 | + let reader = tokio::io::BufReader::new(file); |
| 112 | + let mut data = Vec::with_capacity(std::cmp::min(size as usize, max_bytes)); |
| 113 | + let mut limited = reader.take(max_bytes as u64); |
| 114 | + limited.read_to_end(&mut data).await?; |
| 115 | + |
| 116 | + if size as usize > max_bytes { |
| 117 | + tracing::warn!( |
| 118 | + "Project doc `{}` exceeds {max_bytes} bytes - truncating.", |
| 119 | + candidate.display(), |
| 120 | + ); |
| 121 | + } |
| 122 | + |
| 123 | + let contents = String::from_utf8_lossy(&data).to_string(); |
| 124 | + if contents.trim().is_empty() { |
| 125 | + // Empty file – treat as not found. |
| 126 | + continue; |
| 127 | + } |
| 128 | + |
| 129 | + return Ok(Some(contents)); |
| 130 | + } |
| 131 | + |
| 132 | + Ok(None) |
| 133 | +} |
| 134 | + |
| 135 | +#[cfg(test)] |
| 136 | +mod tests { |
| 137 | + #![allow(clippy::unwrap_used)] |
| 138 | + |
| 139 | + use super::*; |
| 140 | + use crate::config::Config; |
| 141 | + use std::fs; |
| 142 | + use tempfile::TempDir; |
| 143 | + |
| 144 | + /// Helper that returns a `Config` pointing at `root` and using `limit` as |
| 145 | + /// the maximum number of bytes to embed from AGENTS.md. The caller can |
| 146 | + /// optionally specify a custom `instructions` string – when `None` the |
| 147 | + /// value is cleared to mimic a scenario where no system instructions have |
| 148 | + /// been configured. |
| 149 | + fn make_config(root: &TempDir, limit: usize, instructions: Option<&str>) -> Config { |
| 150 | + let mut cfg = Config::load_default_config_for_test(); |
| 151 | + cfg.cwd = root.path().to_path_buf(); |
| 152 | + cfg.project_doc_max_bytes = limit; |
| 153 | + |
| 154 | + cfg.instructions = instructions.map(ToOwned::to_owned); |
| 155 | + cfg |
| 156 | + } |
| 157 | + |
| 158 | + /// AGENTS.md missing – should yield `None`. |
| 159 | + #[tokio::test] |
| 160 | + async fn no_doc_file_returns_none() { |
| 161 | + let tmp = tempfile::tempdir().expect("tempdir"); |
| 162 | + |
| 163 | + let res = create_full_instructions(&make_config(&tmp, 4096, None)).await; |
| 164 | + assert!( |
| 165 | + res.is_none(), |
| 166 | + "Expected None when AGENTS.md is absent and no system instructions provided" |
| 167 | + ); |
| 168 | + assert!(res.is_none(), "Expected None when AGENTS.md is absent"); |
| 169 | + } |
| 170 | + |
| 171 | + /// Small file within the byte-limit is returned unmodified. |
| 172 | + #[tokio::test] |
| 173 | + async fn doc_smaller_than_limit_is_returned() { |
| 174 | + let tmp = tempfile::tempdir().expect("tempdir"); |
| 175 | + fs::write(tmp.path().join("AGENTS.md"), "hello world").unwrap(); |
| 176 | + |
| 177 | + let res = create_full_instructions(&make_config(&tmp, 4096, None)) |
| 178 | + .await |
| 179 | + .expect("doc expected"); |
| 180 | + |
| 181 | + assert_eq!( |
| 182 | + res, "hello world", |
| 183 | + "The document should be returned verbatim when it is smaller than the limit and there are no existing instructions" |
| 184 | + ); |
| 185 | + } |
| 186 | + |
| 187 | + /// Oversize file is truncated to `project_doc_max_bytes`. |
| 188 | + #[tokio::test] |
| 189 | + async fn doc_larger_than_limit_is_truncated() { |
| 190 | + const LIMIT: usize = 1024; |
| 191 | + let tmp = tempfile::tempdir().expect("tempdir"); |
| 192 | + |
| 193 | + let huge = "A".repeat(LIMIT * 2); // 2 KiB |
| 194 | + fs::write(tmp.path().join("AGENTS.md"), &huge).unwrap(); |
| 195 | + |
| 196 | + let res = create_full_instructions(&make_config(&tmp, LIMIT, None)) |
| 197 | + .await |
| 198 | + .expect("doc expected"); |
| 199 | + |
| 200 | + assert_eq!(res.len(), LIMIT, "doc should be truncated to LIMIT bytes"); |
| 201 | + assert_eq!(res, huge[..LIMIT]); |
| 202 | + } |
| 203 | + |
| 204 | + /// When `cwd` is nested inside a repo, the search should locate AGENTS.md |
| 205 | + /// placed at the repository root (identified by `.git`). |
| 206 | + #[tokio::test] |
| 207 | + async fn finds_doc_in_repo_root() { |
| 208 | + let repo = tempfile::tempdir().expect("tempdir"); |
| 209 | + |
| 210 | + // Simulate a git repository. Note .git can be a file or a directory. |
| 211 | + std::fs::write( |
| 212 | + repo.path().join(".git"), |
| 213 | + "gitdir: /path/to/actual/git/dir\n", |
| 214 | + ) |
| 215 | + .unwrap(); |
| 216 | + |
| 217 | + // Put the doc at the repo root. |
| 218 | + fs::write(repo.path().join("AGENTS.md"), "root level doc").unwrap(); |
| 219 | + |
| 220 | + // Now create a nested working directory: repo/workspace/crate_a |
| 221 | + let nested = repo.path().join("workspace/crate_a"); |
| 222 | + std::fs::create_dir_all(&nested).unwrap(); |
| 223 | + |
| 224 | + // Build config pointing at the nested dir. |
| 225 | + let mut cfg = make_config(&repo, 4096, None); |
| 226 | + cfg.cwd = nested; |
| 227 | + |
| 228 | + let res = create_full_instructions(&cfg).await.expect("doc expected"); |
| 229 | + assert_eq!(res, "root level doc"); |
| 230 | + } |
| 231 | + |
| 232 | + /// Explicitly setting the byte-limit to zero disables project docs. |
| 233 | + #[tokio::test] |
| 234 | + async fn zero_byte_limit_disables_docs() { |
| 235 | + let tmp = tempfile::tempdir().expect("tempdir"); |
| 236 | + fs::write(tmp.path().join("AGENTS.md"), "something").unwrap(); |
| 237 | + |
| 238 | + let res = create_full_instructions(&make_config(&tmp, 0, None)).await; |
| 239 | + assert!( |
| 240 | + res.is_none(), |
| 241 | + "With limit 0 the function should return None" |
| 242 | + ); |
| 243 | + } |
| 244 | + |
| 245 | + /// When both system instructions *and* a project doc are present the two |
| 246 | + /// should be concatenated with the separator. |
| 247 | + #[tokio::test] |
| 248 | + async fn merges_existing_instructions_with_project_doc() { |
| 249 | + let tmp = tempfile::tempdir().expect("tempdir"); |
| 250 | + fs::write(tmp.path().join("AGENTS.md"), "proj doc").unwrap(); |
| 251 | + |
| 252 | + const INSTRUCTIONS: &str = "base instructions"; |
| 253 | + |
| 254 | + let res = create_full_instructions(&make_config(&tmp, 4096, Some(INSTRUCTIONS))) |
| 255 | + .await |
| 256 | + .expect("should produce a combined instruction string"); |
| 257 | + |
| 258 | + let expected = format!("{INSTRUCTIONS}{PROJECT_DOC_SEPARATOR}{}", "proj doc"); |
| 259 | + |
| 260 | + assert_eq!(res, expected); |
| 261 | + } |
| 262 | + |
| 263 | + /// If there are existing system instructions but the project doc is |
| 264 | + /// missing we expect the original instructions to be returned unchanged. |
| 265 | + #[tokio::test] |
| 266 | + async fn keeps_existing_instructions_when_doc_missing() { |
| 267 | + let tmp = tempfile::tempdir().expect("tempdir"); |
| 268 | + |
| 269 | + const INSTRUCTIONS: &str = "some instructions"; |
| 270 | + |
| 271 | + let res = create_full_instructions(&make_config(&tmp, 4096, Some(INSTRUCTIONS))).await; |
| 272 | + |
| 273 | + assert_eq!(res, Some(INSTRUCTIONS.to_string())); |
| 274 | + } |
| 275 | +} |
0 commit comments