Skip to content

Commit a60a524

Browse files
committed
download file parse filename add RFC 5987 support
support percent-encoded filename Signed-off-by: zuisong <[email protected]>
1 parent be990ac commit a60a524

File tree

6 files changed

+205
-12
lines changed

6 files changed

+205
-12
lines changed

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@ log = "0.4.21"
6060
rustls = { version = "0.23.25", optional = true, default-features = false, features = ["logging"] }
6161
tracing = { version = "0.1.41", default-features = false, features = ["log"] }
6262
reqwest_cookie_store = { version = "0.8.0", features = ["serde"] }
63+
percent-encoding = "2.3.1"
6364

6465
[dependencies.reqwest]
6566
version = "0.12.3"

src/content_disposition.rs

Lines changed: 127 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,127 @@
1+
use percent_encoding::percent_decode;
2+
3+
/// Parse filename from Content-Disposition header
4+
/// Prioritizes filename* parameter if present, otherwise uses filename parameter
5+
pub fn parse_filename_from_content_disposition(content_disposition: &str) -> Option<String> {
6+
let parts: Vec<&str> = content_disposition
7+
.split(';')
8+
.map(|part| part.trim())
9+
.collect();
10+
11+
// First try to find filename* parameter
12+
for part in parts.iter() {
13+
if part.starts_with("filename*=") {
14+
if let Some(filename) = parse_encoded_filename(part) {
15+
return Some(filename);
16+
}
17+
}
18+
}
19+
20+
// If filename* is not found or parsing failed, try regular filename parameter
21+
for part in parts {
22+
if part.starts_with("filename=") {
23+
return parse_regular_filename(part);
24+
}
25+
}
26+
27+
None
28+
}
29+
30+
/// Parse regular filename parameter
31+
/// Handles both quoted and unquoted filenames
32+
fn parse_regular_filename(part: &str) -> Option<String> {
33+
let filename = part.trim_start_matches("filename=");
34+
// Remove quotes if present
35+
//
36+
// Content-Disposition: attachment; filename="file with \"quotes\".txt" // This won't occur
37+
// Content-Disposition: attachment; filename*=UTF-8''file%20with%20quotes.txt // This is the actual practice
38+
//
39+
// We don't need to handle escaped characters in Content-Disposition header parsing because:
40+
//
41+
// It's not a standard practice
42+
// It rarely occurs in real-world scenarios
43+
// When filenames contain special characters, they should use the filename* parameter
44+
let filename = if filename.starts_with('"') && filename.ends_with('"') {
45+
&filename[1..(filename.len() - 1)]
46+
} else {
47+
filename
48+
};
49+
50+
if filename.is_empty() {
51+
return None;
52+
}
53+
54+
Some(filename.to_string())
55+
}
56+
57+
/// Parse RFC 5987 encoded filename (filename*)
58+
/// Format: charset'language'encoded-value
59+
fn parse_encoded_filename(part: &str) -> Option<String> {
60+
// Remove "filename*=" prefix
61+
let content = part.trim_start_matches("filename*=");
62+
63+
// According to RFC 5987, format should be: charset'language'encoded-value
64+
let parts: Vec<&str> = content.splitn(3, '\'').collect();
65+
if parts.len() != 3 {
66+
return None;
67+
}
68+
69+
let encoded_filename = parts[2];
70+
71+
// Decode using percent-encoding
72+
let decoded = percent_decode(encoded_filename.as_bytes())
73+
.decode_utf8()
74+
.ok()?;
75+
76+
Some(decoded.into_owned())
77+
}
78+
79+
#[cfg(test)]
80+
mod tests {
81+
use super::*;
82+
83+
#[test]
84+
fn test_simple_filename() {
85+
let header = r#"attachment; filename="example.pdf""#;
86+
assert_eq!(
87+
parse_filename_from_content_disposition(header),
88+
Some("example.pdf".to_string())
89+
);
90+
}
91+
92+
#[test]
93+
fn test_filename_without_quotes() {
94+
let header = "attachment; filename=example.pdf";
95+
assert_eq!(
96+
parse_filename_from_content_disposition(header),
97+
Some("example.pdf".to_string())
98+
);
99+
}
100+
101+
#[test]
102+
fn test_encoded_filename() {
103+
// UTF-8 encoded Chinese filename "测试.pdf"
104+
let header = "attachment; filename*=UTF-8''%E6%B5%8B%E8%AF%95.pdf";
105+
assert_eq!(
106+
parse_filename_from_content_disposition(header),
107+
Some("测试.pdf".to_string())
108+
);
109+
}
110+
111+
#[test]
112+
fn test_both_filenames() {
113+
// When both filename and filename* are present, filename* should be preferred
114+
let header =
115+
r#"attachment; filename="fallback.pdf"; filename*=UTF-8''%E6%B5%8B%E8%AF%95.pdf"#;
116+
assert_eq!(
117+
parse_filename_from_content_disposition(header),
118+
Some("测试.pdf".to_string())
119+
);
120+
}
121+
122+
#[test]
123+
fn test_no_filename() {
124+
let header = "attachment";
125+
assert_eq!(parse_filename_from_content_disposition(header), None);
126+
}
127+
}

src/download.rs

Lines changed: 4 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,9 @@ use std::io::{self, ErrorKind, IsTerminal};
33
use std::path::{Path, PathBuf};
44
use std::time::Instant;
55

6+
use crate::content_disposition;
7+
use crate::decoder::{decompress, get_compression_type};
8+
use crate::utils::{copy_largebuf, test_pretend_term, HeaderValueExt};
69
use anyhow::{anyhow, Context, Result};
710
use indicatif::{HumanBytes, ProgressBar, ProgressStyle};
811
use mime2ext::mime2ext;
@@ -13,9 +16,6 @@ use reqwest::{
1316
StatusCode,
1417
};
1518

16-
use crate::decoder::{decompress, get_compression_type};
17-
use crate::utils::{copy_largebuf, test_pretend_term, HeaderValueExt};
18-
1919
fn get_content_length(headers: &HeaderMap) -> Option<u64> {
2020
headers
2121
.get(CONTENT_LENGTH)
@@ -27,20 +27,12 @@ fn get_content_length(headers: &HeaderMap) -> Option<u64> {
2727
// of PathBufs
2828
fn get_file_name(response: &Response, orig_url: &reqwest::Url) -> String {
2929
fn from_header(response: &Response) -> Option<String> {
30-
let quoted = Regex::new("filename=\"([^\"]*)\"").unwrap();
31-
// Alternative form:
32-
let unquoted = Regex::new("filename=([^;=\"]*)").unwrap();
33-
// TODO: support "filename*" version
34-
3530
let header = response
3631
.headers()
3732
.get(CONTENT_DISPOSITION)?
3833
.to_utf8_str()
3934
.ok()?;
40-
let caps = quoted
41-
.captures(header)
42-
.or_else(|| unquoted.captures(header))?;
43-
Some(caps[1].to_string())
35+
content_disposition::parse_filename_from_content_disposition(header)
4436
}
4537

4638
fn from_url(url: &reqwest::Url) -> Option<String> {

src/main.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
mod auth;
33
mod buffer;
44
mod cli;
5+
mod content_disposition;
56
mod decoder;
67
mod download;
78
mod error_reporting;

tests/cases/download.rs

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,77 @@ fn download_supplied_unicode_filename() {
119119
);
120120
}
121121

122+
#[test]
123+
fn download_support_filename_rfc_5987() {
124+
let dir = tempdir().unwrap();
125+
let server = server::http(|_req| async move {
126+
hyper::Response::builder()
127+
.header(
128+
"Content-Disposition",
129+
r#"attachment; filename*=UTF-8''abcd1234.txt"#,
130+
)
131+
.body("file".into())
132+
.unwrap()
133+
});
134+
135+
get_command()
136+
.args(["--download", &server.base_url()])
137+
.current_dir(&dir)
138+
.assert()
139+
.success();
140+
assert_eq!(
141+
fs::read_to_string(dir.path().join("abcd1234.txt")).unwrap(),
142+
"file"
143+
);
144+
}
145+
#[test]
146+
fn download_support_filename_rfc_5987_percent_encoded() {
147+
let dir = tempdir().unwrap();
148+
let server = server::http(|_req| async move {
149+
hyper::Response::builder()
150+
.header(
151+
"Content-Disposition",
152+
r#"attachment; filename*=UTF-8''%E6%B5%8B%E8%AF%95.txt"#,
153+
)
154+
.body("file".into())
155+
.unwrap()
156+
});
157+
158+
get_command()
159+
.args(["--download", &server.base_url()])
160+
.current_dir(&dir)
161+
.assert()
162+
.success();
163+
assert_eq!(
164+
fs::read_to_string(dir.path().join("测试.txt")).unwrap(),
165+
"file"
166+
);
167+
}
168+
169+
#[test]
170+
fn download_filename_star_with_high_priority() {
171+
let dir = tempdir().unwrap();
172+
let server = server::http(|_req| async move {
173+
hyper::Response::builder()
174+
.header(
175+
"Content-Disposition",
176+
r#"attachment; filename="fallback.txt"; filename*=UTF-8''%E6%B5%8B%E8%AF%95.txt"#,
177+
)
178+
.body("file".into())
179+
.unwrap()
180+
});
181+
182+
get_command()
183+
.args(["--download", &server.base_url()])
184+
.current_dir(&dir)
185+
.assert()
186+
.success();
187+
assert_eq!(
188+
fs::read_to_string(dir.path().join("测试.txt")).unwrap(),
189+
"file"
190+
);
191+
}
192+
122193
#[test]
123194
fn download_supplied_unquoted_filename() {
124195
let dir = tempdir().unwrap();

0 commit comments

Comments
 (0)