Skip to content

Commit 30bd6af

Browse files
committed
download file parse filename add RFC 5987 support
support percent-encoded filename Signed-off-by: zuisong <com.me@foxmail.com>
1 parent be990ac commit 30bd6af

File tree

6 files changed

+192
-12
lines changed

6 files changed

+192
-12
lines changed

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@ log = "0.4.21"
6060
rustls = { version = "0.23.25", optional = true, default-features = false, features = ["logging"] }
6161
tracing = { version = "0.1.41", default-features = false, features = ["log"] }
6262
reqwest_cookie_store = { version = "0.8.0", features = ["serde"] }
63+
percent-encoding = "2.3.1"
6364

6465
[dependencies.reqwest]
6566
version = "0.12.3"

src/content_disposition.rs

Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,114 @@
1+
use percent_encoding::percent_decode;
2+
3+
/// Parse filename from Content-Disposition header
4+
/// Prioritizes filename* parameter if present, otherwise uses filename parameter
5+
pub fn parse_filename_from_content_disposition(content_disposition: &str) -> Option<String> {
6+
let parts: Vec<&str> = content_disposition
7+
.split(';')
8+
.map(|part| part.trim())
9+
.collect();
10+
11+
// First try to find filename* parameter
12+
for part in parts.iter() {
13+
if part.starts_with("filename*=") {
14+
if let Some(filename) = parse_encoded_filename(part) {
15+
return Some(filename);
16+
}
17+
}
18+
}
19+
20+
// If filename* is not found or parsing failed, try regular filename parameter
21+
for part in parts {
22+
if part.starts_with("filename=") {
23+
return parse_regular_filename(part);
24+
}
25+
}
26+
27+
None
28+
}
29+
30+
/// Parse regular filename parameter
31+
/// Handles both quoted and unquoted filenames
32+
fn parse_regular_filename(part: &str) -> Option<String> {
33+
let filename = part.trim_start_matches("filename=");
34+
// Remove quotes if present
35+
let filename = filename.trim_matches('"');
36+
37+
if filename.is_empty() {
38+
return None;
39+
}
40+
41+
Some(filename.to_string())
42+
}
43+
44+
/// Parse RFC 5987 encoded filename (filename*)
45+
/// Format: charset'language'encoded-value
46+
fn parse_encoded_filename(part: &str) -> Option<String> {
47+
// Remove "filename*=" prefix
48+
let content = part.trim_start_matches("filename*=");
49+
50+
// According to RFC 5987, format should be: charset'language'encoded-value
51+
let parts: Vec<&str> = content.splitn(3, '\'').collect();
52+
if parts.len() != 3 {
53+
return None;
54+
}
55+
56+
let encoded_filename = parts[2];
57+
58+
// Decode using percent-encoding
59+
let decoded = percent_decode(encoded_filename.as_bytes())
60+
.decode_utf8()
61+
.ok()?;
62+
63+
Some(decoded.into_owned())
64+
}
65+
66+
#[cfg(test)]
67+
mod tests {
68+
use super::*;
69+
70+
#[test]
71+
fn test_simple_filename() {
72+
let header = r#"attachment; filename="example.pdf""#;
73+
assert_eq!(
74+
parse_filename_from_content_disposition(header),
75+
Some("example.pdf".to_string())
76+
);
77+
}
78+
79+
#[test]
80+
fn test_filename_without_quotes() {
81+
let header = "attachment; filename=example.pdf";
82+
assert_eq!(
83+
parse_filename_from_content_disposition(header),
84+
Some("example.pdf".to_string())
85+
);
86+
}
87+
88+
#[test]
89+
fn test_encoded_filename() {
90+
// UTF-8 encoded Chinese filename "测试.pdf"
91+
let header = "attachment; filename*=UTF-8''%E6%B5%8B%E8%AF%95.pdf";
92+
assert_eq!(
93+
parse_filename_from_content_disposition(header),
94+
Some("测试.pdf".to_string())
95+
);
96+
}
97+
98+
#[test]
99+
fn test_both_filenames() {
100+
// When both filename and filename* are present, filename* should be preferred
101+
let header =
102+
r#"attachment; filename="fallback.pdf"; filename*=UTF-8''%E6%B5%8B%E8%AF%95.pdf"#;
103+
assert_eq!(
104+
parse_filename_from_content_disposition(header),
105+
Some("测试.pdf".to_string())
106+
);
107+
}
108+
109+
#[test]
110+
fn test_no_filename() {
111+
let header = "attachment";
112+
assert_eq!(parse_filename_from_content_disposition(header), None);
113+
}
114+
}

src/download.rs

Lines changed: 4 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,9 @@ use std::io::{self, ErrorKind, IsTerminal};
33
use std::path::{Path, PathBuf};
44
use std::time::Instant;
55

6+
use crate::content_disposition;
7+
use crate::decoder::{decompress, get_compression_type};
8+
use crate::utils::{copy_largebuf, test_pretend_term, HeaderValueExt};
69
use anyhow::{anyhow, Context, Result};
710
use indicatif::{HumanBytes, ProgressBar, ProgressStyle};
811
use mime2ext::mime2ext;
@@ -13,9 +16,6 @@ use reqwest::{
1316
StatusCode,
1417
};
1518

16-
use crate::decoder::{decompress, get_compression_type};
17-
use crate::utils::{copy_largebuf, test_pretend_term, HeaderValueExt};
18-
1919
fn get_content_length(headers: &HeaderMap) -> Option<u64> {
2020
headers
2121
.get(CONTENT_LENGTH)
@@ -27,20 +27,12 @@ fn get_content_length(headers: &HeaderMap) -> Option<u64> {
2727
// of PathBufs
2828
fn get_file_name(response: &Response, orig_url: &reqwest::Url) -> String {
2929
fn from_header(response: &Response) -> Option<String> {
30-
let quoted = Regex::new("filename=\"([^\"]*)\"").unwrap();
31-
// Alternative form:
32-
let unquoted = Regex::new("filename=([^;=\"]*)").unwrap();
33-
// TODO: support "filename*" version
34-
3530
let header = response
3631
.headers()
3732
.get(CONTENT_DISPOSITION)?
3833
.to_utf8_str()
3934
.ok()?;
40-
let caps = quoted
41-
.captures(header)
42-
.or_else(|| unquoted.captures(header))?;
43-
Some(caps[1].to_string())
35+
content_disposition::parse_filename_from_content_disposition(header)
4436
}
4537

4638
fn from_url(url: &reqwest::Url) -> Option<String> {

src/main.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
mod auth;
33
mod buffer;
44
mod cli;
5+
mod content_disposition;
56
mod decoder;
67
mod download;
78
mod error_reporting;

tests/cases/download.rs

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,77 @@ fn download_supplied_unicode_filename() {
119119
);
120120
}
121121

122+
#[test]
123+
fn download_support_filename_rfc_5987() {
124+
let dir = tempdir().unwrap();
125+
let server = server::http(|_req| async move {
126+
hyper::Response::builder()
127+
.header(
128+
"Content-Disposition",
129+
r#"attachment; filename*=UTF-8''abcd1234.txt"#,
130+
)
131+
.body("file".into())
132+
.unwrap()
133+
});
134+
135+
get_command()
136+
.args(["--download", &server.base_url()])
137+
.current_dir(&dir)
138+
.assert()
139+
.success();
140+
assert_eq!(
141+
fs::read_to_string(dir.path().join("abcd1234.txt")).unwrap(),
142+
"file"
143+
);
144+
}
145+
#[test]
146+
fn download_support_filename_rfc_5987_percent_encoded() {
147+
let dir = tempdir().unwrap();
148+
let server = server::http(|_req| async move {
149+
hyper::Response::builder()
150+
.header(
151+
"Content-Disposition",
152+
r#"attachment; filename*=UTF-8''%E6%B5%8B%E8%AF%95.txt"#,
153+
)
154+
.body("file".into())
155+
.unwrap()
156+
});
157+
158+
get_command()
159+
.args(["--download", &server.base_url()])
160+
.current_dir(&dir)
161+
.assert()
162+
.success();
163+
assert_eq!(
164+
fs::read_to_string(dir.path().join("测试.txt")).unwrap(),
165+
"file"
166+
);
167+
}
168+
169+
#[test]
170+
fn download_filename_star_with_high_priority() {
171+
let dir = tempdir().unwrap();
172+
let server = server::http(|_req| async move {
173+
hyper::Response::builder()
174+
.header(
175+
"Content-Disposition",
176+
r#"attachment; filename="fallback.txt"; filename*=UTF-8''%E6%B5%8B%E8%AF%95.txt"#,
177+
)
178+
.body("file".into())
179+
.unwrap()
180+
});
181+
182+
get_command()
183+
.args(["--download", &server.base_url()])
184+
.current_dir(&dir)
185+
.assert()
186+
.success();
187+
assert_eq!(
188+
fs::read_to_string(dir.path().join("测试.txt")).unwrap(),
189+
"file"
190+
);
191+
}
192+
122193
#[test]
123194
fn download_supplied_unquoted_filename() {
124195
let dir = tempdir().unwrap();

0 commit comments

Comments
 (0)