Skip to content

Commit 3387a72

Browse files
authored
qcow: Only process allocated clusters on export from raw (#6769)
Implements an optimization similar to the "hybrid" mode in `vhd-tool`: when exporting to qcow from raw, if the VDI is backed by a QCOW file, read its header, determine the allocated clusters, and only export these. This allows skipping over zero clusters in a sparse disk. Unlike `vhd-tool`, however, this is implemented in a modular way - `qcow-stream-tool` gets a new `read_headers` command that outputs the list of allocated clusters (and other info) in JSON format, which allows it to be consumed by the Python `qcow-to-stdout` script (and by `vhd-tool` in future stages of this work, see below). This is the first step of improving handling of sparse VDIs in xapi. I've got the rest working, but I'll be opening PRs step-by-step for the following once this PR gets merged: 1. `vhd-tool` gets a `read_headers` command outputting list of allocated blocks as well 2. `stream_vdi` uses `read_headers` for both VHD and QCOW to avoid reading zero blocks on XVA export (greatly speeds up handling of sparse disks and avoids issues with timeouts) 3. `vhd-tool` and `qcow-to-stdout` can read headers of the opposite format, allowing faster export of sparse VDIs backed by a different format. Best reviewed by commit.
2 parents fb2a1ab + 8914076 commit 3387a72

File tree

6 files changed

+256
-46
lines changed

6 files changed

+256
-46
lines changed

ocaml/qcow-stream-tool/dune

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,5 +7,9 @@
77
qcow-stream
88
cmdliner
99
unix
10+
lwt.unix
11+
lwt
12+
qcow-types
13+
yojson
1014
)
1115
)
Lines changed: 62 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,53 @@
1+
open Cmdliner
2+
13
module Impl = struct
24
let stream_decode output =
35
Qcow_stream.stream_decode Unix.stdin output ;
46
`Ok ()
7+
8+
let read_headers qcow_path =
9+
let open Lwt.Syntax in
10+
let t =
11+
let* fd = Lwt_unix.openfile qcow_path [Unix.O_RDONLY] 0 in
12+
let* virtual_size, cluster_bits, _, data_cluster_map =
13+
Qcow_stream.start_stream_decode fd
14+
in
15+
let clusters = Qcow_types.Cluster.Map.bindings data_cluster_map in
16+
let clusters =
17+
List.map
18+
(fun (_, virt_address) ->
19+
let ( >> ) = Int64.shift_right_logical in
20+
let address =
21+
Int64.to_int (virt_address >> Int32.to_int cluster_bits)
22+
in
23+
`Int address
24+
)
25+
clusters
26+
in
27+
let json =
28+
`Assoc
29+
[
30+
("virtual_size", `Int (Int64.to_int virtual_size))
31+
; ("cluster_bits", `Int (Int32.to_int cluster_bits))
32+
; ("data_clusters", `List clusters)
33+
]
34+
in
35+
let json_string = Yojson.to_string json in
36+
let* () = Lwt_io.print json_string in
37+
let* () = Lwt_io.flush Lwt_io.stdout in
38+
Lwt.return_unit
39+
in
40+
Lwt_main.run t ; `Ok ()
541
end
642

743
module Cli = struct
8-
open Cmdliner
44+
let output default =
45+
let doc = Printf.sprintf "Path to the output file." in
46+
Arg.(value & pos 0 string default & info [] ~doc)
47+
48+
let input =
49+
let doc = Printf.sprintf "Path to the input file." in
50+
Arg.(required & pos 0 (some string) None & info [] ~doc)
951

1052
let stream_decode_cmd =
1153
let doc = "decode qcow2 formatted data from stdin and write a raw image" in
@@ -15,15 +57,28 @@ module Cli = struct
1557
; `P "Decode qcow2 formatted data from stdin and write to a raw file."
1658
]
1759
in
18-
let output default =
19-
let doc = Printf.sprintf "Path to the output file." in
20-
Arg.(value & pos 0 string default & info [] ~doc)
21-
in
2260
Cmd.v
2361
(Cmd.info "stream_decode" ~doc ~man)
2462
Term.(ret (const Impl.stream_decode $ output "test.raw"))
2563

26-
let main () = Cmd.eval stream_decode_cmd
64+
let read_headers_cmd =
65+
let doc =
66+
"Determine allocated clusters by parsing qcow2 file at the provided \
67+
path. Returns JSON like the following: {'virtual_size': X, \
68+
'cluster_bits': Y, 'data_clusters': [1,2,3]}"
69+
in
70+
let man = [`S "DESCRIPTION"; `P doc] in
71+
Cmd.v
72+
(Cmd.info "read_headers" ~doc ~man)
73+
Term.(ret (const Impl.read_headers $ input))
74+
75+
let cmds = [stream_decode_cmd; read_headers_cmd]
2776
end
2877

29-
let () = exit (Cli.main ())
78+
let info =
79+
let doc = "minimal CLI for qcow-stream" in
80+
Cmd.info "qcow-stream-tool" ~version:"1.0.0" ~doc
81+
82+
let () =
83+
let cmd = Cmd.group info Cli.cmds in
84+
exit (Cmd.eval cmd)

ocaml/xapi/qcow_tool_wrapper.ml

Lines changed: 63 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -16,14 +16,15 @@ module D = Debug.Make (struct let name = __MODULE__ end)
1616

1717
open D
1818

19-
let run_qcow_tool qcow_tool ?input_fd ?output_fd (_progress_cb : int -> unit)
20-
(args : string list) =
19+
let run_qcow_tool qcow_tool ?(replace_fds = []) ?input_fd ?output_fd
20+
(_progress_cb : int -> unit) (args : string list) =
2121
info "Executing %s %s" qcow_tool (String.concat " " args) ;
2222
let open Forkhelpers in
2323
match
2424
with_logfile_fd "qcow-tool" (fun log_fd ->
2525
let pid =
26-
safe_close_and_exec input_fd output_fd (Some log_fd) [] qcow_tool args
26+
safe_close_and_exec input_fd output_fd (Some log_fd) replace_fds
27+
qcow_tool args
2728
in
2829
let _, status = waitpid pid in
2930
if status <> Unix.WEXITED 0 then (
@@ -46,14 +47,70 @@ let update_task_progress (__context : Context.t) (x : int) =
4647

4748
let receive (progress_cb : int -> unit) (unix_fd : Unix.file_descr)
4849
(path : string) =
49-
let args = [path] in
50+
let args = ["stream_decode"; path] in
5051
let qcow_tool = !Xapi_globs.qcow_stream_tool in
5152
run_qcow_tool qcow_tool progress_cb args ~input_fd:unix_fd
5253

54+
let read_header qcow_path =
55+
let args = ["read_headers"; qcow_path] in
56+
let qcow_tool = !Xapi_globs.qcow_stream_tool in
57+
let pipe_reader, pipe_writer = Unix.pipe ~cloexec:true () in
58+
59+
let progress_cb _ = () in
60+
Xapi_stdext_pervasives.Pervasiveext.finally
61+
(fun () -> run_qcow_tool qcow_tool progress_cb args ~output_fd:pipe_writer)
62+
(fun () -> Unix.close pipe_writer) ;
63+
pipe_reader
64+
65+
let parse_header qcow_path =
66+
let pipe_reader = read_header qcow_path in
67+
let ic = Unix.in_channel_of_descr pipe_reader in
68+
let buf = Buffer.create 4096 in
69+
let json = Yojson.Basic.from_channel ~buf ~fname:"qcow_header.json" ic in
70+
In_channel.close ic ;
71+
let cluster_size =
72+
1 lsl Yojson.Basic.Util.(member "cluster_bits" json |> to_int)
73+
in
74+
let cluster_list =
75+
Yojson.Basic.Util.(member "data_clusters" json |> to_list |> List.map to_int)
76+
in
77+
(cluster_size, cluster_list)
78+
5379
let send ?relative_to (progress_cb : int -> unit) (unix_fd : Unix.file_descr)
5480
(path : string) (_size : Int64.t) =
81+
let qcow_of_device =
82+
Vhd_tool_wrapper.backing_file_of_device ~driver:"qcow2"
83+
in
84+
let qcow_path = qcow_of_device path in
85+
86+
(* If VDI is backed by QCOW, parse the header to determine nonzero clusters
87+
to avoid reading all of the raw disk *)
88+
let input_fd = Option.map read_header qcow_path in
89+
90+
(* Parse the header of the VDI we are diffing against as well *)
91+
let relative_to_qcow_path = Option.bind relative_to qcow_of_device in
92+
let diff_fd = Option.map read_header relative_to_qcow_path in
93+
94+
let unique_string = Uuidx.(to_string (make ())) in
5595
let args =
56-
[path] @ match relative_to with None -> [] | Some vdi -> ["--diff"; vdi]
96+
[path]
97+
@ (match relative_to with None -> [] | Some vdi -> ["--diff"; vdi])
98+
@ ( match relative_to_qcow_path with
99+
| None ->
100+
[]
101+
| Some _ ->
102+
["--json-header-diff"; unique_string]
103+
)
104+
@ match qcow_path with None -> [] | Some _ -> ["--json-header"]
57105
in
58106
let qcow_tool = !Xapi_globs.qcow_to_stdout in
59-
run_qcow_tool qcow_tool progress_cb args ~output_fd:unix_fd
107+
let replace_fds = Option.map (fun fd -> [(unique_string, fd)]) diff_fd in
108+
Xapi_stdext_pervasives.Pervasiveext.finally
109+
(fun () ->
110+
run_qcow_tool qcow_tool progress_cb args ?input_fd ~output_fd:unix_fd
111+
?replace_fds
112+
)
113+
(fun () ->
114+
Option.iter Unix.close input_fd ;
115+
Option.iter Unix.close diff_fd
116+
)

ocaml/xapi/qcow_tool_wrapper.mli

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,3 +23,5 @@ val send :
2323
-> string
2424
-> int64
2525
-> unit
26+
27+
val parse_header : string -> int * int list

ocaml/xapi/vhd_tool_wrapper.ml

Lines changed: 13 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -149,25 +149,27 @@ let find_backend_device path =
149149
raise Not_found
150150
with _ -> None
151151

152-
(** [vhd_of_device path] returns (Some vhd) where 'vhd' is the vhd leaf backing a particular device [path] or None.
153-
[path] may either be a blktap2 device *or* a blkfront device backed by a blktap2 device. If the latter then
154-
the script must be run in the same domain as blkback. *)
155-
let vhd_of_device path =
152+
(** [backing_file_of_device path] returns (Some backing_file) where 'backing_file'
153+
is the leaf backing a particular device [path] (with a driver of type
154+
[driver] or None. [path] may either be a blktap2 device *or* a blkfront
155+
device backed by a blktap2 device. If the latter then the script must be
156+
run in the same domain as blkback. *)
157+
let backing_file_of_device path ~driver =
156158
let tapdisk_of_path path =
157159
try
158160
match Tapctl.of_device (Tapctl.create ()) path with
159-
| _, _, Some ("vhd", vhd) ->
160-
Some vhd
161+
| _, _, Some (typ, backing_file) when typ = driver ->
162+
Some backing_file
161163
| _, _, _ ->
162164
raise Not_found
163165
with
164166
| Tapctl.Not_blktap -> (
165167
debug "Device %s is not controlled by blktap" path ;
166-
(* Check if it is a VHD behind a NBD deivce *)
168+
(* Check if it is a [driver] behind a NBD device *)
167169
Stream_vdi.(get_nbd_device path |> image_behind_nbd_device) |> function
168-
| Some ("vhd", vhd) ->
169-
debug "%s is a VHD behind NBD device %s" vhd path ;
170-
Some vhd
170+
| Some (typ, backing_file) when typ = driver ->
171+
debug "%s is a %s behind NBD device %s" backing_file driver path ;
172+
Some backing_file
171173
| _ ->
172174
None
173175
)
@@ -182,6 +184,7 @@ let vhd_of_device path =
182184

183185
let send progress_cb ?relative_to (protocol : string) (dest_format : string)
184186
(s : Unix.file_descr) (path : string) (size : Int64.t) (prefix : string) =
187+
let vhd_of_device = backing_file_of_device ~driver:"vhd" in
185188
let s' = Uuidx.(to_string (make ())) in
186189
let source_format, source =
187190
match (Stream_vdi.get_nbd_device path, vhd_of_device path, relative_to) with

0 commit comments

Comments
 (0)