Skip to content

Commit cabcd93

Browse files
jonie001evelyn-ys
andauthored
{Storage} Migrate az storage file upload/upload-batch to track2 sdk (#22803)
* migrate_storage_share_track2 * fix conflict * fix conflict * fix checks * fix checks * fix checks * Update src/azure-cli/azure/cli/command_modules/storage/_help.py Co-authored-by: Yishi Wang <[email protected]> * Update _transformers.py * fix conflict * fix changes * fix conflicts * fix conflicts * fix conflicts * fix conflicts * fix conflicts Co-authored-by: Yishi Wang <[email protected]>
1 parent abce081 commit cabcd93

File tree

7 files changed

+637
-446
lines changed

7 files changed

+637
-446
lines changed

src/azure-cli/azure/cli/command_modules/storage/_help.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1895,6 +1895,12 @@
18951895
crafted: true
18961896
"""
18971897

1898+
helps['storage file download'] = """
1899+
type: command
1900+
short-summary: Download a file to a file path, with automatic chunking and progress notifications.
1901+
long-summary: Return an instance of File with properties and metadata.
1902+
"""
1903+
18981904
helps['storage file download-batch'] = """
18991905
type: command
19001906
short-summary: Download files from an Azure Storage File Share to a local directory in a batch operation.

src/azure-cli/azure/cli/command_modules/storage/_params.py

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1952,27 +1952,35 @@ def load_arguments(self, _): # pylint: disable=too-many-locals, too-many-statem
19521952
c.extra('timeout', help='Request timeout in seconds. Applies to each call to the service.', type=int)
19531953

19541954
with self.argument_context('storage file upload') as c:
1955+
from ._validators import add_progress_callback_v2
19551956
t_file_content_settings = self.get_sdk('file.models#ContentSettings')
19561957

19571958
c.register_path_argument(default_file_param='local_file_path')
19581959
c.register_content_settings_argument(t_file_content_settings, update=False, guess_from_file='local_file_path')
1959-
c.argument('local_file_path', options_list='--source', type=file_type, completer=FilesCompleter())
1960-
c.extra('no_progress', progress_type)
1961-
c.argument('max_connections', type=int)
1960+
c.argument('local_file_path', options_list='--source', type=file_type, completer=FilesCompleter(),
1961+
help='Path of the local file to upload as the file content.')
1962+
c.extra('no_progress', progress_type, validator=add_progress_callback_v2)
1963+
c.argument('max_connections', type=int, help='Maximum number of parallel connections to use.')
1964+
c.extra('share_name', share_name_type, required=True)
1965+
c.argument('validate_content', action='store_true', min_api='2016-05-31',
1966+
help='If true, calculates an MD5 hash for each range of the file. The storage service checks the '
1967+
'hash of the content that has arrived with the hash that was sent. This is primarily valuable '
1968+
'for detecting bitflips on the wire if using http instead of https as https (the default) will '
1969+
'already validate. Note that this MD5 hash is not stored with the file.')
19621970

19631971
with self.argument_context('storage file url') as c:
19641972
c.register_path_argument(fileshare=True)
19651973
c.extra('share_name', share_name_type, required=True)
19661974
c.argument('protocol', arg_type=get_enum_type(['http', 'https'], 'https'), help='Protocol to use.')
19671975

19681976
with self.argument_context('storage file upload-batch') as c:
1969-
from ._validators import process_file_upload_batch_parameters
1977+
from ._validators import process_file_upload_batch_parameters, add_progress_callback_v2
19701978
c.argument('source', options_list=('--source', '-s'), validator=process_file_upload_batch_parameters)
19711979
c.argument('destination', options_list=('--destination', '-d'))
19721980
c.argument('max_connections', arg_group='Download Control', type=int)
19731981
c.argument('validate_content', action='store_true', min_api='2016-05-31')
19741982
c.register_content_settings_argument(t_file_content_settings, update=False, arg_group='Content Settings')
1975-
c.extra('no_progress', progress_type)
1983+
c.extra('no_progress', progress_type, validator=add_progress_callback_v2)
19761984

19771985
with self.argument_context('storage file download-batch') as c:
19781986
from ._validators import process_file_download_batch_parameters

src/azure-cli/azure/cli/command_modules/storage/_validators.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1141,6 +1141,28 @@ def _update_progress(current, total):
11411141
message = getattr(_update_progress, 'message', 'Alive')
11421142
reuse = getattr(_update_progress, 'reuse', False)
11431143

1144+
if total:
1145+
hook.add(message=message, value=current, total_val=total)
1146+
if total == current and not reuse:
1147+
hook.end()
1148+
hook = cmd.cli_ctx.get_progress_controller(det=True)
1149+
_update_progress.hook = hook
1150+
1151+
if not namespace.no_progress:
1152+
namespace.progress_callback = _update_progress
1153+
del namespace.no_progress
1154+
1155+
1156+
def add_progress_callback_v2(cmd, namespace):
1157+
def _update_progress(response):
1158+
if response.http_response.status_code not in [200, 201]:
1159+
return
1160+
1161+
message = getattr(_update_progress, 'message', 'Alive')
1162+
reuse = getattr(_update_progress, 'reuse', False)
1163+
current = response.context['upload_stream_current']
1164+
total = response.context['data_stream_total']
1165+
11441166
if total:
11451167
hook.add(message=message, value=current, total_val=total)
11461168
if total == current and not reuse:
@@ -1281,6 +1303,7 @@ def process_file_upload_batch_parameters(cmd, namespace):
12811303
namespace.account_name = identifier.account_name
12821304

12831305
namespace.source = os.path.realpath(namespace.source)
1306+
namespace.share_name = namespace.destination
12841307

12851308

12861309
def process_file_download_batch_parameters(cmd, namespace):

src/azure-cli/azure/cli/command_modules/storage/commands.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -672,8 +672,6 @@ def get_custom_sdk(custom_module, client_factory, resource_type=ResourceType.DAT
672672
from ._format import transform_boolean_for_table, transform_file_output
673673
from ._exception_handler import file_related_exception_handler
674674
g.storage_command('download', 'get_file_to_path', exception_handler=file_related_exception_handler)
675-
g.storage_command('upload', 'create_file_from_path', exception_handler=file_related_exception_handler)
676-
g.storage_custom_command('upload-batch', 'storage_file_upload_batch')
677675
g.storage_custom_command(
678676
'download-batch', 'storage_file_download_batch')
679677
g.storage_custom_command('delete-batch', 'storage_file_delete_batch')
@@ -703,6 +701,10 @@ def get_custom_sdk(custom_module, client_factory, resource_type=ResourceType.DAT
703701
g.storage_command('copy cancel', 'abort_copy')
704702
g.storage_custom_command('copy start-batch', 'storage_file_copy_batch', client_factory=cf_share_client)
705703

704+
g.storage_custom_command('upload', 'storage_file_upload', exception_handler=file_related_exception_handler)
705+
g.storage_custom_command('upload-batch', 'storage_file_upload_batch',
706+
custom_command_type=get_custom_sdk('file', client_factory=cf_share_client))
707+
706708
with self.command_group('storage cors', get_custom_sdk('cors', multi_service_properties_factory)) as g:
707709
from ._transformers import transform_cors_list_output
708710

src/azure-cli/azure/cli/command_modules/storage/operations/file.py

Lines changed: 44 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,30 @@ def list_share_files(cmd, client, directory_name=None, timeout=None, exclude_dir
137137
return results
138138

139139

140+
def storage_file_upload(client, local_file_path, content_settings=None,
141+
metadata=None, validate_content=False, progress_callback=None, max_connections=2, timeout=None):
142+
upload_args = {
143+
'content_settings': content_settings,
144+
'metadata': metadata,
145+
'validate_content': validate_content,
146+
'max_concurrency': max_connections,
147+
'timeout': timeout
148+
}
149+
if progress_callback:
150+
upload_args['raw_response_hook'] = progress_callback
151+
# Because the contents of the uploaded file may be too large, it should be passed into the a stream object,
152+
# upload_file() read file data in batches to avoid OOM problems
153+
count = os.path.getsize(local_file_path)
154+
with open(local_file_path, 'rb') as stream:
155+
response = client.upload_file(data=stream, length=count, **upload_args)
156+
157+
if 'content_md5' in response:
158+
if isinstance(response['content_md5'], bytearray):
159+
response['content_md5'] = ''.join(hex(x) for x in response['content_md5'])
160+
161+
return response
162+
163+
140164
def storage_file_upload_batch(cmd, client, destination, source, destination_path=None, pattern=None, dryrun=False,
141165
validate_content=False, content_settings=None, max_connections=1, metadata=None,
142166
progress_callback=None):
@@ -145,37 +169,41 @@ def storage_file_upload_batch(cmd, client, destination, source, destination_path
145169
from azure.cli.command_modules.storage.util import glob_files_locally, normalize_blob_file_path
146170

147171
source_files = list(glob_files_locally(source, pattern))
148-
settings_class = cmd.get_models('file.models#ContentSettings')
172+
settings_class = cmd.get_models('_models#ContentSettings')
149173

150174
if dryrun:
151175
logger.info('upload files to file share')
152176
logger.info(' account %s', client.account_name)
153177
logger.info(' share %s', destination)
154178
logger.info(' total %d', len(source_files))
155-
return [{'File': client.make_file_url(destination, os.path.dirname(dst) or None, os.path.basename(dst)),
179+
dst = None
180+
kwargs = {
181+
'dir_name': os.path.dirname(dst),
182+
'file_name': os.path.basename(dst)
183+
}
184+
185+
return [{'File': create_file_url(client, **kwargs),
156186
'Type': guess_content_type(src, content_settings, settings_class).content_type} for src, dst in
157187
source_files]
158188

159189
# TODO: Performance improvement
160190
# 1. Upload files in parallel
161-
def _upload_action(src, dst):
162-
dst = normalize_blob_file_path(destination_path, dst)
163-
dir_name = os.path.dirname(dst)
164-
file_name = os.path.basename(dst)
191+
def _upload_action(src, dst2):
192+
dst2 = normalize_blob_file_path(destination_path, dst2)
193+
dir_name = os.path.dirname(dst2)
194+
file_name = os.path.basename(dst2)
165195

166-
_make_directory_in_files_share(client, destination, dir_name)
167-
create_file_args = {'share_name': destination, 'directory_name': dir_name, 'file_name': file_name,
168-
'local_file_path': src, 'progress_callback': progress_callback,
169-
'content_settings': guess_content_type(src, content_settings, settings_class),
170-
'metadata': metadata, 'max_connections': max_connections}
171-
172-
if cmd.supported_api_version(min_api='2016-05-31'):
173-
create_file_args['validate_content'] = validate_content
196+
_make_directory_in_files_share(client, destination, dir_name, V2=True)
174197

175198
logger.warning('uploading %s', src)
176-
client.create_file_from_path(**create_file_args)
199+
storage_file_upload(client.get_file_client(dst2), src, content_settings, metadata, validate_content,
200+
progress_callback, max_connections)
177201

178-
return client.make_file_url(destination, dir_name, file_name)
202+
args = {
203+
'dir_name': dir_name,
204+
'file_name': file_name
205+
}
206+
return create_file_url(client, **args)
179207

180208
return list(_upload_action(src, dst) for src, dst in source_files)
181209

@@ -384,7 +412,6 @@ def _create_file_and_directory_from_file(cmd, file_service, source_file_service,
384412
def _make_directory_in_files_share(file_service, file_share, directory_path, existing_dirs=None, V2=False):
385413
"""
386414
Create directories recursively.
387-
388415
This method accept a existing_dirs set which serves as the cache of existing directory. If the
389416
parameter is given, the method will search the set first to avoid repeatedly create directory
390417
which already exists.

0 commit comments

Comments
 (0)