Skip to content

Commit 013c9c3

Browse files
authored
Update topo_scale.py
more informative error msg - return missing timestamps when error is thrown upon reading climate data (both plev and surf)
1 parent d0db42e commit 013c9c3

File tree

1 file changed

+54
-2
lines changed

1 file changed

+54
-2
lines changed

TopoPyScale/topo_scale.py

Lines changed: 54 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -442,7 +442,34 @@ def _subset_climate_dataset(ds_, row, type='plev'):
442442
ds_ = None
443443
ds_tmp = None
444444

445-
ds_plev = _open_dataset_climate(flist_PLEV).sel(time=tvec.values)
445+
446+
try:
447+
ds_plev = _open_dataset_climate(flist_PLEV).sel(time=tvec.values)
448+
except KeyError as e:
449+
# Find which timesteps are missing
450+
ds_plev = _open_dataset_climate(flist_PLEV)
451+
available_times = pd.to_datetime(ds_plev.time.values)
452+
requested_times = pd.to_datetime(tvec.values)
453+
454+
# Find missing timesteps
455+
missing_times = []
456+
for req_time in requested_times:
457+
# Check if any available time matches within a small tolerance (1 minute)
458+
time_diffs = abs(available_times - req_time)
459+
if time_diffs.min() > pd.Timedelta('1min'):
460+
missing_times.append(req_time)
461+
462+
if missing_times:
463+
missing_str = ', '.join([t.strftime('%Y-%m-%d %H:%M') for t in missing_times[:10]]) # Show first 10
464+
if len(missing_times) > 10:
465+
missing_str += f" ... and {len(missing_times) - 10} more"
466+
467+
print(f"WARNING: {len(missing_times)} timesteps are missing from the climate data:")
468+
print(f"Available time range: {available_times.min()} to {available_times.max()}")
469+
print(f"Requested time range: {requested_times.min()} to {requested_times.max()}")
470+
print(f"Missing timesteps: {missing_str}")
471+
472+
446473
# to avoid chunk warning
447474

448475
#with dask.config.set(**{'array.slicing.split_large_chunks': True}):
@@ -469,7 +496,32 @@ def _subset_climate_dataset(ds_, row, type='plev'):
469496
tu.multithread_pooling(_subset_climate_dataset, fun_param, n_threads=n_core)
470497
fun_param = None
471498
ds_plev = None
472-
ds_surf = _open_dataset_climate(flist_SURF).sel(time=tvec.values)
499+
try:
500+
ds_plev = _open_dataset_climate(flist_SURF).sel(time=tvec.values)
501+
except KeyError as e:
502+
# Find which timesteps are missing
503+
ds_plev = _open_dataset_climate(flist_SURF)
504+
available_times = pd.to_datetime(ds_plev.time.values)
505+
requested_times = pd.to_datetime(tvec.values)
506+
507+
# Find missing timesteps
508+
missing_times = []
509+
for req_time in requested_times:
510+
# Check if any available time matches within a small tolerance (1 minute)
511+
time_diffs = abs(available_times - req_time)
512+
if time_diffs.min() > pd.Timedelta('1min'):
513+
missing_times.append(req_time)
514+
515+
if missing_times:
516+
missing_str = ', '.join([t.strftime('%Y-%m-%d %H:%M') for t in missing_times[:10]]) # Show first 10
517+
if len(missing_times) > 10:
518+
missing_str += f" ... and {len(missing_times) - 10} more"
519+
520+
print(f"WARNING: {len(missing_times)} timesteps are missing from the climate data:")
521+
print(f"Available time range: {available_times.min()} to {available_times.max()}")
522+
print(f"Requested time range: {requested_times.min()} to {requested_times.max()}")
523+
print(f"Missing timesteps: {missing_str}")
524+
473525
ds_list = []
474526
for _, _ in df_centroids.iterrows():
475527
ds_list.append(ds_surf)

0 commit comments

Comments
 (0)