1515from .hdf5dtype import getTypeItem , createDataType , Reference , special_dtype
1616from .array_util import jsonToArray , bytesArrayToList
1717from .dset_util import resize_dataset
18+ from .shape_util import getShapeClass , getShapeDims
1819from .filters import getFiltersJson
1920from .objid import createObjId , getCollectionForId , isValidUuid , getUuidFromId , getHashTagForId
2021from . import selections
2425from .h5writer import H5Writer , H5NullWriter
2526
2627
28+ def _getDatasetUpdates (dset_json ):
29+ """ return a list of value updates for the datset.
30+ initalize one if not already present. """
31+ if "updates" not in dset_json :
32+ dset_json ["updates" ] = []
33+ return dset_json ["updates" ]
34+
35+
2736class Hdf5db :
2837 """
2938 This class is used to manage id lookup tables for primary HDF objects (Groups, Datasets,
@@ -109,10 +118,12 @@ def root_id(self):
109118
110119 def is_new (self , obj_id ):
111120 """ return true if this is a new object (has not been persisted) """
121+ obj_id = getHashTagForId (obj_id )
112122 return obj_id in self ._new_objects
113123
114124 def is_dirty (self , obj_id ):
115125 """ return true if this object has been modified """
126+ obj_id = getHashTagForId (obj_id )
116127 if self .is_new (obj_id ):
117128 return True
118129 return obj_id in self ._dirty_objects
@@ -131,7 +142,7 @@ def deleted_objects(self):
131142
132143 def make_dirty (self , obj_id ):
133144 """ Mark the object as dirty and update the lastModified timestamp """
134-
145+ obj_id = getHashTagForId ( obj_id )
135146 if obj_id not in self .db :
136147 self .log .error ("make dirty called on deleted object" )
137148 raise KeyError (f"obj_id: { obj_id } not found" )
@@ -236,8 +247,8 @@ def close(self):
236247 """ close reader and writer handles """
237248 self .log .info ("Hdf5db __close" )
238249
239- self .flush ()
240- if self .writer :
250+ if self .writer and not isinstance ( self . writer , H5NullWriter ):
251+ self .flush ()
241252 self .writer .close ()
242253 if self .reader :
243254 self .reader .close ()
@@ -280,13 +291,13 @@ def _checkWriter(self):
280291 def getObjectById (self , obj_id , refresh = False ):
281292 """ return object with given id """
282293 self ._checkReader ()
283- tag = getHashTagForId (obj_id )
284- if tag not in self .db or refresh :
294+ obj_id = getHashTagForId (obj_id )
295+ if obj_id not in self .db or refresh :
285296 # load the obj from the reader
286297 self .log .debug (f"getObjectById - fetching { obj_id } from reader" )
287298 obj_json = self .reader .getObjectById (obj_id )
288- self .db [tag ] = obj_json
289- obj_json = self .db [tag ]
299+ self .db [obj_id ] = obj_json
300+ obj_json = self .db [obj_id ]
290301
291302 return obj_json
292303
@@ -299,6 +310,9 @@ def getObjectIdByPath(self, h5path, parent_id=None):
299310
300311 if parent_id is None :
301312 parent_id = self .root_id
313+ else :
314+ parent_id = getHashTagForId (parent_id )
315+
302316 self .log .debug (f"getObjectIdDByPath(h5path: { h5path } parent_id: { parent_id } " )
303317
304318 obj_json = self .getObjectById (parent_id )
@@ -359,7 +373,7 @@ def getObjectByPath(self, path):
359373 return obj_json
360374
361375 def getDtype (self , obj_json ):
362- """ Return numpy data type for given object id
376+ """ Return numpy data type for given dataset, datatype, or attribute
363377 """
364378
365379 if "type" not in obj_json :
@@ -546,81 +560,99 @@ def getDatasetValues(self, dset_id, sel):
546560 If a slices list or tuple is provided, it should have the same
547561 number of elements as the rank of the dataset.
548562 """
563+
564+ def init_arr (dtype , cpl ):
565+ """ create an ndarray with the give shape, dtype and fill_value
566+ (if the latter is found in the creation properties list) """
567+ arr_shape = sel .count if isinstance (sel .count , tuple ) else (sel .count , )
568+ arr = np .zeros (arr_shape , dtype = dtype )
569+ if "fillValue" in cpl :
570+ fillValue = cpl ["fillValue" ]
571+ # TBD: fix for compound types
572+ arr [...] = fillValue
573+ return arr
574+
575+ dset_id = getHashTagForId (dset_id )
549576 self .log .info (f"getDatasetValues dset_id: { dset_id } , sel: { sel } " )
550577
551- self ._checkReader ()
552578 dset_json = self .getObjectById (dset_id )
553579 shape_json = dset_json ["shape" ]
554580 if not isinstance (sel , selections .Selection ):
555581 raise TypeError ("Expected Selection class" )
556582
557- if shape_json ["class" ] == "H5S_NULL" :
558- return None
559-
560- if shape_json ["class" ] == "H5S_SCALAR" :
561- if sel .select_type != selections .H5S_SELECT_ALL :
562- # TBD: support other selection types
563- raise ValueError ("Only SELECT_ALL selections are supported for scalar datasets" )
564- if sel .shape != ():
565- raise ValueError ("Selection shape does not match dataset shape" )
566- rank = 0
567- else :
568- dims = tuple (shape_json ["dims" ])
569- if sel .shape != dims :
570- raise ValueError ("Selection shape does not match dataset shape" )
571- rank = len (dims )
572-
573583 dtype = self .getDtype (dset_json )
574584
575585 if "creationProperties" in dset_json :
576586 cpl = dset_json ["creationProperties" ]
577587 else :
578588 cpl = {}
579589
580- # determine if we need to make a read request or not
581- if dset_id in self ._new_objects :
590+ updates = _getDatasetUpdates (dset_json )
591+
592+ shape_class = getShapeClass (shape_json )
593+
594+ if shape_class == "H5S_NULL" :
595+ # return None for selections on null space
596+ return None
597+
598+ if sel .shape != getShapeDims (shape_json ):
599+ raise ValueError ("Selection shape does not match dataset shape" )
600+
601+ if shape_class == "H5S_SCALAR" :
602+ if sel .select_type != selections .H5S_SELECT_ALL :
603+ # TBD: support other selection types
604+ raise ValueError ("Only SELECT_ALL selections are supported for scalar datasets" )
605+ if sel .shape != ():
606+ raise ValueError ("Selection shape does not match dataset shape" )
607+ if updates :
608+ # for scalars the update has to be the requested value
609+ (update_sel , arr ) = updates [- 1 ]
610+ elif dset_id in self ._new_objects :
611+ arr = init_arr (dtype , cpl )
612+ else :
613+ # fetch from the server
614+ arr = self .reader .getDatasetValues (dset_id , sel , dtype = dtype )
615+ if arr is None :
616+ raise KeyError (f"Data for dataset { dset_id } not returned" )
617+ # done with NULL and SCALAR cases
618+ return arr
619+
620+ # simple daaset
621+ arr = None
622+ fetch = True
623+
624+ # determine if we need to get data from the reader
625+ if isinstance (self ._reader , H5NullReader ) or dset_id in self ._new_objects :
582626 fetch = False
583627 else :
584- fetch = True
585- # check against pending updates
586- if "updates" in dset_json :
587- updates = dset_json ["updates" ]
588- for (update_sel , update_val ) in updates :
589- if selections .contained (sel , update_sel ):
590- fetch = False
591- break
592-
593- # send a reader request unless an update already covers the sel area
594- if fetch :
595- arr = self .reader .getDatasetValues (dset_id , sel , dtype = dtype )
596- else :
597- if "fillValue" in cpl :
598- fillValue = cpl ["fillValue" ]
599- # TBD: fix for compound types
600- arr = np .zeros (sel .mshape , dtype = dtype )
601- arr [...] = fillValue
602- else :
603- arr = np .zeros (sel .mshape , dtype = dtype )
604-
605- if "updates" in dset_json :
606- # apply any non-flushed changes that intersect the current selection
607- updates = dset_json ["updates" ]
608628 for (update_sel , update_val ) in updates :
609629 sel_inter = selections .intersect (sel , update_sel )
610630 if sel_inter .nselect == 0 :
611631 continue
612- # update portion of arr, that intersects update_val
613- slices = []
614- for dim in range (rank ):
615- start = sel_inter .start [dim ] - sel .start [dim ]
616- stop = start + sel_inter .count [dim ]
617- slices .append (slice (start , stop , 1 ))
618- slices = tuple (slices )
619- # TBD: needs updating to work in the general case!
620- if slices == ():
621- arr [slices ] = update_val [slices ]
622- else :
623- arr [slices ] = update_val
632+ if selections .contained (sel , update_sel ):
633+ # desired selection is wholly contained in this update
634+ # TBD: determine if multiple updates would contain all the
635+ # required elements
636+ fetch = False
637+ break
638+ if fetch :
639+ # get last saved version of the data from the reader
640+ arr = self .reader .getDatasetValues (dset_id , sel , dtype = dtype )
641+ else :
642+ # initialize an array with fill value if given
643+ arr = init_arr (dtype , cpl )
644+
645+ # apply any updates that impact this selection
646+ for (update_sel , update_val ) in updates :
647+ # get the part of the update that is in common with the requested selection
648+ x_sel = selections .intersect (sel , update_sel )
649+ if x_sel .nselect == 0 :
650+ # this update doesn't effect the selection, so ignore
651+ continue
652+ # apply the update to the array to be returned
653+ src_sel = selections .translate (update_sel , x_sel )
654+ tgt_sel = selections .translate (sel , x_sel )
655+ arr [tgt_sel .slices ] = update_val [src_sel .slices ]
624656
625657 return arr
626658
@@ -641,22 +673,32 @@ def setDatasetValues(self, dset_id, sel, arr):
641673 src_dt = arr .dtype
642674 if src_dt != tgt_dt :
643675 raise TypeError ("arr.dtype doesn't match dataset dtype" )
644-
645- if shape_json [ "class" ] == "H5S_NULL" :
676+ shape_class = getShapeClass ( shape_json )
677+ if shape_class == "H5S_NULL" :
646678 raise ValueError ("writing to null space dataset not supported" )
647- if shape_json [ "class" ] == "H5S_SCALAR" :
679+ if shape_class == "H5S_SCALAR" :
648680 if sel .shape != ():
649681 raise ValueError ("Selection shape does not match dataset shape" )
650682 if len (arr .shape ) > 0 :
651683 raise TypeError ("Expected scalar ndarray for scalar dataset" )
652684 else :
653- dims = tuple (shape_json [ "dims" ] )
685+ dims = getShapeDims (shape_json )
654686 if sel .shape != dims :
655687 raise ValueError ("Selection shape does not match dataset shape" )
656- if "updates" not in dset_json or sel .select_type == selections .H5S_SELECT_ALL :
688+ updates = _getDatasetUpdates (dset_json )
689+ if sel .select_type == selections .H5S_SELECT_ALL :
657690 # for select all, throw out any existing updates since this will overwrite them
658- dset_json ["updates" ] = []
659- updates = dset_json ["updates" ]
691+ updates .clear ()
692+ arr = arr .copy () # make a copy in case the client updates it later
693+ rank = len (sel .shape )
694+ if len (arr .shape ) < rank :
695+ # reshape to keep compatiblity with dataset rank
696+ if sel .select_type == selections .H5S_SELECT_ALL :
697+ # this should not result in a dimension reduction
698+ raise ValueError ("unexpected selection shape" )
699+ if sel .select_type != selections .H5S_SELECT_HYPERSLABS :
700+ raise ValueError ("tbd" )
701+ arr = arr .reshape (sel .mshape )
660702 updates .append ((sel , arr .copy ()))
661703 self .make_dirty (dset_id )
662704
0 commit comments