pyhdf5_handler.src.hdf5_handler
1from __future__ import annotations 2 3import os 4import h5py 5import numpy as np 6import numbers 7import pandas as pd 8import datetime 9import time 10import importlib 11 12from ..src import object_handler 13from ..src import constant 14 15import gc 16import re 17 18 19def close_all_hdf5_file(): 20 """ 21 Close all hdf5 file opened in the current session 22 """ 23 24 for obj in gc.get_objects(): # Browse through ALL objects 25 if isinstance(obj, h5py.File): # Just HDF5 files 26 try: 27 print(f"try closing {obj}") 28 obj.close() 29 except: 30 pass # Was already closed 31 32 33def open_hdf5(path, read_only=False, replace=False, wait_time=0): 34 """ 35 36 Open or create an HDF5 file. 37 38 Parameters 39 ---------- 40 41 path : str 42 The file path. 43 44 read_only : boolean 45 If true the access to the hdf5 fil is in read-only mode. Multi process can read the same hdf5 file simulteneously. This is not possible when access mode are append 'a' or write 'w'. 46 47 replace: Boolean 48 If true, the existing hdf5file is erased 49 50 wait_time: int 51 If the hdf5 is unavailable, the function will try to access serveral time and will wait wait_time seconds maximum. If this time is elapsed, the file won't be opened. This parameter is usefull if several program or threads need to read/write simultaneously in the same hdf5 database. 52 53 Returns 54 ------- 55 56 f : 57 A h5py object. 58 59 Examples 60 -------- 61 62 >>> hdf5=pyhdf5_handler.open_hdf5("./my_hdf5.hdf5") 63 >>> hdf5.keys() 64 >>> hdf5.attrs.keys() 65 66 """ 67 f = None 68 wait = 0 69 while wait <= wait_time: 70 71 f = None 72 exist_file = True 73 74 try: 75 76 if read_only: 77 if os.path.isfile(path): 78 f = h5py.File(path, "r") 79 80 else: 81 exist_file = False 82 raise ValueError(f"File {path} does not exist.") 83 84 else: 85 if replace: 86 f = h5py.File(path, "w") 87 88 else: 89 if os.path.isfile(path): 90 f = h5py.File(path, "a") 91 92 else: 93 f = h5py.File(path, "w") 94 except: 95 pass 96 97 if f is None: 98 if not exist_file: 99 print(f"File {path} does not exist.") 100 return f 101 else: 102 print(f"The file {path} is unvailable, waiting {wait}/{wait_time}s") 103 104 wait = wait + 1 105 106 if wait_time > 0: 107 time.sleep(1) 108 109 else: 110 break 111 112 return f 113 114 115def add_hdf5_sub_group(hdf5, subgroup=None): 116 """ 117 Create a new subgroup in a HDF5 object 118 119 Parameters 120 ---------- 121 122 hdf5 : h5py.File 123 An hdf5 object opened with open_hdf5() 124 125 subgroup: str 126 Path to a subgroub that must be created 127 128 Returns 129 ------- 130 131 hdf5 : 132 the h5py object. 133 134 Examples 135 -------- 136 137 >>> hdf5=pyhdf5_handler.open_hdf5("./model_subgroup.hdf5", replace=True) 138 >>> hdf5=pyhdf5_handler.add_hdf5_sub_group(hdf5, subgroup="mygroup") 139 >>> hdf5.keys() 140 >>> hdf5.attrs.keys() 141 142 """ 143 if subgroup is not None: 144 if subgroup == "": 145 subgroup = "./" 146 147 hdf5.require_group(subgroup) 148 149 return hdf5 150 151 152def _dump_object_to_hdf5_from_list_attribute(hdf5, instance, list_attr): 153 """ 154 dump a object to a hdf5 file from a list of attributes 155 156 Parameters 157 ---------- 158 hdf5 : h5py.File 159 an hdf5 object 160 161 instance : object 162 a custom python object. 163 164 list_attr : list 165 a list of attribute 166 167 """ 168 if isinstance(list_attr, list): 169 for attr in list_attr: 170 if isinstance(attr, str): 171 _dump_object_to_hdf5_from_str_attribute(hdf5, instance, attr) 172 173 elif isinstance(attr, list): 174 _dump_object_to_hdf5_from_list_attribute(hdf5, instance, attr) 175 176 elif isinstance(attr, dict): 177 _dump_object_to_hdf5_from_dict_attribute(hdf5, instance, attr) 178 179 else: 180 raise ValueError( 181 f"inconsistent {attr} in {list_attr}. {attr} must be a an instance of dict, list or str" 182 ) 183 184 else: 185 raise ValueError(f"{list_attr} must be a instance of list.") 186 187 188def _dump_object_to_hdf5_from_dict_attribute(hdf5, instance, dict_attr): 189 """ 190 dump a object to a hdf5 file from a dictionary of attributes 191 192 Parameters 193 ---------- 194 195 hdf5 : h5py.File 196 an hdf5 object 197 198 instance : object 199 a custom python object. 200 201 dict_attr : dict 202 a dictionary of attribute 203 204 """ 205 if isinstance(dict_attr, dict): 206 for attr, value in dict_attr.items(): 207 hdf5 = add_hdf5_sub_group(hdf5, subgroup=attr) 208 209 try: 210 sub_instance = getattr(instance, attr) 211 212 except: 213 if isinstance(instance, dict): 214 sub_instance = instance[attr] 215 else: 216 sub_instance = instance 217 218 if isinstance(value, dict): 219 _dump_object_to_hdf5_from_dict_attribute(hdf5[attr], sub_instance, value) 220 221 elif isinstance(value, list): 222 _dump_object_to_hdf5_from_list_attribute(hdf5[attr], sub_instance, value) 223 224 elif isinstance(value, str): 225 _dump_object_to_hdf5_from_str_attribute(hdf5[attr], sub_instance, value) 226 227 else: 228 229 raise ValueError( 230 f"inconsistent '{attr}' in '{dict_attr}'. Dict({attr}) must be a instance of dict, list or str" 231 ) 232 233 else: 234 raise ValueError(f"{dict_attr} must be a instance of dict.") 235 236 237def _dump_object_to_hdf5_from_str_attribute(hdf5, instance, str_attr): 238 """ 239 dump a object to a hdf5 file from a string attribute 240 241 Parameters 242 ---------- 243 244 hdf5 : h5py.File 245 an hdf5 object 246 247 instance : object 248 a custom python object. 249 250 str_attr : str 251 a string attribute 252 253 """ 254 255 if isinstance(str_attr, str): 256 257 try: 258 value = getattr(instance, str_attr) 259 260 except: 261 if isinstance(instance, dict): 262 value = instance[str_attr] 263 else: 264 value = instance 265 266 try: 267 268 attribute_name = str(str_attr) 269 for character in "/ ": 270 attribute_name = attribute_name.replace(character, "_") 271 272 if isinstance(value, dict): 273 274 # print("---> dictionary: ", str_attr, value) 275 276 hdf5 = add_hdf5_sub_group(hdf5, subgroup=attribute_name) 277 save_dict_to_hdf5(hdf5[attribute_name], value) 278 279 else: 280 281 hdf5_dataset_creator(hdf5, attribute_name, value) 282 283 except: 284 raise ValueError( 285 f"Unable to dump attribute {str_attr} with value {value} from {instance}" 286 ) 287 288 else: 289 raise ValueError(f"{str_attr} must be a instance of str.") 290 291 292def _dump_object_to_hdf5_from_iteratable(hdf5, instance, iteratable=None): 293 """ 294 dump a object to a hdf5 file from a iteratable object list or dict 295 296 Parameters 297 ---------- 298 299 hdf5 : h5py.File 300 an hdf5 object 301 instance : object 302 a custom python object. 303 iteratable : list | dict 304 a list or a dict of attribute 305 306 Examples 307 -------- 308 309 >>> setup, mesh = smash.load_dataset("cance") 310 >>> model = smash.Model(setup, mesh) 311 >>> model.run(inplace=True) 312 >>> 313 >>> hdf5=pyhdf5_handler.open_hdf5("./model.hdf5", replace=True) 314 >>> hdf5=pyhdf5_handler.add_hdf5_sub_group(hdf5, subgroup="model1") 315 pyhdf5_handler._dump_object_to_hdf5_from_iteratable(hdf5["model1"], model) 316 317 """ 318 if isinstance(iteratable, list): 319 _dump_object_to_hdf5_from_list_attribute(hdf5, instance, iteratable) 320 321 elif isinstance(iteratable, dict): 322 _dump_object_to_hdf5_from_dict_attribute(hdf5, instance, iteratable) 323 324 else: 325 raise ValueError(f"{iteratable} must be a instance of list or dict.") 326 327 328def _hdf5_handle_str(name, value): 329 330 dataset = { 331 "name": name, 332 "attr_value": str(type(value)), 333 "dataset_value": value, 334 "shape": 1, 335 "dtype": h5py.string_dtype(encoding="utf-8"), 336 } 337 338 return dataset 339 340 341def _hdf5_handle_numbers(name: str, value: numbers.Number): 342 343 arr = np.array([value]) 344 dataset = { 345 "name": name, 346 "attr_value": str(type(value)), 347 "dataset_value": arr, 348 "shape": arr.shape, 349 "dtype": arr.dtype, 350 } 351 352 return dataset 353 354 355def _hdf5_handle_none(name: str, value: None): 356 357 dataset = { 358 "name": name, 359 "attr_value": "_None_", 360 "dataset_value": "_None_", 361 "shape": 1, 362 "dtype": h5py.string_dtype(encoding="utf-8"), 363 } 364 365 return dataset 366 367 368def _hdf5_handle_timestamp( 369 name: str, value: pd.Timestamp | np.datetime64 | datetime.date 370): 371 372 dtype = type(value) 373 374 if isinstance(value, (np.datetime64)): 375 value = value.tolist() 376 377 dataset = { 378 "name": name, 379 "attr_value": str(dtype), 380 "dataset_value": value.strftime("%Y-%m-%d %H:%M"), 381 "shape": 1, 382 "dtype": h5py.string_dtype(encoding="utf-8"), 383 } 384 385 return dataset 386 387 388def _hdf5_handle_DatetimeIndex(name: str, value: pd.DatetimeIndex): 389 390 dataset = _hdf5_handle_array(name, value) 391 392 return dataset 393 394 395def _hdf5_handle_list(name: str, value: list | tuple): 396 397 arr = np.array(value) 398 399 dataset = _hdf5_handle_array(name, arr) 400 401 return dataset 402 403 404def _hdf5_handle_exclude_obj(name: str, value: list | tuple): 405 406 dtype = type(value) 407 408 dataset = { 409 "name": name, 410 "attr_value": str(dtype), 411 "dataset_value": f"excluded data type {str(dtype)}", 412 "shape": 1, 413 "dtype": h5py.string_dtype(encoding="utf-8"), 414 } 415 416 return dataset 417 418def _hdf5_skip_cls(value): 419 420 type_str = str(type(value)) 421 module_name = type_str.split("'")[1].split('.')[0] 422 423 if module_name in constant.EXCLUDE_PYTHON_OBJ: 424 return True 425 else: 426 return False 427 428def _hdf5_handle_array(name: str, value: np.ndarray): 429 430 dtype_attr = type(value) 431 dtype = value.dtype 432 433 if value.dtype.char == "M": 434 435 ListDate = value.tolist() 436 ListDateStr = list() 437 for date in ListDate: 438 ListDateStr.append(date.strftime("%Y-%m-%d %H:%M")) 439 value = np.array(ListDateStr) 440 value = value.astype("O") 441 dtype = h5py.string_dtype(encoding="utf-8") 442 443 elif value.dtype == "object": 444 445 value = value.astype("S") 446 dtype = h5py.string_dtype(encoding="utf-8") 447 448 elif value.dtype.char == "U": 449 value = value.astype("S") 450 dtype = h5py.string_dtype(encoding="utf-8") 451 452 dataset = { 453 "name": name, 454 "attr_value": str(dtype_attr), 455 "dataset_value": value, 456 "shape": value.shape, 457 "dtype": dtype, 458 } 459 460 return dataset 461 462 463def _hdf5_handle_ndarray(hdf5: h5py.File, name: str, value: np.ndarray): 464 465 hdf5 = add_hdf5_sub_group(hdf5, subgroup=name) 466 _dump_ndarray_to_hdf5(hdf5[name], value) 467 468 469def _hdf5_create_dataset(hdf5: h5py.File, dataset: dict): 470 471 if dataset["name"] in hdf5.keys(): 472 del hdf5[dataset["name"]] 473 474 hdf5.create_dataset( 475 dataset["name"], 476 shape=dataset["shape"], 477 dtype=dataset["dtype"], 478 data=dataset["dataset_value"], 479 compression="gzip", 480 chunks=True, 481 ) 482 483 if "_" + dataset["name"] in list(hdf5.attrs.keys()): 484 del hdf5.attrs["_" + dataset["name"]] 485 486 hdf5.attrs["_" + dataset["name"]] = dataset["attr_value"] 487 488 489def hdf5_dataset_creator(hdf5: h5py.File, name: str, value): 490 """ 491 Write any value in an hdf5 object 492 493 Parameters 494 ---------- 495 496 hdf5 : h5py.File 497 an hdf5 object 498 499 name : str 500 name of the dataset 501 502 value : any 503 value to write in the hdf5 504 505 """ 506 507 if _hdf5_skip_cls(value): 508 dataset = _hdf5_handle_exclude_obj(name, value) 509 510 elif isinstance(value, str): 511 dataset = _hdf5_handle_str(name, value) 512 513 elif isinstance(value, numbers.Number): 514 dataset = _hdf5_handle_numbers(name, value) 515 516 elif value is None: 517 dataset = _hdf5_handle_none(name, value) 518 519 elif isinstance(value, (pd.Timestamp, np.datetime64, datetime.date)): 520 dataset = _hdf5_handle_timestamp(name, value) 521 522 elif isinstance(value, pd.DatetimeIndex): 523 dataset = _hdf5_handle_DatetimeIndex(name, value) 524 525 elif isinstance(value, list): 526 dataset = _hdf5_handle_list(name, value) 527 528 elif isinstance(value, tuple): 529 dataset = _hdf5_handle_list(name, value) 530 531 elif isinstance(value, np.ndarray): 532 533 if len(value.dtype) > 0 and len(value.dtype.names) > 0: 534 _hdf5_handle_ndarray(hdf5, name, value) 535 return 536 else: 537 dataset = _hdf5_handle_array(name, value) 538 539 else: 540 541 hdf5 = add_hdf5_sub_group(hdf5, subgroup=name) 542 543 newdict = object_handler.read_object_as_dict(value) 544 545 save_dict_to_hdf5(hdf5[name], newdict) 546 547 _hdf5_create_dataset(hdf5, dataset) 548 549 550def _dump_ndarray_to_hdf5(hdf5, value): 551 """ 552 dump a ndarray data structure to an hdf5 file: this functions create a group ndarray_ds and store each component of the ndarray as a dataset. Plus it add 2 datasets which store the dtypes (ndarray_dtype) and labels (ndarray_indexes). 553 554 Parameters 555 ---------- 556 557 hdf5 : h5py.File 558 an hdf5 object 559 560 value : ndarray 561 an ndarray data structure with different datatype 562 563 """ 564 # save ndarray datastructure 565 566 hdf5 = add_hdf5_sub_group(hdf5, subgroup="ndarray_ds") 567 hdf5_data = hdf5["ndarray_ds"] 568 569 for item in value.dtype.names: 570 571 hdf5_dataset_creator(hdf5=hdf5_data, name=item, value=value[item]) 572 573 index = np.array(value.dtype.descr)[:, 0] 574 dtype = np.array(value.dtype.descr)[:, 1] 575 index = index.astype("O") 576 dtype = dtype.astype("O") 577 data_type = h5py.string_dtype(encoding="utf-8") 578 579 if "ndarray_dtype" in hdf5_data.keys(): 580 del hdf5_data["ndarray_dtype"] 581 582 hdf5_data.create_dataset( 583 "ndarray_dtype", 584 shape=dtype.shape, 585 dtype=data_type, 586 data=dtype, 587 compression="gzip", 588 chunks=True, 589 ) 590 591 if "ndarray_indexes" in hdf5_data.keys(): 592 del hdf5_data["ndarray_indexes"] 593 594 hdf5_data.create_dataset( 595 "ndarray_indexes", 596 shape=index.shape, 597 dtype=data_type, 598 data=index, 599 compression="gzip", 600 chunks=True, 601 ) 602 603 604def _read_ndarray_datastructure(hdf5): 605 """ 606 read a ndarray data structure from hdf5 file 607 608 Parameters 609 ---------- 610 611 hdf5 : h5py.File 612 an hdf5 object at the roots of the ndarray datastructure 613 614 Return 615 ------ 616 617 ndarray : the ndarray 618 619 """ 620 621 if "ndarray_ds" in list(hdf5.keys()): 622 623 decoded_item = list() 624 for it in hdf5["ndarray_ds/ndarray_dtype"][:]: 625 decoded_item.append(it.decode()) 626 list_dtypes = decoded_item 627 628 decoded_item = list() 629 for it in hdf5["ndarray_ds/ndarray_indexes"][:]: 630 decoded_item.append(it.decode()) 631 list_indexes = decoded_item 632 633 len_data = len(hdf5[f"ndarray_ds/{list_indexes[0]}"][:]) 634 635 list_datatype = list() 636 for i in range(len(list_indexes)): 637 list_datatype.append((list_indexes[i], list_dtypes[i])) 638 639 datatype = np.dtype(list_datatype) 640 641 ndarray = np.zeros(len_data, dtype=datatype) 642 643 for i in range(len(list_indexes)): 644 645 expected_type = list_dtypes[i] 646 647 values = hdf5_read_dataset( 648 hdf5[f"ndarray_ds/{list_indexes[i]}"], expected_type 649 ) 650 651 ndarray[list_indexes[i]] = values 652 653 return ndarray 654 655 656def save_dict_to_hdf5(hdf5, dictionary): 657 """ 658 659 dump a dictionary to an hdf5 file 660 661 Parameters 662 ---------- 663 664 hdf5 : h5py.File 665 an hdf5 object 666 667 dictionary : dict 668 a custom python dictionary 669 670 """ 671 if isinstance(dictionary, dict): 672 for attr, value in dictionary.items(): 673 # print("looping:",attr,value) 674 try: 675 676 attribute_name = str(attr) 677 for character in "/ ": 678 attribute_name = attribute_name.replace(character, "_") 679 680 if isinstance(value, dict): 681 # print("---> dictionary: ",attr, value) 682 683 hdf5 = add_hdf5_sub_group(hdf5, subgroup=attribute_name) 684 save_dict_to_hdf5(hdf5[attribute_name], value) 685 686 else: 687 688 hdf5_dataset_creator(hdf5, attribute_name, value) 689 690 except: 691 692 raise ValueError( 693 f"Unable to save attribute {str(attr)} with value {value}" 694 ) 695 696 else: 697 698 raise ValueError(f"{dictionary} must be a instance of dict.") 699 700 701def save_dict_to_hdf5file( 702 path_to_hdf5, dictionary=None, location="./", replace=False, wait_time=0 703): 704 """ 705 706 dump a dictionary to an hdf5 file 707 708 Parameters 709 ---------- 710 711 path_to_hdf5 : str 712 path to the hdf5 file 713 714 dictionary : dict | None 715 a dictionary containing the data to be saved 716 717 location : str 718 path location or subgroup where to write data in the hdf5 file 719 720 replace : Boolean 721 replace an existing hdf5 file. Default is False 722 723 wait_time: int 724 If the hdf5 is unavailable, the function will try to access serveral time and will wait wait_time seconds maximum. If this time is elapsed, the file won't be opened and the funciton will return None. This parameter is usefull if several program or threads need to read/write simultaneously in the same hdf5 database. 725 726 Examples 727 -------- 728 729 >>> dict={"a":1,"b":2} 730 >>> pyhdf5_handler.save_dict_to_hdf5("saved_dictionary.hdf5",dict) 731 732 """ 733 if isinstance(dictionary, dict): 734 hdf5 = open_hdf5(path_to_hdf5, replace=replace, wait_time=wait_time) 735 736 if hdf5 is None: 737 return 738 739 hdf5 = add_hdf5_sub_group(hdf5, subgroup=location) 740 save_dict_to_hdf5(hdf5[location], dictionary) 741 742 else: 743 raise ValueError(f"The input {dictionary} must be a instance of dict.") 744 745 hdf5.close() 746 747 748def save_object_to_hdf5( 749 hdf5, 750 instance, 751 keys_data=None, 752 location="./", 753 sub_data=None, 754 replace=False, 755 wait_time=0, 756): 757 """ 758 759 dump an object to an hdf5 file 760 761 Parameters 762 ---------- 763 764 hdf5 : instance of h5py 765 An opened hdf5 file 766 767 instance : object 768 A custom python object to be saved into an hdf5 769 770 keys_data : list | dict 771 optional, a list or a dictionary of the attribute to be saved 772 773 location : str 774 path location or subgroup where to write data in the hdf5 file 775 776 sub_data : dict | None 777 optional, a extra dictionary containing extra-data to be saved along the object 778 779 replace : Boolean 780 replace an existing hdf5 file. Default is False 781 782 wait_time: int 783 If the hdf5 is unavailable, the function will try to access serveral time and will wait wait_time seconds maximum. If this time is elapsed, the file won't be opened and the funciton will return None. This parameter is usefull if several program or threads need to read/write simultaneously in the same hdf5 database. 784 785 """ 786 787 if keys_data is None: 788 keys_data = object_handler.generate_object_structure( 789 instance, include_method=False 790 ) 791 792 if hdf5 is None: 793 return None 794 795 hdf5 = add_hdf5_sub_group(hdf5, subgroup=location) 796 797 _dump_object_to_hdf5_from_iteratable(hdf5[location], instance, keys_data) 798 799 if isinstance(sub_data, dict): 800 save_dict_to_hdf5(hdf5[location], sub_data) 801 802 hdf5.close() 803 804 805def save_object_to_hdf5file( 806 path_to_hdf5, 807 instance, 808 keys_data=None, 809 location="./", 810 sub_data=None, 811 replace=False, 812 wait_time=0, 813): 814 """ 815 816 dump an object to an hdf5 file 817 818 Parameters 819 ---------- 820 821 path_to_hdf5 : str 822 path to the hdf5 file 823 824 instance : object 825 A custom python object to be saved into an hdf5 826 827 keys_data : list | dict 828 optional, a list or a dictionary of the attribute to be saved 829 830 location : str 831 path location or subgroup where to write data in the hdf5 file 832 833 sub_data : dict | None 834 optional, a extra dictionary containing extra-data to be saved along the object 835 836 replace : Boolean 837 replace an existing hdf5 file. Default is False 838 839 wait_time: int 840 If the hdf5 is unavailable, the function will try to access serveral time and will wait wait_time seconds maximum. If this time is elapsed, the file won't be opened and the funciton will return None. This parameter is usefull if several program or threads need to read/write simultaneously in the same hdf5 database. 841 842 """ 843 844 hdf5 = open_hdf5(path_to_hdf5, replace=replace, wait_time=wait_time) 845 846 save_object_to_hdf5( 847 hdf5, 848 instance, 849 keys_data=keys_data, 850 location=location, 851 sub_data=sub_data, 852 replace=replace, 853 wait_time=wait_time, 854 ) 855 856 857def read_hdf5file_as_dict( 858 path_to_hdf5, location="./", wait_time=0, read_attrs=True, read_dataset_attrs=False 859): 860 """ 861 862 Open, read and close an hdf5 file 863 864 Parameters 865 ---------- 866 867 path_to_hdf5 : str 868 path to the hdf5 file 869 870 location: str 871 place in the hdf5 from which we start reading the file 872 873 read_attrs : bool 874 read and import attributes in the dicitonnary. 875 876 read_dataset_attrs : bool 877 read and import special attributes linked to any dataset and created by pyhdf5_handler. These attributes only store the original dataype of the data stored in the dataset. 878 879 Return 880 -------- 881 882 dictionary : dict, a dictionary of all keys and attribute included in the hdf5 file 883 884 wait_time: int 885 If the hdf5 is unavailable, the function will try to access serveral time and will wait wait_time seconds maximum. If this time is elapsed, the file won't be opened and the funciton will return None. This parameter is usefull if several program or threads need to read/write simultaneously in the same hdf5 database. 886 887 Examples 888 -------- 889 890 read an hdf5 file 891 dictionary=hdf5_handler.read_hdf5file_as_dict(hdf5["model1"]) 892 """ 893 894 hdf5 = open_hdf5(path_to_hdf5, read_only=True, wait_time=wait_time) 895 896 if hdf5 is None: 897 return None 898 899 dictionary = read_hdf5_as_dict( 900 hdf5[location], read_attrs=read_attrs, read_dataset_attrs=read_dataset_attrs 901 ) 902 903 hdf5.close() 904 905 return dictionary 906 907 908def read_hdf5_as_dict(hdf5, read_attrs=True, read_dataset_attrs=False): 909 """ 910 Load an hdf5 file 911 912 Parameters 913 ---------- 914 915 hdf5 : h5py.File 916 an instance of hdf5, open with the function open_hdf5() 917 918 read_attrs : bool 919 read and import attributes in the dicitonnary. 920 921 read_dataset_attrs : bool 922 read and import special attributes linked to any dataset and created by pyhdf5_handler. These attributes only store the original datatype of the data stored in the dataset. 923 924 Return 925 -------- 926 927 dictionary : dict, a dictionary of all keys and attribute included in the hdf5 file 928 929 Examples 930 -------- 931 932 read only a part of an hdf5 file 933 >>> hdf5=hdf5_handler.open_hdf5("./multi_model.hdf5") 934 >>> dictionary=hdf5_handler.read_hdf5_as_dict(hdf5["model1"]) 935 >>> dictionary.keys() 936 937 """ 938 939 if not isinstance(hdf5, (h5py.File, h5py.Group, h5py.Dataset, h5py.Datatype)): 940 print("Error: input arg is not an instance of hdf5.File()") 941 return {} 942 943 dictionary = {} 944 945 for key, item in hdf5.items(): 946 947 if str(type(item)).find("group") != -1: 948 949 if key == "ndarray_ds": 950 951 # dictionary.update({key: _read_ndarray_datastructure(hdf5)}) 952 return _read_ndarray_datastructure(hdf5) 953 954 else: 955 956 dictionary.update({key: read_hdf5_as_dict(item)}) 957 958 if str(type(item)).find("dataset") != -1: 959 960 if "_" + key in hdf5.attrs.keys(): 961 expected_type = hdf5.attrs["_" + key] 962 values = hdf5_read_dataset(item, expected_type) 963 964 else: 965 966 values = item[:] 967 968 dictionary.update({key: values}) 969 970 list_attribute = [] 971 if read_attrs or read_dataset_attrs: 972 tmp_list_attribute = list(hdf5.attrs.keys()) 973 hdf5_item_matching_attributes = ["_" + element for element in list(hdf5.keys())] 974 975 if read_attrs: 976 977 list_attribute.extend( 978 list( 979 filter( 980 lambda l: l not in hdf5_item_matching_attributes, tmp_list_attribute 981 ) 982 ) 983 ) 984 985 if read_dataset_attrs: 986 987 list_attribute.extend( 988 list(filter(lambda l: l in hdf5_item_matching_attributes, tmp_list_attribute)) 989 ) 990 991 for key in list_attribute: 992 dictionary.update({key: hdf5.attrs[key]}) 993 994 return dictionary 995 996 997def _is_numeric_str_class(class_str): 998 """ 999 check if the input string is a representation of a python class and if it is a subclass of numbers.Numbers. 1000 1001 Args: 1002 class_str (str): string representation of a class like "<class 'module.ClassName'>" or "<class 'ClassName'>" 1003 1004 Returns: 1005 bool: True if the class is a subclass of numbers.Number, False sinon. 1006 """ 1007 # Expression régulière pour extraire le nom complet de la classe 1008 match = re.match(r"<class '(?:([^']+)\.)?([^']+)'>", class_str.strip()) 1009 if not match: 1010 return False 1011 1012 module_name, class_name = match.groups() 1013 # full_name = f"{module_name}.{class_name}" if module_name else class_name 1014 1015 try: 1016 # On essaie d'importer le module et de récupérer la classe 1017 if module_name: 1018 module = __import__(module_name, fromlist=[class_name]) 1019 cls = getattr(module, class_name) 1020 else: 1021 # Cas des types built-in (int, float, etc.) 1022 cls = globals().get(class_name, None) 1023 if cls is None: 1024 cls = getattr(__builtins__, class_name, None) 1025 1026 if cls is None: 1027 return False 1028 1029 return issubclass(cls, numbers.Number) 1030 except (ImportError, AttributeError, TypeError): 1031 return False 1032 1033 1034# def _is_numeric_str_class(str_class): 1035# if not str_class.startwith("<class"): 1036# raise ValueError(f"'{str_class}' is not a string class representation.") 1037 1038# path = str_class[8:-2] 1039 1040# path_list_splitted = path.rsplit(".", 1) 1041 1042# if len(path_list_splitted) == 2: 1043# module, name = [*path_list_splitted] 1044# result = issubclass( 1045# getattr(importlib.import_module(module), name), numbers.Number 1046# ) 1047# else: 1048# name = path_list_splitted[0] 1049# result = issubclass( 1050# getattr(importlib.import_module("builtins"), name), numbers.Number 1051# ) 1052 1053# return result 1054 1055 1056def hdf5_read_dataset(item, expected_type=None): 1057 """ 1058 Read a dataset stored in an hdf5 database 1059 1060 Parameters 1061 ---------- 1062 1063 item : h5py.File 1064 an hdf5 dataset/item 1065 1066 expected_type: str 1067 the expected dtype as string str(type()) 1068 1069 Return 1070 -------- 1071 1072 value : the value read from the hdf5, any type matching the expected type 1073 1074 1075 """ 1076 1077 if expected_type == str(type("str")): 1078 1079 values = item[0].decode() 1080 1081 elif expected_type == str(type(1)): 1082 1083 values = int(item[0]) # buildin int type 1084 1085 elif expected_type == str(type(1.0)): 1086 1087 values = float(item[0]) # buildin float type 1088 1089 elif _is_numeric_str_class(expected_type): 1090 1091 values = item[0] # other int/float type like np.int64/np.float64 1092 1093 elif expected_type == "_None_": 1094 1095 values = None 1096 1097 elif expected_type in (str(pd.Timestamp), str(np.datetime64), str(datetime.datetime)): 1098 1099 if expected_type == str(pd.Timestamp): 1100 values = pd.Timestamp(item[0].decode()) 1101 1102 elif expected_type == str(np.datetime64): 1103 values = np.datetime64(item[0].decode()) 1104 1105 elif expected_type == str(datetime.datetime): 1106 values = datetime.datetime.fromisoformat(item[0].decode()) 1107 1108 else: 1109 values = item[0].decode() 1110 1111 else: 1112 1113 if item[:].dtype.char == "S": 1114 1115 values = item[:].astype("U") 1116 1117 elif item[:].dtype.char == "O": 1118 1119 # decode list if required 1120 decoded_item = list() 1121 for it in item[:]: 1122 1123 decoded_item.append(it.decode()) 1124 1125 values = decoded_item 1126 1127 else: 1128 values = item[:] 1129 1130 return values 1131 1132 1133def get_hdf5file_attribute( 1134 path_to_hdf5=str(), location="./", attribute=None, wait_time=0 1135): 1136 """ 1137 Get the value of an attribute in the hdf5file 1138 1139 Parameters 1140 ---------- 1141 1142 path_to_hdf5 : str 1143 the path to the hdf5file 1144 1145 location : str 1146 path inside the hdf5 where the attribute is stored 1147 1148 attribute: str 1149 attribute name 1150 1151 wait_time: int 1152 If the hdf5 is unavailable, the function will try to access serveral time and will wait wait_time seconds maximum. If this time is elapsed, the file won't be opened and the funciton will return None. This parameter is usefull if several program or threads need to read/write simultaneously in the same hdf5 database. 1153 1154 Return 1155 -------- 1156 1157 return_attribute : the value of the attribute 1158 1159 Examples 1160 -------- 1161 1162 get an attribute 1163 >>> attribute=hdf5_handler.get_hdf5_attribute("./multi_model.hdf5",attribute=my_attribute_name) 1164 1165 """ 1166 1167 hdf5_base = open_hdf5(path_to_hdf5, read_only=True, wait_time=wait_time) 1168 1169 if hdf5_base is None: 1170 return None 1171 1172 hdf5 = hdf5_base[location] 1173 1174 return_attribute = hdf5.attrs[attribute] 1175 1176 hdf5_base.close() 1177 1178 return return_attribute 1179 1180 1181def get_hdf5file_dataset(path_to_hdf5=str(), location="./", dataset=None, wait_time=0): 1182 """ 1183 Get the value of an attribute in the hdf5file 1184 1185 Parameters 1186 ---------- 1187 1188 path_to_hdf5 : str 1189 the path to the hdf5file 1190 1191 location : str 1192 path inside the hdf5 where the attribute is stored 1193 1194 dataset: str 1195 dataset name 1196 1197 wait_time: int 1198 If the hdf5 is unavailable, the function will try to access serveral time and will wait wait_time seconds maximum. If this time is elapsed, the file won't be opened and the funciton will return None. This parameter is usefull if several program or threads need to read/write simultaneously in the same hdf5 database. 1199 1200 Return 1201 -------- 1202 1203 return_dataset : the value of the attribute 1204 1205 Examples 1206 -------- 1207 1208 get a dataset 1209 >>> dataset=hdf5_handler.get_hdf5_dataset("./multi_model.hdf5",dataset=my_dataset_name) 1210 1211 """ 1212 1213 hdf5_base = open_hdf5(path_to_hdf5, read_only=True, wait_time=wait_time) 1214 1215 if hdf5_base is None: 1216 return None 1217 1218 hdf5 = hdf5_base[location] 1219 1220 if "_" + dataset in hdf5.attrs.keys(): 1221 expected_type = hdf5.attrs["_" + dataset] 1222 return_dataset = hdf5_read_dataset(hdf5[dataset], expected_type) 1223 1224 else: 1225 return_dataset = hdf5[dataset][:] 1226 1227 hdf5_base.close() 1228 1229 return return_dataset 1230 1231 1232def get_hdf5file_item( 1233 path_to_hdf5=str(), location="./", item=None, wait_time=0, search_attrs=False 1234): 1235 """ 1236 1237 Get a custom item in an hdf5file 1238 1239 Parameters 1240 ---------- 1241 1242 path_to_hdf5 : str 1243 the path to the hdf5file 1244 1245 location : str 1246 path inside the hdf5 where the attribute is stored. If item is None, item is set to basename(location) 1247 1248 item: str 1249 item name 1250 1251 wait_time: int 1252 If the hdf5 is unavailable, the function will try to access serveral time and will wait wait_time seconds maximum. If this time is elapsed, the file won't be opened and the funciton will return None. This parameter is usefull if several program or threads need to read/write simultaneously in the same hdf5 database. 1253 1254 search_attrs: bool 1255 Default is False. If True, the function will also search in the item in the attribute first. 1256 1257 Return 1258 -------- 1259 1260 return : custom value. can be an hdf5 object (group), an numpy array, a string, a float, an int ... 1261 1262 Examples 1263 -------- 1264 1265 get the dataset 'dataset' 1266 >>> dataset=hdf5_handler.get_hdf5_item("./multi_model.hdf5",location="path/in/hdf5/dataset") 1267 1268 """ 1269 1270 hdf5 = open_hdf5(path_to_hdf5, read_only=True, wait_time=wait_time) 1271 1272 if hdf5 is None: 1273 return None 1274 1275 hdf5_item = get_hdf5_item( 1276 hdf5_instance=hdf5, location=location, item=item, search_attrs=search_attrs 1277 ) 1278 1279 hdf5.close() 1280 1281 return hdf5_item 1282 1283 1284def get_hdf5_item(hdf5_instance=None, location="./", item=None, search_attrs=False): 1285 """ 1286 1287 Get a custom item in an hdf5file 1288 1289 Parameters 1290 ---------- 1291 1292 hdf5_instance : h5py.File 1293 an instance of an hdf5 1294 1295 location : str 1296 path inside the hdf5 where the attribute is stored. If item is None, item is set to basename(location) 1297 1298 item: str 1299 item name 1300 1301 search_attrs: bool 1302 Default is False. If True, the function will search in the item in the attribute first. 1303 1304 Return 1305 ------ 1306 1307 return : custom value. can be an hdf5 object (group), an numpy array, a string, a float, an int ... 1308 1309 Examples 1310 -------- 1311 1312 get the dataset 'dataset' 1313 >>> dataset=hdf5_handler.get_hdf5_item("./multi_model.hdf5",location="path/in/hdf5/dataset") 1314 1315 """ 1316 1317 if item is None and isinstance(location, str): 1318 head, tail = os.path.split(location) 1319 if len(tail) > 0: 1320 item = tail 1321 location = head 1322 1323 if not isinstance(item, str): 1324 print(f"Bad search item:{item}") 1325 return None 1326 1327 return None 1328 1329 # print(f"Getting item '{item}' at location '{location}'") 1330 hdf5 = hdf5_instance[location] 1331 1332 # first search in the attribute 1333 if search_attrs: 1334 list_attribute = hdf5.attrs.keys() 1335 if item in list_attribute: 1336 return hdf5.attrs[item] 1337 1338 # then search in groups and dataset 1339 list_keys = hdf5.keys() 1340 if item in list_keys: 1341 1342 hdf5_item = hdf5[item] 1343 1344 # print("Got Item ", hdf5_item) 1345 1346 if str(type(hdf5_item)).find("group") != -1: 1347 1348 if item == "ndarray_ds": 1349 1350 return _read_ndarray_datastructure(hdf5) 1351 1352 else: 1353 1354 returned_dict = read_hdf5_as_dict(hdf5_item) 1355 1356 return returned_dict 1357 1358 elif str(type(hdf5_item)).find("dataset") != -1: 1359 1360 if "_" + item in hdf5.attrs.keys(): 1361 expected_type = hdf5.attrs["_" + item] 1362 values = hdf5_read_dataset(hdf5_item, expected_type) 1363 else: 1364 values = hdf5_item[:] 1365 1366 return values 1367 1368 else: 1369 1370 return hdf5_item 1371 1372 else: 1373 1374 return None 1375 1376 1377def search_in_hdf5file( 1378 path_to_hdf5, key=None, location="./", wait_time=0, search_attrs=False 1379): 1380 """ 1381 1382 Search key in an hdf5 and return a list of [locations, datatype, key name, values]. Value and key are returned only if the key is an attribute or a dataset (None otherwise) 1383 1384 Parameters 1385 ---------- 1386 1387 path_to_hdf5 : str 1388 the path to the hdf5file 1389 1390 key: str 1391 key to search in the hdf5file 1392 1393 location : str 1394 path inside the hdf5 where to start the research 1395 1396 wait_time: int 1397 If the hdf5 is unavailable, the function will try to access serveral time and will wait wait_time seconds maximum. If this time is elapsed, the file won't be opened and the funciton will return None. This parameter is usefull if several program or threads need to read/write simultaneously in the same hdf5 database. 1398 1399 search_attrs : Bool 1400 Default false, search in the attributes 1401 1402 Return 1403 ------ 1404 1405 return_dataset : the value of the attribute 1406 1407 Examples 1408 -------- 1409 1410 search in a hdf5file 1411 >>> matchkey=hdf5_handler.search_in_hdf5file(hdf5filename, key='Nom_du_BV',location="./") 1412 1413 """ 1414 if key is None: 1415 print("Nothing to search, use key=") 1416 return [] 1417 1418 hdf5 = open_hdf5(path_to_hdf5, read_only=True, wait_time=wait_time) 1419 1420 if hdf5 is None: 1421 return None 1422 1423 results = search_in_hdf5(hdf5, key, location=location, search_attrs=search_attrs) 1424 1425 hdf5.close() 1426 1427 return results 1428 1429 1430def search_in_hdf5(hdf5_base, key=None, location="./", search_attrs=False): 1431 """ 1432 1433 Search key in an hdf5 and return a list of [locations, datatype, key name, values]. Value and key are returned only if the key is an attribute or a dataset (None otherwise) 1434 1435 Parameters 1436 ---------- 1437 1438 hdf5_base : h5py.File 1439 opened instance of the hdf5 1440 1441 key: str 1442 key to search in the hdf5file 1443 1444 location : str 1445 path inside the hdf5 where to start the research 1446 1447 search_attrs : Bool 1448 Default false, search in the attributes 1449 1450 Return 1451 ------ 1452 1453 return_dataset : the value of the attribute 1454 1455 Examples 1456 -------- 1457 1458 search in a hdf5 1459 >>> hdf5=hdf5_handler.open_hdf5(hdf5_file) 1460 >>> matchkey=hdf5_handler.search_in_hdf5(hdf5, key='Nom_du_BV',location="./") 1461 >>> hdf5.close() 1462 1463 """ 1464 if key is None: 1465 print("Nothing to search, use key=") 1466 return [] 1467 1468 result = [] 1469 1470 hdf5 = hdf5_base[location] 1471 1472 if search_attrs: 1473 list_attribute = hdf5.attrs.keys() 1474 1475 if key in list_attribute: 1476 result.append( 1477 { 1478 "path": location, 1479 "key": key, 1480 "datatype": "attribute", 1481 "value": hdf5.attrs[key], 1482 } 1483 ) 1484 1485 for hdf5_key, item in hdf5.items(): 1486 1487 if str(type(item)).find("group") != -1: 1488 1489 sub_location = os.path.join(location, hdf5_key) 1490 1491 # print(hdf5_key,sub_location,list(hdf5.keys())) 1492 1493 if hdf5_key == key: 1494 1495 if "ndarray_ds" in item.keys(): 1496 1497 result.append( 1498 { 1499 "path": sub_location, 1500 "key": None, 1501 "datatype": "ndarray", 1502 "value": _read_ndarray_datastructure(item), 1503 } 1504 ) 1505 1506 else: 1507 1508 result.append( 1509 { 1510 "path": sub_location, 1511 "key": None, 1512 "datatype": "group", 1513 "value": None, 1514 } 1515 ) 1516 1517 res = search_in_hdf5(hdf5_base, key, sub_location) 1518 1519 if len(res) > 0: 1520 for element in res: 1521 result.append(element) 1522 1523 if str(type(item)).find("dataset") != -1: 1524 1525 if hdf5_key == key: 1526 1527 if item[:].dtype.char == "S": 1528 1529 values = item[:].astype("U") 1530 1531 elif item[:].dtype.char == "O": 1532 1533 # decode list if required 1534 decoded_item = list() 1535 for it in item[:]: 1536 decoded_item.append(it.decode()) 1537 1538 values = decoded_item 1539 1540 else: 1541 1542 values = item[:] 1543 1544 result.append( 1545 {"path": location, "key": key, "datatype": "dataset", "value": values} 1546 ) 1547 1548 return result 1549 1550 1551def hdf5file_view( 1552 path_to_hdf5, 1553 location="./", 1554 max_depth=None, 1555 level_base=">", 1556 level_sep="--", 1557 depth=None, 1558 wait_time=0, 1559 list_attrs=True, 1560 list_dataset_attrs=False, 1561 return_view=False, 1562): 1563 """ 1564 1565 Search key in an hdf5 and return a list of [locations, datatype, key name, values]. Value and key are returned only if the key is an attribute or a dataset (None otherwise) 1566 1567 Parameters 1568 ---------- 1569 1570 1571 path_to_hdf5 : str 1572 Path to an hdf5 database 1573 1574 location : str 1575 path inside the hdf5 where to start the research 1576 1577 max_depth: str 1578 Max deph of the search in the hdf5 1579 1580 level_base: str 1581 string used as separator at the lower level (default '>') 1582 1583 level_sep: str 1584 string used as separator at higher level (default '--') 1585 1586 depth: int 1587 current depth level 1588 1589 list_attrs: bool 1590 default is True, list the attributes 1591 1592 list_dataset_attrs: bool 1593 default is False, list the special attributes defined for each dataset by pyhdf5_handler 1594 1595 return_view: bool 1596 retrun the object view in a dictionnary (do not print at screen) 1597 1598 wait_time: int 1599 If the hdf5 is unavailable, the function will try to access serveral time and will wait wait_time seconds maximum. If this time is elapsed, the file won't be opened and the funciton will return None. This parameter is usefull if several program or threads need to read/write simultaneously in the same hdf5 database. 1600 1601 Return 1602 -------- 1603 1604 dictionnary : optional, the view of the hdf5 1605 1606 Examples 1607 -------- 1608 1609 search in a hdf5file 1610 >>> matchkey=hdf5_handler.search_in_hdf5file(hdf5filename, key='Nom_du_BV',location="./") 1611 1612 """ 1613 1614 hdf5 = open_hdf5(path_to_hdf5, read_only=True, wait_time=wait_time) 1615 1616 if hdf5 is None: 1617 return None 1618 1619 results = hdf5_view( 1620 hdf5, 1621 location=location, 1622 max_depth=max_depth, 1623 level_base=level_base, 1624 level_sep=level_sep, 1625 depth=depth, 1626 list_attrs=list_attrs, 1627 list_dataset_attrs=list_dataset_attrs, 1628 return_view=return_view, 1629 ) 1630 1631 hdf5.close() 1632 1633 return results 1634 1635 1636def hdf5file_ls(path_to_hdf5, location="./"): 1637 """ 1638 List dataset in an hdf5file. 1639 1640 Parameters 1641 ---------- 1642 1643 path_to_hdf5 : str 1644 path to a hdf5file 1645 1646 location: str 1647 path inside the hdf5 where to start the research 1648 1649 Example 1650 ------- 1651 1652 >>> hdf5file_ls(test.hdf5) 1653 1654 """ 1655 1656 hdf5 = open_hdf5(path_to_hdf5, read_only=True) 1657 1658 hdf5_view( 1659 hdf5, 1660 location=location, 1661 max_depth=0, 1662 level_base=">", 1663 level_sep="--", 1664 list_attrs=False, 1665 return_view=False, 1666 ) 1667 1668 1669def hdf5_ls(hdf5): 1670 """ 1671 List dataset in an hdf5 instance. 1672 1673 Parameters 1674 ---------- 1675 1676 hdf5 : h5py.File 1677 hdf5 instance 1678 1679 location: str 1680 path inside the hdf5 where to start the research 1681 1682 Example 1683 ------- 1684 1685 >>> hdf5 = open_hdf5(path_to_hdf5, read_only=True) 1686 >>> hdf5_ls(hdf5) 1687 1688 """ 1689 1690 hdf5_view( 1691 hdf5, 1692 location="./", 1693 max_depth=0, 1694 level_base=">", 1695 level_sep="--", 1696 list_attrs=False, 1697 return_view=False, 1698 ) 1699 1700 1701def hdf5_view( 1702 hdf5_obj, 1703 location="./", 1704 max_depth=None, 1705 level_base=">", 1706 level_sep="--", 1707 depth=None, 1708 list_attrs=True, 1709 list_dataset_attrs=False, 1710 return_view=False, 1711): 1712 """ 1713 List recursively all dataset (and attributes) in an hdf5 object. 1714 1715 Parameters 1716 ---------- 1717 1718 hdf5_obj : h5py.File 1719 opened instance of the hdf5 1720 1721 location : str 1722 path inside the hdf5 where to start the research 1723 1724 max_depth: str 1725 Max deph of the search in the hdf5 1726 1727 level_base: str 1728 string used as separator at the lower level (default '>') 1729 1730 level_sep: str 1731 string used as separator at higher level (default '--') 1732 1733 depth: int 1734 current level depth 1735 1736 list_attrs: bool 1737 default is True, list the attributes 1738 1739 list_dataset_attrs: bool 1740 default is False, list the special attributes defined for each dataset by pyhdf5_handler 1741 1742 return_view: bool 1743 retrun the object view in a dictionnary 1744 1745 Return 1746 -------- 1747 1748 dictionnary : optional, the view of the hdf5 1749 1750 Examples 1751 -------- 1752 1753 search in a hdf5 1754 >>> hdf5=hdf5_handler.open_hdf5(hdf5_file) 1755 >>> matchkey=hdf5_handler.search_in_hdf5(hdf5, key='Nom_du_BV',location="./") 1756 >>> hdf5.close() 1757 1758 """ 1759 1760 result = [] 1761 1762 if max_depth is not None: 1763 1764 if depth is not None: 1765 depth = depth + 1 1766 else: 1767 depth = 0 1768 1769 if depth > max_depth: 1770 return result 1771 1772 hdf5 = hdf5_obj[location] 1773 1774 list_attribute = [] 1775 if list_attrs or list_dataset_attrs: 1776 tmp_list_attribute = list(hdf5.attrs.keys()) 1777 list_keys_matching_attributes = ["_" + element for element in list(hdf5.keys())] 1778 1779 if list_attrs: 1780 1781 list_attribute.extend( 1782 list( 1783 filter( 1784 lambda l: l not in list_keys_matching_attributes, tmp_list_attribute 1785 ) 1786 ) 1787 ) 1788 1789 if list_dataset_attrs: 1790 1791 list_attribute.extend( 1792 list(filter(lambda l: l in list_keys_matching_attributes, tmp_list_attribute)) 1793 ) 1794 1795 for key in list_attribute: 1796 values = hdf5.attrs[key] 1797 sub_location = os.path.join(location, key) 1798 if isinstance( 1799 values, (int, float, np.int64, np.float64, np.int32, np.float32, np.bool) 1800 ): 1801 result.append( 1802 f"{level_base}| {sub_location}, attribute, type={type(hdf5.attrs[key])}, value={values}" 1803 ) 1804 elif isinstance(values, (str)) and len(values) < 20: 1805 result.append( 1806 f"{level_base}| {sub_location}, attribute, type={type(hdf5.attrs[key])}, len={len(values)}, value={values}" 1807 ) 1808 else: 1809 result.append( 1810 f"{level_base}| {sub_location}, attribute, type={type(hdf5.attrs[key])}, len={len(values)}, value={values[0:20]}..." 1811 ) 1812 1813 for hdf5_key, item in hdf5.items(): 1814 1815 if str(type(item)).find("group") != -1: 1816 1817 sub_location = os.path.join(location, hdf5_key) 1818 1819 if "ndarray_ds" in item.keys(): 1820 result.append(f"{level_base}| {sub_location}, ndarray") 1821 else: 1822 result.append(f"{level_base}| {sub_location}, group") 1823 1824 res = hdf5_view( 1825 hdf5_obj, 1826 sub_location, 1827 max_depth=max_depth, 1828 level_base=level_base + level_sep, 1829 depth=depth, 1830 return_view=True, 1831 ) 1832 1833 # if len(res)>0: 1834 for key, item in enumerate(res): 1835 result.append(item) 1836 1837 if str(type(item)).find("dataset") != -1: 1838 1839 if item[:].dtype.char == "S": 1840 values = item[:].astype("U") 1841 else: 1842 values = item[:] 1843 1844 sub_location = os.path.join(location, hdf5_key) 1845 1846 result.append( 1847 f"{level_base}| {sub_location}, dataset, type={type(values)}, shape={values.shape}" 1848 ) 1849 1850 if return_view: 1851 return result 1852 else: 1853 for res in result: 1854 print(res)
20def close_all_hdf5_file(): 21 """ 22 Close all hdf5 file opened in the current session 23 """ 24 25 for obj in gc.get_objects(): # Browse through ALL objects 26 if isinstance(obj, h5py.File): # Just HDF5 files 27 try: 28 print(f"try closing {obj}") 29 obj.close() 30 except: 31 pass # Was already closed
Close all hdf5 file opened in the current session
34def open_hdf5(path, read_only=False, replace=False, wait_time=0): 35 """ 36 37 Open or create an HDF5 file. 38 39 Parameters 40 ---------- 41 42 path : str 43 The file path. 44 45 read_only : boolean 46 If true the access to the hdf5 fil is in read-only mode. Multi process can read the same hdf5 file simulteneously. This is not possible when access mode are append 'a' or write 'w'. 47 48 replace: Boolean 49 If true, the existing hdf5file is erased 50 51 wait_time: int 52 If the hdf5 is unavailable, the function will try to access serveral time and will wait wait_time seconds maximum. If this time is elapsed, the file won't be opened. This parameter is usefull if several program or threads need to read/write simultaneously in the same hdf5 database. 53 54 Returns 55 ------- 56 57 f : 58 A h5py object. 59 60 Examples 61 -------- 62 63 >>> hdf5=pyhdf5_handler.open_hdf5("./my_hdf5.hdf5") 64 >>> hdf5.keys() 65 >>> hdf5.attrs.keys() 66 67 """ 68 f = None 69 wait = 0 70 while wait <= wait_time: 71 72 f = None 73 exist_file = True 74 75 try: 76 77 if read_only: 78 if os.path.isfile(path): 79 f = h5py.File(path, "r") 80 81 else: 82 exist_file = False 83 raise ValueError(f"File {path} does not exist.") 84 85 else: 86 if replace: 87 f = h5py.File(path, "w") 88 89 else: 90 if os.path.isfile(path): 91 f = h5py.File(path, "a") 92 93 else: 94 f = h5py.File(path, "w") 95 except: 96 pass 97 98 if f is None: 99 if not exist_file: 100 print(f"File {path} does not exist.") 101 return f 102 else: 103 print(f"The file {path} is unvailable, waiting {wait}/{wait_time}s") 104 105 wait = wait + 1 106 107 if wait_time > 0: 108 time.sleep(1) 109 110 else: 111 break 112 113 return f
Open or create an HDF5 file.
Parameters
path : str The file path.
read_only : boolean If true the access to the hdf5 fil is in read-only mode. Multi process can read the same hdf5 file simulteneously. This is not possible when access mode are append 'a' or write 'w'.
replace: Boolean If true, the existing hdf5file is erased
wait_time: int If the hdf5 is unavailable, the function will try to access serveral time and will wait wait_time seconds maximum. If this time is elapsed, the file won't be opened. This parameter is usefull if several program or threads need to read/write simultaneously in the same hdf5 database.
Returns
f : A h5py object.
Examples
>>> hdf5=pyhdf5_handler.open_hdf5("./my_hdf5.hdf5")
>>> hdf5.keys()
>>> hdf5.attrs.keys()
116def add_hdf5_sub_group(hdf5, subgroup=None): 117 """ 118 Create a new subgroup in a HDF5 object 119 120 Parameters 121 ---------- 122 123 hdf5 : h5py.File 124 An hdf5 object opened with open_hdf5() 125 126 subgroup: str 127 Path to a subgroub that must be created 128 129 Returns 130 ------- 131 132 hdf5 : 133 the h5py object. 134 135 Examples 136 -------- 137 138 >>> hdf5=pyhdf5_handler.open_hdf5("./model_subgroup.hdf5", replace=True) 139 >>> hdf5=pyhdf5_handler.add_hdf5_sub_group(hdf5, subgroup="mygroup") 140 >>> hdf5.keys() 141 >>> hdf5.attrs.keys() 142 143 """ 144 if subgroup is not None: 145 if subgroup == "": 146 subgroup = "./" 147 148 hdf5.require_group(subgroup) 149 150 return hdf5
Create a new subgroup in a HDF5 object
Parameters
hdf5 : h5py.File An hdf5 object opened with open_hdf5()
subgroup: str Path to a subgroub that must be created
Returns
hdf5 : the h5py object.
Examples
>>> hdf5=pyhdf5_handler.open_hdf5("./model_subgroup.hdf5", replace=True)
>>> hdf5=pyhdf5_handler.add_hdf5_sub_group(hdf5, subgroup="mygroup")
>>> hdf5.keys()
>>> hdf5.attrs.keys()
490def hdf5_dataset_creator(hdf5: h5py.File, name: str, value): 491 """ 492 Write any value in an hdf5 object 493 494 Parameters 495 ---------- 496 497 hdf5 : h5py.File 498 an hdf5 object 499 500 name : str 501 name of the dataset 502 503 value : any 504 value to write in the hdf5 505 506 """ 507 508 if _hdf5_skip_cls(value): 509 dataset = _hdf5_handle_exclude_obj(name, value) 510 511 elif isinstance(value, str): 512 dataset = _hdf5_handle_str(name, value) 513 514 elif isinstance(value, numbers.Number): 515 dataset = _hdf5_handle_numbers(name, value) 516 517 elif value is None: 518 dataset = _hdf5_handle_none(name, value) 519 520 elif isinstance(value, (pd.Timestamp, np.datetime64, datetime.date)): 521 dataset = _hdf5_handle_timestamp(name, value) 522 523 elif isinstance(value, pd.DatetimeIndex): 524 dataset = _hdf5_handle_DatetimeIndex(name, value) 525 526 elif isinstance(value, list): 527 dataset = _hdf5_handle_list(name, value) 528 529 elif isinstance(value, tuple): 530 dataset = _hdf5_handle_list(name, value) 531 532 elif isinstance(value, np.ndarray): 533 534 if len(value.dtype) > 0 and len(value.dtype.names) > 0: 535 _hdf5_handle_ndarray(hdf5, name, value) 536 return 537 else: 538 dataset = _hdf5_handle_array(name, value) 539 540 else: 541 542 hdf5 = add_hdf5_sub_group(hdf5, subgroup=name) 543 544 newdict = object_handler.read_object_as_dict(value) 545 546 save_dict_to_hdf5(hdf5[name], newdict) 547 548 _hdf5_create_dataset(hdf5, dataset)
Write any value in an hdf5 object
Parameters
hdf5 : h5py.File an hdf5 object
name : str name of the dataset
value : any value to write in the hdf5
657def save_dict_to_hdf5(hdf5, dictionary): 658 """ 659 660 dump a dictionary to an hdf5 file 661 662 Parameters 663 ---------- 664 665 hdf5 : h5py.File 666 an hdf5 object 667 668 dictionary : dict 669 a custom python dictionary 670 671 """ 672 if isinstance(dictionary, dict): 673 for attr, value in dictionary.items(): 674 # print("looping:",attr,value) 675 try: 676 677 attribute_name = str(attr) 678 for character in "/ ": 679 attribute_name = attribute_name.replace(character, "_") 680 681 if isinstance(value, dict): 682 # print("---> dictionary: ",attr, value) 683 684 hdf5 = add_hdf5_sub_group(hdf5, subgroup=attribute_name) 685 save_dict_to_hdf5(hdf5[attribute_name], value) 686 687 else: 688 689 hdf5_dataset_creator(hdf5, attribute_name, value) 690 691 except: 692 693 raise ValueError( 694 f"Unable to save attribute {str(attr)} with value {value}" 695 ) 696 697 else: 698 699 raise ValueError(f"{dictionary} must be a instance of dict.")
dump a dictionary to an hdf5 file
Parameters
hdf5 : h5py.File an hdf5 object
dictionary : dict a custom python dictionary
702def save_dict_to_hdf5file( 703 path_to_hdf5, dictionary=None, location="./", replace=False, wait_time=0 704): 705 """ 706 707 dump a dictionary to an hdf5 file 708 709 Parameters 710 ---------- 711 712 path_to_hdf5 : str 713 path to the hdf5 file 714 715 dictionary : dict | None 716 a dictionary containing the data to be saved 717 718 location : str 719 path location or subgroup where to write data in the hdf5 file 720 721 replace : Boolean 722 replace an existing hdf5 file. Default is False 723 724 wait_time: int 725 If the hdf5 is unavailable, the function will try to access serveral time and will wait wait_time seconds maximum. If this time is elapsed, the file won't be opened and the funciton will return None. This parameter is usefull if several program or threads need to read/write simultaneously in the same hdf5 database. 726 727 Examples 728 -------- 729 730 >>> dict={"a":1,"b":2} 731 >>> pyhdf5_handler.save_dict_to_hdf5("saved_dictionary.hdf5",dict) 732 733 """ 734 if isinstance(dictionary, dict): 735 hdf5 = open_hdf5(path_to_hdf5, replace=replace, wait_time=wait_time) 736 737 if hdf5 is None: 738 return 739 740 hdf5 = add_hdf5_sub_group(hdf5, subgroup=location) 741 save_dict_to_hdf5(hdf5[location], dictionary) 742 743 else: 744 raise ValueError(f"The input {dictionary} must be a instance of dict.") 745 746 hdf5.close()
dump a dictionary to an hdf5 file
Parameters
path_to_hdf5 : str path to the hdf5 file
dictionary : dict | None a dictionary containing the data to be saved
location : str path location or subgroup where to write data in the hdf5 file
replace : Boolean replace an existing hdf5 file. Default is False
wait_time: int If the hdf5 is unavailable, the function will try to access serveral time and will wait wait_time seconds maximum. If this time is elapsed, the file won't be opened and the funciton will return None. This parameter is usefull if several program or threads need to read/write simultaneously in the same hdf5 database.
Examples
>>> dict={"a":1,"b":2}
>>> pyhdf5_handler.save_dict_to_hdf5("saved_dictionary.hdf5",dict)
749def save_object_to_hdf5( 750 hdf5, 751 instance, 752 keys_data=None, 753 location="./", 754 sub_data=None, 755 replace=False, 756 wait_time=0, 757): 758 """ 759 760 dump an object to an hdf5 file 761 762 Parameters 763 ---------- 764 765 hdf5 : instance of h5py 766 An opened hdf5 file 767 768 instance : object 769 A custom python object to be saved into an hdf5 770 771 keys_data : list | dict 772 optional, a list or a dictionary of the attribute to be saved 773 774 location : str 775 path location or subgroup where to write data in the hdf5 file 776 777 sub_data : dict | None 778 optional, a extra dictionary containing extra-data to be saved along the object 779 780 replace : Boolean 781 replace an existing hdf5 file. Default is False 782 783 wait_time: int 784 If the hdf5 is unavailable, the function will try to access serveral time and will wait wait_time seconds maximum. If this time is elapsed, the file won't be opened and the funciton will return None. This parameter is usefull if several program or threads need to read/write simultaneously in the same hdf5 database. 785 786 """ 787 788 if keys_data is None: 789 keys_data = object_handler.generate_object_structure( 790 instance, include_method=False 791 ) 792 793 if hdf5 is None: 794 return None 795 796 hdf5 = add_hdf5_sub_group(hdf5, subgroup=location) 797 798 _dump_object_to_hdf5_from_iteratable(hdf5[location], instance, keys_data) 799 800 if isinstance(sub_data, dict): 801 save_dict_to_hdf5(hdf5[location], sub_data) 802 803 hdf5.close()
dump an object to an hdf5 file
Parameters
hdf5 : instance of h5py An opened hdf5 file
instance : object A custom python object to be saved into an hdf5
keys_data : list | dict optional, a list or a dictionary of the attribute to be saved
location : str path location or subgroup where to write data in the hdf5 file
sub_data : dict | None optional, a extra dictionary containing extra-data to be saved along the object
replace : Boolean replace an existing hdf5 file. Default is False
wait_time: int If the hdf5 is unavailable, the function will try to access serveral time and will wait wait_time seconds maximum. If this time is elapsed, the file won't be opened and the funciton will return None. This parameter is usefull if several program or threads need to read/write simultaneously in the same hdf5 database.
806def save_object_to_hdf5file( 807 path_to_hdf5, 808 instance, 809 keys_data=None, 810 location="./", 811 sub_data=None, 812 replace=False, 813 wait_time=0, 814): 815 """ 816 817 dump an object to an hdf5 file 818 819 Parameters 820 ---------- 821 822 path_to_hdf5 : str 823 path to the hdf5 file 824 825 instance : object 826 A custom python object to be saved into an hdf5 827 828 keys_data : list | dict 829 optional, a list or a dictionary of the attribute to be saved 830 831 location : str 832 path location or subgroup where to write data in the hdf5 file 833 834 sub_data : dict | None 835 optional, a extra dictionary containing extra-data to be saved along the object 836 837 replace : Boolean 838 replace an existing hdf5 file. Default is False 839 840 wait_time: int 841 If the hdf5 is unavailable, the function will try to access serveral time and will wait wait_time seconds maximum. If this time is elapsed, the file won't be opened and the funciton will return None. This parameter is usefull if several program or threads need to read/write simultaneously in the same hdf5 database. 842 843 """ 844 845 hdf5 = open_hdf5(path_to_hdf5, replace=replace, wait_time=wait_time) 846 847 save_object_to_hdf5( 848 hdf5, 849 instance, 850 keys_data=keys_data, 851 location=location, 852 sub_data=sub_data, 853 replace=replace, 854 wait_time=wait_time, 855 )
dump an object to an hdf5 file
Parameters
path_to_hdf5 : str path to the hdf5 file
instance : object A custom python object to be saved into an hdf5
keys_data : list | dict optional, a list or a dictionary of the attribute to be saved
location : str path location or subgroup where to write data in the hdf5 file
sub_data : dict | None optional, a extra dictionary containing extra-data to be saved along the object
replace : Boolean replace an existing hdf5 file. Default is False
wait_time: int If the hdf5 is unavailable, the function will try to access serveral time and will wait wait_time seconds maximum. If this time is elapsed, the file won't be opened and the funciton will return None. This parameter is usefull if several program or threads need to read/write simultaneously in the same hdf5 database.
858def read_hdf5file_as_dict( 859 path_to_hdf5, location="./", wait_time=0, read_attrs=True, read_dataset_attrs=False 860): 861 """ 862 863 Open, read and close an hdf5 file 864 865 Parameters 866 ---------- 867 868 path_to_hdf5 : str 869 path to the hdf5 file 870 871 location: str 872 place in the hdf5 from which we start reading the file 873 874 read_attrs : bool 875 read and import attributes in the dicitonnary. 876 877 read_dataset_attrs : bool 878 read and import special attributes linked to any dataset and created by pyhdf5_handler. These attributes only store the original dataype of the data stored in the dataset. 879 880 Return 881 -------- 882 883 dictionary : dict, a dictionary of all keys and attribute included in the hdf5 file 884 885 wait_time: int 886 If the hdf5 is unavailable, the function will try to access serveral time and will wait wait_time seconds maximum. If this time is elapsed, the file won't be opened and the funciton will return None. This parameter is usefull if several program or threads need to read/write simultaneously in the same hdf5 database. 887 888 Examples 889 -------- 890 891 read an hdf5 file 892 dictionary=hdf5_handler.read_hdf5file_as_dict(hdf5["model1"]) 893 """ 894 895 hdf5 = open_hdf5(path_to_hdf5, read_only=True, wait_time=wait_time) 896 897 if hdf5 is None: 898 return None 899 900 dictionary = read_hdf5_as_dict( 901 hdf5[location], read_attrs=read_attrs, read_dataset_attrs=read_dataset_attrs 902 ) 903 904 hdf5.close() 905 906 return dictionary
Open, read and close an hdf5 file
Parameters
path_to_hdf5 : str path to the hdf5 file
location: str place in the hdf5 from which we start reading the file
read_attrs : bool read and import attributes in the dicitonnary.
read_dataset_attrs : bool read and import special attributes linked to any dataset and created by pyhdf5_handler. These attributes only store the original dataype of the data stored in the dataset.
Return
dictionary : dict, a dictionary of all keys and attribute included in the hdf5 file
wait_time: int If the hdf5 is unavailable, the function will try to access serveral time and will wait wait_time seconds maximum. If this time is elapsed, the file won't be opened and the funciton will return None. This parameter is usefull if several program or threads need to read/write simultaneously in the same hdf5 database.
Examples
read an hdf5 file dictionary=hdf5_handler.read_hdf5file_as_dict(hdf5["model1"])
909def read_hdf5_as_dict(hdf5, read_attrs=True, read_dataset_attrs=False): 910 """ 911 Load an hdf5 file 912 913 Parameters 914 ---------- 915 916 hdf5 : h5py.File 917 an instance of hdf5, open with the function open_hdf5() 918 919 read_attrs : bool 920 read and import attributes in the dicitonnary. 921 922 read_dataset_attrs : bool 923 read and import special attributes linked to any dataset and created by pyhdf5_handler. These attributes only store the original datatype of the data stored in the dataset. 924 925 Return 926 -------- 927 928 dictionary : dict, a dictionary of all keys and attribute included in the hdf5 file 929 930 Examples 931 -------- 932 933 read only a part of an hdf5 file 934 >>> hdf5=hdf5_handler.open_hdf5("./multi_model.hdf5") 935 >>> dictionary=hdf5_handler.read_hdf5_as_dict(hdf5["model1"]) 936 >>> dictionary.keys() 937 938 """ 939 940 if not isinstance(hdf5, (h5py.File, h5py.Group, h5py.Dataset, h5py.Datatype)): 941 print("Error: input arg is not an instance of hdf5.File()") 942 return {} 943 944 dictionary = {} 945 946 for key, item in hdf5.items(): 947 948 if str(type(item)).find("group") != -1: 949 950 if key == "ndarray_ds": 951 952 # dictionary.update({key: _read_ndarray_datastructure(hdf5)}) 953 return _read_ndarray_datastructure(hdf5) 954 955 else: 956 957 dictionary.update({key: read_hdf5_as_dict(item)}) 958 959 if str(type(item)).find("dataset") != -1: 960 961 if "_" + key in hdf5.attrs.keys(): 962 expected_type = hdf5.attrs["_" + key] 963 values = hdf5_read_dataset(item, expected_type) 964 965 else: 966 967 values = item[:] 968 969 dictionary.update({key: values}) 970 971 list_attribute = [] 972 if read_attrs or read_dataset_attrs: 973 tmp_list_attribute = list(hdf5.attrs.keys()) 974 hdf5_item_matching_attributes = ["_" + element for element in list(hdf5.keys())] 975 976 if read_attrs: 977 978 list_attribute.extend( 979 list( 980 filter( 981 lambda l: l not in hdf5_item_matching_attributes, tmp_list_attribute 982 ) 983 ) 984 ) 985 986 if read_dataset_attrs: 987 988 list_attribute.extend( 989 list(filter(lambda l: l in hdf5_item_matching_attributes, tmp_list_attribute)) 990 ) 991 992 for key in list_attribute: 993 dictionary.update({key: hdf5.attrs[key]}) 994 995 return dictionary
Load an hdf5 file
Parameters
hdf5 : h5py.File an instance of hdf5, open with the function open_hdf5()
read_attrs : bool read and import attributes in the dicitonnary.
read_dataset_attrs : bool read and import special attributes linked to any dataset and created by pyhdf5_handler. These attributes only store the original datatype of the data stored in the dataset.
Return
dictionary : dict, a dictionary of all keys and attribute included in the hdf5 file
Examples
read only a part of an hdf5 file
>>> hdf5=hdf5_handler.open_hdf5("./multi_model.hdf5")
>>> dictionary=hdf5_handler.read_hdf5_as_dict(hdf5["model1"])
>>> dictionary.keys()
1057def hdf5_read_dataset(item, expected_type=None): 1058 """ 1059 Read a dataset stored in an hdf5 database 1060 1061 Parameters 1062 ---------- 1063 1064 item : h5py.File 1065 an hdf5 dataset/item 1066 1067 expected_type: str 1068 the expected dtype as string str(type()) 1069 1070 Return 1071 -------- 1072 1073 value : the value read from the hdf5, any type matching the expected type 1074 1075 1076 """ 1077 1078 if expected_type == str(type("str")): 1079 1080 values = item[0].decode() 1081 1082 elif expected_type == str(type(1)): 1083 1084 values = int(item[0]) # buildin int type 1085 1086 elif expected_type == str(type(1.0)): 1087 1088 values = float(item[0]) # buildin float type 1089 1090 elif _is_numeric_str_class(expected_type): 1091 1092 values = item[0] # other int/float type like np.int64/np.float64 1093 1094 elif expected_type == "_None_": 1095 1096 values = None 1097 1098 elif expected_type in (str(pd.Timestamp), str(np.datetime64), str(datetime.datetime)): 1099 1100 if expected_type == str(pd.Timestamp): 1101 values = pd.Timestamp(item[0].decode()) 1102 1103 elif expected_type == str(np.datetime64): 1104 values = np.datetime64(item[0].decode()) 1105 1106 elif expected_type == str(datetime.datetime): 1107 values = datetime.datetime.fromisoformat(item[0].decode()) 1108 1109 else: 1110 values = item[0].decode() 1111 1112 else: 1113 1114 if item[:].dtype.char == "S": 1115 1116 values = item[:].astype("U") 1117 1118 elif item[:].dtype.char == "O": 1119 1120 # decode list if required 1121 decoded_item = list() 1122 for it in item[:]: 1123 1124 decoded_item.append(it.decode()) 1125 1126 values = decoded_item 1127 1128 else: 1129 values = item[:] 1130 1131 return values
Read a dataset stored in an hdf5 database
Parameters
item : h5py.File an hdf5 dataset/item
expected_type: str the expected dtype as string str(type())
Return
value : the value read from the hdf5, any type matching the expected type
1134def get_hdf5file_attribute( 1135 path_to_hdf5=str(), location="./", attribute=None, wait_time=0 1136): 1137 """ 1138 Get the value of an attribute in the hdf5file 1139 1140 Parameters 1141 ---------- 1142 1143 path_to_hdf5 : str 1144 the path to the hdf5file 1145 1146 location : str 1147 path inside the hdf5 where the attribute is stored 1148 1149 attribute: str 1150 attribute name 1151 1152 wait_time: int 1153 If the hdf5 is unavailable, the function will try to access serveral time and will wait wait_time seconds maximum. If this time is elapsed, the file won't be opened and the funciton will return None. This parameter is usefull if several program or threads need to read/write simultaneously in the same hdf5 database. 1154 1155 Return 1156 -------- 1157 1158 return_attribute : the value of the attribute 1159 1160 Examples 1161 -------- 1162 1163 get an attribute 1164 >>> attribute=hdf5_handler.get_hdf5_attribute("./multi_model.hdf5",attribute=my_attribute_name) 1165 1166 """ 1167 1168 hdf5_base = open_hdf5(path_to_hdf5, read_only=True, wait_time=wait_time) 1169 1170 if hdf5_base is None: 1171 return None 1172 1173 hdf5 = hdf5_base[location] 1174 1175 return_attribute = hdf5.attrs[attribute] 1176 1177 hdf5_base.close() 1178 1179 return return_attribute
Get the value of an attribute in the hdf5file
Parameters
path_to_hdf5 : str the path to the hdf5file
location : str path inside the hdf5 where the attribute is stored
attribute: str attribute name
wait_time: int If the hdf5 is unavailable, the function will try to access serveral time and will wait wait_time seconds maximum. If this time is elapsed, the file won't be opened and the funciton will return None. This parameter is usefull if several program or threads need to read/write simultaneously in the same hdf5 database.
Return
return_attribute : the value of the attribute
Examples
get an attribute
>>> attribute=hdf5_handler.get_hdf5_attribute("./multi_model.hdf5",attribute=my_attribute_name)
1182def get_hdf5file_dataset(path_to_hdf5=str(), location="./", dataset=None, wait_time=0): 1183 """ 1184 Get the value of an attribute in the hdf5file 1185 1186 Parameters 1187 ---------- 1188 1189 path_to_hdf5 : str 1190 the path to the hdf5file 1191 1192 location : str 1193 path inside the hdf5 where the attribute is stored 1194 1195 dataset: str 1196 dataset name 1197 1198 wait_time: int 1199 If the hdf5 is unavailable, the function will try to access serveral time and will wait wait_time seconds maximum. If this time is elapsed, the file won't be opened and the funciton will return None. This parameter is usefull if several program or threads need to read/write simultaneously in the same hdf5 database. 1200 1201 Return 1202 -------- 1203 1204 return_dataset : the value of the attribute 1205 1206 Examples 1207 -------- 1208 1209 get a dataset 1210 >>> dataset=hdf5_handler.get_hdf5_dataset("./multi_model.hdf5",dataset=my_dataset_name) 1211 1212 """ 1213 1214 hdf5_base = open_hdf5(path_to_hdf5, read_only=True, wait_time=wait_time) 1215 1216 if hdf5_base is None: 1217 return None 1218 1219 hdf5 = hdf5_base[location] 1220 1221 if "_" + dataset in hdf5.attrs.keys(): 1222 expected_type = hdf5.attrs["_" + dataset] 1223 return_dataset = hdf5_read_dataset(hdf5[dataset], expected_type) 1224 1225 else: 1226 return_dataset = hdf5[dataset][:] 1227 1228 hdf5_base.close() 1229 1230 return return_dataset
Get the value of an attribute in the hdf5file
Parameters
path_to_hdf5 : str the path to the hdf5file
location : str path inside the hdf5 where the attribute is stored
dataset: str dataset name
wait_time: int If the hdf5 is unavailable, the function will try to access serveral time and will wait wait_time seconds maximum. If this time is elapsed, the file won't be opened and the funciton will return None. This parameter is usefull if several program or threads need to read/write simultaneously in the same hdf5 database.
Return
return_dataset : the value of the attribute
Examples
get a dataset
>>> dataset=hdf5_handler.get_hdf5_dataset("./multi_model.hdf5",dataset=my_dataset_name)
1233def get_hdf5file_item( 1234 path_to_hdf5=str(), location="./", item=None, wait_time=0, search_attrs=False 1235): 1236 """ 1237 1238 Get a custom item in an hdf5file 1239 1240 Parameters 1241 ---------- 1242 1243 path_to_hdf5 : str 1244 the path to the hdf5file 1245 1246 location : str 1247 path inside the hdf5 where the attribute is stored. If item is None, item is set to basename(location) 1248 1249 item: str 1250 item name 1251 1252 wait_time: int 1253 If the hdf5 is unavailable, the function will try to access serveral time and will wait wait_time seconds maximum. If this time is elapsed, the file won't be opened and the funciton will return None. This parameter is usefull if several program or threads need to read/write simultaneously in the same hdf5 database. 1254 1255 search_attrs: bool 1256 Default is False. If True, the function will also search in the item in the attribute first. 1257 1258 Return 1259 -------- 1260 1261 return : custom value. can be an hdf5 object (group), an numpy array, a string, a float, an int ... 1262 1263 Examples 1264 -------- 1265 1266 get the dataset 'dataset' 1267 >>> dataset=hdf5_handler.get_hdf5_item("./multi_model.hdf5",location="path/in/hdf5/dataset") 1268 1269 """ 1270 1271 hdf5 = open_hdf5(path_to_hdf5, read_only=True, wait_time=wait_time) 1272 1273 if hdf5 is None: 1274 return None 1275 1276 hdf5_item = get_hdf5_item( 1277 hdf5_instance=hdf5, location=location, item=item, search_attrs=search_attrs 1278 ) 1279 1280 hdf5.close() 1281 1282 return hdf5_item
Get a custom item in an hdf5file
Parameters
path_to_hdf5 : str the path to the hdf5file
location : str path inside the hdf5 where the attribute is stored. If item is None, item is set to basename(location)
item: str item name
wait_time: int If the hdf5 is unavailable, the function will try to access serveral time and will wait wait_time seconds maximum. If this time is elapsed, the file won't be opened and the funciton will return None. This parameter is usefull if several program or threads need to read/write simultaneously in the same hdf5 database.
search_attrs: bool Default is False. If True, the function will also search in the item in the attribute first.
Return
return : custom value. can be an hdf5 object (group), an numpy array, a string, a float, an int ...
Examples
get the dataset 'dataset'
>>> dataset=hdf5_handler.get_hdf5_item("./multi_model.hdf5",location="path/in/hdf5/dataset")
1285def get_hdf5_item(hdf5_instance=None, location="./", item=None, search_attrs=False): 1286 """ 1287 1288 Get a custom item in an hdf5file 1289 1290 Parameters 1291 ---------- 1292 1293 hdf5_instance : h5py.File 1294 an instance of an hdf5 1295 1296 location : str 1297 path inside the hdf5 where the attribute is stored. If item is None, item is set to basename(location) 1298 1299 item: str 1300 item name 1301 1302 search_attrs: bool 1303 Default is False. If True, the function will search in the item in the attribute first. 1304 1305 Return 1306 ------ 1307 1308 return : custom value. can be an hdf5 object (group), an numpy array, a string, a float, an int ... 1309 1310 Examples 1311 -------- 1312 1313 get the dataset 'dataset' 1314 >>> dataset=hdf5_handler.get_hdf5_item("./multi_model.hdf5",location="path/in/hdf5/dataset") 1315 1316 """ 1317 1318 if item is None and isinstance(location, str): 1319 head, tail = os.path.split(location) 1320 if len(tail) > 0: 1321 item = tail 1322 location = head 1323 1324 if not isinstance(item, str): 1325 print(f"Bad search item:{item}") 1326 return None 1327 1328 return None 1329 1330 # print(f"Getting item '{item}' at location '{location}'") 1331 hdf5 = hdf5_instance[location] 1332 1333 # first search in the attribute 1334 if search_attrs: 1335 list_attribute = hdf5.attrs.keys() 1336 if item in list_attribute: 1337 return hdf5.attrs[item] 1338 1339 # then search in groups and dataset 1340 list_keys = hdf5.keys() 1341 if item in list_keys: 1342 1343 hdf5_item = hdf5[item] 1344 1345 # print("Got Item ", hdf5_item) 1346 1347 if str(type(hdf5_item)).find("group") != -1: 1348 1349 if item == "ndarray_ds": 1350 1351 return _read_ndarray_datastructure(hdf5) 1352 1353 else: 1354 1355 returned_dict = read_hdf5_as_dict(hdf5_item) 1356 1357 return returned_dict 1358 1359 elif str(type(hdf5_item)).find("dataset") != -1: 1360 1361 if "_" + item in hdf5.attrs.keys(): 1362 expected_type = hdf5.attrs["_" + item] 1363 values = hdf5_read_dataset(hdf5_item, expected_type) 1364 else: 1365 values = hdf5_item[:] 1366 1367 return values 1368 1369 else: 1370 1371 return hdf5_item 1372 1373 else: 1374 1375 return None
Get a custom item in an hdf5file
Parameters
hdf5_instance : h5py.File an instance of an hdf5
location : str path inside the hdf5 where the attribute is stored. If item is None, item is set to basename(location)
item: str item name
search_attrs: bool Default is False. If True, the function will search in the item in the attribute first.
Return
return : custom value. can be an hdf5 object (group), an numpy array, a string, a float, an int ...
Examples
get the dataset 'dataset'
>>> dataset=hdf5_handler.get_hdf5_item("./multi_model.hdf5",location="path/in/hdf5/dataset")
1378def search_in_hdf5file( 1379 path_to_hdf5, key=None, location="./", wait_time=0, search_attrs=False 1380): 1381 """ 1382 1383 Search key in an hdf5 and return a list of [locations, datatype, key name, values]. Value and key are returned only if the key is an attribute or a dataset (None otherwise) 1384 1385 Parameters 1386 ---------- 1387 1388 path_to_hdf5 : str 1389 the path to the hdf5file 1390 1391 key: str 1392 key to search in the hdf5file 1393 1394 location : str 1395 path inside the hdf5 where to start the research 1396 1397 wait_time: int 1398 If the hdf5 is unavailable, the function will try to access serveral time and will wait wait_time seconds maximum. If this time is elapsed, the file won't be opened and the funciton will return None. This parameter is usefull if several program or threads need to read/write simultaneously in the same hdf5 database. 1399 1400 search_attrs : Bool 1401 Default false, search in the attributes 1402 1403 Return 1404 ------ 1405 1406 return_dataset : the value of the attribute 1407 1408 Examples 1409 -------- 1410 1411 search in a hdf5file 1412 >>> matchkey=hdf5_handler.search_in_hdf5file(hdf5filename, key='Nom_du_BV',location="./") 1413 1414 """ 1415 if key is None: 1416 print("Nothing to search, use key=") 1417 return [] 1418 1419 hdf5 = open_hdf5(path_to_hdf5, read_only=True, wait_time=wait_time) 1420 1421 if hdf5 is None: 1422 return None 1423 1424 results = search_in_hdf5(hdf5, key, location=location, search_attrs=search_attrs) 1425 1426 hdf5.close() 1427 1428 return results
Search key in an hdf5 and return a list of [locations, datatype, key name, values]. Value and key are returned only if the key is an attribute or a dataset (None otherwise)
Parameters
path_to_hdf5 : str the path to the hdf5file
key: str key to search in the hdf5file
location : str path inside the hdf5 where to start the research
wait_time: int If the hdf5 is unavailable, the function will try to access serveral time and will wait wait_time seconds maximum. If this time is elapsed, the file won't be opened and the funciton will return None. This parameter is usefull if several program or threads need to read/write simultaneously in the same hdf5 database.
search_attrs : Bool Default false, search in the attributes
Return
return_dataset : the value of the attribute
Examples
search in a hdf5file
>>> matchkey=hdf5_handler.search_in_hdf5file(hdf5filename, key='Nom_du_BV',location="./")
1431def search_in_hdf5(hdf5_base, key=None, location="./", search_attrs=False): 1432 """ 1433 1434 Search key in an hdf5 and return a list of [locations, datatype, key name, values]. Value and key are returned only if the key is an attribute or a dataset (None otherwise) 1435 1436 Parameters 1437 ---------- 1438 1439 hdf5_base : h5py.File 1440 opened instance of the hdf5 1441 1442 key: str 1443 key to search in the hdf5file 1444 1445 location : str 1446 path inside the hdf5 where to start the research 1447 1448 search_attrs : Bool 1449 Default false, search in the attributes 1450 1451 Return 1452 ------ 1453 1454 return_dataset : the value of the attribute 1455 1456 Examples 1457 -------- 1458 1459 search in a hdf5 1460 >>> hdf5=hdf5_handler.open_hdf5(hdf5_file) 1461 >>> matchkey=hdf5_handler.search_in_hdf5(hdf5, key='Nom_du_BV',location="./") 1462 >>> hdf5.close() 1463 1464 """ 1465 if key is None: 1466 print("Nothing to search, use key=") 1467 return [] 1468 1469 result = [] 1470 1471 hdf5 = hdf5_base[location] 1472 1473 if search_attrs: 1474 list_attribute = hdf5.attrs.keys() 1475 1476 if key in list_attribute: 1477 result.append( 1478 { 1479 "path": location, 1480 "key": key, 1481 "datatype": "attribute", 1482 "value": hdf5.attrs[key], 1483 } 1484 ) 1485 1486 for hdf5_key, item in hdf5.items(): 1487 1488 if str(type(item)).find("group") != -1: 1489 1490 sub_location = os.path.join(location, hdf5_key) 1491 1492 # print(hdf5_key,sub_location,list(hdf5.keys())) 1493 1494 if hdf5_key == key: 1495 1496 if "ndarray_ds" in item.keys(): 1497 1498 result.append( 1499 { 1500 "path": sub_location, 1501 "key": None, 1502 "datatype": "ndarray", 1503 "value": _read_ndarray_datastructure(item), 1504 } 1505 ) 1506 1507 else: 1508 1509 result.append( 1510 { 1511 "path": sub_location, 1512 "key": None, 1513 "datatype": "group", 1514 "value": None, 1515 } 1516 ) 1517 1518 res = search_in_hdf5(hdf5_base, key, sub_location) 1519 1520 if len(res) > 0: 1521 for element in res: 1522 result.append(element) 1523 1524 if str(type(item)).find("dataset") != -1: 1525 1526 if hdf5_key == key: 1527 1528 if item[:].dtype.char == "S": 1529 1530 values = item[:].astype("U") 1531 1532 elif item[:].dtype.char == "O": 1533 1534 # decode list if required 1535 decoded_item = list() 1536 for it in item[:]: 1537 decoded_item.append(it.decode()) 1538 1539 values = decoded_item 1540 1541 else: 1542 1543 values = item[:] 1544 1545 result.append( 1546 {"path": location, "key": key, "datatype": "dataset", "value": values} 1547 ) 1548 1549 return result
Search key in an hdf5 and return a list of [locations, datatype, key name, values]. Value and key are returned only if the key is an attribute or a dataset (None otherwise)
Parameters
hdf5_base : h5py.File opened instance of the hdf5
key: str key to search in the hdf5file
location : str path inside the hdf5 where to start the research
search_attrs : Bool Default false, search in the attributes
Return
return_dataset : the value of the attribute
Examples
search in a hdf5
>>> hdf5=hdf5_handler.open_hdf5(hdf5_file)
>>> matchkey=hdf5_handler.search_in_hdf5(hdf5, key='Nom_du_BV',location="./")
>>> hdf5.close()
1552def hdf5file_view( 1553 path_to_hdf5, 1554 location="./", 1555 max_depth=None, 1556 level_base=">", 1557 level_sep="--", 1558 depth=None, 1559 wait_time=0, 1560 list_attrs=True, 1561 list_dataset_attrs=False, 1562 return_view=False, 1563): 1564 """ 1565 1566 Search key in an hdf5 and return a list of [locations, datatype, key name, values]. Value and key are returned only if the key is an attribute or a dataset (None otherwise) 1567 1568 Parameters 1569 ---------- 1570 1571 1572 path_to_hdf5 : str 1573 Path to an hdf5 database 1574 1575 location : str 1576 path inside the hdf5 where to start the research 1577 1578 max_depth: str 1579 Max deph of the search in the hdf5 1580 1581 level_base: str 1582 string used as separator at the lower level (default '>') 1583 1584 level_sep: str 1585 string used as separator at higher level (default '--') 1586 1587 depth: int 1588 current depth level 1589 1590 list_attrs: bool 1591 default is True, list the attributes 1592 1593 list_dataset_attrs: bool 1594 default is False, list the special attributes defined for each dataset by pyhdf5_handler 1595 1596 return_view: bool 1597 retrun the object view in a dictionnary (do not print at screen) 1598 1599 wait_time: int 1600 If the hdf5 is unavailable, the function will try to access serveral time and will wait wait_time seconds maximum. If this time is elapsed, the file won't be opened and the funciton will return None. This parameter is usefull if several program or threads need to read/write simultaneously in the same hdf5 database. 1601 1602 Return 1603 -------- 1604 1605 dictionnary : optional, the view of the hdf5 1606 1607 Examples 1608 -------- 1609 1610 search in a hdf5file 1611 >>> matchkey=hdf5_handler.search_in_hdf5file(hdf5filename, key='Nom_du_BV',location="./") 1612 1613 """ 1614 1615 hdf5 = open_hdf5(path_to_hdf5, read_only=True, wait_time=wait_time) 1616 1617 if hdf5 is None: 1618 return None 1619 1620 results = hdf5_view( 1621 hdf5, 1622 location=location, 1623 max_depth=max_depth, 1624 level_base=level_base, 1625 level_sep=level_sep, 1626 depth=depth, 1627 list_attrs=list_attrs, 1628 list_dataset_attrs=list_dataset_attrs, 1629 return_view=return_view, 1630 ) 1631 1632 hdf5.close() 1633 1634 return results
Search key in an hdf5 and return a list of [locations, datatype, key name, values]. Value and key are returned only if the key is an attribute or a dataset (None otherwise)
Parameters
path_to_hdf5 : str Path to an hdf5 database
location : str path inside the hdf5 where to start the research
max_depth: str Max deph of the search in the hdf5
level_base: str string used as separator at the lower level (default '>')
level_sep: str string used as separator at higher level (default '--')
depth: int current depth level
list_attrs: bool default is True, list the attributes
list_dataset_attrs: bool default is False, list the special attributes defined for each dataset by pyhdf5_handler
return_view: bool retrun the object view in a dictionnary (do not print at screen)
wait_time: int If the hdf5 is unavailable, the function will try to access serveral time and will wait wait_time seconds maximum. If this time is elapsed, the file won't be opened and the funciton will return None. This parameter is usefull if several program or threads need to read/write simultaneously in the same hdf5 database.
Return
dictionnary : optional, the view of the hdf5
Examples
search in a hdf5file
>>> matchkey=hdf5_handler.search_in_hdf5file(hdf5filename, key='Nom_du_BV',location="./")
1637def hdf5file_ls(path_to_hdf5, location="./"): 1638 """ 1639 List dataset in an hdf5file. 1640 1641 Parameters 1642 ---------- 1643 1644 path_to_hdf5 : str 1645 path to a hdf5file 1646 1647 location: str 1648 path inside the hdf5 where to start the research 1649 1650 Example 1651 ------- 1652 1653 >>> hdf5file_ls(test.hdf5) 1654 1655 """ 1656 1657 hdf5 = open_hdf5(path_to_hdf5, read_only=True) 1658 1659 hdf5_view( 1660 hdf5, 1661 location=location, 1662 max_depth=0, 1663 level_base=">", 1664 level_sep="--", 1665 list_attrs=False, 1666 return_view=False, 1667 )
List dataset in an hdf5file.
Parameters
path_to_hdf5 : str path to a hdf5file
location: str path inside the hdf5 where to start the research
Example
>>> hdf5file_ls(test.hdf5)
1670def hdf5_ls(hdf5): 1671 """ 1672 List dataset in an hdf5 instance. 1673 1674 Parameters 1675 ---------- 1676 1677 hdf5 : h5py.File 1678 hdf5 instance 1679 1680 location: str 1681 path inside the hdf5 where to start the research 1682 1683 Example 1684 ------- 1685 1686 >>> hdf5 = open_hdf5(path_to_hdf5, read_only=True) 1687 >>> hdf5_ls(hdf5) 1688 1689 """ 1690 1691 hdf5_view( 1692 hdf5, 1693 location="./", 1694 max_depth=0, 1695 level_base=">", 1696 level_sep="--", 1697 list_attrs=False, 1698 return_view=False, 1699 )
List dataset in an hdf5 instance.
Parameters
hdf5 : h5py.File hdf5 instance
location: str path inside the hdf5 where to start the research
Example
>>> hdf5 = open_hdf5(path_to_hdf5, read_only=True)
>>> hdf5_ls(hdf5)
1702def hdf5_view( 1703 hdf5_obj, 1704 location="./", 1705 max_depth=None, 1706 level_base=">", 1707 level_sep="--", 1708 depth=None, 1709 list_attrs=True, 1710 list_dataset_attrs=False, 1711 return_view=False, 1712): 1713 """ 1714 List recursively all dataset (and attributes) in an hdf5 object. 1715 1716 Parameters 1717 ---------- 1718 1719 hdf5_obj : h5py.File 1720 opened instance of the hdf5 1721 1722 location : str 1723 path inside the hdf5 where to start the research 1724 1725 max_depth: str 1726 Max deph of the search in the hdf5 1727 1728 level_base: str 1729 string used as separator at the lower level (default '>') 1730 1731 level_sep: str 1732 string used as separator at higher level (default '--') 1733 1734 depth: int 1735 current level depth 1736 1737 list_attrs: bool 1738 default is True, list the attributes 1739 1740 list_dataset_attrs: bool 1741 default is False, list the special attributes defined for each dataset by pyhdf5_handler 1742 1743 return_view: bool 1744 retrun the object view in a dictionnary 1745 1746 Return 1747 -------- 1748 1749 dictionnary : optional, the view of the hdf5 1750 1751 Examples 1752 -------- 1753 1754 search in a hdf5 1755 >>> hdf5=hdf5_handler.open_hdf5(hdf5_file) 1756 >>> matchkey=hdf5_handler.search_in_hdf5(hdf5, key='Nom_du_BV',location="./") 1757 >>> hdf5.close() 1758 1759 """ 1760 1761 result = [] 1762 1763 if max_depth is not None: 1764 1765 if depth is not None: 1766 depth = depth + 1 1767 else: 1768 depth = 0 1769 1770 if depth > max_depth: 1771 return result 1772 1773 hdf5 = hdf5_obj[location] 1774 1775 list_attribute = [] 1776 if list_attrs or list_dataset_attrs: 1777 tmp_list_attribute = list(hdf5.attrs.keys()) 1778 list_keys_matching_attributes = ["_" + element for element in list(hdf5.keys())] 1779 1780 if list_attrs: 1781 1782 list_attribute.extend( 1783 list( 1784 filter( 1785 lambda l: l not in list_keys_matching_attributes, tmp_list_attribute 1786 ) 1787 ) 1788 ) 1789 1790 if list_dataset_attrs: 1791 1792 list_attribute.extend( 1793 list(filter(lambda l: l in list_keys_matching_attributes, tmp_list_attribute)) 1794 ) 1795 1796 for key in list_attribute: 1797 values = hdf5.attrs[key] 1798 sub_location = os.path.join(location, key) 1799 if isinstance( 1800 values, (int, float, np.int64, np.float64, np.int32, np.float32, np.bool) 1801 ): 1802 result.append( 1803 f"{level_base}| {sub_location}, attribute, type={type(hdf5.attrs[key])}, value={values}" 1804 ) 1805 elif isinstance(values, (str)) and len(values) < 20: 1806 result.append( 1807 f"{level_base}| {sub_location}, attribute, type={type(hdf5.attrs[key])}, len={len(values)}, value={values}" 1808 ) 1809 else: 1810 result.append( 1811 f"{level_base}| {sub_location}, attribute, type={type(hdf5.attrs[key])}, len={len(values)}, value={values[0:20]}..." 1812 ) 1813 1814 for hdf5_key, item in hdf5.items(): 1815 1816 if str(type(item)).find("group") != -1: 1817 1818 sub_location = os.path.join(location, hdf5_key) 1819 1820 if "ndarray_ds" in item.keys(): 1821 result.append(f"{level_base}| {sub_location}, ndarray") 1822 else: 1823 result.append(f"{level_base}| {sub_location}, group") 1824 1825 res = hdf5_view( 1826 hdf5_obj, 1827 sub_location, 1828 max_depth=max_depth, 1829 level_base=level_base + level_sep, 1830 depth=depth, 1831 return_view=True, 1832 ) 1833 1834 # if len(res)>0: 1835 for key, item in enumerate(res): 1836 result.append(item) 1837 1838 if str(type(item)).find("dataset") != -1: 1839 1840 if item[:].dtype.char == "S": 1841 values = item[:].astype("U") 1842 else: 1843 values = item[:] 1844 1845 sub_location = os.path.join(location, hdf5_key) 1846 1847 result.append( 1848 f"{level_base}| {sub_location}, dataset, type={type(values)}, shape={values.shape}" 1849 ) 1850 1851 if return_view: 1852 return result 1853 else: 1854 for res in result: 1855 print(res)
List recursively all dataset (and attributes) in an hdf5 object.
Parameters
hdf5_obj : h5py.File opened instance of the hdf5
location : str path inside the hdf5 where to start the research
max_depth: str Max deph of the search in the hdf5
level_base: str string used as separator at the lower level (default '>')
level_sep: str string used as separator at higher level (default '--')
depth: int current level depth
list_attrs: bool default is True, list the attributes
list_dataset_attrs: bool default is False, list the special attributes defined for each dataset by pyhdf5_handler
return_view: bool retrun the object view in a dictionnary
Return
dictionnary : optional, the view of the hdf5
Examples
search in a hdf5
>>> hdf5=hdf5_handler.open_hdf5(hdf5_file)
>>> matchkey=hdf5_handler.search_in_hdf5(hdf5, key='Nom_du_BV',location="./")
>>> hdf5.close()