pyhdf5_handler.src.hdf5_handler

   1from __future__ import annotations
   2
   3import os
   4import h5py
   5import numpy as np
   6import numbers
   7import pandas as pd
   8import datetime
   9import time
  10import importlib
  11
  12from ..src import object_handler
  13from ..src import constant
  14
  15import gc
  16import re
  17
  18
  19def close_all_hdf5_file():
  20    """
  21    Close all hdf5 file opened in the current session
  22    """
  23
  24    for obj in gc.get_objects():  # Browse through ALL objects
  25        if isinstance(obj, h5py.File):  # Just HDF5 files
  26            try:
  27                print(f"try closing {obj}")
  28                obj.close()
  29            except:
  30                pass  # Was already closed
  31
  32
  33def open_hdf5(path, read_only=False, replace=False, wait_time=0):
  34    """
  35
  36    Open or create an HDF5 file.
  37
  38    Parameters
  39    ----------
  40
  41    path : str
  42        The file path.
  43
  44    read_only : boolean
  45        If true the access to the hdf5 fil is in read-only mode. Multi process can read the same hdf5 file simulteneously. This is not possible when access mode are append 'a' or write 'w'.
  46
  47    replace: Boolean
  48        If true, the existing hdf5file is erased
  49
  50    wait_time: int
  51        If the hdf5 is unavailable, the function will try to access serveral time and will wait wait_time seconds maximum. If this time is elapsed, the file won't be opened. This parameter is usefull if several program or threads need to read/write simultaneously in the same hdf5 database.
  52
  53    Returns
  54    -------
  55
  56    f :
  57        A h5py object.
  58
  59    Examples
  60    --------
  61
  62    >>> hdf5=pyhdf5_handler.open_hdf5("./my_hdf5.hdf5")
  63    >>> hdf5.keys()
  64    >>> hdf5.attrs.keys()
  65
  66    """
  67    f = None
  68    wait = 0
  69    while wait <= wait_time:
  70
  71        f = None
  72        exist_file = True
  73
  74        try:
  75
  76            if read_only:
  77                if os.path.isfile(path):
  78                    f = h5py.File(path, "r")
  79
  80                else:
  81                    exist_file = False
  82                    raise ValueError(f"File {path} does not exist.")
  83
  84            else:
  85                if replace:
  86                    f = h5py.File(path, "w")
  87
  88                else:
  89                    if os.path.isfile(path):
  90                        f = h5py.File(path, "a")
  91
  92                    else:
  93                        f = h5py.File(path, "w")
  94        except:
  95            pass
  96
  97        if f is None:
  98            if not exist_file:
  99                print(f"File {path} does not exist.")
 100                return f
 101            else:
 102                print(f"The file {path} is unvailable, waiting {wait}/{wait_time}s")
 103
 104            wait = wait + 1
 105
 106            if wait_time > 0:
 107                time.sleep(1)
 108
 109        else:
 110            break
 111
 112    return f
 113
 114
 115def add_hdf5_sub_group(hdf5, subgroup=None):
 116    """
 117    Create a new subgroup in a HDF5 object
 118
 119    Parameters
 120    ----------
 121
 122    hdf5 : h5py.File
 123        An hdf5 object opened with open_hdf5()
 124
 125    subgroup: str
 126        Path to a subgroub that must be created
 127
 128    Returns
 129    -------
 130
 131    hdf5 :
 132        the h5py object.
 133
 134    Examples
 135    --------
 136
 137    >>> hdf5=pyhdf5_handler.open_hdf5("./model_subgroup.hdf5", replace=True)
 138    >>> hdf5=pyhdf5_handler.add_hdf5_sub_group(hdf5, subgroup="mygroup")
 139    >>> hdf5.keys()
 140    >>> hdf5.attrs.keys()
 141
 142    """
 143    if subgroup is not None:
 144        if subgroup == "":
 145            subgroup = "./"
 146
 147        hdf5.require_group(subgroup)
 148
 149    return hdf5
 150
 151
 152def _dump_object_to_hdf5_from_list_attribute(hdf5, instance, list_attr):
 153    """
 154    dump a object to a hdf5 file from a list of attributes
 155
 156    Parameters
 157    ----------
 158    hdf5 : h5py.File
 159        an hdf5 object
 160
 161    instance : object
 162        a custom python object.
 163
 164    list_attr : list
 165        a list of attribute
 166
 167    """
 168    if isinstance(list_attr, list):
 169        for attr in list_attr:
 170            if isinstance(attr, str):
 171                _dump_object_to_hdf5_from_str_attribute(hdf5, instance, attr)
 172
 173            elif isinstance(attr, list):
 174                _dump_object_to_hdf5_from_list_attribute(hdf5, instance, attr)
 175
 176            elif isinstance(attr, dict):
 177                _dump_object_to_hdf5_from_dict_attribute(hdf5, instance, attr)
 178
 179            else:
 180                raise ValueError(
 181                    f"inconsistent {attr} in {list_attr}. {attr} must be a an instance of dict, list or str"
 182                )
 183
 184    else:
 185        raise ValueError(f"{list_attr} must be a instance of list.")
 186
 187
 188def _dump_object_to_hdf5_from_dict_attribute(hdf5, instance, dict_attr):
 189    """
 190    dump a object to a hdf5 file from a dictionary of attributes
 191
 192    Parameters
 193    ----------
 194
 195    hdf5 : h5py.File
 196        an hdf5 object
 197
 198    instance : object
 199        a custom python object.
 200
 201    dict_attr : dict
 202        a dictionary of attribute
 203
 204    """
 205    if isinstance(dict_attr, dict):
 206        for attr, value in dict_attr.items():
 207            hdf5 = add_hdf5_sub_group(hdf5, subgroup=attr)
 208
 209            try:
 210                sub_instance = getattr(instance, attr)
 211
 212            except:
 213                if isinstance(instance, dict):
 214                    sub_instance = instance[attr]
 215                else:
 216                    sub_instance = instance
 217
 218            if isinstance(value, dict):
 219                _dump_object_to_hdf5_from_dict_attribute(hdf5[attr], sub_instance, value)
 220
 221            elif isinstance(value, list):
 222                _dump_object_to_hdf5_from_list_attribute(hdf5[attr], sub_instance, value)
 223
 224            elif isinstance(value, str):
 225                _dump_object_to_hdf5_from_str_attribute(hdf5[attr], sub_instance, value)
 226
 227            else:
 228
 229                raise ValueError(
 230                    f"inconsistent '{attr}' in '{dict_attr}'. Dict({attr}) must be a instance of dict, list or str"
 231                )
 232
 233    else:
 234        raise ValueError(f"{dict_attr} must be a instance of dict.")
 235
 236
 237def _dump_object_to_hdf5_from_str_attribute(hdf5, instance, str_attr):
 238    """
 239    dump a object to a hdf5 file from a string attribute
 240
 241    Parameters
 242    ----------
 243
 244    hdf5 : h5py.File
 245        an hdf5 object
 246
 247    instance : object
 248        a custom python object.
 249
 250    str_attr : str
 251        a string attribute
 252
 253    """
 254
 255    if isinstance(str_attr, str):
 256
 257        try:
 258            value = getattr(instance, str_attr)
 259
 260        except:
 261            if isinstance(instance, dict):
 262                value = instance[str_attr]
 263            else:
 264                value = instance
 265
 266        try:
 267
 268            attribute_name = str(str_attr)
 269            for character in "/ ":
 270                attribute_name = attribute_name.replace(character, "_")
 271
 272            if isinstance(value, dict):
 273
 274                # print("---> dictionary: ", str_attr, value)
 275
 276                hdf5 = add_hdf5_sub_group(hdf5, subgroup=attribute_name)
 277                save_dict_to_hdf5(hdf5[attribute_name], value)
 278
 279            else:
 280
 281                hdf5_dataset_creator(hdf5, attribute_name, value)
 282
 283        except:
 284            raise ValueError(
 285                f"Unable to dump attribute {str_attr} with value {value} from {instance}"
 286            )
 287
 288    else:
 289        raise ValueError(f"{str_attr} must be a instance of str.")
 290
 291
 292def _dump_object_to_hdf5_from_iteratable(hdf5, instance, iteratable=None):
 293    """
 294       dump a object to a hdf5 file from a iteratable object list or dict
 295
 296       Parameters
 297       ----------
 298
 299       hdf5 : h5py.File
 300           an hdf5 object
 301       instance : object
 302           a custom python object.
 303       iteratable : list | dict
 304           a list or a dict of attribute
 305
 306       Examples
 307       --------
 308
 309       >>> setup, mesh = smash.load_dataset("cance")
 310       >>> model = smash.Model(setup, mesh)
 311       >>> model.run(inplace=True)
 312       >>>
 313       >>> hdf5=pyhdf5_handler.open_hdf5("./model.hdf5", replace=True)
 314       >>> hdf5=pyhdf5_handler.add_hdf5_sub_group(hdf5, subgroup="model1")
 315    pyhdf5_handler._dump_object_to_hdf5_from_iteratable(hdf5["model1"], model)
 316
 317    """
 318    if isinstance(iteratable, list):
 319        _dump_object_to_hdf5_from_list_attribute(hdf5, instance, iteratable)
 320
 321    elif isinstance(iteratable, dict):
 322        _dump_object_to_hdf5_from_dict_attribute(hdf5, instance, iteratable)
 323
 324    else:
 325        raise ValueError(f"{iteratable} must be a instance of list or dict.")
 326
 327
 328def _hdf5_handle_str(name, value):
 329
 330    dataset = {
 331        "name": name,
 332        "attr_value": str(type(value)),
 333        "dataset_value": value,
 334        "shape": 1,
 335        "dtype": h5py.string_dtype(encoding="utf-8"),
 336    }
 337
 338    return dataset
 339
 340
 341def _hdf5_handle_numbers(name: str, value: numbers.Number):
 342
 343    arr = np.array([value])
 344    dataset = {
 345        "name": name,
 346        "attr_value": str(type(value)),
 347        "dataset_value": arr,
 348        "shape": arr.shape,
 349        "dtype": arr.dtype,
 350    }
 351
 352    return dataset
 353
 354
 355def _hdf5_handle_none(name: str, value: None):
 356
 357    dataset = {
 358        "name": name,
 359        "attr_value": "_None_",
 360        "dataset_value": "_None_",
 361        "shape": 1,
 362        "dtype": h5py.string_dtype(encoding="utf-8"),
 363    }
 364
 365    return dataset
 366
 367
 368def _hdf5_handle_timestamp(
 369    name: str, value: pd.Timestamp | np.datetime64 | datetime.date
 370):
 371
 372    dtype = type(value)
 373
 374    if isinstance(value, (np.datetime64)):
 375        value = value.tolist()
 376
 377    dataset = {
 378        "name": name,
 379        "attr_value": str(dtype),
 380        "dataset_value": value.strftime("%Y-%m-%d %H:%M"),
 381        "shape": 1,
 382        "dtype": h5py.string_dtype(encoding="utf-8"),
 383    }
 384
 385    return dataset
 386
 387
 388def _hdf5_handle_DatetimeIndex(name: str, value: pd.DatetimeIndex):
 389
 390    dataset = _hdf5_handle_array(name, value)
 391
 392    return dataset
 393
 394
 395def _hdf5_handle_list(name: str, value: list | tuple):
 396
 397    arr = np.array(value)
 398
 399    dataset = _hdf5_handle_array(name, arr)
 400
 401    return dataset
 402
 403
 404def _hdf5_handle_exclude_obj(name: str, value: list | tuple):
 405
 406    dtype = type(value)
 407
 408    dataset = {
 409        "name": name,
 410        "attr_value": str(dtype),
 411        "dataset_value": f"excluded data type {str(dtype)}",
 412        "shape": 1,
 413        "dtype": h5py.string_dtype(encoding="utf-8"),
 414    }
 415
 416    return dataset
 417
 418def _hdf5_skip_cls(value):
 419    
 420    type_str = str(type(value))
 421    module_name = type_str.split("'")[1].split('.')[0]
 422    
 423    if module_name in constant.EXCLUDE_PYTHON_OBJ:
 424        return True
 425    else:
 426        return False
 427
 428def _hdf5_handle_array(name: str, value: np.ndarray):
 429
 430    dtype_attr = type(value)
 431    dtype = value.dtype
 432
 433    if value.dtype.char == "M":
 434
 435        ListDate = value.tolist()
 436        ListDateStr = list()
 437        for date in ListDate:
 438            ListDateStr.append(date.strftime("%Y-%m-%d %H:%M"))
 439        value = np.array(ListDateStr)
 440        value = value.astype("O")
 441        dtype = h5py.string_dtype(encoding="utf-8")
 442
 443    elif value.dtype == "object":
 444
 445        value = value.astype("S")
 446        dtype = h5py.string_dtype(encoding="utf-8")
 447
 448    elif value.dtype.char == "U":
 449        value = value.astype("S")
 450        dtype = h5py.string_dtype(encoding="utf-8")
 451
 452    dataset = {
 453        "name": name,
 454        "attr_value": str(dtype_attr),
 455        "dataset_value": value,
 456        "shape": value.shape,
 457        "dtype": dtype,
 458    }
 459
 460    return dataset
 461
 462
 463def _hdf5_handle_ndarray(hdf5: h5py.File, name: str, value: np.ndarray):
 464
 465    hdf5 = add_hdf5_sub_group(hdf5, subgroup=name)
 466    _dump_ndarray_to_hdf5(hdf5[name], value)
 467
 468
 469def _hdf5_create_dataset(hdf5: h5py.File, dataset: dict):
 470
 471    if dataset["name"] in hdf5.keys():
 472        del hdf5[dataset["name"]]
 473
 474    hdf5.create_dataset(
 475        dataset["name"],
 476        shape=dataset["shape"],
 477        dtype=dataset["dtype"],
 478        data=dataset["dataset_value"],
 479        compression="gzip",
 480        chunks=True,
 481    )
 482
 483    if "_" + dataset["name"] in list(hdf5.attrs.keys()):
 484        del hdf5.attrs["_" + dataset["name"]]
 485
 486    hdf5.attrs["_" + dataset["name"]] = dataset["attr_value"]
 487
 488
 489def hdf5_dataset_creator(hdf5: h5py.File, name: str, value):
 490    """
 491    Write any value in an hdf5 object
 492
 493    Parameters
 494    ----------
 495
 496    hdf5 : h5py.File
 497        an hdf5 object
 498
 499    name : str
 500        name of the dataset
 501
 502    value : any
 503        value to write in the hdf5
 504
 505    """
 506    
 507    if _hdf5_skip_cls(value):
 508        dataset = _hdf5_handle_exclude_obj(name, value)
 509        
 510    elif isinstance(value, str):
 511        dataset = _hdf5_handle_str(name, value)
 512
 513    elif isinstance(value, numbers.Number):
 514        dataset = _hdf5_handle_numbers(name, value)
 515
 516    elif value is None:
 517        dataset = _hdf5_handle_none(name, value)
 518
 519    elif isinstance(value, (pd.Timestamp, np.datetime64, datetime.date)):
 520        dataset = _hdf5_handle_timestamp(name, value)
 521
 522    elif isinstance(value, pd.DatetimeIndex):
 523        dataset = _hdf5_handle_DatetimeIndex(name, value)
 524
 525    elif isinstance(value, list):
 526        dataset = _hdf5_handle_list(name, value)
 527
 528    elif isinstance(value, tuple):
 529        dataset = _hdf5_handle_list(name, value)
 530
 531    elif isinstance(value, np.ndarray):
 532
 533        if len(value.dtype) > 0 and len(value.dtype.names) > 0:
 534            _hdf5_handle_ndarray(hdf5, name, value)
 535            return
 536        else:
 537            dataset = _hdf5_handle_array(name, value)
 538
 539    else:
 540
 541        hdf5 = add_hdf5_sub_group(hdf5, subgroup=name)
 542
 543        newdict = object_handler.read_object_as_dict(value)
 544
 545        save_dict_to_hdf5(hdf5[name], newdict)
 546
 547    _hdf5_create_dataset(hdf5, dataset)
 548
 549
 550def _dump_ndarray_to_hdf5(hdf5, value):
 551    """
 552    dump a ndarray data structure to an hdf5 file: this functions create a group ndarray_ds and store each component of the ndarray as a dataset. Plus it add 2 datasets which store the dtypes (ndarray_dtype) and labels (ndarray_indexes).
 553
 554    Parameters
 555    ----------
 556
 557    hdf5 : h5py.File
 558        an hdf5 object
 559
 560    value : ndarray
 561        an ndarray data structure with different datatype
 562
 563    """
 564    # save ndarray datastructure
 565
 566    hdf5 = add_hdf5_sub_group(hdf5, subgroup="ndarray_ds")
 567    hdf5_data = hdf5["ndarray_ds"]
 568
 569    for item in value.dtype.names:
 570
 571        hdf5_dataset_creator(hdf5=hdf5_data, name=item, value=value[item])
 572
 573    index = np.array(value.dtype.descr)[:, 0]
 574    dtype = np.array(value.dtype.descr)[:, 1]
 575    index = index.astype("O")
 576    dtype = dtype.astype("O")
 577    data_type = h5py.string_dtype(encoding="utf-8")
 578
 579    if "ndarray_dtype" in hdf5_data.keys():
 580        del hdf5_data["ndarray_dtype"]
 581
 582    hdf5_data.create_dataset(
 583        "ndarray_dtype",
 584        shape=dtype.shape,
 585        dtype=data_type,
 586        data=dtype,
 587        compression="gzip",
 588        chunks=True,
 589    )
 590
 591    if "ndarray_indexes" in hdf5_data.keys():
 592        del hdf5_data["ndarray_indexes"]
 593
 594    hdf5_data.create_dataset(
 595        "ndarray_indexes",
 596        shape=index.shape,
 597        dtype=data_type,
 598        data=index,
 599        compression="gzip",
 600        chunks=True,
 601    )
 602
 603
 604def _read_ndarray_datastructure(hdf5):
 605    """
 606    read a ndarray data structure from hdf5 file
 607
 608    Parameters
 609    ----------
 610
 611    hdf5 : h5py.File
 612        an hdf5 object at the roots of the ndarray datastructure
 613
 614    Return
 615    ------
 616
 617    ndarray : the ndarray
 618
 619    """
 620
 621    if "ndarray_ds" in list(hdf5.keys()):
 622
 623        decoded_item = list()
 624        for it in hdf5["ndarray_ds/ndarray_dtype"][:]:
 625            decoded_item.append(it.decode())
 626        list_dtypes = decoded_item
 627
 628        decoded_item = list()
 629        for it in hdf5["ndarray_ds/ndarray_indexes"][:]:
 630            decoded_item.append(it.decode())
 631        list_indexes = decoded_item
 632
 633        len_data = len(hdf5[f"ndarray_ds/{list_indexes[0]}"][:])
 634
 635        list_datatype = list()
 636        for i in range(len(list_indexes)):
 637            list_datatype.append((list_indexes[i], list_dtypes[i]))
 638
 639        datatype = np.dtype(list_datatype)
 640
 641        ndarray = np.zeros(len_data, dtype=datatype)
 642
 643        for i in range(len(list_indexes)):
 644
 645            expected_type = list_dtypes[i]
 646
 647            values = hdf5_read_dataset(
 648                hdf5[f"ndarray_ds/{list_indexes[i]}"], expected_type
 649            )
 650
 651            ndarray[list_indexes[i]] = values
 652
 653        return ndarray
 654
 655
 656def save_dict_to_hdf5(hdf5, dictionary):
 657    """
 658
 659    dump a dictionary to an hdf5 file
 660
 661    Parameters
 662    ----------
 663
 664    hdf5 : h5py.File
 665        an hdf5 object
 666
 667    dictionary : dict
 668        a custom python dictionary
 669
 670    """
 671    if isinstance(dictionary, dict):
 672        for attr, value in dictionary.items():
 673            # print("looping:",attr,value)
 674            try:
 675
 676                attribute_name = str(attr)
 677                for character in "/ ":
 678                    attribute_name = attribute_name.replace(character, "_")
 679
 680                if isinstance(value, dict):
 681                    # print("---> dictionary: ",attr, value)
 682
 683                    hdf5 = add_hdf5_sub_group(hdf5, subgroup=attribute_name)
 684                    save_dict_to_hdf5(hdf5[attribute_name], value)
 685
 686                else:
 687
 688                    hdf5_dataset_creator(hdf5, attribute_name, value)
 689
 690            except:
 691
 692                raise ValueError(
 693                    f"Unable to save attribute {str(attr)} with value {value}"
 694                )
 695
 696    else:
 697
 698        raise ValueError(f"{dictionary} must be a instance of dict.")
 699
 700
 701def save_dict_to_hdf5file(
 702    path_to_hdf5, dictionary=None, location="./", replace=False, wait_time=0
 703):
 704    """
 705
 706    dump a dictionary to an hdf5 file
 707
 708    Parameters
 709    ----------
 710
 711    path_to_hdf5 : str
 712        path to the hdf5 file
 713
 714    dictionary : dict | None
 715        a dictionary containing the data to be saved
 716
 717    location : str
 718        path location or subgroup where to write data in the hdf5 file
 719
 720    replace : Boolean
 721        replace an existing hdf5 file. Default is False
 722
 723    wait_time: int
 724        If the hdf5 is unavailable, the function will try to access serveral time and will wait wait_time seconds maximum. If this time is elapsed, the file won't be opened and the funciton will return None. This parameter is usefull if several program or threads need to read/write simultaneously in the same hdf5 database.
 725
 726    Examples
 727    --------
 728
 729    >>> dict={"a":1,"b":2}
 730    >>> pyhdf5_handler.save_dict_to_hdf5("saved_dictionary.hdf5",dict)
 731
 732    """
 733    if isinstance(dictionary, dict):
 734        hdf5 = open_hdf5(path_to_hdf5, replace=replace, wait_time=wait_time)
 735
 736        if hdf5 is None:
 737            return
 738
 739        hdf5 = add_hdf5_sub_group(hdf5, subgroup=location)
 740        save_dict_to_hdf5(hdf5[location], dictionary)
 741
 742    else:
 743        raise ValueError(f"The input {dictionary} must be a instance of dict.")
 744
 745    hdf5.close()
 746
 747
 748def save_object_to_hdf5(
 749    hdf5,
 750    instance,
 751    keys_data=None,
 752    location="./",
 753    sub_data=None,
 754    replace=False,
 755    wait_time=0,
 756):
 757    """
 758
 759    dump an object to an hdf5 file
 760
 761    Parameters
 762    ----------
 763
 764    hdf5 : instance of h5py
 765        An opened hdf5 file
 766
 767    instance : object
 768        A custom python object to be saved into an hdf5
 769
 770    keys_data : list | dict
 771        optional, a list or a dictionary of the attribute to be saved
 772
 773    location : str
 774        path location or subgroup where to write data in the hdf5 file
 775
 776    sub_data : dict | None
 777        optional, a extra dictionary containing extra-data to be saved along the object
 778
 779    replace : Boolean
 780        replace an existing hdf5 file. Default is False
 781
 782    wait_time: int
 783        If the hdf5 is unavailable, the function will try to access serveral time and will wait wait_time seconds maximum. If this time is elapsed, the file won't be opened and the funciton will return None. This parameter is usefull if several program or threads need to read/write simultaneously in the same hdf5 database.
 784
 785    """
 786
 787    if keys_data is None:
 788        keys_data = object_handler.generate_object_structure(
 789            instance, include_method=False
 790        )
 791
 792    if hdf5 is None:
 793        return None
 794
 795    hdf5 = add_hdf5_sub_group(hdf5, subgroup=location)
 796
 797    _dump_object_to_hdf5_from_iteratable(hdf5[location], instance, keys_data)
 798
 799    if isinstance(sub_data, dict):
 800        save_dict_to_hdf5(hdf5[location], sub_data)
 801
 802    hdf5.close()
 803
 804
 805def save_object_to_hdf5file(
 806    path_to_hdf5,
 807    instance,
 808    keys_data=None,
 809    location="./",
 810    sub_data=None,
 811    replace=False,
 812    wait_time=0,
 813):
 814    """
 815
 816    dump an object to an hdf5 file
 817
 818    Parameters
 819    ----------
 820
 821    path_to_hdf5 : str
 822        path to the hdf5 file
 823
 824    instance : object
 825        A custom python object to be saved into an hdf5
 826
 827    keys_data : list | dict
 828        optional, a list or a dictionary of the attribute to be saved
 829
 830    location : str
 831        path location or subgroup where to write data in the hdf5 file
 832
 833    sub_data : dict | None
 834        optional, a extra dictionary containing extra-data to be saved along the object
 835
 836    replace : Boolean
 837        replace an existing hdf5 file. Default is False
 838
 839    wait_time: int
 840        If the hdf5 is unavailable, the function will try to access serveral time and will wait wait_time seconds maximum. If this time is elapsed, the file won't be opened and the funciton will return None. This parameter is usefull if several program or threads need to read/write simultaneously in the same hdf5 database.
 841
 842    """
 843
 844    hdf5 = open_hdf5(path_to_hdf5, replace=replace, wait_time=wait_time)
 845
 846    save_object_to_hdf5(
 847        hdf5,
 848        instance,
 849        keys_data=keys_data,
 850        location=location,
 851        sub_data=sub_data,
 852        replace=replace,
 853        wait_time=wait_time,
 854    )
 855
 856
 857def read_hdf5file_as_dict(
 858    path_to_hdf5, location="./", wait_time=0, read_attrs=True, read_dataset_attrs=False
 859):
 860    """
 861
 862    Open, read and close an hdf5 file
 863
 864    Parameters
 865    ----------
 866
 867    path_to_hdf5 : str
 868        path to the hdf5 file
 869
 870    location: str
 871        place in the hdf5 from which we start reading the file
 872
 873    read_attrs : bool
 874        read and import attributes in the dicitonnary.
 875
 876    read_dataset_attrs : bool
 877        read and import special attributes linked to any dataset and created by pyhdf5_handler. These attributes only store the original dataype of the data stored in the dataset.
 878
 879    Return
 880    --------
 881
 882    dictionary : dict, a dictionary of all keys and attribute included in the hdf5 file
 883
 884    wait_time: int
 885        If the hdf5 is unavailable, the function will try to access serveral time and will wait wait_time seconds maximum. If this time is elapsed, the file won't be opened and the funciton will return None. This parameter is usefull if several program or threads need to read/write simultaneously in the same hdf5 database.
 886
 887    Examples
 888    --------
 889
 890    read an hdf5 file
 891    dictionary=hdf5_handler.read_hdf5file_as_dict(hdf5["model1"])
 892    """
 893
 894    hdf5 = open_hdf5(path_to_hdf5, read_only=True, wait_time=wait_time)
 895
 896    if hdf5 is None:
 897        return None
 898
 899    dictionary = read_hdf5_as_dict(
 900        hdf5[location], read_attrs=read_attrs, read_dataset_attrs=read_dataset_attrs
 901    )
 902
 903    hdf5.close()
 904
 905    return dictionary
 906
 907
 908def read_hdf5_as_dict(hdf5, read_attrs=True, read_dataset_attrs=False):
 909    """
 910    Load an hdf5 file
 911
 912    Parameters
 913    ----------
 914
 915    hdf5 : h5py.File
 916        an instance of hdf5, open with the function open_hdf5()
 917
 918    read_attrs : bool
 919        read and import attributes in the dicitonnary.
 920
 921    read_dataset_attrs : bool
 922        read and import special attributes linked to any dataset and created by pyhdf5_handler. These attributes only store the original datatype of the data stored in the dataset.
 923
 924    Return
 925    --------
 926
 927    dictionary : dict, a dictionary of all keys and attribute included in the hdf5 file
 928
 929    Examples
 930    --------
 931
 932    read only a part of an hdf5 file
 933    >>> hdf5=hdf5_handler.open_hdf5("./multi_model.hdf5")
 934    >>> dictionary=hdf5_handler.read_hdf5_as_dict(hdf5["model1"])
 935    >>> dictionary.keys()
 936
 937    """
 938
 939    if not isinstance(hdf5, (h5py.File, h5py.Group, h5py.Dataset, h5py.Datatype)):
 940        print("Error: input arg is not an instance of hdf5.File()")
 941        return {}
 942
 943    dictionary = {}
 944
 945    for key, item in hdf5.items():
 946
 947        if str(type(item)).find("group") != -1:
 948
 949            if key == "ndarray_ds":
 950
 951                # dictionary.update({key: _read_ndarray_datastructure(hdf5)})
 952                return _read_ndarray_datastructure(hdf5)
 953
 954            else:
 955
 956                dictionary.update({key: read_hdf5_as_dict(item)})
 957
 958        if str(type(item)).find("dataset") != -1:
 959
 960            if "_" + key in hdf5.attrs.keys():
 961                expected_type = hdf5.attrs["_" + key]
 962                values = hdf5_read_dataset(item, expected_type)
 963
 964            else:
 965
 966                values = item[:]
 967
 968            dictionary.update({key: values})
 969
 970    list_attribute = []
 971    if read_attrs or read_dataset_attrs:
 972        tmp_list_attribute = list(hdf5.attrs.keys())
 973        hdf5_item_matching_attributes = ["_" + element for element in list(hdf5.keys())]
 974
 975    if read_attrs:
 976
 977        list_attribute.extend(
 978            list(
 979                filter(
 980                    lambda l: l not in hdf5_item_matching_attributes, tmp_list_attribute
 981                )
 982            )
 983        )
 984
 985    if read_dataset_attrs:
 986
 987        list_attribute.extend(
 988            list(filter(lambda l: l in hdf5_item_matching_attributes, tmp_list_attribute))
 989        )
 990
 991    for key in list_attribute:
 992        dictionary.update({key: hdf5.attrs[key]})
 993
 994    return dictionary
 995
 996
 997def _is_numeric_str_class(class_str):
 998    """
 999    check if the input string is a representation of a python class and if it is a subclass of numbers.Numbers.
1000
1001    Args:
1002        class_str (str): string representation of a class like "<class 'module.ClassName'>" or "<class 'ClassName'>"
1003
1004    Returns:
1005        bool: True if the class is a subclass of numbers.Number, False sinon.
1006    """
1007    # Expression régulière pour extraire le nom complet de la classe
1008    match = re.match(r"<class '(?:([^']+)\.)?([^']+)'>", class_str.strip())
1009    if not match:
1010        return False
1011
1012    module_name, class_name = match.groups()
1013    # full_name = f"{module_name}.{class_name}" if module_name else class_name
1014
1015    try:
1016        # On essaie d'importer le module et de récupérer la classe
1017        if module_name:
1018            module = __import__(module_name, fromlist=[class_name])
1019            cls = getattr(module, class_name)
1020        else:
1021            # Cas des types built-in (int, float, etc.)
1022            cls = globals().get(class_name, None)
1023            if cls is None:
1024                cls = getattr(__builtins__, class_name, None)
1025
1026        if cls is None:
1027            return False
1028
1029        return issubclass(cls, numbers.Number)
1030    except (ImportError, AttributeError, TypeError):
1031        return False
1032
1033
1034# def _is_numeric_str_class(str_class):
1035#     if not str_class.startwith("<class"):
1036#         raise ValueError(f"'{str_class}' is not a string class representation.")
1037
1038#     path = str_class[8:-2]
1039
1040#     path_list_splitted = path.rsplit(".", 1)
1041
1042#     if len(path_list_splitted) == 2:
1043#         module, name = [*path_list_splitted]
1044#         result = issubclass(
1045#             getattr(importlib.import_module(module), name), numbers.Number
1046#         )
1047#     else:
1048#         name = path_list_splitted[0]
1049#         result = issubclass(
1050#             getattr(importlib.import_module("builtins"), name), numbers.Number
1051#         )
1052
1053#     return result
1054
1055
1056def hdf5_read_dataset(item, expected_type=None):
1057    """
1058    Read a dataset stored in an hdf5 database
1059
1060    Parameters
1061    ----------
1062
1063    item : h5py.File
1064        an hdf5 dataset/item
1065
1066    expected_type: str
1067        the expected dtype as string str(type())
1068
1069    Return
1070    --------
1071
1072    value : the value read from the hdf5, any type matching the expected type
1073
1074
1075    """
1076
1077    if expected_type == str(type("str")):
1078
1079        values = item[0].decode()
1080
1081    elif expected_type == str(type(1)):
1082
1083        values = int(item[0])  # buildin int type
1084
1085    elif expected_type == str(type(1.0)):
1086
1087        values = float(item[0])  # buildin float type
1088
1089    elif _is_numeric_str_class(expected_type):
1090
1091        values = item[0]  # other int/float type like np.int64/np.float64
1092
1093    elif expected_type == "_None_":
1094
1095        values = None
1096
1097    elif expected_type in (str(pd.Timestamp), str(np.datetime64), str(datetime.datetime)):
1098
1099        if expected_type == str(pd.Timestamp):
1100            values = pd.Timestamp(item[0].decode())
1101
1102        elif expected_type == str(np.datetime64):
1103            values = np.datetime64(item[0].decode())
1104
1105        elif expected_type == str(datetime.datetime):
1106            values = datetime.datetime.fromisoformat(item[0].decode())
1107
1108        else:
1109            values = item[0].decode()
1110
1111    else:
1112
1113        if item[:].dtype.char == "S":
1114
1115            values = item[:].astype("U")
1116
1117        elif item[:].dtype.char == "O":
1118
1119            # decode list if required
1120            decoded_item = list()
1121            for it in item[:]:
1122
1123                decoded_item.append(it.decode())
1124
1125            values = decoded_item
1126
1127        else:
1128            values = item[:]
1129
1130    return values
1131
1132
1133def get_hdf5file_attribute(
1134    path_to_hdf5=str(), location="./", attribute=None, wait_time=0
1135):
1136    """
1137    Get the value of an attribute in the hdf5file
1138
1139    Parameters
1140    ----------
1141
1142    path_to_hdf5 : str
1143        the path to the hdf5file
1144
1145    location : str
1146        path inside the hdf5 where the attribute is stored
1147
1148    attribute: str
1149        attribute name
1150
1151    wait_time: int
1152        If the hdf5 is unavailable, the function will try to access serveral time and will wait wait_time seconds maximum. If this time is elapsed, the file won't be opened and the funciton will return None. This parameter is usefull if several program or threads need to read/write simultaneously in the same hdf5 database.
1153
1154    Return
1155    --------
1156
1157    return_attribute : the value of the attribute
1158
1159    Examples
1160    --------
1161
1162    get an attribute
1163    >>> attribute=hdf5_handler.get_hdf5_attribute("./multi_model.hdf5",attribute=my_attribute_name)
1164
1165    """
1166
1167    hdf5_base = open_hdf5(path_to_hdf5, read_only=True, wait_time=wait_time)
1168
1169    if hdf5_base is None:
1170        return None
1171
1172    hdf5 = hdf5_base[location]
1173
1174    return_attribute = hdf5.attrs[attribute]
1175
1176    hdf5_base.close()
1177
1178    return return_attribute
1179
1180
1181def get_hdf5file_dataset(path_to_hdf5=str(), location="./", dataset=None, wait_time=0):
1182    """
1183    Get the value of an attribute in the hdf5file
1184
1185    Parameters
1186    ----------
1187
1188    path_to_hdf5 : str
1189        the path to the hdf5file
1190
1191    location : str
1192        path inside the hdf5 where the attribute is stored
1193
1194    dataset: str
1195        dataset name
1196
1197    wait_time: int
1198        If the hdf5 is unavailable, the function will try to access serveral time and will wait wait_time seconds maximum. If this time is elapsed, the file won't be opened and the funciton will return None. This parameter is usefull if several program or threads need to read/write simultaneously in the same hdf5 database.
1199
1200    Return
1201    --------
1202
1203    return_dataset : the value of the attribute
1204
1205    Examples
1206    --------
1207
1208    get a dataset
1209    >>> dataset=hdf5_handler.get_hdf5_dataset("./multi_model.hdf5",dataset=my_dataset_name)
1210
1211    """
1212
1213    hdf5_base = open_hdf5(path_to_hdf5, read_only=True, wait_time=wait_time)
1214
1215    if hdf5_base is None:
1216        return None
1217
1218    hdf5 = hdf5_base[location]
1219
1220    if "_" + dataset in hdf5.attrs.keys():
1221        expected_type = hdf5.attrs["_" + dataset]
1222        return_dataset = hdf5_read_dataset(hdf5[dataset], expected_type)
1223
1224    else:
1225        return_dataset = hdf5[dataset][:]
1226
1227    hdf5_base.close()
1228
1229    return return_dataset
1230
1231
1232def get_hdf5file_item(
1233    path_to_hdf5=str(), location="./", item=None, wait_time=0, search_attrs=False
1234):
1235    """
1236
1237    Get a custom item in an hdf5file
1238
1239    Parameters
1240    ----------
1241
1242    path_to_hdf5 : str
1243        the path to the hdf5file
1244
1245    location : str
1246        path inside the hdf5 where the attribute is stored. If item is None, item is set to basename(location)
1247
1248    item: str
1249        item name
1250
1251    wait_time: int
1252        If the hdf5 is unavailable, the function will try to access serveral time and will wait wait_time seconds maximum. If this time is elapsed, the file won't be opened and the funciton will return None. This parameter is usefull if several program or threads need to read/write simultaneously in the same hdf5 database.
1253
1254    search_attrs: bool
1255        Default is False. If True, the function will also search in the item in the attribute first.
1256
1257    Return
1258    --------
1259
1260    return : custom value. can be an hdf5 object (group), an numpy array, a string, a float, an int ...
1261
1262    Examples
1263    --------
1264
1265    get the dataset 'dataset'
1266    >>> dataset=hdf5_handler.get_hdf5_item("./multi_model.hdf5",location="path/in/hdf5/dataset")
1267
1268    """
1269
1270    hdf5 = open_hdf5(path_to_hdf5, read_only=True, wait_time=wait_time)
1271
1272    if hdf5 is None:
1273        return None
1274
1275    hdf5_item = get_hdf5_item(
1276        hdf5_instance=hdf5, location=location, item=item, search_attrs=search_attrs
1277    )
1278
1279    hdf5.close()
1280
1281    return hdf5_item
1282
1283
1284def get_hdf5_item(hdf5_instance=None, location="./", item=None, search_attrs=False):
1285    """
1286
1287    Get a custom item in an hdf5file
1288
1289    Parameters
1290    ----------
1291
1292    hdf5_instance : h5py.File
1293        an instance of an hdf5
1294
1295    location : str
1296        path inside the hdf5 where the attribute is stored. If item is None, item is set to basename(location)
1297
1298    item: str
1299        item name
1300
1301    search_attrs: bool
1302        Default is False. If True, the function will search in the item in the attribute first.
1303
1304    Return
1305    ------
1306
1307    return : custom value. can be an hdf5 object (group), an numpy array, a string, a float, an int ...
1308
1309    Examples
1310    --------
1311
1312    get the dataset 'dataset'
1313    >>> dataset=hdf5_handler.get_hdf5_item("./multi_model.hdf5",location="path/in/hdf5/dataset")
1314
1315    """
1316
1317    if item is None and isinstance(location, str):
1318        head, tail = os.path.split(location)
1319        if len(tail) > 0:
1320            item = tail
1321        location = head
1322
1323    if not isinstance(item, str):
1324        print(f"Bad search item:{item}")
1325        return None
1326
1327        return None
1328
1329    # print(f"Getting item '{item}' at location '{location}'")
1330    hdf5 = hdf5_instance[location]
1331
1332    # first search in the attribute
1333    if search_attrs:
1334        list_attribute = hdf5.attrs.keys()
1335        if item in list_attribute:
1336            return hdf5.attrs[item]
1337
1338    # then search in groups and dataset
1339    list_keys = hdf5.keys()
1340    if item in list_keys:
1341
1342        hdf5_item = hdf5[item]
1343
1344        # print("Got Item ", hdf5_item)
1345
1346        if str(type(hdf5_item)).find("group") != -1:
1347
1348            if item == "ndarray_ds":
1349
1350                return _read_ndarray_datastructure(hdf5)
1351
1352            else:
1353
1354                returned_dict = read_hdf5_as_dict(hdf5_item)
1355
1356                return returned_dict
1357
1358        elif str(type(hdf5_item)).find("dataset") != -1:
1359
1360            if "_" + item in hdf5.attrs.keys():
1361                expected_type = hdf5.attrs["_" + item]
1362                values = hdf5_read_dataset(hdf5_item, expected_type)
1363            else:
1364                values = hdf5_item[:]
1365
1366            return values
1367
1368        else:
1369
1370            return hdf5_item
1371
1372    else:
1373
1374        return None
1375
1376
1377def search_in_hdf5file(
1378    path_to_hdf5, key=None, location="./", wait_time=0, search_attrs=False
1379):
1380    """
1381
1382    Search key in an hdf5 and return a list of [locations, datatype, key name, values]. Value and key are returned only if the key is an attribute or a dataset (None otherwise)
1383
1384    Parameters
1385    ----------
1386
1387    path_to_hdf5 : str
1388        the path to the hdf5file
1389
1390    key: str
1391        key to search in the hdf5file
1392
1393    location : str
1394        path inside the hdf5 where to start the research
1395
1396    wait_time: int
1397        If the hdf5 is unavailable, the function will try to access serveral time and will wait wait_time seconds maximum. If this time is elapsed, the file won't be opened and the funciton will return None. This parameter is usefull if several program or threads need to read/write simultaneously in the same hdf5 database.
1398
1399    search_attrs : Bool
1400        Default false, search in the attributes
1401
1402    Return
1403    ------
1404
1405    return_dataset : the value of the attribute
1406
1407    Examples
1408    --------
1409
1410    search in a hdf5file
1411    >>> matchkey=hdf5_handler.search_in_hdf5file(hdf5filename, key='Nom_du_BV',location="./")
1412
1413    """
1414    if key is None:
1415        print("Nothing to search, use key=")
1416        return []
1417
1418    hdf5 = open_hdf5(path_to_hdf5, read_only=True, wait_time=wait_time)
1419
1420    if hdf5 is None:
1421        return None
1422
1423    results = search_in_hdf5(hdf5, key, location=location, search_attrs=search_attrs)
1424
1425    hdf5.close()
1426
1427    return results
1428
1429
1430def search_in_hdf5(hdf5_base, key=None, location="./", search_attrs=False):
1431    """
1432
1433    Search key in an hdf5 and return a list of [locations, datatype, key name, values]. Value and key are returned only if the key is an attribute or a dataset (None otherwise)
1434
1435    Parameters
1436    ----------
1437
1438    hdf5_base : h5py.File
1439        opened instance of the hdf5
1440
1441    key: str
1442        key to search in the hdf5file
1443
1444    location : str
1445        path inside the hdf5 where to start the research
1446
1447    search_attrs : Bool
1448        Default false, search in the attributes
1449
1450    Return
1451    ------
1452
1453    return_dataset : the value of the attribute
1454
1455    Examples
1456    --------
1457
1458    search in a hdf5
1459    >>> hdf5=hdf5_handler.open_hdf5(hdf5_file)
1460    >>> matchkey=hdf5_handler.search_in_hdf5(hdf5, key='Nom_du_BV',location="./")
1461    >>> hdf5.close()
1462
1463    """
1464    if key is None:
1465        print("Nothing to search, use key=")
1466        return []
1467
1468    result = []
1469
1470    hdf5 = hdf5_base[location]
1471
1472    if search_attrs:
1473        list_attribute = hdf5.attrs.keys()
1474
1475        if key in list_attribute:
1476            result.append(
1477                {
1478                    "path": location,
1479                    "key": key,
1480                    "datatype": "attribute",
1481                    "value": hdf5.attrs[key],
1482                }
1483            )
1484
1485    for hdf5_key, item in hdf5.items():
1486
1487        if str(type(item)).find("group") != -1:
1488
1489            sub_location = os.path.join(location, hdf5_key)
1490
1491            # print(hdf5_key,sub_location,list(hdf5.keys()))
1492
1493            if hdf5_key == key:
1494
1495                if "ndarray_ds" in item.keys():
1496
1497                    result.append(
1498                        {
1499                            "path": sub_location,
1500                            "key": None,
1501                            "datatype": "ndarray",
1502                            "value": _read_ndarray_datastructure(item),
1503                        }
1504                    )
1505
1506                else:
1507
1508                    result.append(
1509                        {
1510                            "path": sub_location,
1511                            "key": None,
1512                            "datatype": "group",
1513                            "value": None,
1514                        }
1515                    )
1516
1517            res = search_in_hdf5(hdf5_base, key, sub_location)
1518
1519            if len(res) > 0:
1520                for element in res:
1521                    result.append(element)
1522
1523        if str(type(item)).find("dataset") != -1:
1524
1525            if hdf5_key == key:
1526
1527                if item[:].dtype.char == "S":
1528
1529                    values = item[:].astype("U")
1530
1531                elif item[:].dtype.char == "O":
1532
1533                    # decode list if required
1534                    decoded_item = list()
1535                    for it in item[:]:
1536                        decoded_item.append(it.decode())
1537
1538                    values = decoded_item
1539
1540                else:
1541
1542                    values = item[:]
1543
1544                result.append(
1545                    {"path": location, "key": key, "datatype": "dataset", "value": values}
1546                )
1547
1548    return result
1549
1550
1551def hdf5file_view(
1552    path_to_hdf5,
1553    location="./",
1554    max_depth=None,
1555    level_base=">",
1556    level_sep="--",
1557    depth=None,
1558    wait_time=0,
1559    list_attrs=True,
1560    list_dataset_attrs=False,
1561    return_view=False,
1562):
1563    """
1564
1565    Search key in an hdf5 and return a list of [locations, datatype, key name, values]. Value and key are returned only if the key is an attribute or a dataset (None otherwise)
1566
1567    Parameters
1568    ----------
1569
1570
1571    path_to_hdf5 : str
1572        Path to an hdf5 database
1573
1574    location : str
1575        path inside the hdf5 where to start the research
1576
1577    max_depth: str
1578        Max deph of the search in the hdf5
1579
1580    level_base: str
1581        string used as separator at the lower level (default '>')
1582
1583    level_sep: str
1584        string used as separator at higher level (default '--')
1585
1586    depth: int
1587        current depth level
1588
1589    list_attrs: bool
1590        default is True, list the attributes
1591
1592    list_dataset_attrs: bool
1593        default is False, list the special attributes defined for each dataset by pyhdf5_handler
1594
1595    return_view: bool
1596        retrun the object view in a dictionnary (do not print at screen)
1597
1598    wait_time: int
1599        If the hdf5 is unavailable, the function will try to access serveral time and will wait wait_time seconds maximum. If this time is elapsed, the file won't be opened and the funciton will return None. This parameter is usefull if several program or threads need to read/write simultaneously in the same hdf5 database.
1600
1601    Return
1602    --------
1603
1604    dictionnary : optional, the view of the hdf5
1605
1606    Examples
1607    --------
1608
1609    search in a hdf5file
1610    >>> matchkey=hdf5_handler.search_in_hdf5file(hdf5filename, key='Nom_du_BV',location="./")
1611
1612    """
1613
1614    hdf5 = open_hdf5(path_to_hdf5, read_only=True, wait_time=wait_time)
1615
1616    if hdf5 is None:
1617        return None
1618
1619    results = hdf5_view(
1620        hdf5,
1621        location=location,
1622        max_depth=max_depth,
1623        level_base=level_base,
1624        level_sep=level_sep,
1625        depth=depth,
1626        list_attrs=list_attrs,
1627        list_dataset_attrs=list_dataset_attrs,
1628        return_view=return_view,
1629    )
1630
1631    hdf5.close()
1632
1633    return results
1634
1635
1636def hdf5file_ls(path_to_hdf5, location="./"):
1637    """
1638    List dataset in an hdf5file.
1639
1640    Parameters
1641    ----------
1642
1643    path_to_hdf5 : str
1644        path to a hdf5file
1645
1646    location: str
1647        path inside the hdf5 where to start the research
1648
1649    Example
1650    -------
1651
1652    >>> hdf5file_ls(test.hdf5)
1653
1654    """
1655
1656    hdf5 = open_hdf5(path_to_hdf5, read_only=True)
1657
1658    hdf5_view(
1659        hdf5,
1660        location=location,
1661        max_depth=0,
1662        level_base=">",
1663        level_sep="--",
1664        list_attrs=False,
1665        return_view=False,
1666    )
1667
1668
1669def hdf5_ls(hdf5):
1670    """
1671    List dataset in an hdf5 instance.
1672
1673    Parameters
1674    ----------
1675
1676    hdf5 : h5py.File
1677        hdf5 instance
1678
1679    location: str
1680        path inside the hdf5 where to start the research
1681
1682    Example
1683    -------
1684
1685    >>> hdf5 = open_hdf5(path_to_hdf5, read_only=True)
1686    >>> hdf5_ls(hdf5)
1687
1688    """
1689
1690    hdf5_view(
1691        hdf5,
1692        location="./",
1693        max_depth=0,
1694        level_base=">",
1695        level_sep="--",
1696        list_attrs=False,
1697        return_view=False,
1698    )
1699
1700
1701def hdf5_view(
1702    hdf5_obj,
1703    location="./",
1704    max_depth=None,
1705    level_base=">",
1706    level_sep="--",
1707    depth=None,
1708    list_attrs=True,
1709    list_dataset_attrs=False,
1710    return_view=False,
1711):
1712    """
1713    List recursively all dataset (and attributes) in an hdf5 object.
1714
1715    Parameters
1716    ----------
1717
1718    hdf5_obj : h5py.File
1719        opened instance of the hdf5
1720
1721    location : str
1722        path inside the hdf5 where to start the research
1723
1724    max_depth: str
1725        Max deph of the search in the hdf5
1726
1727    level_base: str
1728        string used as separator at the lower level (default '>')
1729
1730    level_sep: str
1731        string used as separator at higher level (default '--')
1732
1733    depth: int
1734        current level depth
1735
1736    list_attrs: bool
1737        default is True, list the attributes
1738
1739    list_dataset_attrs: bool
1740        default is False, list the special attributes defined for each dataset by pyhdf5_handler
1741
1742    return_view: bool
1743        retrun the object view in a dictionnary
1744
1745    Return
1746    --------
1747
1748    dictionnary : optional, the view of the hdf5
1749
1750    Examples
1751    --------
1752
1753    search in a hdf5
1754    >>> hdf5=hdf5_handler.open_hdf5(hdf5_file)
1755    >>> matchkey=hdf5_handler.search_in_hdf5(hdf5, key='Nom_du_BV',location="./")
1756    >>> hdf5.close()
1757
1758    """
1759
1760    result = []
1761
1762    if max_depth is not None:
1763
1764        if depth is not None:
1765            depth = depth + 1
1766        else:
1767            depth = 0
1768
1769        if depth > max_depth:
1770            return result
1771
1772    hdf5 = hdf5_obj[location]
1773
1774    list_attribute = []
1775    if list_attrs or list_dataset_attrs:
1776        tmp_list_attribute = list(hdf5.attrs.keys())
1777        list_keys_matching_attributes = ["_" + element for element in list(hdf5.keys())]
1778
1779    if list_attrs:
1780
1781        list_attribute.extend(
1782            list(
1783                filter(
1784                    lambda l: l not in list_keys_matching_attributes, tmp_list_attribute
1785                )
1786            )
1787        )
1788
1789    if list_dataset_attrs:
1790
1791        list_attribute.extend(
1792            list(filter(lambda l: l in list_keys_matching_attributes, tmp_list_attribute))
1793        )
1794
1795    for key in list_attribute:
1796        values = hdf5.attrs[key]
1797        sub_location = os.path.join(location, key)
1798        if isinstance(
1799            values, (int, float, np.int64, np.float64, np.int32, np.float32, np.bool)
1800        ):
1801            result.append(
1802                f"{level_base}| {sub_location}, attribute, type={type(hdf5.attrs[key])}, value={values}"
1803            )
1804        elif isinstance(values, (str)) and len(values) < 20:
1805            result.append(
1806                f"{level_base}| {sub_location}, attribute, type={type(hdf5.attrs[key])}, len={len(values)}, value={values}"
1807            )
1808        else:
1809            result.append(
1810                f"{level_base}| {sub_location}, attribute, type={type(hdf5.attrs[key])}, len={len(values)}, value={values[0:20]}..."
1811            )
1812
1813    for hdf5_key, item in hdf5.items():
1814
1815        if str(type(item)).find("group") != -1:
1816
1817            sub_location = os.path.join(location, hdf5_key)
1818
1819            if "ndarray_ds" in item.keys():
1820                result.append(f"{level_base}| {sub_location}, ndarray")
1821            else:
1822                result.append(f"{level_base}| {sub_location}, group")
1823
1824            res = hdf5_view(
1825                hdf5_obj,
1826                sub_location,
1827                max_depth=max_depth,
1828                level_base=level_base + level_sep,
1829                depth=depth,
1830                return_view=True,
1831            )
1832
1833            # if len(res)>0:
1834            for key, item in enumerate(res):
1835                result.append(item)
1836
1837        if str(type(item)).find("dataset") != -1:
1838
1839            if item[:].dtype.char == "S":
1840                values = item[:].astype("U")
1841            else:
1842                values = item[:]
1843
1844            sub_location = os.path.join(location, hdf5_key)
1845
1846            result.append(
1847                f"{level_base}| {sub_location}, dataset, type={type(values)}, shape={values.shape}"
1848            )
1849
1850    if return_view:
1851        return result
1852    else:
1853        for res in result:
1854            print(res)
def close_all_hdf5_file():
20def close_all_hdf5_file():
21    """
22    Close all hdf5 file opened in the current session
23    """
24
25    for obj in gc.get_objects():  # Browse through ALL objects
26        if isinstance(obj, h5py.File):  # Just HDF5 files
27            try:
28                print(f"try closing {obj}")
29                obj.close()
30            except:
31                pass  # Was already closed

Close all hdf5 file opened in the current session

def open_hdf5(path, read_only=False, replace=False, wait_time=0):
 34def open_hdf5(path, read_only=False, replace=False, wait_time=0):
 35    """
 36
 37    Open or create an HDF5 file.
 38
 39    Parameters
 40    ----------
 41
 42    path : str
 43        The file path.
 44
 45    read_only : boolean
 46        If true the access to the hdf5 fil is in read-only mode. Multi process can read the same hdf5 file simulteneously. This is not possible when access mode are append 'a' or write 'w'.
 47
 48    replace: Boolean
 49        If true, the existing hdf5file is erased
 50
 51    wait_time: int
 52        If the hdf5 is unavailable, the function will try to access serveral time and will wait wait_time seconds maximum. If this time is elapsed, the file won't be opened. This parameter is usefull if several program or threads need to read/write simultaneously in the same hdf5 database.
 53
 54    Returns
 55    -------
 56
 57    f :
 58        A h5py object.
 59
 60    Examples
 61    --------
 62
 63    >>> hdf5=pyhdf5_handler.open_hdf5("./my_hdf5.hdf5")
 64    >>> hdf5.keys()
 65    >>> hdf5.attrs.keys()
 66
 67    """
 68    f = None
 69    wait = 0
 70    while wait <= wait_time:
 71
 72        f = None
 73        exist_file = True
 74
 75        try:
 76
 77            if read_only:
 78                if os.path.isfile(path):
 79                    f = h5py.File(path, "r")
 80
 81                else:
 82                    exist_file = False
 83                    raise ValueError(f"File {path} does not exist.")
 84
 85            else:
 86                if replace:
 87                    f = h5py.File(path, "w")
 88
 89                else:
 90                    if os.path.isfile(path):
 91                        f = h5py.File(path, "a")
 92
 93                    else:
 94                        f = h5py.File(path, "w")
 95        except:
 96            pass
 97
 98        if f is None:
 99            if not exist_file:
100                print(f"File {path} does not exist.")
101                return f
102            else:
103                print(f"The file {path} is unvailable, waiting {wait}/{wait_time}s")
104
105            wait = wait + 1
106
107            if wait_time > 0:
108                time.sleep(1)
109
110        else:
111            break
112
113    return f

Open or create an HDF5 file.

Parameters

path : str The file path.

read_only : boolean If true the access to the hdf5 fil is in read-only mode. Multi process can read the same hdf5 file simulteneously. This is not possible when access mode are append 'a' or write 'w'.

replace: Boolean If true, the existing hdf5file is erased

wait_time: int If the hdf5 is unavailable, the function will try to access serveral time and will wait wait_time seconds maximum. If this time is elapsed, the file won't be opened. This parameter is usefull if several program or threads need to read/write simultaneously in the same hdf5 database.

Returns

f : A h5py object.

Examples

>>> hdf5=pyhdf5_handler.open_hdf5("./my_hdf5.hdf5")
>>> hdf5.keys()
>>> hdf5.attrs.keys()
def add_hdf5_sub_group(hdf5, subgroup=None):
116def add_hdf5_sub_group(hdf5, subgroup=None):
117    """
118    Create a new subgroup in a HDF5 object
119
120    Parameters
121    ----------
122
123    hdf5 : h5py.File
124        An hdf5 object opened with open_hdf5()
125
126    subgroup: str
127        Path to a subgroub that must be created
128
129    Returns
130    -------
131
132    hdf5 :
133        the h5py object.
134
135    Examples
136    --------
137
138    >>> hdf5=pyhdf5_handler.open_hdf5("./model_subgroup.hdf5", replace=True)
139    >>> hdf5=pyhdf5_handler.add_hdf5_sub_group(hdf5, subgroup="mygroup")
140    >>> hdf5.keys()
141    >>> hdf5.attrs.keys()
142
143    """
144    if subgroup is not None:
145        if subgroup == "":
146            subgroup = "./"
147
148        hdf5.require_group(subgroup)
149
150    return hdf5

Create a new subgroup in a HDF5 object

Parameters

hdf5 : h5py.File An hdf5 object opened with open_hdf5()

subgroup: str Path to a subgroub that must be created

Returns

hdf5 : the h5py object.

Examples

>>> hdf5=pyhdf5_handler.open_hdf5("./model_subgroup.hdf5", replace=True)
>>> hdf5=pyhdf5_handler.add_hdf5_sub_group(hdf5, subgroup="mygroup")
>>> hdf5.keys()
>>> hdf5.attrs.keys()
def hdf5_dataset_creator(hdf5: h5py._hl.files.File, name: str, value):
490def hdf5_dataset_creator(hdf5: h5py.File, name: str, value):
491    """
492    Write any value in an hdf5 object
493
494    Parameters
495    ----------
496
497    hdf5 : h5py.File
498        an hdf5 object
499
500    name : str
501        name of the dataset
502
503    value : any
504        value to write in the hdf5
505
506    """
507    
508    if _hdf5_skip_cls(value):
509        dataset = _hdf5_handle_exclude_obj(name, value)
510        
511    elif isinstance(value, str):
512        dataset = _hdf5_handle_str(name, value)
513
514    elif isinstance(value, numbers.Number):
515        dataset = _hdf5_handle_numbers(name, value)
516
517    elif value is None:
518        dataset = _hdf5_handle_none(name, value)
519
520    elif isinstance(value, (pd.Timestamp, np.datetime64, datetime.date)):
521        dataset = _hdf5_handle_timestamp(name, value)
522
523    elif isinstance(value, pd.DatetimeIndex):
524        dataset = _hdf5_handle_DatetimeIndex(name, value)
525
526    elif isinstance(value, list):
527        dataset = _hdf5_handle_list(name, value)
528
529    elif isinstance(value, tuple):
530        dataset = _hdf5_handle_list(name, value)
531
532    elif isinstance(value, np.ndarray):
533
534        if len(value.dtype) > 0 and len(value.dtype.names) > 0:
535            _hdf5_handle_ndarray(hdf5, name, value)
536            return
537        else:
538            dataset = _hdf5_handle_array(name, value)
539
540    else:
541
542        hdf5 = add_hdf5_sub_group(hdf5, subgroup=name)
543
544        newdict = object_handler.read_object_as_dict(value)
545
546        save_dict_to_hdf5(hdf5[name], newdict)
547
548    _hdf5_create_dataset(hdf5, dataset)

Write any value in an hdf5 object

Parameters

hdf5 : h5py.File an hdf5 object

name : str name of the dataset

value : any value to write in the hdf5

def save_dict_to_hdf5(hdf5, dictionary):
657def save_dict_to_hdf5(hdf5, dictionary):
658    """
659
660    dump a dictionary to an hdf5 file
661
662    Parameters
663    ----------
664
665    hdf5 : h5py.File
666        an hdf5 object
667
668    dictionary : dict
669        a custom python dictionary
670
671    """
672    if isinstance(dictionary, dict):
673        for attr, value in dictionary.items():
674            # print("looping:",attr,value)
675            try:
676
677                attribute_name = str(attr)
678                for character in "/ ":
679                    attribute_name = attribute_name.replace(character, "_")
680
681                if isinstance(value, dict):
682                    # print("---> dictionary: ",attr, value)
683
684                    hdf5 = add_hdf5_sub_group(hdf5, subgroup=attribute_name)
685                    save_dict_to_hdf5(hdf5[attribute_name], value)
686
687                else:
688
689                    hdf5_dataset_creator(hdf5, attribute_name, value)
690
691            except:
692
693                raise ValueError(
694                    f"Unable to save attribute {str(attr)} with value {value}"
695                )
696
697    else:
698
699        raise ValueError(f"{dictionary} must be a instance of dict.")

dump a dictionary to an hdf5 file

Parameters

hdf5 : h5py.File an hdf5 object

dictionary : dict a custom python dictionary

def save_dict_to_hdf5file( path_to_hdf5, dictionary=None, location='./', replace=False, wait_time=0):
702def save_dict_to_hdf5file(
703    path_to_hdf5, dictionary=None, location="./", replace=False, wait_time=0
704):
705    """
706
707    dump a dictionary to an hdf5 file
708
709    Parameters
710    ----------
711
712    path_to_hdf5 : str
713        path to the hdf5 file
714
715    dictionary : dict | None
716        a dictionary containing the data to be saved
717
718    location : str
719        path location or subgroup where to write data in the hdf5 file
720
721    replace : Boolean
722        replace an existing hdf5 file. Default is False
723
724    wait_time: int
725        If the hdf5 is unavailable, the function will try to access serveral time and will wait wait_time seconds maximum. If this time is elapsed, the file won't be opened and the funciton will return None. This parameter is usefull if several program or threads need to read/write simultaneously in the same hdf5 database.
726
727    Examples
728    --------
729
730    >>> dict={"a":1,"b":2}
731    >>> pyhdf5_handler.save_dict_to_hdf5("saved_dictionary.hdf5",dict)
732
733    """
734    if isinstance(dictionary, dict):
735        hdf5 = open_hdf5(path_to_hdf5, replace=replace, wait_time=wait_time)
736
737        if hdf5 is None:
738            return
739
740        hdf5 = add_hdf5_sub_group(hdf5, subgroup=location)
741        save_dict_to_hdf5(hdf5[location], dictionary)
742
743    else:
744        raise ValueError(f"The input {dictionary} must be a instance of dict.")
745
746    hdf5.close()

dump a dictionary to an hdf5 file

Parameters

path_to_hdf5 : str path to the hdf5 file

dictionary : dict | None a dictionary containing the data to be saved

location : str path location or subgroup where to write data in the hdf5 file

replace : Boolean replace an existing hdf5 file. Default is False

wait_time: int If the hdf5 is unavailable, the function will try to access serveral time and will wait wait_time seconds maximum. If this time is elapsed, the file won't be opened and the funciton will return None. This parameter is usefull if several program or threads need to read/write simultaneously in the same hdf5 database.

Examples

>>> dict={"a":1,"b":2}
>>> pyhdf5_handler.save_dict_to_hdf5("saved_dictionary.hdf5",dict)
def save_object_to_hdf5( hdf5, instance, keys_data=None, location='./', sub_data=None, replace=False, wait_time=0):
749def save_object_to_hdf5(
750    hdf5,
751    instance,
752    keys_data=None,
753    location="./",
754    sub_data=None,
755    replace=False,
756    wait_time=0,
757):
758    """
759
760    dump an object to an hdf5 file
761
762    Parameters
763    ----------
764
765    hdf5 : instance of h5py
766        An opened hdf5 file
767
768    instance : object
769        A custom python object to be saved into an hdf5
770
771    keys_data : list | dict
772        optional, a list or a dictionary of the attribute to be saved
773
774    location : str
775        path location or subgroup where to write data in the hdf5 file
776
777    sub_data : dict | None
778        optional, a extra dictionary containing extra-data to be saved along the object
779
780    replace : Boolean
781        replace an existing hdf5 file. Default is False
782
783    wait_time: int
784        If the hdf5 is unavailable, the function will try to access serveral time and will wait wait_time seconds maximum. If this time is elapsed, the file won't be opened and the funciton will return None. This parameter is usefull if several program or threads need to read/write simultaneously in the same hdf5 database.
785
786    """
787
788    if keys_data is None:
789        keys_data = object_handler.generate_object_structure(
790            instance, include_method=False
791        )
792
793    if hdf5 is None:
794        return None
795
796    hdf5 = add_hdf5_sub_group(hdf5, subgroup=location)
797
798    _dump_object_to_hdf5_from_iteratable(hdf5[location], instance, keys_data)
799
800    if isinstance(sub_data, dict):
801        save_dict_to_hdf5(hdf5[location], sub_data)
802
803    hdf5.close()

dump an object to an hdf5 file

Parameters

hdf5 : instance of h5py An opened hdf5 file

instance : object A custom python object to be saved into an hdf5

keys_data : list | dict optional, a list or a dictionary of the attribute to be saved

location : str path location or subgroup where to write data in the hdf5 file

sub_data : dict | None optional, a extra dictionary containing extra-data to be saved along the object

replace : Boolean replace an existing hdf5 file. Default is False

wait_time: int If the hdf5 is unavailable, the function will try to access serveral time and will wait wait_time seconds maximum. If this time is elapsed, the file won't be opened and the funciton will return None. This parameter is usefull if several program or threads need to read/write simultaneously in the same hdf5 database.

def save_object_to_hdf5file( path_to_hdf5, instance, keys_data=None, location='./', sub_data=None, replace=False, wait_time=0):
806def save_object_to_hdf5file(
807    path_to_hdf5,
808    instance,
809    keys_data=None,
810    location="./",
811    sub_data=None,
812    replace=False,
813    wait_time=0,
814):
815    """
816
817    dump an object to an hdf5 file
818
819    Parameters
820    ----------
821
822    path_to_hdf5 : str
823        path to the hdf5 file
824
825    instance : object
826        A custom python object to be saved into an hdf5
827
828    keys_data : list | dict
829        optional, a list or a dictionary of the attribute to be saved
830
831    location : str
832        path location or subgroup where to write data in the hdf5 file
833
834    sub_data : dict | None
835        optional, a extra dictionary containing extra-data to be saved along the object
836
837    replace : Boolean
838        replace an existing hdf5 file. Default is False
839
840    wait_time: int
841        If the hdf5 is unavailable, the function will try to access serveral time and will wait wait_time seconds maximum. If this time is elapsed, the file won't be opened and the funciton will return None. This parameter is usefull if several program or threads need to read/write simultaneously in the same hdf5 database.
842
843    """
844
845    hdf5 = open_hdf5(path_to_hdf5, replace=replace, wait_time=wait_time)
846
847    save_object_to_hdf5(
848        hdf5,
849        instance,
850        keys_data=keys_data,
851        location=location,
852        sub_data=sub_data,
853        replace=replace,
854        wait_time=wait_time,
855    )

dump an object to an hdf5 file

Parameters

path_to_hdf5 : str path to the hdf5 file

instance : object A custom python object to be saved into an hdf5

keys_data : list | dict optional, a list or a dictionary of the attribute to be saved

location : str path location or subgroup where to write data in the hdf5 file

sub_data : dict | None optional, a extra dictionary containing extra-data to be saved along the object

replace : Boolean replace an existing hdf5 file. Default is False

wait_time: int If the hdf5 is unavailable, the function will try to access serveral time and will wait wait_time seconds maximum. If this time is elapsed, the file won't be opened and the funciton will return None. This parameter is usefull if several program or threads need to read/write simultaneously in the same hdf5 database.

def read_hdf5file_as_dict( path_to_hdf5, location='./', wait_time=0, read_attrs=True, read_dataset_attrs=False):
858def read_hdf5file_as_dict(
859    path_to_hdf5, location="./", wait_time=0, read_attrs=True, read_dataset_attrs=False
860):
861    """
862
863    Open, read and close an hdf5 file
864
865    Parameters
866    ----------
867
868    path_to_hdf5 : str
869        path to the hdf5 file
870
871    location: str
872        place in the hdf5 from which we start reading the file
873
874    read_attrs : bool
875        read and import attributes in the dicitonnary.
876
877    read_dataset_attrs : bool
878        read and import special attributes linked to any dataset and created by pyhdf5_handler. These attributes only store the original dataype of the data stored in the dataset.
879
880    Return
881    --------
882
883    dictionary : dict, a dictionary of all keys and attribute included in the hdf5 file
884
885    wait_time: int
886        If the hdf5 is unavailable, the function will try to access serveral time and will wait wait_time seconds maximum. If this time is elapsed, the file won't be opened and the funciton will return None. This parameter is usefull if several program or threads need to read/write simultaneously in the same hdf5 database.
887
888    Examples
889    --------
890
891    read an hdf5 file
892    dictionary=hdf5_handler.read_hdf5file_as_dict(hdf5["model1"])
893    """
894
895    hdf5 = open_hdf5(path_to_hdf5, read_only=True, wait_time=wait_time)
896
897    if hdf5 is None:
898        return None
899
900    dictionary = read_hdf5_as_dict(
901        hdf5[location], read_attrs=read_attrs, read_dataset_attrs=read_dataset_attrs
902    )
903
904    hdf5.close()
905
906    return dictionary

Open, read and close an hdf5 file

Parameters

path_to_hdf5 : str path to the hdf5 file

location: str place in the hdf5 from which we start reading the file

read_attrs : bool read and import attributes in the dicitonnary.

read_dataset_attrs : bool read and import special attributes linked to any dataset and created by pyhdf5_handler. These attributes only store the original dataype of the data stored in the dataset.

Return

dictionary : dict, a dictionary of all keys and attribute included in the hdf5 file

wait_time: int If the hdf5 is unavailable, the function will try to access serveral time and will wait wait_time seconds maximum. If this time is elapsed, the file won't be opened and the funciton will return None. This parameter is usefull if several program or threads need to read/write simultaneously in the same hdf5 database.

Examples

read an hdf5 file dictionary=hdf5_handler.read_hdf5file_as_dict(hdf5["model1"])

def read_hdf5_as_dict(hdf5, read_attrs=True, read_dataset_attrs=False):
909def read_hdf5_as_dict(hdf5, read_attrs=True, read_dataset_attrs=False):
910    """
911    Load an hdf5 file
912
913    Parameters
914    ----------
915
916    hdf5 : h5py.File
917        an instance of hdf5, open with the function open_hdf5()
918
919    read_attrs : bool
920        read and import attributes in the dicitonnary.
921
922    read_dataset_attrs : bool
923        read and import special attributes linked to any dataset and created by pyhdf5_handler. These attributes only store the original datatype of the data stored in the dataset.
924
925    Return
926    --------
927
928    dictionary : dict, a dictionary of all keys and attribute included in the hdf5 file
929
930    Examples
931    --------
932
933    read only a part of an hdf5 file
934    >>> hdf5=hdf5_handler.open_hdf5("./multi_model.hdf5")
935    >>> dictionary=hdf5_handler.read_hdf5_as_dict(hdf5["model1"])
936    >>> dictionary.keys()
937
938    """
939
940    if not isinstance(hdf5, (h5py.File, h5py.Group, h5py.Dataset, h5py.Datatype)):
941        print("Error: input arg is not an instance of hdf5.File()")
942        return {}
943
944    dictionary = {}
945
946    for key, item in hdf5.items():
947
948        if str(type(item)).find("group") != -1:
949
950            if key == "ndarray_ds":
951
952                # dictionary.update({key: _read_ndarray_datastructure(hdf5)})
953                return _read_ndarray_datastructure(hdf5)
954
955            else:
956
957                dictionary.update({key: read_hdf5_as_dict(item)})
958
959        if str(type(item)).find("dataset") != -1:
960
961            if "_" + key in hdf5.attrs.keys():
962                expected_type = hdf5.attrs["_" + key]
963                values = hdf5_read_dataset(item, expected_type)
964
965            else:
966
967                values = item[:]
968
969            dictionary.update({key: values})
970
971    list_attribute = []
972    if read_attrs or read_dataset_attrs:
973        tmp_list_attribute = list(hdf5.attrs.keys())
974        hdf5_item_matching_attributes = ["_" + element for element in list(hdf5.keys())]
975
976    if read_attrs:
977
978        list_attribute.extend(
979            list(
980                filter(
981                    lambda l: l not in hdf5_item_matching_attributes, tmp_list_attribute
982                )
983            )
984        )
985
986    if read_dataset_attrs:
987
988        list_attribute.extend(
989            list(filter(lambda l: l in hdf5_item_matching_attributes, tmp_list_attribute))
990        )
991
992    for key in list_attribute:
993        dictionary.update({key: hdf5.attrs[key]})
994
995    return dictionary

Load an hdf5 file

Parameters

hdf5 : h5py.File an instance of hdf5, open with the function open_hdf5()

read_attrs : bool read and import attributes in the dicitonnary.

read_dataset_attrs : bool read and import special attributes linked to any dataset and created by pyhdf5_handler. These attributes only store the original datatype of the data stored in the dataset.

Return

dictionary : dict, a dictionary of all keys and attribute included in the hdf5 file

Examples

read only a part of an hdf5 file

>>> hdf5=hdf5_handler.open_hdf5("./multi_model.hdf5")
>>> dictionary=hdf5_handler.read_hdf5_as_dict(hdf5["model1"])
>>> dictionary.keys()
def hdf5_read_dataset(item, expected_type=None):
1057def hdf5_read_dataset(item, expected_type=None):
1058    """
1059    Read a dataset stored in an hdf5 database
1060
1061    Parameters
1062    ----------
1063
1064    item : h5py.File
1065        an hdf5 dataset/item
1066
1067    expected_type: str
1068        the expected dtype as string str(type())
1069
1070    Return
1071    --------
1072
1073    value : the value read from the hdf5, any type matching the expected type
1074
1075
1076    """
1077
1078    if expected_type == str(type("str")):
1079
1080        values = item[0].decode()
1081
1082    elif expected_type == str(type(1)):
1083
1084        values = int(item[0])  # buildin int type
1085
1086    elif expected_type == str(type(1.0)):
1087
1088        values = float(item[0])  # buildin float type
1089
1090    elif _is_numeric_str_class(expected_type):
1091
1092        values = item[0]  # other int/float type like np.int64/np.float64
1093
1094    elif expected_type == "_None_":
1095
1096        values = None
1097
1098    elif expected_type in (str(pd.Timestamp), str(np.datetime64), str(datetime.datetime)):
1099
1100        if expected_type == str(pd.Timestamp):
1101            values = pd.Timestamp(item[0].decode())
1102
1103        elif expected_type == str(np.datetime64):
1104            values = np.datetime64(item[0].decode())
1105
1106        elif expected_type == str(datetime.datetime):
1107            values = datetime.datetime.fromisoformat(item[0].decode())
1108
1109        else:
1110            values = item[0].decode()
1111
1112    else:
1113
1114        if item[:].dtype.char == "S":
1115
1116            values = item[:].astype("U")
1117
1118        elif item[:].dtype.char == "O":
1119
1120            # decode list if required
1121            decoded_item = list()
1122            for it in item[:]:
1123
1124                decoded_item.append(it.decode())
1125
1126            values = decoded_item
1127
1128        else:
1129            values = item[:]
1130
1131    return values

Read a dataset stored in an hdf5 database

Parameters

item : h5py.File an hdf5 dataset/item

expected_type: str the expected dtype as string str(type())

Return

value : the value read from the hdf5, any type matching the expected type

def get_hdf5file_attribute(path_to_hdf5='', location='./', attribute=None, wait_time=0):
1134def get_hdf5file_attribute(
1135    path_to_hdf5=str(), location="./", attribute=None, wait_time=0
1136):
1137    """
1138    Get the value of an attribute in the hdf5file
1139
1140    Parameters
1141    ----------
1142
1143    path_to_hdf5 : str
1144        the path to the hdf5file
1145
1146    location : str
1147        path inside the hdf5 where the attribute is stored
1148
1149    attribute: str
1150        attribute name
1151
1152    wait_time: int
1153        If the hdf5 is unavailable, the function will try to access serveral time and will wait wait_time seconds maximum. If this time is elapsed, the file won't be opened and the funciton will return None. This parameter is usefull if several program or threads need to read/write simultaneously in the same hdf5 database.
1154
1155    Return
1156    --------
1157
1158    return_attribute : the value of the attribute
1159
1160    Examples
1161    --------
1162
1163    get an attribute
1164    >>> attribute=hdf5_handler.get_hdf5_attribute("./multi_model.hdf5",attribute=my_attribute_name)
1165
1166    """
1167
1168    hdf5_base = open_hdf5(path_to_hdf5, read_only=True, wait_time=wait_time)
1169
1170    if hdf5_base is None:
1171        return None
1172
1173    hdf5 = hdf5_base[location]
1174
1175    return_attribute = hdf5.attrs[attribute]
1176
1177    hdf5_base.close()
1178
1179    return return_attribute

Get the value of an attribute in the hdf5file

Parameters

path_to_hdf5 : str the path to the hdf5file

location : str path inside the hdf5 where the attribute is stored

attribute: str attribute name

wait_time: int If the hdf5 is unavailable, the function will try to access serveral time and will wait wait_time seconds maximum. If this time is elapsed, the file won't be opened and the funciton will return None. This parameter is usefull if several program or threads need to read/write simultaneously in the same hdf5 database.

Return

return_attribute : the value of the attribute

Examples

get an attribute

>>> attribute=hdf5_handler.get_hdf5_attribute("./multi_model.hdf5",attribute=my_attribute_name)
def get_hdf5file_dataset(path_to_hdf5='', location='./', dataset=None, wait_time=0):
1182def get_hdf5file_dataset(path_to_hdf5=str(), location="./", dataset=None, wait_time=0):
1183    """
1184    Get the value of an attribute in the hdf5file
1185
1186    Parameters
1187    ----------
1188
1189    path_to_hdf5 : str
1190        the path to the hdf5file
1191
1192    location : str
1193        path inside the hdf5 where the attribute is stored
1194
1195    dataset: str
1196        dataset name
1197
1198    wait_time: int
1199        If the hdf5 is unavailable, the function will try to access serveral time and will wait wait_time seconds maximum. If this time is elapsed, the file won't be opened and the funciton will return None. This parameter is usefull if several program or threads need to read/write simultaneously in the same hdf5 database.
1200
1201    Return
1202    --------
1203
1204    return_dataset : the value of the attribute
1205
1206    Examples
1207    --------
1208
1209    get a dataset
1210    >>> dataset=hdf5_handler.get_hdf5_dataset("./multi_model.hdf5",dataset=my_dataset_name)
1211
1212    """
1213
1214    hdf5_base = open_hdf5(path_to_hdf5, read_only=True, wait_time=wait_time)
1215
1216    if hdf5_base is None:
1217        return None
1218
1219    hdf5 = hdf5_base[location]
1220
1221    if "_" + dataset in hdf5.attrs.keys():
1222        expected_type = hdf5.attrs["_" + dataset]
1223        return_dataset = hdf5_read_dataset(hdf5[dataset], expected_type)
1224
1225    else:
1226        return_dataset = hdf5[dataset][:]
1227
1228    hdf5_base.close()
1229
1230    return return_dataset

Get the value of an attribute in the hdf5file

Parameters

path_to_hdf5 : str the path to the hdf5file

location : str path inside the hdf5 where the attribute is stored

dataset: str dataset name

wait_time: int If the hdf5 is unavailable, the function will try to access serveral time and will wait wait_time seconds maximum. If this time is elapsed, the file won't be opened and the funciton will return None. This parameter is usefull if several program or threads need to read/write simultaneously in the same hdf5 database.

Return

return_dataset : the value of the attribute

Examples

get a dataset

>>> dataset=hdf5_handler.get_hdf5_dataset("./multi_model.hdf5",dataset=my_dataset_name)
def get_hdf5file_item( path_to_hdf5='', location='./', item=None, wait_time=0, search_attrs=False):
1233def get_hdf5file_item(
1234    path_to_hdf5=str(), location="./", item=None, wait_time=0, search_attrs=False
1235):
1236    """
1237
1238    Get a custom item in an hdf5file
1239
1240    Parameters
1241    ----------
1242
1243    path_to_hdf5 : str
1244        the path to the hdf5file
1245
1246    location : str
1247        path inside the hdf5 where the attribute is stored. If item is None, item is set to basename(location)
1248
1249    item: str
1250        item name
1251
1252    wait_time: int
1253        If the hdf5 is unavailable, the function will try to access serveral time and will wait wait_time seconds maximum. If this time is elapsed, the file won't be opened and the funciton will return None. This parameter is usefull if several program or threads need to read/write simultaneously in the same hdf5 database.
1254
1255    search_attrs: bool
1256        Default is False. If True, the function will also search in the item in the attribute first.
1257
1258    Return
1259    --------
1260
1261    return : custom value. can be an hdf5 object (group), an numpy array, a string, a float, an int ...
1262
1263    Examples
1264    --------
1265
1266    get the dataset 'dataset'
1267    >>> dataset=hdf5_handler.get_hdf5_item("./multi_model.hdf5",location="path/in/hdf5/dataset")
1268
1269    """
1270
1271    hdf5 = open_hdf5(path_to_hdf5, read_only=True, wait_time=wait_time)
1272
1273    if hdf5 is None:
1274        return None
1275
1276    hdf5_item = get_hdf5_item(
1277        hdf5_instance=hdf5, location=location, item=item, search_attrs=search_attrs
1278    )
1279
1280    hdf5.close()
1281
1282    return hdf5_item

Get a custom item in an hdf5file

Parameters

path_to_hdf5 : str the path to the hdf5file

location : str path inside the hdf5 where the attribute is stored. If item is None, item is set to basename(location)

item: str item name

wait_time: int If the hdf5 is unavailable, the function will try to access serveral time and will wait wait_time seconds maximum. If this time is elapsed, the file won't be opened and the funciton will return None. This parameter is usefull if several program or threads need to read/write simultaneously in the same hdf5 database.

search_attrs: bool Default is False. If True, the function will also search in the item in the attribute first.

Return

return : custom value. can be an hdf5 object (group), an numpy array, a string, a float, an int ...

Examples

get the dataset 'dataset'

>>> dataset=hdf5_handler.get_hdf5_item("./multi_model.hdf5",location="path/in/hdf5/dataset")
def get_hdf5_item(hdf5_instance=None, location='./', item=None, search_attrs=False):
1285def get_hdf5_item(hdf5_instance=None, location="./", item=None, search_attrs=False):
1286    """
1287
1288    Get a custom item in an hdf5file
1289
1290    Parameters
1291    ----------
1292
1293    hdf5_instance : h5py.File
1294        an instance of an hdf5
1295
1296    location : str
1297        path inside the hdf5 where the attribute is stored. If item is None, item is set to basename(location)
1298
1299    item: str
1300        item name
1301
1302    search_attrs: bool
1303        Default is False. If True, the function will search in the item in the attribute first.
1304
1305    Return
1306    ------
1307
1308    return : custom value. can be an hdf5 object (group), an numpy array, a string, a float, an int ...
1309
1310    Examples
1311    --------
1312
1313    get the dataset 'dataset'
1314    >>> dataset=hdf5_handler.get_hdf5_item("./multi_model.hdf5",location="path/in/hdf5/dataset")
1315
1316    """
1317
1318    if item is None and isinstance(location, str):
1319        head, tail = os.path.split(location)
1320        if len(tail) > 0:
1321            item = tail
1322        location = head
1323
1324    if not isinstance(item, str):
1325        print(f"Bad search item:{item}")
1326        return None
1327
1328        return None
1329
1330    # print(f"Getting item '{item}' at location '{location}'")
1331    hdf5 = hdf5_instance[location]
1332
1333    # first search in the attribute
1334    if search_attrs:
1335        list_attribute = hdf5.attrs.keys()
1336        if item in list_attribute:
1337            return hdf5.attrs[item]
1338
1339    # then search in groups and dataset
1340    list_keys = hdf5.keys()
1341    if item in list_keys:
1342
1343        hdf5_item = hdf5[item]
1344
1345        # print("Got Item ", hdf5_item)
1346
1347        if str(type(hdf5_item)).find("group") != -1:
1348
1349            if item == "ndarray_ds":
1350
1351                return _read_ndarray_datastructure(hdf5)
1352
1353            else:
1354
1355                returned_dict = read_hdf5_as_dict(hdf5_item)
1356
1357                return returned_dict
1358
1359        elif str(type(hdf5_item)).find("dataset") != -1:
1360
1361            if "_" + item in hdf5.attrs.keys():
1362                expected_type = hdf5.attrs["_" + item]
1363                values = hdf5_read_dataset(hdf5_item, expected_type)
1364            else:
1365                values = hdf5_item[:]
1366
1367            return values
1368
1369        else:
1370
1371            return hdf5_item
1372
1373    else:
1374
1375        return None

Get a custom item in an hdf5file

Parameters

hdf5_instance : h5py.File an instance of an hdf5

location : str path inside the hdf5 where the attribute is stored. If item is None, item is set to basename(location)

item: str item name

search_attrs: bool Default is False. If True, the function will search in the item in the attribute first.

Return

return : custom value. can be an hdf5 object (group), an numpy array, a string, a float, an int ...

Examples

get the dataset 'dataset'

>>> dataset=hdf5_handler.get_hdf5_item("./multi_model.hdf5",location="path/in/hdf5/dataset")
def search_in_hdf5file( path_to_hdf5, key=None, location='./', wait_time=0, search_attrs=False):
1378def search_in_hdf5file(
1379    path_to_hdf5, key=None, location="./", wait_time=0, search_attrs=False
1380):
1381    """
1382
1383    Search key in an hdf5 and return a list of [locations, datatype, key name, values]. Value and key are returned only if the key is an attribute or a dataset (None otherwise)
1384
1385    Parameters
1386    ----------
1387
1388    path_to_hdf5 : str
1389        the path to the hdf5file
1390
1391    key: str
1392        key to search in the hdf5file
1393
1394    location : str
1395        path inside the hdf5 where to start the research
1396
1397    wait_time: int
1398        If the hdf5 is unavailable, the function will try to access serveral time and will wait wait_time seconds maximum. If this time is elapsed, the file won't be opened and the funciton will return None. This parameter is usefull if several program or threads need to read/write simultaneously in the same hdf5 database.
1399
1400    search_attrs : Bool
1401        Default false, search in the attributes
1402
1403    Return
1404    ------
1405
1406    return_dataset : the value of the attribute
1407
1408    Examples
1409    --------
1410
1411    search in a hdf5file
1412    >>> matchkey=hdf5_handler.search_in_hdf5file(hdf5filename, key='Nom_du_BV',location="./")
1413
1414    """
1415    if key is None:
1416        print("Nothing to search, use key=")
1417        return []
1418
1419    hdf5 = open_hdf5(path_to_hdf5, read_only=True, wait_time=wait_time)
1420
1421    if hdf5 is None:
1422        return None
1423
1424    results = search_in_hdf5(hdf5, key, location=location, search_attrs=search_attrs)
1425
1426    hdf5.close()
1427
1428    return results

Search key in an hdf5 and return a list of [locations, datatype, key name, values]. Value and key are returned only if the key is an attribute or a dataset (None otherwise)

Parameters

path_to_hdf5 : str the path to the hdf5file

key: str key to search in the hdf5file

location : str path inside the hdf5 where to start the research

wait_time: int If the hdf5 is unavailable, the function will try to access serveral time and will wait wait_time seconds maximum. If this time is elapsed, the file won't be opened and the funciton will return None. This parameter is usefull if several program or threads need to read/write simultaneously in the same hdf5 database.

search_attrs : Bool Default false, search in the attributes

Return

return_dataset : the value of the attribute

Examples

search in a hdf5file

>>> matchkey=hdf5_handler.search_in_hdf5file(hdf5filename, key='Nom_du_BV',location="./")
def search_in_hdf5(hdf5_base, key=None, location='./', search_attrs=False):
1431def search_in_hdf5(hdf5_base, key=None, location="./", search_attrs=False):
1432    """
1433
1434    Search key in an hdf5 and return a list of [locations, datatype, key name, values]. Value and key are returned only if the key is an attribute or a dataset (None otherwise)
1435
1436    Parameters
1437    ----------
1438
1439    hdf5_base : h5py.File
1440        opened instance of the hdf5
1441
1442    key: str
1443        key to search in the hdf5file
1444
1445    location : str
1446        path inside the hdf5 where to start the research
1447
1448    search_attrs : Bool
1449        Default false, search in the attributes
1450
1451    Return
1452    ------
1453
1454    return_dataset : the value of the attribute
1455
1456    Examples
1457    --------
1458
1459    search in a hdf5
1460    >>> hdf5=hdf5_handler.open_hdf5(hdf5_file)
1461    >>> matchkey=hdf5_handler.search_in_hdf5(hdf5, key='Nom_du_BV',location="./")
1462    >>> hdf5.close()
1463
1464    """
1465    if key is None:
1466        print("Nothing to search, use key=")
1467        return []
1468
1469    result = []
1470
1471    hdf5 = hdf5_base[location]
1472
1473    if search_attrs:
1474        list_attribute = hdf5.attrs.keys()
1475
1476        if key in list_attribute:
1477            result.append(
1478                {
1479                    "path": location,
1480                    "key": key,
1481                    "datatype": "attribute",
1482                    "value": hdf5.attrs[key],
1483                }
1484            )
1485
1486    for hdf5_key, item in hdf5.items():
1487
1488        if str(type(item)).find("group") != -1:
1489
1490            sub_location = os.path.join(location, hdf5_key)
1491
1492            # print(hdf5_key,sub_location,list(hdf5.keys()))
1493
1494            if hdf5_key == key:
1495
1496                if "ndarray_ds" in item.keys():
1497
1498                    result.append(
1499                        {
1500                            "path": sub_location,
1501                            "key": None,
1502                            "datatype": "ndarray",
1503                            "value": _read_ndarray_datastructure(item),
1504                        }
1505                    )
1506
1507                else:
1508
1509                    result.append(
1510                        {
1511                            "path": sub_location,
1512                            "key": None,
1513                            "datatype": "group",
1514                            "value": None,
1515                        }
1516                    )
1517
1518            res = search_in_hdf5(hdf5_base, key, sub_location)
1519
1520            if len(res) > 0:
1521                for element in res:
1522                    result.append(element)
1523
1524        if str(type(item)).find("dataset") != -1:
1525
1526            if hdf5_key == key:
1527
1528                if item[:].dtype.char == "S":
1529
1530                    values = item[:].astype("U")
1531
1532                elif item[:].dtype.char == "O":
1533
1534                    # decode list if required
1535                    decoded_item = list()
1536                    for it in item[:]:
1537                        decoded_item.append(it.decode())
1538
1539                    values = decoded_item
1540
1541                else:
1542
1543                    values = item[:]
1544
1545                result.append(
1546                    {"path": location, "key": key, "datatype": "dataset", "value": values}
1547                )
1548
1549    return result

Search key in an hdf5 and return a list of [locations, datatype, key name, values]. Value and key are returned only if the key is an attribute or a dataset (None otherwise)

Parameters

hdf5_base : h5py.File opened instance of the hdf5

key: str key to search in the hdf5file

location : str path inside the hdf5 where to start the research

search_attrs : Bool Default false, search in the attributes

Return

return_dataset : the value of the attribute

Examples

search in a hdf5

>>> hdf5=hdf5_handler.open_hdf5(hdf5_file)
>>> matchkey=hdf5_handler.search_in_hdf5(hdf5, key='Nom_du_BV',location="./")
>>> hdf5.close()
def hdf5file_view( path_to_hdf5, location='./', max_depth=None, level_base='>', level_sep='--', depth=None, wait_time=0, list_attrs=True, list_dataset_attrs=False, return_view=False):
1552def hdf5file_view(
1553    path_to_hdf5,
1554    location="./",
1555    max_depth=None,
1556    level_base=">",
1557    level_sep="--",
1558    depth=None,
1559    wait_time=0,
1560    list_attrs=True,
1561    list_dataset_attrs=False,
1562    return_view=False,
1563):
1564    """
1565
1566    Search key in an hdf5 and return a list of [locations, datatype, key name, values]. Value and key are returned only if the key is an attribute or a dataset (None otherwise)
1567
1568    Parameters
1569    ----------
1570
1571
1572    path_to_hdf5 : str
1573        Path to an hdf5 database
1574
1575    location : str
1576        path inside the hdf5 where to start the research
1577
1578    max_depth: str
1579        Max deph of the search in the hdf5
1580
1581    level_base: str
1582        string used as separator at the lower level (default '>')
1583
1584    level_sep: str
1585        string used as separator at higher level (default '--')
1586
1587    depth: int
1588        current depth level
1589
1590    list_attrs: bool
1591        default is True, list the attributes
1592
1593    list_dataset_attrs: bool
1594        default is False, list the special attributes defined for each dataset by pyhdf5_handler
1595
1596    return_view: bool
1597        retrun the object view in a dictionnary (do not print at screen)
1598
1599    wait_time: int
1600        If the hdf5 is unavailable, the function will try to access serveral time and will wait wait_time seconds maximum. If this time is elapsed, the file won't be opened and the funciton will return None. This parameter is usefull if several program or threads need to read/write simultaneously in the same hdf5 database.
1601
1602    Return
1603    --------
1604
1605    dictionnary : optional, the view of the hdf5
1606
1607    Examples
1608    --------
1609
1610    search in a hdf5file
1611    >>> matchkey=hdf5_handler.search_in_hdf5file(hdf5filename, key='Nom_du_BV',location="./")
1612
1613    """
1614
1615    hdf5 = open_hdf5(path_to_hdf5, read_only=True, wait_time=wait_time)
1616
1617    if hdf5 is None:
1618        return None
1619
1620    results = hdf5_view(
1621        hdf5,
1622        location=location,
1623        max_depth=max_depth,
1624        level_base=level_base,
1625        level_sep=level_sep,
1626        depth=depth,
1627        list_attrs=list_attrs,
1628        list_dataset_attrs=list_dataset_attrs,
1629        return_view=return_view,
1630    )
1631
1632    hdf5.close()
1633
1634    return results

Search key in an hdf5 and return a list of [locations, datatype, key name, values]. Value and key are returned only if the key is an attribute or a dataset (None otherwise)

Parameters

path_to_hdf5 : str Path to an hdf5 database

location : str path inside the hdf5 where to start the research

max_depth: str Max deph of the search in the hdf5

level_base: str string used as separator at the lower level (default '>')

level_sep: str string used as separator at higher level (default '--')

depth: int current depth level

list_attrs: bool default is True, list the attributes

list_dataset_attrs: bool default is False, list the special attributes defined for each dataset by pyhdf5_handler

return_view: bool retrun the object view in a dictionnary (do not print at screen)

wait_time: int If the hdf5 is unavailable, the function will try to access serveral time and will wait wait_time seconds maximum. If this time is elapsed, the file won't be opened and the funciton will return None. This parameter is usefull if several program or threads need to read/write simultaneously in the same hdf5 database.

Return

dictionnary : optional, the view of the hdf5

Examples

search in a hdf5file

>>> matchkey=hdf5_handler.search_in_hdf5file(hdf5filename, key='Nom_du_BV',location="./")
def hdf5file_ls(path_to_hdf5, location='./'):
1637def hdf5file_ls(path_to_hdf5, location="./"):
1638    """
1639    List dataset in an hdf5file.
1640
1641    Parameters
1642    ----------
1643
1644    path_to_hdf5 : str
1645        path to a hdf5file
1646
1647    location: str
1648        path inside the hdf5 where to start the research
1649
1650    Example
1651    -------
1652
1653    >>> hdf5file_ls(test.hdf5)
1654
1655    """
1656
1657    hdf5 = open_hdf5(path_to_hdf5, read_only=True)
1658
1659    hdf5_view(
1660        hdf5,
1661        location=location,
1662        max_depth=0,
1663        level_base=">",
1664        level_sep="--",
1665        list_attrs=False,
1666        return_view=False,
1667    )

List dataset in an hdf5file.

Parameters

path_to_hdf5 : str path to a hdf5file

location: str path inside the hdf5 where to start the research

Example

>>> hdf5file_ls(test.hdf5)
def hdf5_ls(hdf5):
1670def hdf5_ls(hdf5):
1671    """
1672    List dataset in an hdf5 instance.
1673
1674    Parameters
1675    ----------
1676
1677    hdf5 : h5py.File
1678        hdf5 instance
1679
1680    location: str
1681        path inside the hdf5 where to start the research
1682
1683    Example
1684    -------
1685
1686    >>> hdf5 = open_hdf5(path_to_hdf5, read_only=True)
1687    >>> hdf5_ls(hdf5)
1688
1689    """
1690
1691    hdf5_view(
1692        hdf5,
1693        location="./",
1694        max_depth=0,
1695        level_base=">",
1696        level_sep="--",
1697        list_attrs=False,
1698        return_view=False,
1699    )

List dataset in an hdf5 instance.

Parameters

hdf5 : h5py.File hdf5 instance

location: str path inside the hdf5 where to start the research

Example

>>> hdf5 = open_hdf5(path_to_hdf5, read_only=True)
>>> hdf5_ls(hdf5)
def hdf5_view( hdf5_obj, location='./', max_depth=None, level_base='>', level_sep='--', depth=None, list_attrs=True, list_dataset_attrs=False, return_view=False):
1702def hdf5_view(
1703    hdf5_obj,
1704    location="./",
1705    max_depth=None,
1706    level_base=">",
1707    level_sep="--",
1708    depth=None,
1709    list_attrs=True,
1710    list_dataset_attrs=False,
1711    return_view=False,
1712):
1713    """
1714    List recursively all dataset (and attributes) in an hdf5 object.
1715
1716    Parameters
1717    ----------
1718
1719    hdf5_obj : h5py.File
1720        opened instance of the hdf5
1721
1722    location : str
1723        path inside the hdf5 where to start the research
1724
1725    max_depth: str
1726        Max deph of the search in the hdf5
1727
1728    level_base: str
1729        string used as separator at the lower level (default '>')
1730
1731    level_sep: str
1732        string used as separator at higher level (default '--')
1733
1734    depth: int
1735        current level depth
1736
1737    list_attrs: bool
1738        default is True, list the attributes
1739
1740    list_dataset_attrs: bool
1741        default is False, list the special attributes defined for each dataset by pyhdf5_handler
1742
1743    return_view: bool
1744        retrun the object view in a dictionnary
1745
1746    Return
1747    --------
1748
1749    dictionnary : optional, the view of the hdf5
1750
1751    Examples
1752    --------
1753
1754    search in a hdf5
1755    >>> hdf5=hdf5_handler.open_hdf5(hdf5_file)
1756    >>> matchkey=hdf5_handler.search_in_hdf5(hdf5, key='Nom_du_BV',location="./")
1757    >>> hdf5.close()
1758
1759    """
1760
1761    result = []
1762
1763    if max_depth is not None:
1764
1765        if depth is not None:
1766            depth = depth + 1
1767        else:
1768            depth = 0
1769
1770        if depth > max_depth:
1771            return result
1772
1773    hdf5 = hdf5_obj[location]
1774
1775    list_attribute = []
1776    if list_attrs or list_dataset_attrs:
1777        tmp_list_attribute = list(hdf5.attrs.keys())
1778        list_keys_matching_attributes = ["_" + element for element in list(hdf5.keys())]
1779
1780    if list_attrs:
1781
1782        list_attribute.extend(
1783            list(
1784                filter(
1785                    lambda l: l not in list_keys_matching_attributes, tmp_list_attribute
1786                )
1787            )
1788        )
1789
1790    if list_dataset_attrs:
1791
1792        list_attribute.extend(
1793            list(filter(lambda l: l in list_keys_matching_attributes, tmp_list_attribute))
1794        )
1795
1796    for key in list_attribute:
1797        values = hdf5.attrs[key]
1798        sub_location = os.path.join(location, key)
1799        if isinstance(
1800            values, (int, float, np.int64, np.float64, np.int32, np.float32, np.bool)
1801        ):
1802            result.append(
1803                f"{level_base}| {sub_location}, attribute, type={type(hdf5.attrs[key])}, value={values}"
1804            )
1805        elif isinstance(values, (str)) and len(values) < 20:
1806            result.append(
1807                f"{level_base}| {sub_location}, attribute, type={type(hdf5.attrs[key])}, len={len(values)}, value={values}"
1808            )
1809        else:
1810            result.append(
1811                f"{level_base}| {sub_location}, attribute, type={type(hdf5.attrs[key])}, len={len(values)}, value={values[0:20]}..."
1812            )
1813
1814    for hdf5_key, item in hdf5.items():
1815
1816        if str(type(item)).find("group") != -1:
1817
1818            sub_location = os.path.join(location, hdf5_key)
1819
1820            if "ndarray_ds" in item.keys():
1821                result.append(f"{level_base}| {sub_location}, ndarray")
1822            else:
1823                result.append(f"{level_base}| {sub_location}, group")
1824
1825            res = hdf5_view(
1826                hdf5_obj,
1827                sub_location,
1828                max_depth=max_depth,
1829                level_base=level_base + level_sep,
1830                depth=depth,
1831                return_view=True,
1832            )
1833
1834            # if len(res)>0:
1835            for key, item in enumerate(res):
1836                result.append(item)
1837
1838        if str(type(item)).find("dataset") != -1:
1839
1840            if item[:].dtype.char == "S":
1841                values = item[:].astype("U")
1842            else:
1843                values = item[:]
1844
1845            sub_location = os.path.join(location, hdf5_key)
1846
1847            result.append(
1848                f"{level_base}| {sub_location}, dataset, type={type(values)}, shape={values.shape}"
1849            )
1850
1851    if return_view:
1852        return result
1853    else:
1854        for res in result:
1855            print(res)

List recursively all dataset (and attributes) in an hdf5 object.

Parameters

hdf5_obj : h5py.File opened instance of the hdf5

location : str path inside the hdf5 where to start the research

max_depth: str Max deph of the search in the hdf5

level_base: str string used as separator at the lower level (default '>')

level_sep: str string used as separator at higher level (default '--')

depth: int current level depth

list_attrs: bool default is True, list the attributes

list_dataset_attrs: bool default is False, list the special attributes defined for each dataset by pyhdf5_handler

return_view: bool retrun the object view in a dictionnary

Return

dictionnary : optional, the view of the hdf5

Examples

search in a hdf5

>>> hdf5=hdf5_handler.open_hdf5(hdf5_file)
>>> matchkey=hdf5_handler.search_in_hdf5(hdf5, key='Nom_du_BV',location="./")
>>> hdf5.close()