Skip to content

API reference

All public functions are available directly from the top-level pyo_oracle namespace, e.g. pyo_oracle.list_layers(...).

Discovery

pyo_oracle.list_layers(search=None, variables=None, ssp=None, time_period=None, depth=None, dataframe=True, simplify=False, _include_allDatasets=False)

list_layers(search: Optional[Union[str, Iterable[str]]] = None, variables: Optional[Union[Variable, Iterable[Variable]]] = None, ssp: Optional[Union[SSP, Iterable[SSP]]] = None, time_period: Optional[TimePeriod] = None, depth: Optional[Union[Depth, Iterable[Depth]]] = None, dataframe: Literal[True] = True, simplify: bool = False, _include_allDatasets: bool = False) -> pd.DataFrame
list_layers(search: Optional[Union[str, Iterable[str]]] = None, variables: Optional[Union[Variable, Iterable[Variable]]] = None, ssp: Optional[Union[SSP, Iterable[SSP]]] = None, time_period: Optional[TimePeriod] = None, depth: Optional[Union[Depth, Iterable[Depth]]] = None, dataframe: Literal[False] = False, simplify: bool = False, _include_allDatasets: bool = False) -> List[str]

Lists available layers in the Bio-ORACLE server.

Parameters:

Name Type Description Default
search str | list

Natural text search term, eg. 'Temperature', 'Oxygen'.

None
variables str | list

Variables to filter from. Valid values are ['po4','o2','si','ph','sws','phyc','so','thetao','dfe','no3','sithick','tas','siconc','chl','mlotst','clt','terrain'].

None
ssp str | list

Future scenario to choose from. Valid values are ['ssp119', 'ssp126', 'ssp370', 'ssp585', 'ssp460', 'ssp245', 'baseline'].

None
time_period str

Time period to choose from. Valid values are either 'present' or 'future'.

None
depth str | list

Depth category to choose from. Valid values are ['min', 'mean', 'max', 'surf'].

None
dataframe bool

Whether to return a Pandas DataFrame. If False, will return a list.

True
simplify bool

Whether to simplify the output. If True, will return only dataset ID and dataset title. If dataframe=False, this doesn't do anything.

False
_include_allDatasets bool

Internal flag for including all datasets.

False

Returns:

Type Description
Union[DataFrame, List[str]]

pd.DataFrame or list: If 'dataframe' is True (default), returns a Pandas DataFrame containing filtered layers' information. If 'dataframe' is False, returns a list of filtered dataset IDs.

Notes
  • This function queries the Bio-ORACLE server to list available layers based on the provided filters.
  • Filtering can be done by specifying 'variables', 'ssp', and 'time_period'.
  • The function provides flexibility in choosing to return a DataFrame or a list of dataset IDs.
Example

List all available layers

all_layers = list_layers()

List layers for specific variables and future scenarios

filtered_layers = list_layers(variables=['po4', 'o2'], ssp='ssp585', dataframe=True)

Source code in pyo_oracle/main.py
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
def list_layers(
    search: Optional[Union[str, Iterable[str]]] = None,
    variables: Optional[Union[Variable, Iterable[Variable]]] = None,
    ssp: Optional[Union[SSP, Iterable[SSP]]] = None,
    time_period: Optional[TimePeriod] = None,
    depth: Optional[Union[Depth, Iterable[Depth]]] = None,
    dataframe: bool = True,
    simplify: bool = False,
    _include_allDatasets: bool = False,
) -> Union[pd.DataFrame, List[str]]:
    """
    Lists available layers in the Bio-ORACLE server.

    Args:
        search (str|list): Natural text search term, eg. 'Temperature', 'Oxygen'.
        variables (str|list): Variables to filter from. Valid values are ['po4','o2','si','ph','sws','phyc','so','thetao','dfe','no3','sithick','tas','siconc','chl','mlotst','clt','terrain'].
        ssp (str|list): Future scenario to choose from. Valid values are ['ssp119', 'ssp126', 'ssp370', 'ssp585', 'ssp460', 'ssp245', 'baseline'].
        time_period (str): Time period to choose from. Valid values are either 'present' or 'future'.
        depth (str|list): Depth category to choose from. Valid values are ['min', 'mean', 'max', 'surf'].
        dataframe (bool): Whether to return a Pandas DataFrame. If False, will return a list.
        simplify (bool): Whether to simplify the output. If True, will return only dataset ID and dataset title. If dataframe=False, this doesn't do anything.
        _include_allDatasets (bool): Internal flag for including all datasets.

    Returns:
        pd.DataFrame or list: If 'dataframe' is True (default), returns a Pandas DataFrame containing filtered layers' information. If 'dataframe' is False, returns a list of filtered dataset IDs.

    Notes:
        - This function queries the Bio-ORACLE server to list available layers based on the provided filters.
        - Filtering can be done by specifying 'variables', 'ssp', and 'time_period'.
        - The function provides flexibility in choosing to return a DataFrame or a list of dataset IDs.

    Example:
        # List all available layers
        all_layers = list_layers()

        # List layers for specific variables and future scenarios
        filtered_layers = list_layers(variables=['po4', 'o2'], ssp='ssp585', dataframe=True)
    """
    valid_args = {
        # With get_args, we get the list of valid arguments using the Literal type
        "valid_variables": get_args(Variable),
        "valid_ssp": get_args(SSP),
        "valid_time_period": get_args(TimePeriod),
        "valid_depth": get_args(Depth),
    }

    # Validate the provided arguments against valid values
    names = ("variables", "ssp", "time_period", "depth")
    values = (variables, ssp, time_period, depth)
    for name, value in zip(names, values):
        _validate_argument(name, value, valid_args[f"valid_{name}"])

    # Convert inputs into a hashable tuple for caching.
    # The main logic is defined in _list_layers.
    return _list_layers(
        _ensure_hashable(search),
        _ensure_hashable(variables),
        _ensure_hashable(ssp),
        _ensure_hashable(time_period),
        _ensure_hashable(depth),
        dataframe,
        simplify,
        _include_allDatasets,
    )

pyo_oracle.info_layer(dataset_id, verbose=True)

Returns metadata about a single layer (dataset).

Mirrors biooracler::info_layer: reports the dimension ranges (time, latitude, longitude, and depth when present) and the available variables together with their units and long names.

Parameters:

Name Type Description Default
dataset_id str

The dataset ID to inspect, e.g. "thetao_baseline_2000_2019_depthsurf".

required
verbose bool

If True (default), pretty-print the metadata as well as returning it.

True

Returns:

Name Type Description
dict Dict[str, Any]

A dictionary with keys dataset_id, dimensions

Dict[str, Any]

(mapping dim name -> (min, max)), variables (mapping variable name ->

Dict[str, Any]

{"units", "long_name"}) and griddap_constraints (the full-range

Dict[str, Any]

constraints dict accepted by download_layers / load_layer).

Example

info = info_layer("thetao_baseline_2000_2019_depthsurf")

Source code in pyo_oracle/main.py
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
def info_layer(dataset_id: str, verbose: bool = True) -> Dict[str, Any]:
    """
    Returns metadata about a single layer (dataset).

    Mirrors ``biooracler::info_layer``: reports the dimension ranges
    (time, latitude, longitude, and depth when present) and the available
    variables together with their units and long names.

    Args:
        dataset_id (str): The dataset ID to inspect, e.g. "thetao_baseline_2000_2019_depthsurf".
        verbose (bool): If True (default), pretty-print the metadata as well as returning it.

    Returns:
        dict: A dictionary with keys ``dataset_id``, ``dimensions``
        (mapping dim name -> (min, max)), ``variables`` (mapping variable name ->
        {"units", "long_name"}) and ``griddap_constraints`` (the full-range
        constraints dict accepted by ``download_layers`` / ``load_layer``).

    Example:
        info = info_layer("thetao_baseline_2000_2019_depthsurf")
    """
    info = _layer_info(dataset_id)

    if verbose:
        print(f"Dataset ID: {info['dataset_id']}\n")
        print("Dimensions:")
        for dim, (lo, hi) in info["dimensions"].items():
            print(f"\t{dim}: {lo} to {hi}")
        print("\nVariables:")
        for var, meta in info["variables"].items():
            units = meta.get("units")
            long_name = meta.get("long_name")
            label = long_name or var
            unit_str = f" [{units}]" if units else ""
            print(f"\t{var}: {label}{unit_str}")
        print()

    return info

Subsetting

pyo_oracle.build_constraints(dataset_id=None, time=None, latitude=None, longitude=None, depth=None, time_step=1, latitude_step=1, longitude_step=1, depth_step=1, validate=True)

Build a griddap constraints dictionary from human-friendly bounds.

Instead of hand-writing the {"time>=": ..., "time<=": ..., "time_step": ...} dictionary, pass (min, max) tuples per dimension and optional strides.

Parameters:

Name Type Description Default
dataset_id str

If given and validate is True, bounds are checked against the dataset's real dimension ranges and a warning is emitted (not an error) when they fall outside.

None
time tuple

(start, end) as ISO strings, e.g. ("2000-01-01T00:00:00Z", "2010-01-01T00:00:00Z").

None
latitude tuple

(min, max) latitude in degrees.

None
longitude tuple

(min, max) longitude in degrees.

None
depth tuple

(min, max) depth.

None
time_step int

Stride along time. Default 1.

1
latitude_step int

Stride along latitude. Default 1.

1
longitude_step int

Stride along longitude. Default 1.

1
depth_step int

Stride along depth. Default 1.

1
validate bool

If True and dataset_id is given, validate bounds.

True

Returns:

Name Type Description
dict Dict[str, Any]

A constraints dictionary suitable for download_layers / load_layer.

Example

constraints = build_constraints( time=("2000-01-01T00:00:00Z", "2010-01-01T00:00:00Z"), latitude=(0, 10), longitude=(0, 10), )

Source code in pyo_oracle/utils.py
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
def build_constraints(
    dataset_id: Optional[str] = None,
    time: Optional[Tuple[Any, Any]] = None,
    latitude: Optional[Tuple[float, float]] = None,
    longitude: Optional[Tuple[float, float]] = None,
    depth: Optional[Tuple[float, float]] = None,
    time_step: int = 1,
    latitude_step: int = 1,
    longitude_step: int = 1,
    depth_step: int = 1,
    validate: bool = True,
) -> Dict[str, Any]:
    """
    Build a griddap constraints dictionary from human-friendly bounds.

    Instead of hand-writing the ``{"time>=": ..., "time<=": ..., "time_step": ...}``
    dictionary, pass ``(min, max)`` tuples per dimension and optional strides.

    Args:
        dataset_id (str, optional): If given and ``validate`` is True, bounds are
            checked against the dataset's real dimension ranges and a warning is
            emitted (not an error) when they fall outside.
        time (tuple, optional): ``(start, end)`` as ISO strings, e.g. ("2000-01-01T00:00:00Z", "2010-01-01T00:00:00Z").
        latitude (tuple, optional): ``(min, max)`` latitude in degrees.
        longitude (tuple, optional): ``(min, max)`` longitude in degrees.
        depth (tuple, optional): ``(min, max)`` depth.
        time_step (int): Stride along time. Default 1.
        latitude_step (int): Stride along latitude. Default 1.
        longitude_step (int): Stride along longitude. Default 1.
        depth_step (int): Stride along depth. Default 1.
        validate (bool): If True and ``dataset_id`` is given, validate bounds.

    Returns:
        dict: A constraints dictionary suitable for ``download_layers`` / ``load_layer``.

    Example:
        constraints = build_constraints(
            time=("2000-01-01T00:00:00Z", "2010-01-01T00:00:00Z"),
            latitude=(0, 10),
            longitude=(0, 10),
        )
    """
    bounds = {
        "time": (time, time_step),
        "latitude": (latitude, latitude_step),
        "longitude": (longitude, longitude_step),
        "depth": (depth, depth_step),
    }

    real_ranges: Dict[str, Tuple[Any, Any]] = {}
    if validate and dataset_id is not None:
        try:
            real_ranges = _layer_info(dataset_id)["dimensions"]
        except Exception as exc:  # network/validation is best-effort
            warnings.warn(
                f"Could not fetch ranges for '{dataset_id}' to validate constraints: {exc}"
            )

    constraints: Dict[str, Any] = {}
    for dim, (value, step) in bounds.items():
        if value is None:
            continue
        lo, hi = value
        constraints[f"{dim}>="] = lo
        constraints[f"{dim}<="] = hi
        constraints[f"{dim}_step"] = step

        if dim in real_ranges:
            rlo, rhi = real_ranges[dim]
            # Numeric dimensions only; skip time string comparisons.
            if dim != "time" and None not in (rlo, rhi):
                try:
                    if lo < rlo or hi > rhi:
                        warnings.warn(
                            f"Requested {dim} range ({lo}, {hi}) is outside the "
                            f"dataset range ({rlo}, {rhi}) for '{dataset_id}'."
                        )
                except TypeError:
                    pass

    return constraints

Data access

pyo_oracle.load_layer(dataset_id, constraints=None, variables=None, fmt='pandas', verbose=False)

Loads a layer directly into memory instead of writing it to a file.

This is the in-memory counterpart of download_layers and mirrors biooracler::download_layers returning data into the session.

Parameters:

Name Type Description Default
dataset_id str

The dataset ID to load.

required
constraints dict

Constraints to apply. See build_constraints.

None
variables list

Subset of variables to load. If None, all are loaded.

None
fmt str

"pandas" (default) returns a pandas.DataFrame; "xarray" returns an xarray.Dataset (requires the optional xarray extra: pip install pyo_oracle[xarray]).

'pandas'
verbose bool

If True, print selection details.

False

Returns:

Type Description
Any

pandas.DataFrame or xarray.Dataset: The requested data.

Example

df = load_layer("thetao_baseline_2000_2019_depthsurf", constraints=constraints) ds = load_layer("thetao_baseline_2000_2019_depthsurf", fmt="xarray")

Source code in pyo_oracle/main.py
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
def load_layer(
    dataset_id: str,
    constraints: Optional[Dict[str, Any]] = None,
    variables: Optional[Iterable[str]] = None,
    fmt: Literal["pandas", "xarray"] = "pandas",
    verbose: bool = False,
) -> Any:
    """
    Loads a layer directly into memory instead of writing it to a file.

    This is the in-memory counterpart of ``download_layers`` and mirrors
    ``biooracler::download_layers`` returning data into the session.

    Args:
        dataset_id (str): The dataset ID to load.
        constraints (dict, optional): Constraints to apply. See ``build_constraints``.
        variables (list, optional): Subset of variables to load. If None, all are loaded.
        fmt (str): "pandas" (default) returns a ``pandas.DataFrame``; "xarray" returns
            an ``xarray.Dataset`` (requires the optional ``xarray`` extra:
            ``pip install pyo_oracle[xarray]``).
        verbose (bool): If True, print selection details.

    Returns:
        pandas.DataFrame or xarray.Dataset: The requested data.

    Example:
        df = load_layer("thetao_baseline_2000_2019_depthsurf", constraints=constraints)
        ds = load_layer("thetao_baseline_2000_2019_depthsurf", fmt="xarray")
    """
    if fmt not in ("pandas", "xarray"):
        raise ValueError(f"Unsupported fmt '{fmt}'. Use 'pandas' or 'xarray'.")

    server = _build_griddap_server(dataset_id, variables, constraints, verbose)

    if fmt == "pandas":
        return server.to_pandas()

    try:
        return server.to_xarray()
    except ImportError as exc:  # pragma: no cover - exercised via install state
        raise ImportError(
            "Loading layers as xarray requires the optional dependencies. "
            "Install them with: pip install pyo_oracle[xarray]"
        ) from exc

pyo_oracle.download_layers(dataset_ids, output_directory=None, response='nc', constraints=None, variables=None, skip_confirmation=None, verbose=True, log=True, timestamp=True, timeout=120, skip_convert_to_lowercase=False, **httpx_kwargs)

Downloads one or more layers.

Parameters:

Name Type Description Default
dataset_ids str or list

Dataset ID(s) to download. A single dataset ID or a list of IDs.

required
output_directory str or Path

Directory where downloaded files will be saved. If not provided, the default directory will be used.

None
response str

Format of the response to download. Default is 'nc'.

'nc'
constraints dict

Constraints to apply to the downloaded data. See build_constraints for a convenient way to build this dictionary.

None
variables list

Subset of variables to download. If not provided, all variables in the dataset are downloaded.

None
skip_confirmation bool

If True, confirmation prompts will be skipped. If None, the value from the configuration will be used.

None
verbose bool

If True, detailed information will be printed during the download process.

True
log bool

If True, a log of the download will be created.

True
timestamp bool

If True, a timestamp will be added to the downloaded files' names.

True
timeout int

Timeout in seconds for the download request.

120
skip_convert_to_lowercase bool

If True, the dataset ID will not be converted to lowercase.

False
httpx_kwargs dict

Additional keyword arguments to pass to the httpx function.

{}

Returns:

Type Description
None

None

Note

This function downloads the specified dataset(s) and saves them to the provided or default output directory.

Example

Download a single dataset with default settings

download_layers(dataset_ids="dataset123")

Download multiple datasets with custom settings, restricting to two variables

download_layers( dataset_ids=["dataset456", "dataset789"], output_directory="/path/to/output", response="csv", variables=["thetao_mean"], verbose=False, )

Source code in pyo_oracle/main.py
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
def download_layers(
    dataset_ids: Union[str, Iterable[str]],
    output_directory: Optional[Union[str, Path]] = None,
    response: str = "nc",
    constraints: Optional[Dict[str, Any]] = None,
    variables: Optional[Iterable[str]] = None,
    skip_confirmation: Optional[bool] = None,
    verbose: bool = True,
    log: bool = True,
    timestamp: bool = True,
    timeout: int = 120,
    skip_convert_to_lowercase: bool = False,
    **httpx_kwargs,
) -> None:
    """
    Downloads one or more layers.

    Args:
        dataset_ids (str or list): Dataset ID(s) to download. A single dataset ID or a list of IDs.
        output_directory (str or Path, optional): Directory where downloaded files will be saved. If not provided, the default directory will be used.
        response (str, optional): Format of the response to download. Default is 'nc'.
        constraints (dict, optional): Constraints to apply to the downloaded data. See ``build_constraints`` for a convenient way to build this dictionary.
        variables (list, optional): Subset of variables to download. If not provided, all variables in the dataset are downloaded.
        skip_confirmation (bool, optional): If True, confirmation prompts will be skipped. If None, the value from the configuration will be used.
        verbose (bool, optional): If True, detailed information will be printed during the download process.
        log (bool, optional): If True, a log of the download will be created.
        timestamp (bool, optional): If True, a timestamp will be added to the downloaded files' names.
        timeout (int, optional): Timeout in seconds for the download request.
        skip_convert_to_lowercase (bool, optional): If True, the dataset ID will not be converted to lowercase.
        httpx_kwargs (dict, optional): Additional keyword arguments to pass to the httpx function.

    Returns:
        None

    Note:
        This function downloads the specified dataset(s) and saves them to the provided or default output directory.

    Example:
        # Download a single dataset with default settings
        download_layers(dataset_ids="dataset123")

        # Download multiple datasets with custom settings, restricting to two variables
        download_layers(
            dataset_ids=["dataset456", "dataset789"],
            output_directory="/path/to/output",
            response="csv",
            variables=["thetao_mean"],
            verbose=False,
        )
    """
    if isinstance(dataset_ids, str):
        dataset_ids = (dataset_ids,)

    if skip_confirmation is None:
        skip_confirmation = _as_bool(config["skip_confirmation"])

    if not skip_confirmation and not constraints:
        question = "No constraints have been set. This will download the full dataset, which may be a few GBs in size."

        if not confirm(question):
            return

    for dataset_id in dataset_ids:
        if not dataset_id.islower() and not skip_convert_to_lowercase:
            print(f"Converting dataset ID '{dataset_id}' to lowercase.")
            dataset_id = dataset_id.lower()
        _download_layer(
            dataset_id,
            output_directory,
            response,
            constraints,
            skip_confirmation,
            verbose,
            log,
            timestamp,
            timeout,
            variables=variables,
            **httpx_kwargs,
        )

pyo_oracle.list_local_data(data_directory=None, verbose=True)

Lists datasets that are locally downloaded.

Parameters:

Name Type Description Default
data_directory str

Path to the data directory. If not provided, the path from the configuration will be used.

None
verbose bool

If True, detailed information will be printed. If False, only basic file names will be printed.

True

Returns:

Type Description
None

None

Note

This function lists the datasets available in the specified data directory.

Example

List all datasets in the default data directory with detailed information

list_local_data()

List datasets in a specific directory without verbose output

list_local_data(data_directory="/path/to/data", verbose=False)

Source code in pyo_oracle/main.py
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
def list_local_data(
    data_directory: Optional[Union[str, Path]] = None, verbose: bool = True
) -> None:
    """
    Lists datasets that are locally downloaded.

    Args:
        data_directory (str, optional): Path to the data directory. If not provided, the path from the configuration will be used.
        verbose (bool): If True, detailed information will be printed. If False, only basic file names will be printed.

    Returns:
        None

    Note:
        This function lists the datasets available in the specified data directory.

    Example:
        # List all datasets in the default data directory with detailed information
        list_local_data()

        # List datasets in a specific directory without verbose output
        list_local_data(data_directory="/path/to/data", verbose=False)
    """
    if data_directory is None:
        data_directory = config["data_directory"]

    verbose_print(f"Your data directory is '{data_directory}'.\n", verbose)
    verbose_print("Contents of data directory:", verbose)
    files = glob(str(Path(config["data_directory"]).joinpath("*")))
    if not verbose:
        files = [f for f in files if not str(f).endswith(".log")]
    if files:
        for file in files:
            print(
                "\t", Path(file).name, "\t", convert_bytes(Path(file).stat().st_size)
            ) if verbose else print("\t", Path(file).name)
        print()
    else:
        print(f"Data directory at '{data_directory}' does not contain any data.")
    if verbose:
        dirsize = sum(Path(f).stat().st_size for f in files)
        dirsize = convert_bytes(dirsize)
        verbose_print(f"Size of data directory is {dirsize}.", verbose)

Configuration

pyo_oracle.create_config = config_module.create_config module-attribute

pyo_oracle.get_config_path = config_module.get_config_path module-attribute

pyo_oracle.print_config_values = config_module.print_config_values module-attribute

pyo_oracle.update_setting = config_module.update_setting module-attribute