Skip to article frontmatterSkip to article content

Sea Surface Altimetry Data Analysis

For this example we will use gridded sea-surface altimetry data from The Copernicus Marine Environment. This is a widely used dataset in physical oceanography and climate.

The dataset has been extracted from Copernicus and stored in google cloud storage in xarray-zarr format. It is catalogues in the Pangeo Cloud Catalog at https://catalog.pangeo.io/browse/master/ocean/sea_surface_height/

import numpy as np
import xarray as xr
import matplotlib.pyplot as plt
import hvplot.xarray
plt.rcParams['figure.figsize'] = (15,10)
%matplotlib inline
Loading...

Initialize Dataset

Here we load the dataset from the zarr store. Note that this very large dataset initializes nearly instantly, and we can see the full list of variables and coordinates, including metadata for each variable.

from intake import open_catalog
cat = open_catalog("https://raw.githubusercontent.com/pangeo-data/pangeo-datastore/master/intake-catalogs/ocean.yaml")
ds  = cat["sea_surface_height"].to_dask()
ds
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
Cell In[2], line 3
      1 from intake import open_catalog
      2 cat = open_catalog("https://raw.githubusercontent.com/pangeo-data/pangeo-datastore/master/intake-catalogs/ocean.yaml")
----> 3 ds  = cat["sea_surface_height"].to_dask()
      4 ds

File ~/micromamba/envs/po-cookbook-dev/lib/python3.13/site-packages/intake_xarray/base.py:8, in IntakeXarraySourceAdapter.to_dask(self)
      6 def to_dask(self):
      7     if "chunks" not in self.reader.kwargs:
----> 8         return self.reader(chunks={}).read()
      9     else:
     10         return self.reader.read()

File ~/micromamba/envs/po-cookbook-dev/lib/python3.13/site-packages/intake/readers/readers.py:121, in BaseReader.read(self, *args, **kwargs)
    119 kw.update(kwargs)
    120 args = kw.pop("args", ()) or args
--> 121 return self._read(*args, **kw)

File ~/micromamba/envs/po-cookbook-dev/lib/python3.13/site-packages/intake/readers/readers.py:1327, in XArrayDatasetReader._read(self, data, open_local, **kw)
   1325         f = fsspec.open(data.url, **(data.storage_options or {})).open()
   1326         return open_dataset(f, **kw)
-> 1327 return open_dataset(data.url, **kw)

File ~/micromamba/envs/po-cookbook-dev/lib/python3.13/site-packages/xarray/backends/api.py:750, in open_dataset(filename_or_obj, engine, chunks, cache, decode_cf, mask_and_scale, decode_times, decode_timedelta, use_cftime, concat_characters, decode_coords, drop_variables, create_default_indexes, inline_array, chunked_array_type, from_array_kwargs, backend_kwargs, **kwargs)
    738 decoders = _resolve_decoders_kwargs(
    739     decode_cf,
    740     open_backend_dataset_parameters=backend.open_dataset_parameters,
   (...)    746     decode_coords=decode_coords,
    747 )
    749 overwrite_encoded_chunks = kwargs.pop("overwrite_encoded_chunks", None)
--> 750 backend_ds = backend.open_dataset(
    751     filename_or_obj,
    752     drop_variables=drop_variables,
    753     **decoders,
    754     **kwargs,
    755 )
    756 ds = _dataset_from_backend_dataset(
    757     backend_ds,
    758     filename_or_obj,
   (...)    769     **kwargs,
    770 )
    771 return ds

File ~/micromamba/envs/po-cookbook-dev/lib/python3.13/site-packages/xarray/backends/zarr.py:1636, in ZarrBackendEntrypoint.open_dataset(self, filename_or_obj, mask_and_scale, decode_times, concat_characters, decode_coords, drop_variables, use_cftime, decode_timedelta, group, mode, synchronizer, consolidated, chunk_store, storage_options, zarr_version, zarr_format, store, engine, use_zarr_fill_value_as_mask, cache_members)
   1634 filename_or_obj = _normalize_path(filename_or_obj)
   1635 if not store:
-> 1636     store = ZarrStore.open_group(
   1637         filename_or_obj,
   1638         group=group,
   1639         mode=mode,
   1640         synchronizer=synchronizer,
   1641         consolidated=consolidated,
   1642         consolidate_on_close=False,
   1643         chunk_store=chunk_store,
   1644         storage_options=storage_options,
   1645         zarr_version=zarr_version,
   1646         use_zarr_fill_value_as_mask=None,
   1647         zarr_format=zarr_format,
   1648         cache_members=cache_members,
   1649     )
   1651 store_entrypoint = StoreBackendEntrypoint()
   1652 with close_on_error(store):

File ~/micromamba/envs/po-cookbook-dev/lib/python3.13/site-packages/xarray/backends/zarr.py:714, in ZarrStore.open_group(cls, store, mode, synchronizer, group, consolidated, consolidate_on_close, chunk_store, storage_options, append_dim, write_region, safe_chunks, align_chunks, zarr_version, zarr_format, use_zarr_fill_value_as_mask, write_empty, cache_members)
    688 @classmethod
    689 def open_group(
    690     cls,
   (...)    707     cache_members: bool = True,
    708 ):
    709     (
    710         zarr_group,
    711         consolidate_on_close,
    712         close_store_on_close,
    713         use_zarr_fill_value_as_mask,
--> 714     ) = _get_open_params(
    715         store=store,
    716         mode=mode,
    717         synchronizer=synchronizer,
    718         group=group,
    719         consolidated=consolidated,
    720         consolidate_on_close=consolidate_on_close,
    721         chunk_store=chunk_store,
    722         storage_options=storage_options,
    723         zarr_version=zarr_version,
    724         use_zarr_fill_value_as_mask=use_zarr_fill_value_as_mask,
    725         zarr_format=zarr_format,
    726     )
    728     return cls(
    729         zarr_group,
    730         mode,
   (...)    739         cache_members=cache_members,
    740     )

File ~/micromamba/envs/po-cookbook-dev/lib/python3.13/site-packages/xarray/backends/zarr.py:1840, in _get_open_params(store, mode, synchronizer, group, consolidated, consolidate_on_close, chunk_store, storage_options, zarr_version, use_zarr_fill_value_as_mask, zarr_format)
   1836 group = open_kwargs.pop("path")
   1838 if consolidated:
   1839     # TODO: an option to pass the metadata_key keyword
-> 1840     zarr_root_group = zarr.open_consolidated(store, **open_kwargs)
   1841 elif consolidated is None:
   1842     # same but with more error handling in case no consolidated metadata found
   1843     try:

File ~/micromamba/envs/po-cookbook-dev/lib/python3.13/site-packages/zarr/api/synchronous.py:222, in open_consolidated(use_consolidated, *args, **kwargs)
    217 def open_consolidated(*args: Any, use_consolidated: Literal[True] = True, **kwargs: Any) -> Group:
    218     """
    219     Alias for :func:`open_group` with ``use_consolidated=True``.
    220     """
    221     return Group(
--> 222         sync(async_api.open_consolidated(*args, use_consolidated=use_consolidated, **kwargs))
    223     )

File ~/micromamba/envs/po-cookbook-dev/lib/python3.13/site-packages/zarr/core/sync.py:163, in sync(coro, loop, timeout)
    160 return_result = next(iter(finished)).result()
    162 if isinstance(return_result, BaseException):
--> 163     raise return_result
    164 else:
    165     return return_result

File ~/micromamba/envs/po-cookbook-dev/lib/python3.13/site-packages/zarr/core/sync.py:119, in _runner(coro)
    114 """
    115 Await a coroutine and return the result of running it. If awaiting the coroutine raises an
    116 exception, the exception will be returned.
    117 """
    118 try:
--> 119     return await coro
    120 except Exception as ex:
    121     return ex

File ~/micromamba/envs/po-cookbook-dev/lib/python3.13/site-packages/zarr/api/asynchronous.py:381, in open_consolidated(use_consolidated, *args, **kwargs)
    376 if use_consolidated is not True:
    377     raise TypeError(
    378         "'use_consolidated' must be 'True' in 'open_consolidated'. Use 'open' with "
    379         "'use_consolidated=False' to bypass consolidated metadata."
    380     )
--> 381 return await open_group(*args, use_consolidated=use_consolidated, **kwargs)

File ~/micromamba/envs/po-cookbook-dev/lib/python3.13/site-packages/zarr/api/asynchronous.py:844, in open_group(store, mode, cache_attrs, synchronizer, path, chunk_store, storage_options, zarr_version, zarr_format, meta_array, attributes, use_consolidated)
    842 try:
    843     if mode in _READ_MODES:
--> 844         return await AsyncGroup.open(
    845             store_path, zarr_format=zarr_format, use_consolidated=use_consolidated
    846         )
    847 except (KeyError, FileNotFoundError):
    848     pass

File ~/micromamba/envs/po-cookbook-dev/lib/python3.13/site-packages/zarr/core/group.py:542, in AsyncGroup.open(cls, store, zarr_format, use_consolidated)
    535         raise FileNotFoundError(store_path)
    536 elif zarr_format is None:
    537     (
    538         zarr_json_bytes,
    539         zgroup_bytes,
    540         zattrs_bytes,
    541         maybe_consolidated_metadata_bytes,
--> 542     ) = await asyncio.gather(
    543         (store_path / ZARR_JSON).get(),
    544         (store_path / ZGROUP_JSON).get(),
    545         (store_path / ZATTRS_JSON).get(),
    546         (store_path / str(consolidated_key)).get(),
    547     )
    548     if zarr_json_bytes is not None and zgroup_bytes is not None:
    549         # warn and favor v3
    550         msg = f"Both zarr.json (Zarr format 3) and .zgroup (Zarr format 2) metadata objects exist at {store_path}. Zarr format 3 will be used."

File ~/micromamba/envs/po-cookbook-dev/lib/python3.13/site-packages/zarr/storage/_common.py:164, in StorePath.get(self, prototype, byte_range)
    162 if prototype is None:
    163     prototype = default_buffer_prototype()
--> 164 return await self.store.get(self.path, prototype=prototype, byte_range=byte_range)

File ~/micromamba/envs/po-cookbook-dev/lib/python3.13/site-packages/zarr/storage/_fsspec.py:300, in FsspecStore.get(self, key, prototype, byte_range)
    298 try:
    299     if byte_range is None:
--> 300         value = prototype.buffer.from_bytes(await self.fs._cat_file(path))
    301     elif isinstance(byte_range, RangeByteRequest):
    302         value = prototype.buffer.from_bytes(
    303             await self.fs._cat_file(
    304                 path,
   (...)    307             )
    308         )

File ~/micromamba/envs/po-cookbook-dev/lib/python3.13/site-packages/gcsfs/core.py:1117, in GCSFileSystem._cat_file(self, path, start, end, **kwargs)
   1115 else:
   1116     head = {}
-> 1117 headers, out = await self._call("GET", u2, headers=head)
   1118 return out

File ~/micromamba/envs/po-cookbook-dev/lib/python3.13/site-packages/gcsfs/core.py:483, in GCSFileSystem._call(self, method, path, json_out, info_out, *args, **kwargs)
    479 async def _call(
    480     self, method, path, *args, json_out=False, info_out=False, **kwargs
    481 ):
    482     logger.debug(f"{method.upper()}: {path}, {args}, {kwargs.get('headers')}")
--> 483     status, headers, info, contents = await self._request(
    484         method, path, *args, **kwargs
    485     )
    486     if json_out:
    487         return json.loads(contents)

File ~/micromamba/envs/po-cookbook-dev/lib/python3.13/site-packages/decorator.py:224, in decorate.<locals>.fun(*args, **kw)
    222 if not kwsyntax:
    223     args, kw = fix(args, kw, sig)
--> 224 return await caller(func, *(extras + args), **kw)

File ~/micromamba/envs/po-cookbook-dev/lib/python3.13/site-packages/gcsfs/retry.py:135, in retry_request(func, retries, *args, **kwargs)
    133     if retry > 0:
    134         await asyncio.sleep(min(random.random() + 2 ** (retry - 1), 32))
--> 135     return await func(*args, **kwargs)
    136 except (
    137     HttpError,
    138     requests.exceptions.RequestException,
   (...)    141     aiohttp.client_exceptions.ClientError,
    142 ) as e:
    143     if (
    144         isinstance(e, HttpError)
    145         and e.code == 400
    146         and "requester pays" in e.message
    147     ):

File ~/micromamba/envs/po-cookbook-dev/lib/python3.13/site-packages/gcsfs/core.py:476, in GCSFileSystem._request(self, method, path, headers, json, data, *args, **kwargs)
    473 info = r.request_info  # for debug only
    474 contents = await r.read()
--> 476 validate_response(status, contents, path, args)
    477 return status, headers, info, contents

File ~/micromamba/envs/po-cookbook-dev/lib/python3.13/site-packages/gcsfs/retry.py:120, in validate_response(status, content, path, args)
    118     raise requests.exceptions.ProxyError()
    119 elif "invalid" in str(msg):
--> 120     raise ValueError(f"Bad Request: {path}\n{msg}")
    121 elif error and not isinstance(error, str):
    122     raise HttpError(error)

ValueError: Bad Request: https://storage.googleapis.com/download/storage/v1/b/pangeo-cmems-duacs/o/.zgroup?alt=media
User project specified in the request is invalid.

Visually Examine Some of the Data

Let’s do a sanity check that the data looks reasonable. Here we use the hvplot interactive plotting library.

ds.sla.hvplot.image('longitude', 'latitude',
                    rasterize=True, dynamic=True, width=800, height=450, 
                    widget_type='scrubber', widget_location='bottom', cmap='RdBu_r')

Create and Connect to Dask Distributed Cluster

from dask_gateway import Gateway
from dask.distributed import Client

gateway = Gateway()
cluster = gateway.new_cluster()
cluster.adapt(minimum=1, maximum=20)
cluster

** ☝️ Don’t forget to click the link above to view the scheduler dashboard! **

client = Client(cluster)
client

Timeseries of Global Mean Sea Level

Here we make a simple yet fundamental calculation: the rate of increase of global mean sea level over the observational period.

# the number of GB involved in the reduction
ds.sla.nbytes/1e9
# the computationally intensive step
sla_timeseries = ds.sla.mean(dim=('latitude', 'longitude')).load()
sla_timeseries.plot(label='full data')
sla_timeseries.rolling(time=365, center=True).mean().plot(label='rolling annual mean')
plt.ylabel('Sea Level Anomaly [m]')
plt.title('Global Mean Sea Level')
plt.legend()
plt.grid()

In order to understand how the sea level rise is distributed in latitude, we can make a sort of Hovmöller diagram.

sla_hov = ds.sla.mean(dim='longitude').load()
fig, ax = plt.subplots(figsize=(12, 4))
sla_hov.name = 'Sea Level Anomaly [m]'
sla_hov.transpose().plot(vmax=0.2, ax=ax)

We can see that most sea level rise is actually in the Southern Hemisphere.

Sea Level Variability

We can examine the natural variability in sea level by looking at its standard deviation in time.

sla_std = ds.sla.std(dim='time').load()
sla_std.name = 'Sea Level Variability [m]'
ax = sla_std.plot()
_ = plt.title('Sea Level Variability')