For this example we will use gridded sea-surface altimetry data from The Copernicus Marine Environment. This is a widely used dataset in physical oceanography and climate.
The dataset has been extracted from Copernicus and stored in google cloud storage in xarray-zarr format. It is catalogues in the Pangeo Cloud Catalog at https://
import numpy as np
import xarray as xr
import matplotlib.pyplot as plt
import hvplot.xarray
plt.rcParams['figure.figsize'] = (15,10)
%matplotlib inlineInitialize Dataset¶
Here we load the dataset from the zarr store. Note that this very large dataset initializes nearly instantly, and we can see the full list of variables and coordinates, including metadata for each variable.
from intake import open_catalog
cat = open_catalog("https://raw.githubusercontent.com/pangeo-data/pangeo-datastore/master/intake-catalogs/ocean.yaml")
ds = cat["sea_surface_height"].to_dask()
ds---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
Cell In[2], line 3
1 from intake import open_catalog
2 cat = open_catalog("https://raw.githubusercontent.com/pangeo-data/pangeo-datastore/master/intake-catalogs/ocean.yaml")
----> 3 ds = cat["sea_surface_height"].to_dask()
4 ds
File ~/micromamba/envs/po-cookbook-dev/lib/python3.14/site-packages/intake_xarray/base.py:8, in IntakeXarraySourceAdapter.to_dask(self)
6 def to_dask(self):
7 if "chunks" not in self.reader.kwargs:
----> 8 return self.reader(chunks={}).read()
9 else:
10 return self.reader.read()
File ~/micromamba/envs/po-cookbook-dev/lib/python3.14/site-packages/intake/readers/readers.py:121, in BaseReader.read(self, *args, **kwargs)
119 kw.update(kwargs)
120 args = kw.pop("args", ()) or args
--> 121 return self._read(*args, **kw)
File ~/micromamba/envs/po-cookbook-dev/lib/python3.14/site-packages/intake/readers/readers.py:1327, in XArrayDatasetReader._read(self, data, open_local, **kw)
1325 f = fsspec.open(data.url, **(data.storage_options or {})).open()
1326 return open_dataset(f, **kw)
-> 1327 return open_dataset(data.url, **kw)
File ~/micromamba/envs/po-cookbook-dev/lib/python3.14/site-packages/xarray/backends/api.py:607, in open_dataset(filename_or_obj, engine, chunks, cache, decode_cf, mask_and_scale, decode_times, decode_timedelta, use_cftime, concat_characters, decode_coords, drop_variables, create_default_indexes, inline_array, chunked_array_type, from_array_kwargs, backend_kwargs, **kwargs)
595 decoders = _resolve_decoders_kwargs(
596 decode_cf,
597 open_backend_dataset_parameters=backend.open_dataset_parameters,
(...) 603 decode_coords=decode_coords,
604 )
606 overwrite_encoded_chunks = kwargs.pop("overwrite_encoded_chunks", None)
--> 607 backend_ds = backend.open_dataset(
608 filename_or_obj,
609 drop_variables=drop_variables,
610 **decoders,
611 **kwargs,
612 )
613 ds = _dataset_from_backend_dataset(
614 backend_ds,
615 filename_or_obj,
(...) 626 **kwargs,
627 )
628 return ds
File ~/micromamba/envs/po-cookbook-dev/lib/python3.14/site-packages/xarray/backends/zarr.py:1683, in ZarrBackendEntrypoint.open_dataset(self, filename_or_obj, mask_and_scale, decode_times, concat_characters, decode_coords, drop_variables, use_cftime, decode_timedelta, group, mode, synchronizer, consolidated, chunk_store, storage_options, zarr_version, zarr_format, store, engine, use_zarr_fill_value_as_mask, cache_members)
1681 filename_or_obj = _normalize_path(filename_or_obj)
1682 if not store:
-> 1683 store = ZarrStore.open_group(
1684 filename_or_obj,
1685 group=group,
1686 mode=mode,
1687 synchronizer=synchronizer,
1688 consolidated=consolidated,
1689 consolidate_on_close=False,
1690 chunk_store=chunk_store,
1691 storage_options=storage_options,
1692 zarr_version=zarr_version,
1693 use_zarr_fill_value_as_mask=None,
1694 zarr_format=zarr_format,
1695 cache_members=cache_members,
1696 )
1698 store_entrypoint = StoreBackendEntrypoint()
1699 with close_on_error(store):
File ~/micromamba/envs/po-cookbook-dev/lib/python3.14/site-packages/xarray/backends/zarr.py:722, in ZarrStore.open_group(cls, store, mode, synchronizer, group, consolidated, consolidate_on_close, chunk_store, storage_options, append_dim, write_region, safe_chunks, align_chunks, zarr_version, zarr_format, use_zarr_fill_value_as_mask, write_empty, cache_members)
696 @classmethod
697 def open_group(
698 cls,
(...) 715 cache_members: bool = True,
716 ):
717 (
718 zarr_group,
719 consolidate_on_close,
720 close_store_on_close,
721 use_zarr_fill_value_as_mask,
--> 722 ) = _get_open_params(
723 store=store,
724 mode=mode,
725 synchronizer=synchronizer,
726 group=group,
727 consolidated=consolidated,
728 consolidate_on_close=consolidate_on_close,
729 chunk_store=chunk_store,
730 storage_options=storage_options,
731 zarr_version=zarr_version,
732 use_zarr_fill_value_as_mask=use_zarr_fill_value_as_mask,
733 zarr_format=zarr_format,
734 )
736 return cls(
737 zarr_group,
738 mode,
(...) 747 cache_members=cache_members,
748 )
File ~/micromamba/envs/po-cookbook-dev/lib/python3.14/site-packages/xarray/backends/zarr.py:1887, in _get_open_params(store, mode, synchronizer, group, consolidated, consolidate_on_close, chunk_store, storage_options, zarr_version, use_zarr_fill_value_as_mask, zarr_format)
1883 group = open_kwargs.pop("path")
1885 if consolidated:
1886 # TODO: an option to pass the metadata_key keyword
-> 1887 zarr_root_group = zarr.open_consolidated(store, **open_kwargs)
1888 elif consolidated is None:
1889 # same but with more error handling in case no consolidated metadata found
1890 try:
File ~/micromamba/envs/po-cookbook-dev/lib/python3.14/site-packages/zarr/api/synchronous.py:238, in open_consolidated(use_consolidated, *args, **kwargs)
233 def open_consolidated(*args: Any, use_consolidated: Literal[True] = True, **kwargs: Any) -> Group:
234 """
235 Alias for [`open_group`][zarr.api.synchronous.open_group] with ``use_consolidated=True``.
236 """
237 return Group(
--> 238 sync(async_api.open_consolidated(*args, use_consolidated=use_consolidated, **kwargs))
239 )
File ~/micromamba/envs/po-cookbook-dev/lib/python3.14/site-packages/zarr/core/sync.py:159, in sync(coro, loop, timeout)
156 return_result = next(iter(finished)).result()
158 if isinstance(return_result, BaseException):
--> 159 raise return_result
160 else:
161 return return_result
File ~/micromamba/envs/po-cookbook-dev/lib/python3.14/site-packages/zarr/core/sync.py:119, in _runner(coro)
114 """
115 Await a coroutine and return the result of running it. If awaiting the coroutine raises an
116 exception, the exception will be returned.
117 """
118 try:
--> 119 return await coro
120 except Exception as ex:
121 return ex
File ~/micromamba/envs/po-cookbook-dev/lib/python3.14/site-packages/zarr/api/asynchronous.py:415, in open_consolidated(use_consolidated, *args, **kwargs)
410 if use_consolidated is not True:
411 raise TypeError(
412 "'use_consolidated' must be 'True' in 'open_consolidated'. Use 'open' with "
413 "'use_consolidated=False' to bypass consolidated metadata."
414 )
--> 415 return await open_group(*args, use_consolidated=use_consolidated, **kwargs)
File ~/micromamba/envs/po-cookbook-dev/lib/python3.14/site-packages/zarr/api/asynchronous.py:866, in open_group(store, mode, cache_attrs, synchronizer, path, chunk_store, storage_options, zarr_version, zarr_format, meta_array, attributes, use_consolidated)
864 try:
865 if mode in _READ_MODES:
--> 866 return await AsyncGroup.open(
867 store_path, zarr_format=zarr_format, use_consolidated=use_consolidated
868 )
869 except (KeyError, FileNotFoundError):
870 pass
File ~/micromamba/envs/po-cookbook-dev/lib/python3.14/site-packages/zarr/core/group.py:570, in AsyncGroup.open(cls, store, zarr_format, use_consolidated)
563 raise FileNotFoundError(store_path)
564 elif zarr_format is None:
565 (
566 zarr_json_bytes,
567 zgroup_bytes,
568 zattrs_bytes,
569 maybe_consolidated_metadata_bytes,
--> 570 ) = await asyncio.gather(
571 (store_path / ZARR_JSON).get(),
572 (store_path / ZGROUP_JSON).get(),
573 (store_path / ZATTRS_JSON).get(),
574 (store_path / str(consolidated_key)).get(),
575 )
576 if zarr_json_bytes is not None and zgroup_bytes is not None:
577 # warn and favor v3
578 msg = f"Both zarr.json (Zarr format 3) and .zgroup (Zarr format 2) metadata objects exist at {store_path}. Zarr format 3 will be used."
File ~/micromamba/envs/po-cookbook-dev/lib/python3.14/site-packages/zarr/storage/_common.py:168, in StorePath.get(self, prototype, byte_range)
166 if prototype is None:
167 prototype = default_buffer_prototype()
--> 168 return await self.store.get(self.path, prototype=prototype, byte_range=byte_range)
File ~/micromamba/envs/po-cookbook-dev/lib/python3.14/site-packages/zarr/storage/_fsspec.py:289, in FsspecStore.get(self, key, prototype, byte_range)
287 try:
288 if byte_range is None:
--> 289 value = prototype.buffer.from_bytes(await self.fs._cat_file(path))
290 elif isinstance(byte_range, RangeByteRequest):
291 value = prototype.buffer.from_bytes(
292 await self.fs._cat_file(
293 path,
(...) 296 )
297 )
File ~/micromamba/envs/po-cookbook-dev/lib/python3.14/site-packages/gcsfs/core.py:1120, in GCSFileSystem._cat_file(self, path, start, end, **kwargs)
1118 else:
1119 head = {}
-> 1120 headers, out = await self._call("GET", u2, headers=head)
1121 return out
File ~/micromamba/envs/po-cookbook-dev/lib/python3.14/site-packages/gcsfs/core.py:483, in GCSFileSystem._call(self, method, path, json_out, info_out, *args, **kwargs)
479 async def _call(
480 self, method, path, *args, json_out=False, info_out=False, **kwargs
481 ):
482 logger.debug(f"{method.upper()}: {path}, {args}, {kwargs.get('headers')}")
--> 483 status, headers, info, contents = await self._request(
484 method, path, *args, **kwargs
485 )
486 if json_out:
487 return json.loads(contents)
File ~/micromamba/envs/po-cookbook-dev/lib/python3.14/site-packages/decorator.py:224, in decorate.<locals>.fun(*args, **kw)
222 if not kwsyntax:
223 args, kw = fix(args, kw, sig)
--> 224 return await caller(func, *(extras + args), **kw)
File ~/micromamba/envs/po-cookbook-dev/lib/python3.14/site-packages/gcsfs/retry.py:135, in retry_request(func, retries, *args, **kwargs)
133 if retry > 0:
134 await asyncio.sleep(min(random.random() + 2 ** (retry - 1), 32))
--> 135 return await func(*args, **kwargs)
136 except (
137 HttpError,
138 requests.exceptions.RequestException,
(...) 141 aiohttp.client_exceptions.ClientError,
142 ) as e:
143 if (
144 isinstance(e, HttpError)
145 and e.code == 400
146 and "requester pays" in e.message
147 ):
File ~/micromamba/envs/po-cookbook-dev/lib/python3.14/site-packages/gcsfs/core.py:476, in GCSFileSystem._request(self, method, path, headers, json, data, *args, **kwargs)
473 info = r.request_info # for debug only
474 contents = await r.read()
--> 476 validate_response(status, contents, path, args)
477 return status, headers, info, contents
File ~/micromamba/envs/po-cookbook-dev/lib/python3.14/site-packages/gcsfs/retry.py:120, in validate_response(status, content, path, args)
118 raise requests.exceptions.ProxyError()
119 elif "invalid" in str(msg):
--> 120 raise ValueError(f"Bad Request: {path}\n{msg}")
121 elif error and not isinstance(error, str):
122 raise HttpError(error)
ValueError: Bad Request: https://storage.googleapis.com/download/storage/v1/b/pangeo-cmems-duacs/o/.zattrs?alt=media
User project specified in the request is invalid.Visually Examine Some of the Data¶
Let’s do a sanity check that the data looks reasonable. Here we use the hvplot interactive plotting library.
ds.sla.hvplot.image('longitude', 'latitude',
rasterize=True, dynamic=True, width=800, height=450,
widget_type='scrubber', widget_location='bottom', cmap='RdBu_r')Create and Connect to Dask Distributed Cluster¶
from dask_gateway import Gateway
from dask.distributed import Client
gateway = Gateway()
cluster = gateway.new_cluster()
cluster.adapt(minimum=1, maximum=20)
cluster** ☝️ Don’t forget to click the link above to view the scheduler dashboard! **
client = Client(cluster)
clientTimeseries of Global Mean Sea Level¶
Here we make a simple yet fundamental calculation: the rate of increase of global mean sea level over the observational period.
# the number of GB involved in the reduction
ds.sla.nbytes/1e9# the computationally intensive step
sla_timeseries = ds.sla.mean(dim=('latitude', 'longitude')).load()sla_timeseries.plot(label='full data')
sla_timeseries.rolling(time=365, center=True).mean().plot(label='rolling annual mean')
plt.ylabel('Sea Level Anomaly [m]')
plt.title('Global Mean Sea Level')
plt.legend()
plt.grid()In order to understand how the sea level rise is distributed in latitude, we can make a sort of Hovmöller diagram.
sla_hov = ds.sla.mean(dim='longitude').load()fig, ax = plt.subplots(figsize=(12, 4))
sla_hov.name = 'Sea Level Anomaly [m]'
sla_hov.transpose().plot(vmax=0.2, ax=ax)We can see that most sea level rise is actually in the Southern Hemisphere.
Sea Level Variability¶
We can examine the natural variability in sea level by looking at its standard deviation in time.
sla_std = ds.sla.std(dim='time').load()
sla_std.name = 'Sea Level Variability [m]'ax = sla_std.plot()
_ = plt.title('Sea Level Variability')