For this example we will use gridded sea-surface altimetry data from The Copernicus Marine Environment. This is a widely used dataset in physical oceanography and climate.
The dataset has been extracted from Copernicus and stored in google cloud storage in xarray-zarr format. It is catalogues in the Pangeo Cloud Catalog at https://
import numpy as np
import xarray as xr
import matplotlib.pyplot as plt
import hvplot.xarray
plt.rcParams['figure.figsize'] = (15,10)
%matplotlib inline
Initialize Dataset¶
Here we load the dataset from the zarr store. Note that this very large dataset initializes nearly instantly, and we can see the full list of variables and coordinates, including metadata for each variable.
from intake import open_catalog
cat = open_catalog("https://raw.githubusercontent.com/pangeo-data/pangeo-datastore/master/intake-catalogs/ocean.yaml")
ds = cat["sea_surface_height"].to_dask()
ds
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
Cell In[2], line 3
1 from intake import open_catalog
2 cat = open_catalog("https://raw.githubusercontent.com/pangeo-data/pangeo-datastore/master/intake-catalogs/ocean.yaml")
----> 3 ds = cat["sea_surface_height"].to_dask()
4 ds
File ~/micromamba/envs/po-cookbook-dev/lib/python3.13/site-packages/intake_xarray/base.py:8, in IntakeXarraySourceAdapter.to_dask(self)
6 def to_dask(self):
7 if "chunks" not in self.reader.kwargs:
----> 8 return self.reader(chunks={}).read()
9 else:
10 return self.reader.read()
File ~/micromamba/envs/po-cookbook-dev/lib/python3.13/site-packages/intake/readers/readers.py:121, in BaseReader.read(self, *args, **kwargs)
119 kw.update(kwargs)
120 args = kw.pop("args", ()) or args
--> 121 return self._read(*args, **kw)
File ~/micromamba/envs/po-cookbook-dev/lib/python3.13/site-packages/intake/readers/readers.py:1327, in XArrayDatasetReader._read(self, data, open_local, **kw)
1325 f = fsspec.open(data.url, **(data.storage_options or {})).open()
1326 return open_dataset(f, **kw)
-> 1327 return open_dataset(data.url, **kw)
File ~/micromamba/envs/po-cookbook-dev/lib/python3.13/site-packages/xarray/backends/api.py:687, in open_dataset(filename_or_obj, engine, chunks, cache, decode_cf, mask_and_scale, decode_times, decode_timedelta, use_cftime, concat_characters, decode_coords, drop_variables, inline_array, chunked_array_type, from_array_kwargs, backend_kwargs, **kwargs)
675 decoders = _resolve_decoders_kwargs(
676 decode_cf,
677 open_backend_dataset_parameters=backend.open_dataset_parameters,
(...) 683 decode_coords=decode_coords,
684 )
686 overwrite_encoded_chunks = kwargs.pop("overwrite_encoded_chunks", None)
--> 687 backend_ds = backend.open_dataset(
688 filename_or_obj,
689 drop_variables=drop_variables,
690 **decoders,
691 **kwargs,
692 )
693 ds = _dataset_from_backend_dataset(
694 backend_ds,
695 filename_or_obj,
(...) 705 **kwargs,
706 )
707 return ds
File ~/micromamba/envs/po-cookbook-dev/lib/python3.13/site-packages/xarray/backends/zarr.py:1578, in ZarrBackendEntrypoint.open_dataset(self, filename_or_obj, mask_and_scale, decode_times, concat_characters, decode_coords, drop_variables, use_cftime, decode_timedelta, group, mode, synchronizer, consolidated, chunk_store, storage_options, zarr_version, zarr_format, store, engine, use_zarr_fill_value_as_mask, cache_members)
1576 filename_or_obj = _normalize_path(filename_or_obj)
1577 if not store:
-> 1578 store = ZarrStore.open_group(
1579 filename_or_obj,
1580 group=group,
1581 mode=mode,
1582 synchronizer=synchronizer,
1583 consolidated=consolidated,
1584 consolidate_on_close=False,
1585 chunk_store=chunk_store,
1586 storage_options=storage_options,
1587 zarr_version=zarr_version,
1588 use_zarr_fill_value_as_mask=None,
1589 zarr_format=zarr_format,
1590 cache_members=cache_members,
1591 )
1593 store_entrypoint = StoreBackendEntrypoint()
1594 with close_on_error(store):
File ~/micromamba/envs/po-cookbook-dev/lib/python3.13/site-packages/xarray/backends/zarr.py:664, in ZarrStore.open_group(cls, store, mode, synchronizer, group, consolidated, consolidate_on_close, chunk_store, storage_options, append_dim, write_region, safe_chunks, align_chunks, zarr_version, zarr_format, use_zarr_fill_value_as_mask, write_empty, cache_members)
638 @classmethod
639 def open_group(
640 cls,
(...) 657 cache_members: bool = True,
658 ):
659 (
660 zarr_group,
661 consolidate_on_close,
662 close_store_on_close,
663 use_zarr_fill_value_as_mask,
--> 664 ) = _get_open_params(
665 store=store,
666 mode=mode,
667 synchronizer=synchronizer,
668 group=group,
669 consolidated=consolidated,
670 consolidate_on_close=consolidate_on_close,
671 chunk_store=chunk_store,
672 storage_options=storage_options,
673 zarr_version=zarr_version,
674 use_zarr_fill_value_as_mask=use_zarr_fill_value_as_mask,
675 zarr_format=zarr_format,
676 )
678 return cls(
679 zarr_group,
680 mode,
(...) 689 cache_members=cache_members,
690 )
File ~/micromamba/envs/po-cookbook-dev/lib/python3.13/site-packages/xarray/backends/zarr.py:1777, in _get_open_params(store, mode, synchronizer, group, consolidated, consolidate_on_close, chunk_store, storage_options, zarr_version, use_zarr_fill_value_as_mask, zarr_format)
1773 group = open_kwargs.pop("path")
1775 if consolidated:
1776 # TODO: an option to pass the metadata_key keyword
-> 1777 zarr_root_group = zarr.open_consolidated(store, **open_kwargs)
1778 elif consolidated is None:
1779 # same but with more error handling in case no consolidated metadata found
1780 try:
File ~/micromamba/envs/po-cookbook-dev/lib/python3.13/site-packages/zarr/api/synchronous.py:217, in open_consolidated(use_consolidated, *args, **kwargs)
212 def open_consolidated(*args: Any, use_consolidated: Literal[True] = True, **kwargs: Any) -> Group:
213 """
214 Alias for :func:`open_group` with ``use_consolidated=True``.
215 """
216 return Group(
--> 217 sync(async_api.open_consolidated(*args, use_consolidated=use_consolidated, **kwargs))
218 )
File ~/micromamba/envs/po-cookbook-dev/lib/python3.13/site-packages/zarr/core/sync.py:163, in sync(coro, loop, timeout)
160 return_result = next(iter(finished)).result()
162 if isinstance(return_result, BaseException):
--> 163 raise return_result
164 else:
165 return return_result
File ~/micromamba/envs/po-cookbook-dev/lib/python3.13/site-packages/zarr/core/sync.py:119, in _runner(coro)
114 """
115 Await a coroutine and return the result of running it. If awaiting the coroutine raises an
116 exception, the exception will be returned.
117 """
118 try:
--> 119 return await coro
120 except Exception as ex:
121 return ex
File ~/micromamba/envs/po-cookbook-dev/lib/python3.13/site-packages/zarr/api/asynchronous.py:362, in open_consolidated(use_consolidated, *args, **kwargs)
357 if use_consolidated is not True:
358 raise TypeError(
359 "'use_consolidated' must be 'True' in 'open_consolidated'. Use 'open' with "
360 "'use_consolidated=False' to bypass consolidated metadata."
361 )
--> 362 return await open_group(*args, use_consolidated=use_consolidated, **kwargs)
File ~/micromamba/envs/po-cookbook-dev/lib/python3.13/site-packages/zarr/api/asynchronous.py:825, in open_group(store, mode, cache_attrs, synchronizer, path, chunk_store, storage_options, zarr_version, zarr_format, meta_array, attributes, use_consolidated)
823 try:
824 if mode in _READ_MODES:
--> 825 return await AsyncGroup.open(
826 store_path, zarr_format=zarr_format, use_consolidated=use_consolidated
827 )
828 except (KeyError, FileNotFoundError):
829 pass
File ~/micromamba/envs/po-cookbook-dev/lib/python3.13/site-packages/zarr/core/group.py:538, in AsyncGroup.open(cls, store, zarr_format, use_consolidated)
531 raise FileNotFoundError(store_path)
532 elif zarr_format is None:
533 (
534 zarr_json_bytes,
535 zgroup_bytes,
536 zattrs_bytes,
537 maybe_consolidated_metadata_bytes,
--> 538 ) = await asyncio.gather(
539 (store_path / ZARR_JSON).get(),
540 (store_path / ZGROUP_JSON).get(),
541 (store_path / ZATTRS_JSON).get(),
542 (store_path / str(consolidated_key)).get(),
543 )
544 if zarr_json_bytes is not None and zgroup_bytes is not None:
545 # warn and favor v3
546 msg = f"Both zarr.json (Zarr format 3) and .zgroup (Zarr format 2) metadata objects exist at {store_path}. Zarr format 3 will be used."
File ~/micromamba/envs/po-cookbook-dev/lib/python3.13/site-packages/zarr/storage/_common.py:124, in StorePath.get(self, prototype, byte_range)
122 if prototype is None:
123 prototype = default_buffer_prototype()
--> 124 return await self.store.get(self.path, prototype=prototype, byte_range=byte_range)
File ~/micromamba/envs/po-cookbook-dev/lib/python3.13/site-packages/zarr/storage/_fsspec.py:230, in FsspecStore.get(self, key, prototype, byte_range)
228 try:
229 if byte_range is None:
--> 230 value = prototype.buffer.from_bytes(await self.fs._cat_file(path))
231 elif isinstance(byte_range, RangeByteRequest):
232 value = prototype.buffer.from_bytes(
233 await self.fs._cat_file(
234 path,
(...) 237 )
238 )
File ~/micromamba/envs/po-cookbook-dev/lib/python3.13/site-packages/gcsfs/core.py:1115, in GCSFileSystem._cat_file(self, path, start, end, **kwargs)
1113 else:
1114 head = {}
-> 1115 headers, out = await self._call("GET", u2, headers=head)
1116 return out
File ~/micromamba/envs/po-cookbook-dev/lib/python3.13/site-packages/gcsfs/core.py:481, in GCSFileSystem._call(self, method, path, json_out, info_out, *args, **kwargs)
477 async def _call(
478 self, method, path, *args, json_out=False, info_out=False, **kwargs
479 ):
480 logger.debug(f"{method.upper()}: {path}, {args}, {kwargs.get('headers')}")
--> 481 status, headers, info, contents = await self._request(
482 method, path, *args, **kwargs
483 )
484 if json_out:
485 return json.loads(contents)
File ~/micromamba/envs/po-cookbook-dev/lib/python3.13/site-packages/decorator.py:224, in decorate.<locals>.fun(*args, **kw)
222 if not kwsyntax:
223 args, kw = fix(args, kw, sig)
--> 224 return await caller(func, *(extras + args), **kw)
File ~/micromamba/envs/po-cookbook-dev/lib/python3.13/site-packages/gcsfs/retry.py:135, in retry_request(func, retries, *args, **kwargs)
133 if retry > 0:
134 await asyncio.sleep(min(random.random() + 2 ** (retry - 1), 32))
--> 135 return await func(*args, **kwargs)
136 except (
137 HttpError,
138 requests.exceptions.RequestException,
(...) 141 aiohttp.client_exceptions.ClientError,
142 ) as e:
143 if (
144 isinstance(e, HttpError)
145 and e.code == 400
146 and "requester pays" in e.message
147 ):
File ~/micromamba/envs/po-cookbook-dev/lib/python3.13/site-packages/gcsfs/core.py:474, in GCSFileSystem._request(self, method, path, headers, json, data, *args, **kwargs)
471 info = r.request_info # for debug only
472 contents = await r.read()
--> 474 validate_response(status, contents, path, args)
475 return status, headers, info, contents
File ~/micromamba/envs/po-cookbook-dev/lib/python3.13/site-packages/gcsfs/retry.py:120, in validate_response(status, content, path, args)
118 raise requests.exceptions.ProxyError()
119 elif "invalid" in str(msg):
--> 120 raise ValueError(f"Bad Request: {path}\n{msg}")
121 elif error and not isinstance(error, str):
122 raise HttpError(error)
ValueError: Bad Request: https://storage.googleapis.com/download/storage/v1/b/pangeo-cmems-duacs/o/.zattrs?alt=media
User project specified in the request is invalid.
Visually Examine Some of the Data¶
Let’s do a sanity check that the data looks reasonable. Here we use the hvplot interactive plotting library.
ds.sla.hvplot.image('longitude', 'latitude',
rasterize=True, dynamic=True, width=800, height=450,
widget_type='scrubber', widget_location='bottom', cmap='RdBu_r')
Create and Connect to Dask Distributed Cluster¶
from dask_gateway import Gateway
from dask.distributed import Client
gateway = Gateway()
cluster = gateway.new_cluster()
cluster.adapt(minimum=1, maximum=20)
cluster
** ☝️ Don’t forget to click the link above to view the scheduler dashboard! **
client = Client(cluster)
client
Timeseries of Global Mean Sea Level¶
Here we make a simple yet fundamental calculation: the rate of increase of global mean sea level over the observational period.
# the number of GB involved in the reduction
ds.sla.nbytes/1e9
# the computationally intensive step
sla_timeseries = ds.sla.mean(dim=('latitude', 'longitude')).load()
sla_timeseries.plot(label='full data')
sla_timeseries.rolling(time=365, center=True).mean().plot(label='rolling annual mean')
plt.ylabel('Sea Level Anomaly [m]')
plt.title('Global Mean Sea Level')
plt.legend()
plt.grid()
In order to understand how the sea level rise is distributed in latitude, we can make a sort of Hovmöller diagram.
sla_hov = ds.sla.mean(dim='longitude').load()
fig, ax = plt.subplots(figsize=(12, 4))
sla_hov.name = 'Sea Level Anomaly [m]'
sla_hov.transpose().plot(vmax=0.2, ax=ax)
We can see that most sea level rise is actually in the Southern Hemisphere.
Sea Level Variability¶
We can examine the natural variability in sea level by looking at its standard deviation in time.
sla_std = ds.sla.std(dim='time').load()
sla_std.name = 'Sea Level Variability [m]'
ax = sla_std.plot()
_ = plt.title('Sea Level Variability')