This notebook shows how to load and analyze ocean data from an out-of-the-box MOM6/CESM G-case simulation (coupled ocean ocean/sea ice).
NOTE: MOM6/CESM is not ready to be used for research.
%matplotlib inline
import numpy as np
import xarray as xr
import matplotlib.pyplot as plt
import holoviews as hv
import datashader
from holoviews.operation.datashader import regrid, shade, datashade
hv.extension('bokeh', width=100)Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Load MOM6/CESM Data¶
This data is stored in xarray-zarr format in Google Cloud Storage. This format is optimized for parallel distributed reads from within the cloud environment.
The full data catalog is located here: https://
import intake
cat = intake.open_catalog("https://raw.githubusercontent.com/pangeo-data/pangeo-datastore/master/intake-catalogs/ocean.yaml")
ds = cat["cesm_mom6_example"].to_dask()
ds---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
Cell In[2], line 3
1 import intake
2 cat = intake.open_catalog("https://raw.githubusercontent.com/pangeo-data/pangeo-datastore/master/intake-catalogs/ocean.yaml")
----> 3 ds = cat["cesm_mom6_example"].to_dask()
4 ds
File ~/micromamba/envs/po-cookbook-dev/lib/python3.14/site-packages/intake_xarray/base.py:8, in IntakeXarraySourceAdapter.to_dask(self)
6 def to_dask(self):
7 if "chunks" not in self.reader.kwargs:
----> 8 return self.reader(chunks={}).read()
9 else:
10 return self.reader.read()
File ~/micromamba/envs/po-cookbook-dev/lib/python3.14/site-packages/intake/readers/readers.py:121, in BaseReader.read(self, *args, **kwargs)
119 kw.update(kwargs)
120 args = kw.pop("args", ()) or args
--> 121 return self._read(*args, **kw)
File ~/micromamba/envs/po-cookbook-dev/lib/python3.14/site-packages/intake/readers/readers.py:1327, in XArrayDatasetReader._read(self, data, open_local, **kw)
1325 f = fsspec.open(data.url, **(data.storage_options or {})).open()
1326 return open_dataset(f, **kw)
-> 1327 return open_dataset(data.url, **kw)
File ~/micromamba/envs/po-cookbook-dev/lib/python3.14/site-packages/xarray/backends/api.py:607, in open_dataset(filename_or_obj, engine, chunks, cache, decode_cf, mask_and_scale, decode_times, decode_timedelta, use_cftime, concat_characters, decode_coords, drop_variables, create_default_indexes, inline_array, chunked_array_type, from_array_kwargs, backend_kwargs, **kwargs)
595 decoders = _resolve_decoders_kwargs(
596 decode_cf,
597 open_backend_dataset_parameters=backend.open_dataset_parameters,
(...) 603 decode_coords=decode_coords,
604 )
606 overwrite_encoded_chunks = kwargs.pop("overwrite_encoded_chunks", None)
--> 607 backend_ds = backend.open_dataset(
608 filename_or_obj,
609 drop_variables=drop_variables,
610 **decoders,
611 **kwargs,
612 )
613 ds = _dataset_from_backend_dataset(
614 backend_ds,
615 filename_or_obj,
(...) 626 **kwargs,
627 )
628 return ds
File ~/micromamba/envs/po-cookbook-dev/lib/python3.14/site-packages/xarray/backends/zarr.py:1683, in ZarrBackendEntrypoint.open_dataset(self, filename_or_obj, mask_and_scale, decode_times, concat_characters, decode_coords, drop_variables, use_cftime, decode_timedelta, group, mode, synchronizer, consolidated, chunk_store, storage_options, zarr_version, zarr_format, store, engine, use_zarr_fill_value_as_mask, cache_members)
1681 filename_or_obj = _normalize_path(filename_or_obj)
1682 if not store:
-> 1683 store = ZarrStore.open_group(
1684 filename_or_obj,
1685 group=group,
1686 mode=mode,
1687 synchronizer=synchronizer,
1688 consolidated=consolidated,
1689 consolidate_on_close=False,
1690 chunk_store=chunk_store,
1691 storage_options=storage_options,
1692 zarr_version=zarr_version,
1693 use_zarr_fill_value_as_mask=None,
1694 zarr_format=zarr_format,
1695 cache_members=cache_members,
1696 )
1698 store_entrypoint = StoreBackendEntrypoint()
1699 with close_on_error(store):
File ~/micromamba/envs/po-cookbook-dev/lib/python3.14/site-packages/xarray/backends/zarr.py:722, in ZarrStore.open_group(cls, store, mode, synchronizer, group, consolidated, consolidate_on_close, chunk_store, storage_options, append_dim, write_region, safe_chunks, align_chunks, zarr_version, zarr_format, use_zarr_fill_value_as_mask, write_empty, cache_members)
696 @classmethod
697 def open_group(
698 cls,
(...) 715 cache_members: bool = True,
716 ):
717 (
718 zarr_group,
719 consolidate_on_close,
720 close_store_on_close,
721 use_zarr_fill_value_as_mask,
--> 722 ) = _get_open_params(
723 store=store,
724 mode=mode,
725 synchronizer=synchronizer,
726 group=group,
727 consolidated=consolidated,
728 consolidate_on_close=consolidate_on_close,
729 chunk_store=chunk_store,
730 storage_options=storage_options,
731 zarr_version=zarr_version,
732 use_zarr_fill_value_as_mask=use_zarr_fill_value_as_mask,
733 zarr_format=zarr_format,
734 )
736 return cls(
737 zarr_group,
738 mode,
(...) 747 cache_members=cache_members,
748 )
File ~/micromamba/envs/po-cookbook-dev/lib/python3.14/site-packages/xarray/backends/zarr.py:1887, in _get_open_params(store, mode, synchronizer, group, consolidated, consolidate_on_close, chunk_store, storage_options, zarr_version, use_zarr_fill_value_as_mask, zarr_format)
1883 group = open_kwargs.pop("path")
1885 if consolidated:
1886 # TODO: an option to pass the metadata_key keyword
-> 1887 zarr_root_group = zarr.open_consolidated(store, **open_kwargs)
1888 elif consolidated is None:
1889 # same but with more error handling in case no consolidated metadata found
1890 try:
File ~/micromamba/envs/po-cookbook-dev/lib/python3.14/site-packages/zarr/api/synchronous.py:238, in open_consolidated(use_consolidated, *args, **kwargs)
233 def open_consolidated(*args: Any, use_consolidated: Literal[True] = True, **kwargs: Any) -> Group:
234 """
235 Alias for [`open_group`][zarr.api.synchronous.open_group] with ``use_consolidated=True``.
236 """
237 return Group(
--> 238 sync(async_api.open_consolidated(*args, use_consolidated=use_consolidated, **kwargs))
239 )
File ~/micromamba/envs/po-cookbook-dev/lib/python3.14/site-packages/zarr/core/sync.py:159, in sync(coro, loop, timeout)
156 return_result = next(iter(finished)).result()
158 if isinstance(return_result, BaseException):
--> 159 raise return_result
160 else:
161 return return_result
File ~/micromamba/envs/po-cookbook-dev/lib/python3.14/site-packages/zarr/core/sync.py:119, in _runner(coro)
114 """
115 Await a coroutine and return the result of running it. If awaiting the coroutine raises an
116 exception, the exception will be returned.
117 """
118 try:
--> 119 return await coro
120 except Exception as ex:
121 return ex
File ~/micromamba/envs/po-cookbook-dev/lib/python3.14/site-packages/zarr/api/asynchronous.py:415, in open_consolidated(use_consolidated, *args, **kwargs)
410 if use_consolidated is not True:
411 raise TypeError(
412 "'use_consolidated' must be 'True' in 'open_consolidated'. Use 'open' with "
413 "'use_consolidated=False' to bypass consolidated metadata."
414 )
--> 415 return await open_group(*args, use_consolidated=use_consolidated, **kwargs)
File ~/micromamba/envs/po-cookbook-dev/lib/python3.14/site-packages/zarr/api/asynchronous.py:866, in open_group(store, mode, cache_attrs, synchronizer, path, chunk_store, storage_options, zarr_version, zarr_format, meta_array, attributes, use_consolidated)
864 try:
865 if mode in _READ_MODES:
--> 866 return await AsyncGroup.open(
867 store_path, zarr_format=zarr_format, use_consolidated=use_consolidated
868 )
869 except (KeyError, FileNotFoundError):
870 pass
File ~/micromamba/envs/po-cookbook-dev/lib/python3.14/site-packages/zarr/core/group.py:570, in AsyncGroup.open(cls, store, zarr_format, use_consolidated)
563 raise FileNotFoundError(store_path)
564 elif zarr_format is None:
565 (
566 zarr_json_bytes,
567 zgroup_bytes,
568 zattrs_bytes,
569 maybe_consolidated_metadata_bytes,
--> 570 ) = await asyncio.gather(
571 (store_path / ZARR_JSON).get(),
572 (store_path / ZGROUP_JSON).get(),
573 (store_path / ZATTRS_JSON).get(),
574 (store_path / str(consolidated_key)).get(),
575 )
576 if zarr_json_bytes is not None and zgroup_bytes is not None:
577 # warn and favor v3
578 msg = f"Both zarr.json (Zarr format 3) and .zgroup (Zarr format 2) metadata objects exist at {store_path}. Zarr format 3 will be used."
File ~/micromamba/envs/po-cookbook-dev/lib/python3.14/site-packages/zarr/storage/_common.py:168, in StorePath.get(self, prototype, byte_range)
166 if prototype is None:
167 prototype = default_buffer_prototype()
--> 168 return await self.store.get(self.path, prototype=prototype, byte_range=byte_range)
File ~/micromamba/envs/po-cookbook-dev/lib/python3.14/site-packages/zarr/storage/_fsspec.py:289, in FsspecStore.get(self, key, prototype, byte_range)
287 try:
288 if byte_range is None:
--> 289 value = prototype.buffer.from_bytes(await self.fs._cat_file(path))
290 elif isinstance(byte_range, RangeByteRequest):
291 value = prototype.buffer.from_bytes(
292 await self.fs._cat_file(
293 path,
(...) 296 )
297 )
File ~/micromamba/envs/po-cookbook-dev/lib/python3.14/site-packages/gcsfs/core.py:1120, in GCSFileSystem._cat_file(self, path, start, end, **kwargs)
1118 else:
1119 head = {}
-> 1120 headers, out = await self._call("GET", u2, headers=head)
1121 return out
File ~/micromamba/envs/po-cookbook-dev/lib/python3.14/site-packages/gcsfs/core.py:483, in GCSFileSystem._call(self, method, path, json_out, info_out, *args, **kwargs)
479 async def _call(
480 self, method, path, *args, json_out=False, info_out=False, **kwargs
481 ):
482 logger.debug(f"{method.upper()}: {path}, {args}, {kwargs.get('headers')}")
--> 483 status, headers, info, contents = await self._request(
484 method, path, *args, **kwargs
485 )
486 if json_out:
487 return json.loads(contents)
File ~/micromamba/envs/po-cookbook-dev/lib/python3.14/site-packages/decorator.py:224, in decorate.<locals>.fun(*args, **kw)
222 if not kwsyntax:
223 args, kw = fix(args, kw, sig)
--> 224 return await caller(func, *(extras + args), **kw)
File ~/micromamba/envs/po-cookbook-dev/lib/python3.14/site-packages/gcsfs/retry.py:135, in retry_request(func, retries, *args, **kwargs)
133 if retry > 0:
134 await asyncio.sleep(min(random.random() + 2 ** (retry - 1), 32))
--> 135 return await func(*args, **kwargs)
136 except (
137 HttpError,
138 requests.exceptions.RequestException,
(...) 141 aiohttp.client_exceptions.ClientError,
142 ) as e:
143 if (
144 isinstance(e, HttpError)
145 and e.code == 400
146 and "requester pays" in e.message
147 ):
File ~/micromamba/envs/po-cookbook-dev/lib/python3.14/site-packages/gcsfs/core.py:476, in GCSFileSystem._request(self, method, path, headers, json, data, *args, **kwargs)
473 info = r.request_info # for debug only
474 contents = await r.read()
--> 476 validate_response(status, contents, path, args)
477 return status, headers, info, contents
File ~/micromamba/envs/po-cookbook-dev/lib/python3.14/site-packages/gcsfs/retry.py:120, in validate_response(status, content, path, args)
118 raise requests.exceptions.ProxyError()
119 elif "invalid" in str(msg):
--> 120 raise ValueError(f"Bad Request: {path}\n{msg}")
121 elif error and not isinstance(error, str):
122 raise HttpError(error)
ValueError: Bad Request: https://storage.googleapis.com/download/storage/v1/b/pangeo-cesm-mom6/o/.zgroup?alt=media
User project specified in the request is invalid.Visualize SST Data with Holoviews and Datashader¶
The cells below show how to interactively explore the dataset.
sst_ds = hv.Dataset(ds['SST'], kdims=['time', 'geolon', 'geolat'])
sst = sst_ds.to(hv.QuadMesh, kdims=["geolon", "geolat"], dynamic=True)
%opts RGB [width=900 height=600]
datashade(sst, precompute=True, cmap=plt.cm.RdBu_r)Visualize SSS Data with Holoviews and Datashader¶
sss_ds = hv.Dataset(ds['SSS'], kdims=['time', 'geolon', 'geolat'])
sss = sst_ds.to(hv.QuadMesh, kdims=["geolon", "geolat"], dynamic=True)
%opts RGB [width=900 height=600]
datashade(sss, precompute=True, cmap=plt.cm.Spectral_r)Create and Connect to Dask Distributed Cluster¶
This will launch a cluster of virtual machines in the cloud.
from dask.distributed import Client
from dask_gateway import GatewayCluster
cluster = GatewayCluster()
cluster.adapt(minimum=1, maximum=10)
cluster👆 Don’t forget to click this link to get the cluster dashboard
client = Client(cluster)
clientData reduction¶
Here we make a data reduction by taking the time of SST and SSS. This demonstrates how the cluster distributes the reads from storage.
SST_mean = ds.SST.mean(dim=('time'))
SST_meanSSS_mean = ds.SSS.mean(dim=('time'))
SSS_mean%time SST_mean.load()# plot mean SST
qm = hv.QuadMesh((ds.geolon.values, ds.geolat.values, SST_mean))
datashade(qm, precompute=True, cmap=plt.cm.RdBu_r)%time SSS_mean.load()# plot mean SSS
qm = hv.QuadMesh((ds.geolon.values, ds.geolat.values, SSS_mean))
datashade(qm, precompute=True, cmap=plt.cm.Spectral_r)