This notebook shows how to load and analyze ocean data from an out-of-the-box MOM6/CESM G-case simulation (coupled ocean ocean/sea ice).
NOTE: MOM6/CESM is not ready to be used for research.
%matplotlib inline
import numpy as np
import xarray as xr
import matplotlib.pyplot as plt
import holoviews as hv
import datashader
from holoviews.operation.datashader import regrid, shade, datashade
hv.extension('bokeh', width=100)
Loading...
Load MOM6/CESM Data¶
This data is stored in xarray-zarr format in Google Cloud Storage. This format is optimized for parallel distributed reads from within the cloud environment.
The full data catalog is located here: https://
import intake
cat = intake.open_catalog("https://raw.githubusercontent.com/pangeo-data/pangeo-datastore/master/intake-catalogs/ocean.yaml")
ds = cat["cesm_mom6_example"].to_dask()
ds
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
Cell In[2], line 3
1 import intake
2 cat = intake.open_catalog("https://raw.githubusercontent.com/pangeo-data/pangeo-datastore/master/intake-catalogs/ocean.yaml")
----> 3 ds = cat["cesm_mom6_example"].to_dask()
4 ds
File ~/micromamba/envs/po-cookbook-dev/lib/python3.13/site-packages/intake_xarray/base.py:8, in IntakeXarraySourceAdapter.to_dask(self)
6 def to_dask(self):
7 if "chunks" not in self.reader.kwargs:
----> 8 return self.reader(chunks={}).read()
9 else:
10 return self.reader.read()
File ~/micromamba/envs/po-cookbook-dev/lib/python3.13/site-packages/intake/readers/readers.py:121, in BaseReader.read(self, *args, **kwargs)
119 kw.update(kwargs)
120 args = kw.pop("args", ()) or args
--> 121 return self._read(*args, **kw)
File ~/micromamba/envs/po-cookbook-dev/lib/python3.13/site-packages/intake/readers/readers.py:1327, in XArrayDatasetReader._read(self, data, open_local, **kw)
1325 f = fsspec.open(data.url, **(data.storage_options or {})).open()
1326 return open_dataset(f, **kw)
-> 1327 return open_dataset(data.url, **kw)
File ~/micromamba/envs/po-cookbook-dev/lib/python3.13/site-packages/xarray/backends/api.py:687, in open_dataset(filename_or_obj, engine, chunks, cache, decode_cf, mask_and_scale, decode_times, decode_timedelta, use_cftime, concat_characters, decode_coords, drop_variables, inline_array, chunked_array_type, from_array_kwargs, backend_kwargs, **kwargs)
675 decoders = _resolve_decoders_kwargs(
676 decode_cf,
677 open_backend_dataset_parameters=backend.open_dataset_parameters,
(...) 683 decode_coords=decode_coords,
684 )
686 overwrite_encoded_chunks = kwargs.pop("overwrite_encoded_chunks", None)
--> 687 backend_ds = backend.open_dataset(
688 filename_or_obj,
689 drop_variables=drop_variables,
690 **decoders,
691 **kwargs,
692 )
693 ds = _dataset_from_backend_dataset(
694 backend_ds,
695 filename_or_obj,
(...) 705 **kwargs,
706 )
707 return ds
File ~/micromamba/envs/po-cookbook-dev/lib/python3.13/site-packages/xarray/backends/zarr.py:1578, in ZarrBackendEntrypoint.open_dataset(self, filename_or_obj, mask_and_scale, decode_times, concat_characters, decode_coords, drop_variables, use_cftime, decode_timedelta, group, mode, synchronizer, consolidated, chunk_store, storage_options, zarr_version, zarr_format, store, engine, use_zarr_fill_value_as_mask, cache_members)
1576 filename_or_obj = _normalize_path(filename_or_obj)
1577 if not store:
-> 1578 store = ZarrStore.open_group(
1579 filename_or_obj,
1580 group=group,
1581 mode=mode,
1582 synchronizer=synchronizer,
1583 consolidated=consolidated,
1584 consolidate_on_close=False,
1585 chunk_store=chunk_store,
1586 storage_options=storage_options,
1587 zarr_version=zarr_version,
1588 use_zarr_fill_value_as_mask=None,
1589 zarr_format=zarr_format,
1590 cache_members=cache_members,
1591 )
1593 store_entrypoint = StoreBackendEntrypoint()
1594 with close_on_error(store):
File ~/micromamba/envs/po-cookbook-dev/lib/python3.13/site-packages/xarray/backends/zarr.py:664, in ZarrStore.open_group(cls, store, mode, synchronizer, group, consolidated, consolidate_on_close, chunk_store, storage_options, append_dim, write_region, safe_chunks, align_chunks, zarr_version, zarr_format, use_zarr_fill_value_as_mask, write_empty, cache_members)
638 @classmethod
639 def open_group(
640 cls,
(...) 657 cache_members: bool = True,
658 ):
659 (
660 zarr_group,
661 consolidate_on_close,
662 close_store_on_close,
663 use_zarr_fill_value_as_mask,
--> 664 ) = _get_open_params(
665 store=store,
666 mode=mode,
667 synchronizer=synchronizer,
668 group=group,
669 consolidated=consolidated,
670 consolidate_on_close=consolidate_on_close,
671 chunk_store=chunk_store,
672 storage_options=storage_options,
673 zarr_version=zarr_version,
674 use_zarr_fill_value_as_mask=use_zarr_fill_value_as_mask,
675 zarr_format=zarr_format,
676 )
678 return cls(
679 zarr_group,
680 mode,
(...) 689 cache_members=cache_members,
690 )
File ~/micromamba/envs/po-cookbook-dev/lib/python3.13/site-packages/xarray/backends/zarr.py:1777, in _get_open_params(store, mode, synchronizer, group, consolidated, consolidate_on_close, chunk_store, storage_options, zarr_version, use_zarr_fill_value_as_mask, zarr_format)
1773 group = open_kwargs.pop("path")
1775 if consolidated:
1776 # TODO: an option to pass the metadata_key keyword
-> 1777 zarr_root_group = zarr.open_consolidated(store, **open_kwargs)
1778 elif consolidated is None:
1779 # same but with more error handling in case no consolidated metadata found
1780 try:
File ~/micromamba/envs/po-cookbook-dev/lib/python3.13/site-packages/zarr/api/synchronous.py:217, in open_consolidated(use_consolidated, *args, **kwargs)
212 def open_consolidated(*args: Any, use_consolidated: Literal[True] = True, **kwargs: Any) -> Group:
213 """
214 Alias for :func:`open_group` with ``use_consolidated=True``.
215 """
216 return Group(
--> 217 sync(async_api.open_consolidated(*args, use_consolidated=use_consolidated, **kwargs))
218 )
File ~/micromamba/envs/po-cookbook-dev/lib/python3.13/site-packages/zarr/core/sync.py:163, in sync(coro, loop, timeout)
160 return_result = next(iter(finished)).result()
162 if isinstance(return_result, BaseException):
--> 163 raise return_result
164 else:
165 return return_result
File ~/micromamba/envs/po-cookbook-dev/lib/python3.13/site-packages/zarr/core/sync.py:119, in _runner(coro)
114 """
115 Await a coroutine and return the result of running it. If awaiting the coroutine raises an
116 exception, the exception will be returned.
117 """
118 try:
--> 119 return await coro
120 except Exception as ex:
121 return ex
File ~/micromamba/envs/po-cookbook-dev/lib/python3.13/site-packages/zarr/api/asynchronous.py:362, in open_consolidated(use_consolidated, *args, **kwargs)
357 if use_consolidated is not True:
358 raise TypeError(
359 "'use_consolidated' must be 'True' in 'open_consolidated'. Use 'open' with "
360 "'use_consolidated=False' to bypass consolidated metadata."
361 )
--> 362 return await open_group(*args, use_consolidated=use_consolidated, **kwargs)
File ~/micromamba/envs/po-cookbook-dev/lib/python3.13/site-packages/zarr/api/asynchronous.py:825, in open_group(store, mode, cache_attrs, synchronizer, path, chunk_store, storage_options, zarr_version, zarr_format, meta_array, attributes, use_consolidated)
823 try:
824 if mode in _READ_MODES:
--> 825 return await AsyncGroup.open(
826 store_path, zarr_format=zarr_format, use_consolidated=use_consolidated
827 )
828 except (KeyError, FileNotFoundError):
829 pass
File ~/micromamba/envs/po-cookbook-dev/lib/python3.13/site-packages/zarr/core/group.py:538, in AsyncGroup.open(cls, store, zarr_format, use_consolidated)
531 raise FileNotFoundError(store_path)
532 elif zarr_format is None:
533 (
534 zarr_json_bytes,
535 zgroup_bytes,
536 zattrs_bytes,
537 maybe_consolidated_metadata_bytes,
--> 538 ) = await asyncio.gather(
539 (store_path / ZARR_JSON).get(),
540 (store_path / ZGROUP_JSON).get(),
541 (store_path / ZATTRS_JSON).get(),
542 (store_path / str(consolidated_key)).get(),
543 )
544 if zarr_json_bytes is not None and zgroup_bytes is not None:
545 # warn and favor v3
546 msg = f"Both zarr.json (Zarr format 3) and .zgroup (Zarr format 2) metadata objects exist at {store_path}. Zarr format 3 will be used."
File ~/micromamba/envs/po-cookbook-dev/lib/python3.13/site-packages/zarr/storage/_common.py:124, in StorePath.get(self, prototype, byte_range)
122 if prototype is None:
123 prototype = default_buffer_prototype()
--> 124 return await self.store.get(self.path, prototype=prototype, byte_range=byte_range)
File ~/micromamba/envs/po-cookbook-dev/lib/python3.13/site-packages/zarr/storage/_fsspec.py:230, in FsspecStore.get(self, key, prototype, byte_range)
228 try:
229 if byte_range is None:
--> 230 value = prototype.buffer.from_bytes(await self.fs._cat_file(path))
231 elif isinstance(byte_range, RangeByteRequest):
232 value = prototype.buffer.from_bytes(
233 await self.fs._cat_file(
234 path,
(...) 237 )
238 )
File ~/micromamba/envs/po-cookbook-dev/lib/python3.13/site-packages/gcsfs/core.py:1115, in GCSFileSystem._cat_file(self, path, start, end, **kwargs)
1113 else:
1114 head = {}
-> 1115 headers, out = await self._call("GET", u2, headers=head)
1116 return out
File ~/micromamba/envs/po-cookbook-dev/lib/python3.13/site-packages/gcsfs/core.py:481, in GCSFileSystem._call(self, method, path, json_out, info_out, *args, **kwargs)
477 async def _call(
478 self, method, path, *args, json_out=False, info_out=False, **kwargs
479 ):
480 logger.debug(f"{method.upper()}: {path}, {args}, {kwargs.get('headers')}")
--> 481 status, headers, info, contents = await self._request(
482 method, path, *args, **kwargs
483 )
484 if json_out:
485 return json.loads(contents)
File ~/micromamba/envs/po-cookbook-dev/lib/python3.13/site-packages/decorator.py:224, in decorate.<locals>.fun(*args, **kw)
222 if not kwsyntax:
223 args, kw = fix(args, kw, sig)
--> 224 return await caller(func, *(extras + args), **kw)
File ~/micromamba/envs/po-cookbook-dev/lib/python3.13/site-packages/gcsfs/retry.py:135, in retry_request(func, retries, *args, **kwargs)
133 if retry > 0:
134 await asyncio.sleep(min(random.random() + 2 ** (retry - 1), 32))
--> 135 return await func(*args, **kwargs)
136 except (
137 HttpError,
138 requests.exceptions.RequestException,
(...) 141 aiohttp.client_exceptions.ClientError,
142 ) as e:
143 if (
144 isinstance(e, HttpError)
145 and e.code == 400
146 and "requester pays" in e.message
147 ):
File ~/micromamba/envs/po-cookbook-dev/lib/python3.13/site-packages/gcsfs/core.py:474, in GCSFileSystem._request(self, method, path, headers, json, data, *args, **kwargs)
471 info = r.request_info # for debug only
472 contents = await r.read()
--> 474 validate_response(status, contents, path, args)
475 return status, headers, info, contents
File ~/micromamba/envs/po-cookbook-dev/lib/python3.13/site-packages/gcsfs/retry.py:120, in validate_response(status, content, path, args)
118 raise requests.exceptions.ProxyError()
119 elif "invalid" in str(msg):
--> 120 raise ValueError(f"Bad Request: {path}\n{msg}")
121 elif error and not isinstance(error, str):
122 raise HttpError(error)
ValueError: Bad Request: https://storage.googleapis.com/download/storage/v1/b/pangeo-cesm-mom6/o/.zmetadata?alt=media
User project specified in the request is invalid.
Visualize SST Data with Holoviews and Datashader¶
The cells below show how to interactively explore the dataset.
sst_ds = hv.Dataset(ds['SST'], kdims=['time', 'geolon', 'geolat'])
sst = sst_ds.to(hv.QuadMesh, kdims=["geolon", "geolat"], dynamic=True)
%opts RGB [width=900 height=600]
datashade(sst, precompute=True, cmap=plt.cm.RdBu_r)
Visualize SSS Data with Holoviews and Datashader¶
sss_ds = hv.Dataset(ds['SSS'], kdims=['time', 'geolon', 'geolat'])
sss = sst_ds.to(hv.QuadMesh, kdims=["geolon", "geolat"], dynamic=True)
%opts RGB [width=900 height=600]
datashade(sss, precompute=True, cmap=plt.cm.Spectral_r)
Create and Connect to Dask Distributed Cluster¶
This will launch a cluster of virtual machines in the cloud.
from dask.distributed import Client
from dask_gateway import GatewayCluster
cluster = GatewayCluster()
cluster.adapt(minimum=1, maximum=10)
cluster
👆 Don’t forget to click this link to get the cluster dashboard
client = Client(cluster)
client
Data reduction¶
Here we make a data reduction by taking the time of SST and SSS. This demonstrates how the cluster distributes the reads from storage.
SST_mean = ds.SST.mean(dim=('time'))
SST_mean
SSS_mean = ds.SSS.mean(dim=('time'))
SSS_mean
%time SST_mean.load()
# plot mean SST
qm = hv.QuadMesh((ds.geolon.values, ds.geolat.values, SST_mean))
datashade(qm, precompute=True, cmap=plt.cm.RdBu_r)
%time SSS_mean.load()
# plot mean SSS
qm = hv.QuadMesh((ds.geolon.values, ds.geolat.values, SSS_mean))
datashade(qm, precompute=True, cmap=plt.cm.Spectral_r)