This notebook shows how to load and analyze ocean data from an out-of-the-box MOM6/CESM G-case simulation (coupled ocean ocean/sea ice).
NOTE: MOM6/CESM is not ready to be used for research.
%matplotlib inline
import numpy as np
import xarray as xr
import matplotlib.pyplot as plt
import holoviews as hv
import datashader
from holoviews.operation.datashader import regrid, shade, datashade
hv.extension('bokeh', width=100)
Loading...
Load MOM6/CESM Data¶
This data is stored in xarray-zarr format in Google Cloud Storage. This format is optimized for parallel distributed reads from within the cloud environment.
The full data catalog is located here: https://
import intake
cat = intake.open_catalog("https://raw.githubusercontent.com/pangeo-data/pangeo-datastore/master/intake-catalogs/ocean.yaml")
ds = cat["cesm_mom6_example"].to_dask()
ds
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
Cell In[2], line 3
1 import intake
2 cat = intake.open_catalog("https://raw.githubusercontent.com/pangeo-data/pangeo-datastore/master/intake-catalogs/ocean.yaml")
----> 3 ds = cat["cesm_mom6_example"].to_dask()
4 ds
File ~/micromamba/envs/po-cookbook-dev/lib/python3.13/site-packages/intake_xarray/base.py:8, in IntakeXarraySourceAdapter.to_dask(self)
6 def to_dask(self):
7 if "chunks" not in self.reader.kwargs:
----> 8 return self.reader(chunks={}).read()
9 else:
10 return self.reader.read()
File ~/micromamba/envs/po-cookbook-dev/lib/python3.13/site-packages/intake/readers/readers.py:121, in BaseReader.read(self, *args, **kwargs)
119 kw.update(kwargs)
120 args = kw.pop("args", ()) or args
--> 121 return self._read(*args, **kw)
File ~/micromamba/envs/po-cookbook-dev/lib/python3.13/site-packages/intake/readers/readers.py:1327, in XArrayDatasetReader._read(self, data, open_local, **kw)
1325 f = fsspec.open(data.url, **(data.storage_options or {})).open()
1326 return open_dataset(f, **kw)
-> 1327 return open_dataset(data.url, **kw)
File ~/micromamba/envs/po-cookbook-dev/lib/python3.13/site-packages/xarray/backends/api.py:715, in open_dataset(filename_or_obj, engine, chunks, cache, decode_cf, mask_and_scale, decode_times, decode_timedelta, use_cftime, concat_characters, decode_coords, drop_variables, create_default_indexes, inline_array, chunked_array_type, from_array_kwargs, backend_kwargs, **kwargs)
703 decoders = _resolve_decoders_kwargs(
704 decode_cf,
705 open_backend_dataset_parameters=backend.open_dataset_parameters,
(...) 711 decode_coords=decode_coords,
712 )
714 overwrite_encoded_chunks = kwargs.pop("overwrite_encoded_chunks", None)
--> 715 backend_ds = backend.open_dataset(
716 filename_or_obj,
717 drop_variables=drop_variables,
718 **decoders,
719 **kwargs,
720 )
721 ds = _dataset_from_backend_dataset(
722 backend_ds,
723 filename_or_obj,
(...) 734 **kwargs,
735 )
736 return ds
File ~/micromamba/envs/po-cookbook-dev/lib/python3.13/site-packages/xarray/backends/zarr.py:1587, in ZarrBackendEntrypoint.open_dataset(self, filename_or_obj, mask_and_scale, decode_times, concat_characters, decode_coords, drop_variables, use_cftime, decode_timedelta, group, mode, synchronizer, consolidated, chunk_store, storage_options, zarr_version, zarr_format, store, engine, use_zarr_fill_value_as_mask, cache_members)
1585 filename_or_obj = _normalize_path(filename_or_obj)
1586 if not store:
-> 1587 store = ZarrStore.open_group(
1588 filename_or_obj,
1589 group=group,
1590 mode=mode,
1591 synchronizer=synchronizer,
1592 consolidated=consolidated,
1593 consolidate_on_close=False,
1594 chunk_store=chunk_store,
1595 storage_options=storage_options,
1596 zarr_version=zarr_version,
1597 use_zarr_fill_value_as_mask=None,
1598 zarr_format=zarr_format,
1599 cache_members=cache_members,
1600 )
1602 store_entrypoint = StoreBackendEntrypoint()
1603 with close_on_error(store):
File ~/micromamba/envs/po-cookbook-dev/lib/python3.13/site-packages/xarray/backends/zarr.py:664, in ZarrStore.open_group(cls, store, mode, synchronizer, group, consolidated, consolidate_on_close, chunk_store, storage_options, append_dim, write_region, safe_chunks, align_chunks, zarr_version, zarr_format, use_zarr_fill_value_as_mask, write_empty, cache_members)
638 @classmethod
639 def open_group(
640 cls,
(...) 657 cache_members: bool = True,
658 ):
659 (
660 zarr_group,
661 consolidate_on_close,
662 close_store_on_close,
663 use_zarr_fill_value_as_mask,
--> 664 ) = _get_open_params(
665 store=store,
666 mode=mode,
667 synchronizer=synchronizer,
668 group=group,
669 consolidated=consolidated,
670 consolidate_on_close=consolidate_on_close,
671 chunk_store=chunk_store,
672 storage_options=storage_options,
673 zarr_version=zarr_version,
674 use_zarr_fill_value_as_mask=use_zarr_fill_value_as_mask,
675 zarr_format=zarr_format,
676 )
678 return cls(
679 zarr_group,
680 mode,
(...) 689 cache_members=cache_members,
690 )
File ~/micromamba/envs/po-cookbook-dev/lib/python3.13/site-packages/xarray/backends/zarr.py:1791, in _get_open_params(store, mode, synchronizer, group, consolidated, consolidate_on_close, chunk_store, storage_options, zarr_version, use_zarr_fill_value_as_mask, zarr_format)
1787 group = open_kwargs.pop("path")
1789 if consolidated:
1790 # TODO: an option to pass the metadata_key keyword
-> 1791 zarr_root_group = zarr.open_consolidated(store, **open_kwargs)
1792 elif consolidated is None:
1793 # same but with more error handling in case no consolidated metadata found
1794 try:
File ~/micromamba/envs/po-cookbook-dev/lib/python3.13/site-packages/zarr/api/synchronous.py:222, in open_consolidated(use_consolidated, *args, **kwargs)
217 def open_consolidated(*args: Any, use_consolidated: Literal[True] = True, **kwargs: Any) -> Group:
218 """
219 Alias for :func:`open_group` with ``use_consolidated=True``.
220 """
221 return Group(
--> 222 sync(async_api.open_consolidated(*args, use_consolidated=use_consolidated, **kwargs))
223 )
File ~/micromamba/envs/po-cookbook-dev/lib/python3.13/site-packages/zarr/core/sync.py:163, in sync(coro, loop, timeout)
160 return_result = next(iter(finished)).result()
162 if isinstance(return_result, BaseException):
--> 163 raise return_result
164 else:
165 return return_result
File ~/micromamba/envs/po-cookbook-dev/lib/python3.13/site-packages/zarr/core/sync.py:119, in _runner(coro)
114 """
115 Await a coroutine and return the result of running it. If awaiting the coroutine raises an
116 exception, the exception will be returned.
117 """
118 try:
--> 119 return await coro
120 except Exception as ex:
121 return ex
File ~/micromamba/envs/po-cookbook-dev/lib/python3.13/site-packages/zarr/api/asynchronous.py:382, in open_consolidated(use_consolidated, *args, **kwargs)
377 if use_consolidated is not True:
378 raise TypeError(
379 "'use_consolidated' must be 'True' in 'open_consolidated'. Use 'open' with "
380 "'use_consolidated=False' to bypass consolidated metadata."
381 )
--> 382 return await open_group(*args, use_consolidated=use_consolidated, **kwargs)
File ~/micromamba/envs/po-cookbook-dev/lib/python3.13/site-packages/zarr/api/asynchronous.py:845, in open_group(store, mode, cache_attrs, synchronizer, path, chunk_store, storage_options, zarr_version, zarr_format, meta_array, attributes, use_consolidated)
843 try:
844 if mode in _READ_MODES:
--> 845 return await AsyncGroup.open(
846 store_path, zarr_format=zarr_format, use_consolidated=use_consolidated
847 )
848 except (KeyError, FileNotFoundError):
849 pass
File ~/micromamba/envs/po-cookbook-dev/lib/python3.13/site-packages/zarr/core/group.py:542, in AsyncGroup.open(cls, store, zarr_format, use_consolidated)
535 raise FileNotFoundError(store_path)
536 elif zarr_format is None:
537 (
538 zarr_json_bytes,
539 zgroup_bytes,
540 zattrs_bytes,
541 maybe_consolidated_metadata_bytes,
--> 542 ) = await asyncio.gather(
543 (store_path / ZARR_JSON).get(),
544 (store_path / ZGROUP_JSON).get(),
545 (store_path / ZATTRS_JSON).get(),
546 (store_path / str(consolidated_key)).get(),
547 )
548 if zarr_json_bytes is not None and zgroup_bytes is not None:
549 # warn and favor v3
550 msg = f"Both zarr.json (Zarr format 3) and .zgroup (Zarr format 2) metadata objects exist at {store_path}. Zarr format 3 will be used."
File ~/micromamba/envs/po-cookbook-dev/lib/python3.13/site-packages/zarr/storage/_common.py:164, in StorePath.get(self, prototype, byte_range)
162 if prototype is None:
163 prototype = default_buffer_prototype()
--> 164 return await self.store.get(self.path, prototype=prototype, byte_range=byte_range)
File ~/micromamba/envs/po-cookbook-dev/lib/python3.13/site-packages/zarr/storage/_fsspec.py:300, in FsspecStore.get(self, key, prototype, byte_range)
298 try:
299 if byte_range is None:
--> 300 value = prototype.buffer.from_bytes(await self.fs._cat_file(path))
301 elif isinstance(byte_range, RangeByteRequest):
302 value = prototype.buffer.from_bytes(
303 await self.fs._cat_file(
304 path,
(...) 307 )
308 )
File ~/micromamba/envs/po-cookbook-dev/lib/python3.13/site-packages/gcsfs/core.py:1117, in GCSFileSystem._cat_file(self, path, start, end, **kwargs)
1115 else:
1116 head = {}
-> 1117 headers, out = await self._call("GET", u2, headers=head)
1118 return out
File ~/micromamba/envs/po-cookbook-dev/lib/python3.13/site-packages/gcsfs/core.py:483, in GCSFileSystem._call(self, method, path, json_out, info_out, *args, **kwargs)
479 async def _call(
480 self, method, path, *args, json_out=False, info_out=False, **kwargs
481 ):
482 logger.debug(f"{method.upper()}: {path}, {args}, {kwargs.get('headers')}")
--> 483 status, headers, info, contents = await self._request(
484 method, path, *args, **kwargs
485 )
486 if json_out:
487 return json.loads(contents)
File ~/micromamba/envs/po-cookbook-dev/lib/python3.13/site-packages/decorator.py:224, in decorate.<locals>.fun(*args, **kw)
222 if not kwsyntax:
223 args, kw = fix(args, kw, sig)
--> 224 return await caller(func, *(extras + args), **kw)
File ~/micromamba/envs/po-cookbook-dev/lib/python3.13/site-packages/gcsfs/retry.py:135, in retry_request(func, retries, *args, **kwargs)
133 if retry > 0:
134 await asyncio.sleep(min(random.random() + 2 ** (retry - 1), 32))
--> 135 return await func(*args, **kwargs)
136 except (
137 HttpError,
138 requests.exceptions.RequestException,
(...) 141 aiohttp.client_exceptions.ClientError,
142 ) as e:
143 if (
144 isinstance(e, HttpError)
145 and e.code == 400
146 and "requester pays" in e.message
147 ):
File ~/micromamba/envs/po-cookbook-dev/lib/python3.13/site-packages/gcsfs/core.py:476, in GCSFileSystem._request(self, method, path, headers, json, data, *args, **kwargs)
473 info = r.request_info # for debug only
474 contents = await r.read()
--> 476 validate_response(status, contents, path, args)
477 return status, headers, info, contents
File ~/micromamba/envs/po-cookbook-dev/lib/python3.13/site-packages/gcsfs/retry.py:120, in validate_response(status, content, path, args)
118 raise requests.exceptions.ProxyError()
119 elif "invalid" in str(msg):
--> 120 raise ValueError(f"Bad Request: {path}\n{msg}")
121 elif error and not isinstance(error, str):
122 raise HttpError(error)
ValueError: Bad Request: https://storage.googleapis.com/download/storage/v1/b/pangeo-cesm-mom6/o/zarr.json?alt=media
User project specified in the request is invalid.
Visualize SST Data with Holoviews and Datashader¶
The cells below show how to interactively explore the dataset.
sst_ds = hv.Dataset(ds['SST'], kdims=['time', 'geolon', 'geolat'])
sst = sst_ds.to(hv.QuadMesh, kdims=["geolon", "geolat"], dynamic=True)
%opts RGB [width=900 height=600]
datashade(sst, precompute=True, cmap=plt.cm.RdBu_r)
Visualize SSS Data with Holoviews and Datashader¶
sss_ds = hv.Dataset(ds['SSS'], kdims=['time', 'geolon', 'geolat'])
sss = sst_ds.to(hv.QuadMesh, kdims=["geolon", "geolat"], dynamic=True)
%opts RGB [width=900 height=600]
datashade(sss, precompute=True, cmap=plt.cm.Spectral_r)
Create and Connect to Dask Distributed Cluster¶
This will launch a cluster of virtual machines in the cloud.
from dask.distributed import Client
from dask_gateway import GatewayCluster
cluster = GatewayCluster()
cluster.adapt(minimum=1, maximum=10)
cluster
👆 Don’t forget to click this link to get the cluster dashboard
client = Client(cluster)
client
Data reduction¶
Here we make a data reduction by taking the time of SST and SSS. This demonstrates how the cluster distributes the reads from storage.
SST_mean = ds.SST.mean(dim=('time'))
SST_mean
SSS_mean = ds.SSS.mean(dim=('time'))
SSS_mean
%time SST_mean.load()
# plot mean SST
qm = hv.QuadMesh((ds.geolon.values, ds.geolat.values, SST_mean))
datashade(qm, precompute=True, cmap=plt.cm.RdBu_r)
%time SSS_mean.load()
# plot mean SSS
qm = hv.QuadMesh((ds.geolon.values, ds.geolat.values, SSS_mean))
datashade(qm, precompute=True, cmap=plt.cm.Spectral_r)