MOM6/CESM Ocean Model Analysis
This notebook shows how to load and analyze ocean data from an out-of-the-box MOM6/CESM G-case simulation (coupled ocean ocean/sea ice).
NOTE: MOM6/CESM is not ready to be used for research.
%matplotlib inline
import numpy as np
import xarray as xr
import matplotlib.pyplot as plt
import holoviews as hv
import datashader
from holoviews.operation.datashader import regrid, shade, datashade
hv.extension('bokeh', width=100)
Load MOM6/CESM Data
This data is stored in xarray-zarr format in Google Cloud Storage. This format is optimized for parallel distributed reads from within the cloud environment.
The full data catalog is located here: https://catalog.pangeo.io/browse/master/ocean/
import intake
cat = intake.open_catalog("https://raw.githubusercontent.com/pangeo-data/pangeo-datastore/master/intake-catalogs/ocean.yaml")
ds = cat["cesm_mom6_example"].to_dask()
ds
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
Cell In[2], line 3
1 import intake
2 cat = intake.open_catalog("https://raw.githubusercontent.com/pangeo-data/pangeo-datastore/master/intake-catalogs/ocean.yaml")
----> 3 ds = cat["cesm_mom6_example"].to_dask()
4 ds
File ~/miniconda3/envs/po-cookbook-dev/lib/python3.10/site-packages/intake/catalog/base.py:472, in Catalog.__getitem__(self, key)
463 """Return a catalog entry by name.
464
465 Can also use attribute syntax, like ``cat.entry_name``, or
(...)
468 cat['name1', 'name2']
469 """
470 if not isinstance(key, list) and key in self:
471 # triggers reload_on_change
--> 472 s = self._get_entry(key)
473 if s.container == "catalog":
474 s.name = key
File ~/miniconda3/envs/po-cookbook-dev/lib/python3.10/site-packages/intake/catalog/utils.py:43, in reload_on_change.<locals>.wrapper(self, *args, **kwargs)
40 @functools.wraps(f)
41 def wrapper(self, *args, **kwargs):
42 self.reload()
---> 43 return f(self, *args, **kwargs)
File ~/miniconda3/envs/po-cookbook-dev/lib/python3.10/site-packages/intake/catalog/base.py:355, in Catalog._get_entry(self, name)
353 ups = [up for name, up in self.user_parameters.items() if name not in up_names]
354 entry._user_parameters = ups + (entry._user_parameters or [])
--> 355 return entry()
File ~/miniconda3/envs/po-cookbook-dev/lib/python3.10/site-packages/intake/catalog/entry.py:60, in CatalogEntry.__call__(self, persist, **kwargs)
58 def __call__(self, persist=None, **kwargs):
59 """Instantiate DataSource with given user arguments"""
---> 60 s = self.get(**kwargs)
61 s._entry = self
62 s._passed_kwargs = list(kwargs)
File ~/miniconda3/envs/po-cookbook-dev/lib/python3.10/site-packages/intake/catalog/local.py:313, in LocalCatalogEntry.get(self, **user_parameters)
310 return self._default_source
312 plugin, open_args = self._create_open_args(user_parameters)
--> 313 data_source = plugin(**open_args)
314 data_source.catalog_object = self._catalog
315 data_source.name = self.name
TypeError: ZarrArraySource.__init__() got an unexpected keyword argument 'consolidated'
Visualize SST Data with Holoviews and Datashader
The cells below show how to interactively explore the dataset.
sst_ds = hv.Dataset(ds['SST'], kdims=['time', 'geolon', 'geolat'])
sst = sst_ds.to(hv.QuadMesh, kdims=["geolon", "geolat"], dynamic=True)
%opts RGB [width=900 height=600]
datashade(sst, precompute=True, cmap=plt.cm.RdBu_r)
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
Cell In[3], line 1
----> 1 sst_ds = hv.Dataset(ds['SST'], kdims=['time', 'geolon', 'geolat'])
2 sst = sst_ds.to(hv.QuadMesh, kdims=["geolon", "geolat"], dynamic=True)
3 get_ipython().run_line_magic('opts', 'RGB [width=900 height=600]')
NameError: name 'ds' is not defined
Visualize SSS Data with Holoviews and Datashader
sss_ds = hv.Dataset(ds['SSS'], kdims=['time', 'geolon', 'geolat'])
sss = sst_ds.to(hv.QuadMesh, kdims=["geolon", "geolat"], dynamic=True)
%opts RGB [width=900 height=600]
datashade(sss, precompute=True, cmap=plt.cm.Spectral_r)
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
Cell In[4], line 1
----> 1 sss_ds = hv.Dataset(ds['SSS'], kdims=['time', 'geolon', 'geolat'])
2 sss = sst_ds.to(hv.QuadMesh, kdims=["geolon", "geolat"], dynamic=True)
3 get_ipython().run_line_magic('opts', 'RGB [width=900 height=600]')
NameError: name 'ds' is not defined
Create and Connect to Dask Distributed Cluster
This will launch a cluster of virtual machines in the cloud.
from dask.distributed import Client
from dask_gateway import GatewayCluster
cluster = GatewayCluster()
cluster.adapt(minimum=1, maximum=10)
cluster
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
Cell In[5], line 3
1 from dask.distributed import Client
2 from dask_gateway import GatewayCluster
----> 3 cluster = GatewayCluster()
4 cluster.adapt(minimum=1, maximum=10)
5 cluster
File ~/miniconda3/envs/po-cookbook-dev/lib/python3.10/site-packages/dask_gateway/client.py:816, in GatewayCluster.__init__(self, address, proxy_address, public_address, auth, cluster_options, shutdown_on_close, asynchronous, loop, **kwargs)
804 def __init__(
805 self,
806 address=None,
(...)
814 **kwargs,
815 ):
--> 816 self._init_internal(
817 address=address,
818 proxy_address=proxy_address,
819 public_address=public_address,
820 auth=auth,
821 cluster_options=cluster_options,
822 cluster_kwargs=kwargs,
823 shutdown_on_close=shutdown_on_close,
824 asynchronous=asynchronous,
825 loop=loop,
826 )
File ~/miniconda3/envs/po-cookbook-dev/lib/python3.10/site-packages/dask_gateway/client.py:889, in GatewayCluster._init_internal(self, address, proxy_address, public_address, auth, cluster_options, cluster_kwargs, shutdown_on_close, asynchronous, loop, name)
885 self.shutdown_on_close = shutdown_on_close
887 self._instances.add(self)
--> 889 self.gateway = Gateway(
890 address=address,
891 proxy_address=proxy_address,
892 public_address=public_address,
893 auth=auth,
894 asynchronous=asynchronous,
895 loop=loop,
896 )
898 # Internals
899 self.scheduler_info = {}
File ~/miniconda3/envs/po-cookbook-dev/lib/python3.10/site-packages/dask_gateway/client.py:282, in Gateway.__init__(self, address, proxy_address, public_address, auth, asynchronous, loop)
280 address = format_template(dask.config.get("gateway.address"))
281 if address is None:
--> 282 raise ValueError(
283 "No dask-gateway address provided or found in configuration"
284 )
285 address = address.rstrip("/")
287 if public_address is None:
ValueError: No dask-gateway address provided or found in configuration
👆 Don’t forget to click this link to get the cluster dashboard
client = Client(cluster)
client
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
Cell In[6], line 1
----> 1 client = Client(cluster)
2 client
NameError: name 'cluster' is not defined
Data reduction
Here we make a data reduction by taking the time of SST and SSS. This demonstrates how the cluster distributes the reads from storage.
SST_mean = ds.SST.mean(dim=('time'))
SST_mean
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
Cell In[7], line 1
----> 1 SST_mean = ds.SST.mean(dim=('time'))
2 SST_mean
NameError: name 'ds' is not defined
SSS_mean = ds.SSS.mean(dim=('time'))
SSS_mean
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
Cell In[8], line 1
----> 1 SSS_mean = ds.SSS.mean(dim=('time'))
2 SSS_mean
NameError: name 'ds' is not defined
%time SST_mean.load()
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
File <timed eval>:1
NameError: name 'SST_mean' is not defined
# plot mean SST
qm = hv.QuadMesh((ds.geolon.values, ds.geolat.values, SST_mean))
datashade(qm, precompute=True, cmap=plt.cm.RdBu_r)
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
Cell In[10], line 2
1 # plot mean SST
----> 2 qm = hv.QuadMesh((ds.geolon.values, ds.geolat.values, SST_mean))
3 datashade(qm, precompute=True, cmap=plt.cm.RdBu_r)
NameError: name 'ds' is not defined
%time SSS_mean.load()
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
File <timed eval>:1
NameError: name 'SSS_mean' is not defined
# plot mean SSS
qm = hv.QuadMesh((ds.geolon.values, ds.geolat.values, SSS_mean))
datashade(qm, precompute=True, cmap=plt.cm.Spectral_r)
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
Cell In[12], line 2
1 # plot mean SSS
----> 2 qm = hv.QuadMesh((ds.geolon.values, ds.geolat.values, SSS_mean))
3 datashade(qm, precompute=True, cmap=plt.cm.Spectral_r)
NameError: name 'ds' is not defined