MOM6/CESM Ocean Model Analysis

This notebook shows how to load and analyze ocean data from an out-of-the-box MOM6/CESM G-case simulation (coupled ocean ocean/sea ice).

NOTE: MOM6/CESM is not ready to be used for research.

%matplotlib inline

import numpy as np
import xarray as xr
import matplotlib.pyplot as plt
import holoviews as hv
import datashader
from holoviews.operation.datashader import regrid, shade, datashade

hv.extension('bokeh', width=100)

Load MOM6/CESM Data

This data is stored in xarray-zarr format in Google Cloud Storage. This format is optimized for parallel distributed reads from within the cloud environment.

The full data catalog is located here: https://catalog.pangeo.io/browse/master/ocean/

import intake
cat = intake.open_catalog("https://raw.githubusercontent.com/pangeo-data/pangeo-datastore/master/intake-catalogs/ocean.yaml")
ds = cat["cesm_mom6_example"].to_dask()
ds
---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
Cell In[2], line 3
      1 import intake
      2 cat = intake.open_catalog("https://raw.githubusercontent.com/pangeo-data/pangeo-datastore/master/intake-catalogs/ocean.yaml")
----> 3 ds = cat["cesm_mom6_example"].to_dask()
      4 ds

File ~/miniconda3/envs/po-cookbook-dev/lib/python3.10/site-packages/intake/catalog/base.py:472, in Catalog.__getitem__(self, key)
    463 """Return a catalog entry by name.
    464 
    465 Can also use attribute syntax, like ``cat.entry_name``, or
   (...)
    468 cat['name1', 'name2']
    469 """
    470 if not isinstance(key, list) and key in self:
    471     # triggers reload_on_change
--> 472     s = self._get_entry(key)
    473     if s.container == "catalog":
    474         s.name = key

File ~/miniconda3/envs/po-cookbook-dev/lib/python3.10/site-packages/intake/catalog/utils.py:43, in reload_on_change.<locals>.wrapper(self, *args, **kwargs)
     40 @functools.wraps(f)
     41 def wrapper(self, *args, **kwargs):
     42     self.reload()
---> 43     return f(self, *args, **kwargs)

File ~/miniconda3/envs/po-cookbook-dev/lib/python3.10/site-packages/intake/catalog/base.py:355, in Catalog._get_entry(self, name)
    353 ups = [up for name, up in self.user_parameters.items() if name not in up_names]
    354 entry._user_parameters = ups + (entry._user_parameters or [])
--> 355 return entry()

File ~/miniconda3/envs/po-cookbook-dev/lib/python3.10/site-packages/intake/catalog/entry.py:60, in CatalogEntry.__call__(self, persist, **kwargs)
     58 def __call__(self, persist=None, **kwargs):
     59     """Instantiate DataSource with given user arguments"""
---> 60     s = self.get(**kwargs)
     61     s._entry = self
     62     s._passed_kwargs = list(kwargs)

File ~/miniconda3/envs/po-cookbook-dev/lib/python3.10/site-packages/intake/catalog/local.py:313, in LocalCatalogEntry.get(self, **user_parameters)
    310     return self._default_source
    312 plugin, open_args = self._create_open_args(user_parameters)
--> 313 data_source = plugin(**open_args)
    314 data_source.catalog_object = self._catalog
    315 data_source.name = self.name

TypeError: ZarrArraySource.__init__() got an unexpected keyword argument 'consolidated'

Visualize SST Data with Holoviews and Datashader

The cells below show how to interactively explore the dataset.

sst_ds = hv.Dataset(ds['SST'], kdims=['time', 'geolon', 'geolat'])
sst = sst_ds.to(hv.QuadMesh, kdims=["geolon", "geolat"], dynamic=True)
%opts RGB [width=900 height=600] 
datashade(sst, precompute=True, cmap=plt.cm.RdBu_r)
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
Cell In[3], line 1
----> 1 sst_ds = hv.Dataset(ds['SST'], kdims=['time', 'geolon', 'geolat'])
      2 sst = sst_ds.to(hv.QuadMesh, kdims=["geolon", "geolat"], dynamic=True)
      3 get_ipython().run_line_magic('opts', 'RGB [width=900 height=600]')

NameError: name 'ds' is not defined

Visualize SSS Data with Holoviews and Datashader

sss_ds = hv.Dataset(ds['SSS'], kdims=['time', 'geolon', 'geolat'])
sss = sst_ds.to(hv.QuadMesh, kdims=["geolon", "geolat"], dynamic=True)
%opts RGB [width=900 height=600] 
datashade(sss, precompute=True, cmap=plt.cm.Spectral_r)
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
Cell In[4], line 1
----> 1 sss_ds = hv.Dataset(ds['SSS'], kdims=['time', 'geolon', 'geolat'])
      2 sss = sst_ds.to(hv.QuadMesh, kdims=["geolon", "geolat"], dynamic=True)
      3 get_ipython().run_line_magic('opts', 'RGB [width=900 height=600]')

NameError: name 'ds' is not defined

Create and Connect to Dask Distributed Cluster

This will launch a cluster of virtual machines in the cloud.

from dask.distributed import Client
from dask_gateway import GatewayCluster
cluster = GatewayCluster()
cluster.adapt(minimum=1, maximum=10)
cluster
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
Cell In[5], line 3
      1 from dask.distributed import Client
      2 from dask_gateway import GatewayCluster
----> 3 cluster = GatewayCluster()
      4 cluster.adapt(minimum=1, maximum=10)
      5 cluster

File ~/miniconda3/envs/po-cookbook-dev/lib/python3.10/site-packages/dask_gateway/client.py:816, in GatewayCluster.__init__(self, address, proxy_address, public_address, auth, cluster_options, shutdown_on_close, asynchronous, loop, **kwargs)
    804 def __init__(
    805     self,
    806     address=None,
   (...)
    814     **kwargs,
    815 ):
--> 816     self._init_internal(
    817         address=address,
    818         proxy_address=proxy_address,
    819         public_address=public_address,
    820         auth=auth,
    821         cluster_options=cluster_options,
    822         cluster_kwargs=kwargs,
    823         shutdown_on_close=shutdown_on_close,
    824         asynchronous=asynchronous,
    825         loop=loop,
    826     )

File ~/miniconda3/envs/po-cookbook-dev/lib/python3.10/site-packages/dask_gateway/client.py:889, in GatewayCluster._init_internal(self, address, proxy_address, public_address, auth, cluster_options, cluster_kwargs, shutdown_on_close, asynchronous, loop, name)
    885 self.shutdown_on_close = shutdown_on_close
    887 self._instances.add(self)
--> 889 self.gateway = Gateway(
    890     address=address,
    891     proxy_address=proxy_address,
    892     public_address=public_address,
    893     auth=auth,
    894     asynchronous=asynchronous,
    895     loop=loop,
    896 )
    898 # Internals
    899 self.scheduler_info = {}

File ~/miniconda3/envs/po-cookbook-dev/lib/python3.10/site-packages/dask_gateway/client.py:282, in Gateway.__init__(self, address, proxy_address, public_address, auth, asynchronous, loop)
    280     address = format_template(dask.config.get("gateway.address"))
    281 if address is None:
--> 282     raise ValueError(
    283         "No dask-gateway address provided or found in configuration"
    284     )
    285 address = address.rstrip("/")
    287 if public_address is None:

ValueError: No dask-gateway address provided or found in configuration

👆 Don’t forget to click this link to get the cluster dashboard

client = Client(cluster)
client
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
Cell In[6], line 1
----> 1 client = Client(cluster)
      2 client

NameError: name 'cluster' is not defined

Data reduction

Here we make a data reduction by taking the time of SST and SSS. This demonstrates how the cluster distributes the reads from storage.

SST_mean = ds.SST.mean(dim=('time'))
SST_mean
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
Cell In[7], line 1
----> 1 SST_mean = ds.SST.mean(dim=('time'))
      2 SST_mean

NameError: name 'ds' is not defined
SSS_mean = ds.SSS.mean(dim=('time'))
SSS_mean
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
Cell In[8], line 1
----> 1 SSS_mean = ds.SSS.mean(dim=('time'))
      2 SSS_mean

NameError: name 'ds' is not defined
%time SST_mean.load()
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
File <timed eval>:1

NameError: name 'SST_mean' is not defined
# plot mean SST
qm = hv.QuadMesh((ds.geolon.values, ds.geolat.values, SST_mean))
datashade(qm, precompute=True, cmap=plt.cm.RdBu_r)
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
Cell In[10], line 2
      1 # plot mean SST
----> 2 qm = hv.QuadMesh((ds.geolon.values, ds.geolat.values, SST_mean))
      3 datashade(qm, precompute=True, cmap=plt.cm.RdBu_r)

NameError: name 'ds' is not defined
%time SSS_mean.load()
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
File <timed eval>:1

NameError: name 'SSS_mean' is not defined
# plot mean SSS
qm = hv.QuadMesh((ds.geolon.values, ds.geolat.values, SSS_mean))
datashade(qm, precompute=True, cmap=plt.cm.Spectral_r)
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
Cell In[12], line 2
      1 # plot mean SSS
----> 2 qm = hv.QuadMesh((ds.geolon.values, ds.geolat.values, SSS_mean))
      3 datashade(qm, precompute=True, cmap=plt.cm.Spectral_r)

NameError: name 'ds' is not defined