Overview¶
This notebook uses similar techniques to the ECS notebook. Please refer to that notebook for details.
Prerequisites¶
Concepts | Importance | Notes |
---|---|---|
Understanding of NetCDF | Helpful | Familiarity with metadata structure |
Seaborn | Helpful |
- Time to learn: 10 minutes
Imports¶
from matplotlib import pyplot as plt
import xarray as xr
import numpy as np
import dask
from dask.diagnostics import progress
from tqdm.autonotebook import tqdm
import intake
import fsspec
import seaborn as sns
%matplotlib inline
/tmp/ipykernel_4214/1335193511.py:6: TqdmExperimentalWarning: Using `tqdm.autonotebook.tqdm` in notebook mode. Use `tqdm.tqdm` instead to force console mode (e.g. in jupyter console)
from tqdm.autonotebook import tqdm
col = intake.open_esm_datastore("https://storage.googleapis.com/cmip6/pangeo-cmip6.json")
col
Loading...
[eid for eid in col.df['experiment_id'].unique() if 'ssp' in eid]
['ssp585',
'ssp245',
'ssp370SST-lowCH4',
'ssp370-lowNTCF',
'ssp370SST-lowNTCF',
'ssp370SST-ssp126Lu',
'ssp370SST',
'ssp370pdSST',
'ssp119',
'ssp370',
'esm-ssp585-ssp126Lu',
'ssp126-ssp370Lu',
'ssp370-ssp126Lu',
'ssp126',
'esm-ssp585',
'ssp245-GHG',
'ssp245-nat',
'ssp460',
'ssp434',
'ssp534-over',
'ssp245-stratO3',
'ssp245-aer',
'ssp245-cov-modgreen',
'ssp245-cov-fossil',
'ssp245-cov-strgreen',
'ssp245-covid',
'ssp585-bgc']
There is currently a significant amount of data for these runs:
expts = ['historical', 'ssp245', 'ssp585']
query = dict(
experiment_id=expts,
table_id='Amon',
variable_id=['tas'],
member_id = 'r1i1p1f1',
)
col_subset = col.search(require_all_on=["source_id"], **query)
col_subset.df.groupby("source_id")[
["experiment_id", "variable_id", "table_id"]
].nunique()
Loading...
def drop_all_bounds(ds):
drop_vars = [vname for vname in ds.coords
if (('_bounds') in vname ) or ('_bnds') in vname]
return ds.drop(drop_vars)
def open_dset(df):
assert len(df) == 1
ds = xr.open_zarr(fsspec.get_mapper(df.zstore.values[0]), consolidated=True)
return drop_all_bounds(ds)
def open_delayed(df):
return dask.delayed(open_dset)(df)
from collections import defaultdict
dsets = defaultdict(dict)
for group, df in col_subset.df.groupby(by=['source_id', 'experiment_id']):
dsets[group[0]][group[1]] = open_delayed(df)
dsets_ = dask.compute(dict(dsets))[0]
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
Cell In[6], line 1
----> 1 dsets_ = dask.compute(dict(dsets))[0]
File ~/micromamba/envs/cmip6-cookbook-dev/lib/python3.11/site-packages/dask/base.py:681, in compute(traverse, optimize_graph, scheduler, get, *args, **kwargs)
678 expr = expr.optimize()
679 keys = list(flatten(expr.__dask_keys__()))
--> 681 results = schedule(expr, keys, **kwargs)
683 return repack(results)
Cell In[5], line 8, in open_dset(df)
6 def open_dset(df):
7 assert len(df) == 1
----> 8 ds = xr.open_zarr(fsspec.get_mapper(df.zstore.values[0]), consolidated=True)
9 return drop_all_bounds(ds)
File ~/micromamba/envs/cmip6-cookbook-dev/lib/python3.11/site-packages/fsspec/mapping.py:249, in get_mapper(url, check, create, missing_exceptions, alternate_root, **kwargs)
218 """Create key-value interface for given URL and options
219
220 The URL will be of the form "protocol://location" and point to the root
(...) 246 ``FSMap`` instance, the dict-like key-value store.
247 """
248 # Removing protocol here - could defer to each open() on the backend
--> 249 fs, urlpath = url_to_fs(url, **kwargs)
250 root = alternate_root if alternate_root is not None else urlpath
251 return FSMap(root, fs, check, create, missing_exceptions=missing_exceptions)
File ~/micromamba/envs/cmip6-cookbook-dev/lib/python3.11/site-packages/fsspec/core.py:415, in url_to_fs(url, **kwargs)
413 inkwargs["fo"] = urls
414 urlpath, protocol, _ = chain[0]
--> 415 fs = filesystem(protocol, **inkwargs)
416 return fs, urlpath
File ~/micromamba/envs/cmip6-cookbook-dev/lib/python3.11/site-packages/fsspec/registry.py:322, in filesystem(protocol, **storage_options)
315 warnings.warn(
316 "The 'arrow_hdfs' protocol has been deprecated and will be "
317 "removed in the future. Specify it as 'hdfs'.",
318 DeprecationWarning,
319 )
321 cls = get_filesystem_class(protocol)
--> 322 return cls(**storage_options)
File ~/micromamba/envs/cmip6-cookbook-dev/lib/python3.11/site-packages/fsspec/spec.py:81, in _Cached.__call__(cls, *args, **kwargs)
79 return cls._cache[token]
80 else:
---> 81 obj = super().__call__(*args, **kwargs)
82 # Setting _fs_token here causes some static linters to complain.
83 obj._fs_token_ = token
File ~/micromamba/envs/cmip6-cookbook-dev/lib/python3.11/site-packages/gcsfs/core.py:269, in GCSFileSystem.__init__(self, project, access, token, block_size, consistency, cache_timeout, secure_serialize, check_connection, requests_timeout, requester_pays, asynchronous, loop, callback_timeout, **kwargs)
267 self.callback_timeout = callback_timeout
268 if not asynchronous:
--> 269 self._session = sync(
270 self.loop, get_client, callback_timeout=self.callback_timeout
271 )
272 weakref.finalize(self, sync, self.loop, self.session.close)
273 else:
File ~/micromamba/envs/cmip6-cookbook-dev/lib/python3.11/site-packages/fsspec/asyn.py:103, in sync(loop, func, timeout, *args, **kwargs)
101 raise FSTimeoutError from return_result
102 elif isinstance(return_result, BaseException):
--> 103 raise return_result
104 else:
105 return return_result
File ~/micromamba/envs/cmip6-cookbook-dev/lib/python3.11/site-packages/fsspec/asyn.py:56, in _runner(event, coro, result, timeout)
54 coro = asyncio.wait_for(coro, timeout=timeout)
55 try:
---> 56 result[0] = await coro
57 except Exception as ex:
58 result[0] = ex
File ~/micromamba/envs/cmip6-cookbook-dev/lib/python3.11/site-packages/fsspec/implementations/http.py:33, in get_client(**kwargs)
32 async def get_client(**kwargs):
---> 33 return aiohttp.ClientSession(**kwargs)
TypeError: ClientSession.__init__() got an unexpected keyword argument 'callback_timeout'
Calculate global means:
def get_lat_name(ds):
for lat_name in ['lat', 'latitude']:
if lat_name in ds.coords:
return lat_name
raise RuntimeError("Couldn't find a latitude coordinate")
def global_mean(ds):
lat = ds[get_lat_name(ds)]
weight = np.cos(np.deg2rad(lat))
weight /= weight.mean()
other_dims = set(ds.dims) - {'time'}
return (ds * weight).mean(other_dims)
expt_da = xr.DataArray(expts, dims='experiment_id', name='experiment_id',
coords={'experiment_id': expts})
dsets_aligned = {}
for k, v in tqdm(dsets_.items()):
expt_dsets = v.values()
if any([d is None for d in expt_dsets]):
print(f"Missing experiment for {k}")
continue
for ds in expt_dsets:
ds.coords['year'] = ds.time.dt.year
# workaround for
# https://github.com/pydata/xarray/issues/2237#issuecomment-620961663
dsets_ann_mean = [v[expt].pipe(global_mean)
.swap_dims({'time': 'year'})
.drop('time')
.coarsen(year=12).mean()
for expt in expts]
# align everything with the 4xCO2 experiment
dsets_aligned[k] = xr.concat(dsets_ann_mean, join='outer',
dim=expt_da)
dsets_aligned_ = dask.compute(dsets_aligned)[0]
source_ids = list(dsets_aligned_.keys())
source_da = xr.DataArray(source_ids, dims='source_id', name='source_id',
coords={'source_id': source_ids})
big_ds = xr.concat([ds.reset_coords(drop=True)
for ds in dsets_aligned_.values()],
dim=source_da)
big_ds
df_all = big_ds.sel(year=slice(1900, 2100)).to_dataframe().reset_index()
df_all.head()
sns.relplot(data=df_all,
x="year", y="tas", hue='experiment_id',
kind="line", ci="sd", aspect=2);
Summary¶
In this notebook, we accessed data for historical, SSP245, and SSP585 runs from a collection of CMIP6 models and plotted the multimodel-mean global average surface air temperature for each run.
What’s next?¶
We will use CMIP6 data to analyze precipitation intensity under a warming climate.
Resources and references¶
- Original notebook in the Pangeo Gallery by Henri Drake and Ryan Abernathey