Skip to article frontmatterSkip to article content

Global Mean Surface Temperature


Overview

This notebook uses similar techniques to the ECS notebook. Please refer to that notebook for details.

Prerequisites

ConceptsImportanceNotes
Understanding of NetCDFHelpfulFamiliarity with metadata structure
SeabornHelpful
  • Time to learn: 10 minutes

Imports

from matplotlib import pyplot as plt
import xarray as xr
import numpy as np
import dask
from dask.diagnostics import progress
from tqdm.autonotebook import tqdm
import intake
import fsspec
import seaborn as sns

%matplotlib inline
/tmp/ipykernel_4214/1335193511.py:6: TqdmExperimentalWarning: Using `tqdm.autonotebook.tqdm` in notebook mode. Use `tqdm.tqdm` instead to force console mode (e.g. in jupyter console)
  from tqdm.autonotebook import tqdm
col = intake.open_esm_datastore("https://storage.googleapis.com/cmip6/pangeo-cmip6.json")
col
Loading...
[eid for eid in col.df['experiment_id'].unique() if 'ssp' in eid]
['ssp585', 'ssp245', 'ssp370SST-lowCH4', 'ssp370-lowNTCF', 'ssp370SST-lowNTCF', 'ssp370SST-ssp126Lu', 'ssp370SST', 'ssp370pdSST', 'ssp119', 'ssp370', 'esm-ssp585-ssp126Lu', 'ssp126-ssp370Lu', 'ssp370-ssp126Lu', 'ssp126', 'esm-ssp585', 'ssp245-GHG', 'ssp245-nat', 'ssp460', 'ssp434', 'ssp534-over', 'ssp245-stratO3', 'ssp245-aer', 'ssp245-cov-modgreen', 'ssp245-cov-fossil', 'ssp245-cov-strgreen', 'ssp245-covid', 'ssp585-bgc']

There is currently a significant amount of data for these runs:

expts = ['historical', 'ssp245', 'ssp585']

query = dict(
    experiment_id=expts,
    table_id='Amon',
    variable_id=['tas'],
    member_id = 'r1i1p1f1',
)

col_subset = col.search(require_all_on=["source_id"], **query)
col_subset.df.groupby("source_id")[
    ["experiment_id", "variable_id", "table_id"]
].nunique()
Loading...
def drop_all_bounds(ds):
    drop_vars = [vname for vname in ds.coords
                 if (('_bounds') in vname ) or ('_bnds') in vname]
    return ds.drop(drop_vars)

def open_dset(df):
    assert len(df) == 1
    ds = xr.open_zarr(fsspec.get_mapper(df.zstore.values[0]), consolidated=True)
    return drop_all_bounds(ds)

def open_delayed(df):
    return dask.delayed(open_dset)(df)

from collections import defaultdict
dsets = defaultdict(dict)

for group, df in col_subset.df.groupby(by=['source_id', 'experiment_id']):
    dsets[group[0]][group[1]] = open_delayed(df)
dsets_ = dask.compute(dict(dsets))[0]
---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
Cell In[6], line 1
----> 1 dsets_ = dask.compute(dict(dsets))[0]

File ~/micromamba/envs/cmip6-cookbook-dev/lib/python3.11/site-packages/dask/base.py:681, in compute(traverse, optimize_graph, scheduler, get, *args, **kwargs)
    678     expr = expr.optimize()
    679     keys = list(flatten(expr.__dask_keys__()))
--> 681     results = schedule(expr, keys, **kwargs)
    683 return repack(results)

Cell In[5], line 8, in open_dset(df)
      6 def open_dset(df):
      7     assert len(df) == 1
----> 8     ds = xr.open_zarr(fsspec.get_mapper(df.zstore.values[0]), consolidated=True)
      9     return drop_all_bounds(ds)

File ~/micromamba/envs/cmip6-cookbook-dev/lib/python3.11/site-packages/fsspec/mapping.py:249, in get_mapper(url, check, create, missing_exceptions, alternate_root, **kwargs)
    218 """Create key-value interface for given URL and options
    219 
    220 The URL will be of the form "protocol://location" and point to the root
   (...)    246 ``FSMap`` instance, the dict-like key-value store.
    247 """
    248 # Removing protocol here - could defer to each open() on the backend
--> 249 fs, urlpath = url_to_fs(url, **kwargs)
    250 root = alternate_root if alternate_root is not None else urlpath
    251 return FSMap(root, fs, check, create, missing_exceptions=missing_exceptions)

File ~/micromamba/envs/cmip6-cookbook-dev/lib/python3.11/site-packages/fsspec/core.py:415, in url_to_fs(url, **kwargs)
    413     inkwargs["fo"] = urls
    414 urlpath, protocol, _ = chain[0]
--> 415 fs = filesystem(protocol, **inkwargs)
    416 return fs, urlpath

File ~/micromamba/envs/cmip6-cookbook-dev/lib/python3.11/site-packages/fsspec/registry.py:322, in filesystem(protocol, **storage_options)
    315     warnings.warn(
    316         "The 'arrow_hdfs' protocol has been deprecated and will be "
    317         "removed in the future. Specify it as 'hdfs'.",
    318         DeprecationWarning,
    319     )
    321 cls = get_filesystem_class(protocol)
--> 322 return cls(**storage_options)

File ~/micromamba/envs/cmip6-cookbook-dev/lib/python3.11/site-packages/fsspec/spec.py:81, in _Cached.__call__(cls, *args, **kwargs)
     79     return cls._cache[token]
     80 else:
---> 81     obj = super().__call__(*args, **kwargs)
     82     # Setting _fs_token here causes some static linters to complain.
     83     obj._fs_token_ = token

File ~/micromamba/envs/cmip6-cookbook-dev/lib/python3.11/site-packages/gcsfs/core.py:269, in GCSFileSystem.__init__(self, project, access, token, block_size, consistency, cache_timeout, secure_serialize, check_connection, requests_timeout, requester_pays, asynchronous, loop, callback_timeout, **kwargs)
    267 self.callback_timeout = callback_timeout
    268 if not asynchronous:
--> 269     self._session = sync(
    270         self.loop, get_client, callback_timeout=self.callback_timeout
    271     )
    272     weakref.finalize(self, sync, self.loop, self.session.close)
    273 else:

File ~/micromamba/envs/cmip6-cookbook-dev/lib/python3.11/site-packages/fsspec/asyn.py:103, in sync(loop, func, timeout, *args, **kwargs)
    101     raise FSTimeoutError from return_result
    102 elif isinstance(return_result, BaseException):
--> 103     raise return_result
    104 else:
    105     return return_result

File ~/micromamba/envs/cmip6-cookbook-dev/lib/python3.11/site-packages/fsspec/asyn.py:56, in _runner(event, coro, result, timeout)
     54     coro = asyncio.wait_for(coro, timeout=timeout)
     55 try:
---> 56     result[0] = await coro
     57 except Exception as ex:
     58     result[0] = ex

File ~/micromamba/envs/cmip6-cookbook-dev/lib/python3.11/site-packages/fsspec/implementations/http.py:33, in get_client(**kwargs)
     32 async def get_client(**kwargs):
---> 33     return aiohttp.ClientSession(**kwargs)

TypeError: ClientSession.__init__() got an unexpected keyword argument 'callback_timeout'

Calculate global means:

def get_lat_name(ds):
    for lat_name in ['lat', 'latitude']:
        if lat_name in ds.coords:
            return lat_name
    raise RuntimeError("Couldn't find a latitude coordinate")

def global_mean(ds):
    lat = ds[get_lat_name(ds)]
    weight = np.cos(np.deg2rad(lat))
    weight /= weight.mean()
    other_dims = set(ds.dims) - {'time'}
    return (ds * weight).mean(other_dims)
expt_da = xr.DataArray(expts, dims='experiment_id', name='experiment_id',
                       coords={'experiment_id': expts})

dsets_aligned = {}

for k, v in tqdm(dsets_.items()):
    expt_dsets = v.values()
    if any([d is None for d in expt_dsets]):
        print(f"Missing experiment for {k}")
        continue

    for ds in expt_dsets:
        ds.coords['year'] = ds.time.dt.year

    # workaround for
    # https://github.com/pydata/xarray/issues/2237#issuecomment-620961663
    dsets_ann_mean = [v[expt].pipe(global_mean)
                             .swap_dims({'time': 'year'})
                             .drop('time')
                             .coarsen(year=12).mean()
                      for expt in expts]

    # align everything with the 4xCO2 experiment
    dsets_aligned[k] = xr.concat(dsets_ann_mean, join='outer',
                                 dim=expt_da)
dsets_aligned_ = dask.compute(dsets_aligned)[0]
source_ids = list(dsets_aligned_.keys())
source_da = xr.DataArray(source_ids, dims='source_id', name='source_id',
                         coords={'source_id': source_ids})

big_ds = xr.concat([ds.reset_coords(drop=True)
                    for ds in dsets_aligned_.values()],
                    dim=source_da)

big_ds
df_all = big_ds.sel(year=slice(1900, 2100)).to_dataframe().reset_index()
df_all.head()
sns.relplot(data=df_all,
            x="year", y="tas", hue='experiment_id',
            kind="line", ci="sd", aspect=2);

Summary

In this notebook, we accessed data for historical, SSP245, and SSP585 runs from a collection of CMIP6 models and plotted the multimodel-mean global average surface air temperature for each run.

What’s next?

We will use CMIP6 data to analyze precipitation intensity under a warming climate.

Resources and references