# Libraries required for this tutorial...
# import dask
from datetime import datetime
import numpy as np
import xarray as xr
import xwrf
import matplotlib.pyplot as plt
import matplotlib.pyplot as pl
# Plotting wrfstat variables...
from distributed import Client
client = Client("tcp://127.0.0.1:44455")
path_shcu_root = "/data/project/ARM_Summer_School_2024_Data/lasso_tutorial/ShCu/untar" # on Jupyter
case_date = datetime(2019, 4, 4)
sim_id = [6,7,8]
/home/runner/micromamba/envs/lasso-those-clouds-cookbook-dev/lib/python3.13/site-packages/xwrf/__init__.py:5: UserWarning: pkg_resources is deprecated as an API. See https://setuptools.pypa.io/en/latest/pkg_resources.html. The pkg_resources package is slated for removal as early as 2025-11-30. Refrain from using this package or pin to Setuptools<81.
from pkg_resources import DistributionNotFound, get_distribution
---------------------------------------------------------------------------
ConnectionRefusedError Traceback (most recent call last)
ConnectionRefusedError: [Errno 111] Connection refused
The above exception was the direct cause of the following exception:
CommClosedError Traceback (most recent call last)
File ~/micromamba/envs/lasso-those-clouds-cookbook-dev/lib/python3.13/site-packages/distributed/comm/core.py:342, in connect(addr, timeout, deserialize, handshake_overrides, **connection_args)
341 try:
--> 342 comm = await wait_for(
343 connector.connect(loc, deserialize=deserialize, **connection_args),
344 timeout=min(intermediate_cap, time_left()),
345 )
346 break
File ~/micromamba/envs/lasso-those-clouds-cookbook-dev/lib/python3.13/site-packages/distributed/utils.py:1923, in wait_for(fut, timeout)
1922 async with asyncio.timeout(timeout):
-> 1923 return await fut
File ~/micromamba/envs/lasso-those-clouds-cookbook-dev/lib/python3.13/site-packages/distributed/comm/tcp.py:560, in BaseTCPConnector.connect(self, address, deserialize, **connection_args)
558 except StreamClosedError as e:
559 # The socket connect() call failed
--> 560 convert_stream_closed_error(self, e)
561 except SSLCertVerificationError as err:
File ~/micromamba/envs/lasso-those-clouds-cookbook-dev/lib/python3.13/site-packages/distributed/comm/tcp.py:143, in convert_stream_closed_error(obj, exc)
142 raise FatalCommClosedError(f"in {obj}: {exc.__class__.__name__}: {exc}")
--> 143 raise CommClosedError(f"in {obj}: {exc.__class__.__name__}: {exc}") from exc
CommClosedError: in <distributed.comm.tcp.TCPConnector object at 0x7f35ede6af90>: ConnectionRefusedError: [Errno 111] Connection refused
The above exception was the direct cause of the following exception:
OSError Traceback (most recent call last)
Cell In[1], line 13
11 # Plotting wrfstat variables...
12 from distributed import Client
---> 13 client = Client("tcp://127.0.0.1:44455")
14 path_shcu_root = "/data/project/ARM_Summer_School_2024_Data/lasso_tutorial/ShCu/untar" # on Jupyter
16 case_date = datetime(2019, 4, 4)
File ~/micromamba/envs/lasso-those-clouds-cookbook-dev/lib/python3.13/site-packages/distributed/client.py:1203, in Client.__init__(self, address, loop, timeout, set_as_default, scheduler_file, security, asynchronous, name, heartbeat_interval, serializers, deserializers, extensions, direct_to_workers, connection_limit, **kwargs)
1200 preload_argv = dask.config.get("distributed.client.preload-argv")
1201 self.preloads = preloading.process_preloads(self, preload, preload_argv)
-> 1203 self.start(timeout=timeout)
1204 Client._instances.add(self)
1206 from distributed.recreate_tasks import ReplayTaskClient
File ~/micromamba/envs/lasso-those-clouds-cookbook-dev/lib/python3.13/site-packages/distributed/client.py:1403, in Client.start(self, **kwargs)
1401 self._started = asyncio.ensure_future(self._start(**kwargs))
1402 else:
-> 1403 sync(self.loop, self._start, **kwargs)
File ~/micromamba/envs/lasso-those-clouds-cookbook-dev/lib/python3.13/site-packages/distributed/utils.py:452, in sync(loop, func, callback_timeout, *args, **kwargs)
449 wait(10)
451 if error is not None:
--> 452 raise error
453 else:
454 return result
File ~/micromamba/envs/lasso-those-clouds-cookbook-dev/lib/python3.13/site-packages/distributed/utils.py:426, in sync.<locals>.f()
424 awaitable = wait_for(awaitable, timeout)
425 future = asyncio.ensure_future(awaitable)
--> 426 result = yield future
427 except Exception as exception:
428 error = exception
File ~/micromamba/envs/lasso-those-clouds-cookbook-dev/lib/python3.13/site-packages/tornado/gen.py:769, in Runner.run(self)
767 try:
768 try:
--> 769 value = future.result()
770 except Exception as e:
771 # Save the exception for later. It's important that
772 # gen.throw() not be called inside this try/except block
773 # because that makes sys.exc_info behave unexpectedly.
774 exc: Optional[Exception] = e
File ~/micromamba/envs/lasso-those-clouds-cookbook-dev/lib/python3.13/site-packages/distributed/client.py:1481, in Client._start(self, timeout, **kwargs)
1478 self.scheduler_comm = None
1480 try:
-> 1481 await self._ensure_connected(timeout=timeout)
1482 except (OSError, ImportError):
1483 await self._close()
File ~/micromamba/envs/lasso-those-clouds-cookbook-dev/lib/python3.13/site-packages/distributed/client.py:1549, in Client._ensure_connected(self, timeout)
1546 self._connecting_to_scheduler = True
1548 try:
-> 1549 comm = await connect(
1550 self.scheduler.address, timeout=timeout, **self.connection_args
1551 )
1552 comm.name = "Client->Scheduler"
1553 if timeout is not None:
File ~/micromamba/envs/lasso-those-clouds-cookbook-dev/lib/python3.13/site-packages/distributed/comm/core.py:368, in connect(addr, timeout, deserialize, handshake_overrides, **connection_args)
366 await asyncio.sleep(backoff)
367 else:
--> 368 raise OSError(
369 f"Timed out trying to connect to {addr} after {timeout} s"
370 ) from active_exception
372 local_info = {
373 **comm.handshake_info(),
374 **(handshake_overrides or {}),
375 }
376 await comm.write(local_info)
OSError: Timed out trying to connect to tcp://127.0.0.1:44455 after 30 s
ds_stat_1 = xr.open_dataset(f"{path_shcu_root}/{case_date:%Y%m%d}/sim{sim_id[0]:04d}/raw_model/wrfstat_d01_{case_date:%Y-%m-%d_12:00:00}.nc")
ds_stat_2 = xr.open_dataset(f"{path_shcu_root}/{case_date:%Y%m%d}/sim{sim_id[1]:04d}/raw_model/wrfstat_d01_{case_date:%Y-%m-%d_12:00:00}.nc")
ds_stat_3 = xr.open_dataset(f"{path_shcu_root}/{case_date:%Y%m%d}/sim{sim_id[2]:04d}/raw_model/wrfstat_d01_{case_date:%Y-%m-%d_12:00:00}.nc")
ds_stat_1 = ds_stat_1.assign_coords(height=(ds_stat_1["CSP_Z"]))
ds_stat_2 = ds_stat_2.assign_coords(height=(ds_stat_2["CSP_Z"]))
ds_stat_3 = ds_stat_3.assign_coords(height=(ds_stat_3["CSP_Z"]))
ds_stat_3["Time"] = ds_stat_3["XTIME"]
ds_stat_2["Time"] = ds_stat_2["XTIME"]
ds_stat_1["Time"] = ds_stat_1["XTIME"]
# Note the extra details required by open_mfdataset to connect the files together in time.
ds_xwrf_1 = xr.open_mfdataset(f"{path_shcu_root}/{case_date:%Y%m%d}/sim{sim_id[0]:04d}/raw_model/wrfout_d01_*.nc", combine="nested", concat_dim="Time").xwrf.postprocess()
ds_xwrf_2 = xr.open_mfdataset(f"{path_shcu_root}/{case_date:%Y%m%d}/sim{sim_id[1]:04d}/raw_model/wrfout_d01_*.nc", combine="nested", concat_dim="Time").xwrf.postprocess()
ds_xwrf_3 = xr.open_mfdataset(f"{path_shcu_root}/{case_date:%Y%m%d}/sim{sim_id[2]:04d}/raw_model/wrfout_d01_*.nc", combine="nested", concat_dim="Time").xwrf.postprocess()
ds_xwrf_3["Time"] = ds_xwrf_3["XTIME"]
ds_xwrf_2["Time"] = ds_xwrf_2["XTIME"]
ds_xwrf_1["Time"] = ds_xwrf_1["XTIME"]
Advection Input to LES simulations with different forcings scale and corresponding LES output thermodynamic and Cloud time-height profiles¶
Humidity and Heat coming into the LES domain due to large-scale forcings is dinstinctly different for increasing forcing scales. These also correspond to very different intensities in cloud water content and other cloud structure properties.
import numpy as np
import matplotlib.ticker as tkr
pl.rcParams['xtick.labelsize'] = 16
pl.rcParams['ytick.labelsize'] = 16
pl.rcParams['axes.labelsize'] = 16
pl.rcParams['axes.titlesize'] = 16
pl.rcParams['legend.fontsize'] = 16
def plot_contour(var_name,label_name,min_level,max_level):
fig,ax = pl.subplots(1,3,figsize=(27,5))
pl.subplot(131)
ds_stat_1[var_name].plot.contourf(x='Time',y='height',levels=np.linspace(min_level,max_level,10),add_colorbar=False,cmap='coolwarm')
pl.ylim([0,5000])
pl.title(label_name[0])
pl.subplot(132)
ds_stat_2[var_name].plot.contourf(x='Time',y='height',levels=np.linspace(min_level,max_level,10),add_colorbar=False,cmap='coolwarm')
pl.title(label_name[1])
pl.ylabel('')
pl.ylim([0,5000])
pl.subplot(133)
p1=ds_stat_3[var_name].plot.contourf(x='Time',y='height',levels=np.linspace(min_level,max_level,10),add_colorbar=False,cmap='coolwarm')
pl.ylabel('')
pl.ylim([0,5000])
fig.subplots_adjust(right=0.8)
cbar_ax = fig.add_axes([0.85, 0.15, 0.05, 0.7])
cb2 = fig.colorbar(p1,format=tkr.FormatStrFormatter('%.3g'),cax=cbar_ax)
pl.title(label_name[2])
plot_contour('CSP_THDT_LSHOR',['','TH Advection Horizontal',''],-1e-4,1e-4)
plot_contour('CSP_TH',['','TH - LES',''],280,310)
plot_contour('CSP_QVDT_LSHOR',['','QV Advection Horizontal',''],0,2e-7)
plot_contour('CSP_QV',['','QV - LES',''],0,0.011)
plot_contour('CSP_QL',['','QL',''],0,5e-5)
pl.figure(figsize=(7,5))
ds_stat_1['CST_PRECT'].plot.line(label='75 km')
ds_stat_2['CST_PRECT'].plot.line(label='150 km')
ds_stat_2['CST_PRECT'].plot.line(label='300 km')
Which of the forcing scales produces clouds close to what was observed from satellie images (GOES)?¶
sim_id=6
ds_cogs_1 = xr.open_dataset(f"{path_shcu_root}/{case_date:%Y%m%d}/sim{sim_id:04d}/obs_model/sgplassocogsdiagobsmod{sim_id}C1.m1.{case_date:%Y%m%d}.120000.nc")
sim_id=7
ds_cogs_2 = xr.open_dataset(f"{path_shcu_root}/{case_date:%Y%m%d}/sim{sim_id:04d}/obs_model/sgplassocogsdiagobsmod{sim_id}C1.m1.{case_date:%Y%m%d}.120000.nc")
sim_id=8
ds_cogs_3 = xr.open_dataset(f"{path_shcu_root}/{case_date:%Y%m%d}/sim{sim_id:04d}/obs_model/sgplassocogsdiagobsmod{sim_id}C1.m1.{case_date:%Y%m%d}.120000.nc")
fig, ax = plt.subplots(ncols=1,figsize=(7,5))
ds_cogs_1["low_cloud_fraction_cogs"].isel(source_type=0).plot(ax=ax, marker="o", label="COGS",color='k')
ds_cogs_1["low_cloud_fraction_cogs"].isel(source_type=1).plot(ax=ax, marker="o", label="Sim6", color='b')
ds_cogs_2["low_cloud_fraction_cogs"].isel(source_type=1).plot(ax=ax, marker="o", label="Sim7", color= 'r')
ds_cogs_3["low_cloud_fraction_cogs"].isel(source_type=1).plot(ax=ax, marker="o", label="Sim8", color = 'g')
# If you plan to share with frieds, do a little clean-up beyond the default labelling from xarray...
ax.legend()
ax.set_title("COGS vs. WRF Cloud Fraction")
ax.set_xlabel("Time (UTC)")
ax.set_ylabel("Cloud Fraction")
plt.show()
ds_may_1=xr.open_dataset('/data/project/ARM_Summer_School_2024_Data/lasso_tutorial/ShCu/untar/20190506/sim0006/obs_model/sgplassocogsdiagobsmod6C1.m1.20190506.120000.nc',decode_times=True)
ds_may_2=xr.open_dataset('/data/project/ARM_Summer_School_2024_Data/lasso_tutorial/ShCu/untar/20190506/sim0007/obs_model/sgplassocogsdiagobsmod7C1.m1.20190506.120000.nc',decode_times=True)
ds_may_3=xr.open_dataset('/data/project/ARM_Summer_School_2024_Data/lasso_tutorial/ShCu/untar/20190506/sim0008/obs_model/sgplassocogsdiagobsmod8C1.m1.20190506.120000.nc',decode_times=True)
fig, ax = plt.subplots(ncols=1,figsize=(7,5))
ds_may_1["low_cloud_fraction_cogs"].isel(source_type=0).plot(ax=ax, marker="o", label="COGS",color='k')
ds_may_1["low_cloud_fraction_cogs"].isel(source_type=1).plot(ax=ax, marker="o", label="Sim6", color='b')
ds_may_2["low_cloud_fraction_cogs"].isel(source_type=1).plot(ax=ax, marker="o", label="Sim7", color= 'r')
ds_may_3["low_cloud_fraction_cogs"].isel(source_type=1).plot(ax=ax, marker="o", label="Sim8", color = 'g')
# If you plan to share with frieds, do a little clean-up beyond the default labelling from xarray...
ax.legend()
ax.set_title("COGS vs. WRF Cloud Fraction")
ax.set_xlabel("Time (UTC)")
ax.set_ylabel("Cloud Fraction")
plt.show()
Are these differences we notice in Cloud structure statistics reprsentative of what’s going on at the 3-d level?¶
To check this we track each individual cloud cells (cluster of all adjacent cloudy cells) defined by x,y dependent cloud water path (ql_path) > 0.005
#### Field Plots #####
import os
import numpy as np
import matplotlib.pyplot as pl
from matplotlib import cm
import math
from scipy.stats import norm
import xarray as xr
import netCDF4 as nc
import sys
sys.setrecursionlimit(1000000)
##########################################################################################
class cell:
def __init__(self, id):
self.id = id
self.value = [[],[]]
self.location = [[],[]]
self.nelements = 0
self.nelements_local = 0
def add_elements(self, i, j, var_values):
self.location[0].append(i)
self.location[1].append(j)
self.value[0].append(var_values)
self.nelements = self.nelements + 1
self.nelements_local = self.nelements_local + 1
def __del__(self):
return
##########################################################################################
def find_boolean(variable, threshold_criteria): #variable is f(i, j, t): --> outputs boolean -1 (unsatisfied) 0 (satisfied)
boolean = np.zeros(( len(variable[:,0]), len(variable[0,:])))
boolean = -1
boolean = np.where(variable[:,:]>threshold_criteria,0,-1)
return boolean;
########################################################################################
def identify_elements_in_cell(i,j,new_cell): #input the ijk at which boolean is satisfied along with boolean and new cell created
global booli;
new_cell.add_elements(i,j,cell_variable[i,j])
booli[i,j] = -1
ii=i-1; jj=j; #look west
if ii<0:
ii = nx-1
if (booli[ii,jj] == 0):
identify_elements_in_cell(ii,jj,new_cell)
ii=i+1; jj=j; #look east
if ii>nx-1:
ii = 0
if (booli[ii,jj] == 0):
identify_elements_in_cell(ii,jj,new_cell)
ii=i; jj=j+1; #look north
if jj>ny-1:
jj = 0
if (booli[ii,jj] == 0):
identify_elements_in_cell(ii,jj,new_cell)
ii=i; jj=j-1; #look south
if jj<0:
jj = ny-1
if (booli[ii,jj] == 0):
identify_elements_in_cell(ii,jj,new_cell)
#################################################################################################
def create_new_cell(variable,bool): # input the boolean and the variable, output is the cells tracked (i,j,t) based on boolean
cell_number = 0;
global booli,cell_variable,nx,ny;
nx = len(variable[:,0])
ny = len(variable[0,:])
booli=bool;cell_variable = variable;
variable_cells = []
for j in range(0,ny):
for i in range(0,nx):
if booli[i,j]==0:
new_cell=cell(cell_number)
identify_elements_in_cell(i,j,new_cell)
if new_cell.nelements>=nminelems:
variable_cells.append(new_cell)
variable_cells[cell_number].id = cell_number
cell_number = cell_number + 1
else:
del new_cell;
return variable_cells, cell_number;
#################################################################################################
def run_tracking(tracked_variable,param_threshold):
global nx,ny;
nx = len(tracked_variable[:,0]); ny = len(tracked_variable[0,:]);
bool = find_boolean(tracked_variable,param_threshold)
[cells,cell_number] = create_new_cell(tracked_variable,bool);
return cells,cell_number;
####################################################################################################
def find_cell_centers(cells,ncells):
centers=np.zeros((ncells,2))
max_cloudsize=0;
for i in range(0,ncells):
centers[i,0]=np.mean(cells[i].location[0])
centers[i,1]=np.mean(cells[i].location[1])
if cells[i].nelements>max_cloudsize:
max_cloudsize=cells[i].nelements
return centers,max_cloudsize;
####################################################################################################
def find_nearest_neighbor(centers,nx,ny):
distance=np.zeros(len(centers[:,0]))
nn_distance=np.zeros(len(centers[:,0]))
for i in range(len(centers[:,0])):
for j in range(len(centers[:,0])):
if i==j:
distance[j]=100;
else:
xdist=min(abs(centers[i,0]+nx-centers[j,0]),abs(centers[i,0]-nx-centers[j,0]),abs(centers[i,0]-centers[j,0]))
ydist=min(abs(centers[i,1]+ny-centers[j,1]),abs(centers[i,1]-ny-centers[j,1]),abs(centers[i,1]-centers[j,1]))
distance[j]=math.sqrt(xdist**2+ydist**2)
nn_distance[i]=min(distance)
return nn_distance;
####################################################################################################
def retrieve_variable(variable_name,netcdf_path): #open corresponding netcdf file and read data
all_data=xr.open_dataset(netcdf_path,decode_times=False)
var=all_data[variable_name].values
variable=var
x=all_data['x'].values
y=all_data['y'].values
t=all_data['Time'].values
return variable, x, y, t;
##########################################################################################
#[w_cross,x,y,t]=retrieve_variable(variable_name='w',netcdf_path='/fs/ess/PFS0220/eurec4a/case_1060lagtraj_feb2_withw/w_cross.nc')
def get_iorg(xr_data,variable_name,param_threshold,start_ind,slice_len):
variable=xr_data[variable_name].values
i_org=np.zeros(xr_data.Time.size)
Max_Clouds=np.zeros(xr_data.Time.size)
for i in range(start_ind,xr_data.Time.size,slice_len):
if i%60==0:
print(i)
[clouds,ncells]=run_tracking(tracked_variable=variable[i,:,:],param_threshold=param_threshold)
if ncells<2:
i_org[i]=0;
Max_Clouds[i]=0;
else:
[centers,Max_Clouds[i]]=find_cell_centers(clouds,ncells)
nn_distance=find_nearest_neighbor(centers,nx,ny)
centers=centers.astype(int)
nn_sorted = np.sort(nn_distance);nn_sorted=nn_sorted/max(nn_sorted)
p = 1. * np.arange(len(nn_distance)) / (len(nn_distance) - 1)
p_ran=1-np.exp(-1*(math.pi)*nn_sorted*nn_sorted)
i_org[i]=np.trapz(p,x=p_ran)
xr_data['i_org']=xr.DataArray(i_org,dims=['Time'])
xr_data['Max_Clouds']=xr.DataArray(Max_Clouds,dims=['Time'])
return xr_data;
#########################################################################################
nminelems=10
slice_len=1;
ds_xwrf_1=get_iorg(ds_xwrf_1,'qc_path',0.005,start_ind=0,slice_len=slice_len)
ds_xwrf_2=get_iorg(ds_xwrf_2,'qc_path',0.005,start_ind=0,slice_len=slice_len)
ds_xwrf_3=get_iorg(ds_xwrf_3,'qc_path',0.005,start_ind=0,slice_len=slice_len)
Organization Index (Randomness (=0) or Organization (=1) of tracked individual cloud cells) and Size of Cloud Cell with maximum area¶
pl.figure(figsize=(7,5))
(0.1*ds_xwrf_1['Max_Clouds']**(1/2)).plot.line(label='75km')
(0.1*ds_xwrf_2['Max_Clouds']**(1/2)).plot.line(label='150km')
(0.1*ds_xwrf_3['Max_Clouds']**(1/2)).plot.line(label='300km')
pl.ylabel('Cloud Size (km)')
pl.title('Size of the Largest Cloud Cell')
pl.legend()
pl.figure(figsize=(7,5))
ds_xwrf_1.i_org.rolling(Time=6, center=True).mean().dropna("Time").plot.line(label='75km')
ds_xwrf_2.i_org.rolling(Time=6, center=True).mean().dropna("Time").plot.line(label='150km')
ds_xwrf_3.i_org.rolling(Time=6, center=True).mean().dropna("Time").plot.line(label='300km')
pl.ylabel(r'Organization Index ($I_{org}$)')
pl.title('Extent of Cloud Organization')
pl.legend()
ds_stat_1.CSP_Z.isel(bottom_top=75).values