Source determination through NOAA HYSPLIT trajectory model

HYSPLIT models simulate the dispersion and trajectory of substances transported and dispersed through our atmosphere, over local to global scales.

A common application is a back trajectory analysis to determine the origin of air masses and establish source-receptor relationships.

# import libraries
import xarray as xr   # for data storage and processing
import numpy as np    # for data storage and processig
import pandas as pd   # for data storage and processig
import act
from datetime import datetime  # for formating date and time
import matplotlib.pyplot as plt

# define the function to read the HYSPLIT model output file
def read_hysplit(filename, base_year=2000):
    """
    Reads an input HYSPLIT trajectory for plotting in ACT.
    Parameters
    ----------
    filename: str
        The input file name.
    base_year: int
        The first year of the century in which the data are contained.
    Returns
    -------
    ds: xarray Dataset
        The ACT dataset containing the HYSPLIT trajectories
    """
    ds = xr.Dataset({})
    num_lines = 0
    with open(filename) as filebuf:
        num_grids = int(filebuf.readline().split()[0])
        num_lines += 1
        grid_times = []
        grid_names = []
        forecast_hours = np.zeros(num_grids)
        for i in range(num_grids):
            data = filebuf.readline().split()
            num_lines += 1
            grid_names.append(data[0])
            grid_times.append(
                datetime(
                    year=(int(data[1]) + base_year),
                    month=int(data[2]),
                    day=int(data[3]),
                    hour=int(data[4]),
                )
            )
            forecast_hours[i] = int(data[5])
        ds["grid_forecast_hour"] = xr.DataArray(forecast_hours, dims=["num_grids"])
        ds["grid_forecast_hour"].attrs["standard_name"] = "Grid forecast hour"
        ds["grid_forecast_hour"].attrs["units"] = "Hour [UTC]"
        ds["grid_times"] = xr.DataArray(np.array(grid_times), dims=["num_grids"])
        data_line = filebuf.readline().split()
        num_lines += 1
        ds.attrs["trajectory_direction"] = data_line[1]
        ds.attrs["vertical_motion_calculation_method"] = data_line[2]
        num_traj = int(data_line[0])
        traj_times = []
        start_lats = np.zeros(num_traj)
        start_lons = np.zeros(num_traj)
        start_alt = np.zeros(num_traj)
        for i in range(num_traj):
            data = filebuf.readline().split()
            num_lines += 1
            traj_times.append(
                datetime(
                    year=(base_year + int(data[0])),
                    month=int(data[1]),
                    day=int(data[2]),
                    hour=int(data[3]),
                )
            )
            start_lats[i] = float(data[4])
            start_lons[i] = float(data[5])
            start_alt[i] = float(data[6])
        ds["start_latitude"] = xr.DataArray(start_lats, dims=["num_trajectories"])
        ds["start_latitude"].attrs["long_name"] = "Trajectory start latitude"
        ds["start_latitude"].attrs["units"] = "degree"
        ds["start_longitude"] = xr.DataArray(start_lats, dims=["num_trajectories"])
        ds["start_longitude"].attrs["long_name"] = "Trajectory start longitude"
        ds["start_longitude"].attrs["units"] = "degree"
        ds["start_altitude"] = xr.DataArray(start_alt, dims=["num_trajectories"])
        ds["start_altitude"].attrs["long_name"] = "Trajectory start altitude"
        ds["start_altitude"].attrs["units"] = "degree"
        data = filebuf.readline().split()
        num_lines += 1
        var_list = [
            "trajectory_number",
            "grid_number",
            "year",
            "month",
            "day",
            "hour",
            "minute",
            "forecast_hour",
            "age",
            "lat",
            "lon",
            "alt",
        ]
        for variable in data[1:]:
            var_list.append(variable)
        input_df = pd.read_csv(
            filebuf, sep=r'\s+', index_col=False, names=var_list, skiprows=1
        )  # noqa W605
        input_df['year'] = base_year + input_df['year']
        input_df['year'] = input_df['year'].astype(int)
        input_df['month'] = input_df['month'].astype(int)
        input_df['day'] = input_df['day'].astype(int)
        input_df['hour'] = input_df['hour'].astype(int)
        input_df['minute'] = input_df['minute'].astype(int)
        input_df['time'] = pd.to_datetime(
            input_df[["year", "month", "day", "hour", "minute"]], format='%y%m%d%H%M'
        )
        input_df = input_df.set_index("time")
        del input_df["year"]
        del input_df["month"]
        del input_df["day"]
        del input_df["hour"]
        del input_df["minute"]
        ds = ds.merge(input_df.to_xarray())
        ds.attrs['datastream'] = 'hysplit'
        ds["trajectory_number"].attrs["standard_name"] = "Trajectory number"
        ds["trajectory_number"].attrs["units"] = "1"
        ds["grid_number"].attrs["standard_name"] = "Grid number"
        ds["grid_number"].attrs["units"] = "1"
        ds["age"].attrs["standard_name"] = "Grid number"
        ds["age"].attrs["units"] = "1"
        ds["lat"].attrs["standard_name"] = "Latitude"
        ds["lat"].attrs["units"] = "degree"
        ds["lon"].attrs["standard_name"] = "Longitude"
        ds["lon"].attrs["units"] = "degree"
        ds["alt"].attrs["standard_name"] = "Altitude"
        ds["alt"].attrs["units"] = "meter"
    return ds

Plotting HYSPLIT back trajectory and compare with NASA firemap

Case 1: Jan 8th, 2022

# Load the data
filename = 'guc_010822.txt'
ds = read_hysplit(filename)

# Use the GeographicPlotDisplay object to make the plot
disp = act.plotting.GeographicPlotDisplay(ds)
ax = disp.geoplot('PRESSURE', cartopy_feature=['STATES', 'OCEAN', 'LAND'], s = 1)
ax.set_xlim([-159, -67])
ax.set_ylim([20, 77])
plt.show()

../_images/1a9ff84293d96dbd5e7a6b1fab87a76fa119e46d792b20199842ee72c33f8d0b.png

Fire Map Link

Click here to view the interactive map

Case 2: Jan 25th, 2022

# Load the data
filename = 'guc_012522.txt'
ds = read_hysplit(filename)

# Use the GeographicPlotDisplay object to make the plot
disp = act.plotting.GeographicPlotDisplay(ds)
ax = disp.geoplot('PRESSURE', cartopy_feature=['STATES', 'OCEAN', 'LAND'], s = 1)
ax.set_xlim([-159, -67])
ax.set_ylim([20, 77])
plt.show()

../_images/9540c0324cbaffbd192b97241a37a47b7acda00f149c8b97a558c5951b48e188.png

Case 3: Apr 3rd, 2023

# Load the data
filename = 'guc_040323.txt'
ds = read_hysplit(filename)

# Use the GeographicPlotDisplay object to make the plot
disp = act.plotting.GeographicPlotDisplay(ds)
ax = disp.geoplot('PRESSURE', cartopy_feature=['STATES', 'OCEAN', 'LAND'], s = 1)
ax.set_xlim([-159, -67])
ax.set_ylim([20, 77])
plt.show()

../_images/b6e03a3bb9a4279da6028ffc102acc16244a1d92528dd7225f9d02bf01b0ebfd.png

from IPython.display import display, HTML

# Adjust the path to your HTML file accordingly
file_path = "ARM_smps_data.html"

# Read the content of the HTML file
with open(file_path, 'r') as file:
    html_content = file.read()

# Display the HTML content
display(HTML(html_content))

ARM_SAIL_project

load libraries

library(ncdf4) # package for netcdf manipulation
library(ggplot2) # package for plotting
library(data.table) #read data into datatble or creat datatble
library(patchwork) # arrange plot_layout
library(janitor) #clean data table names

## 
## Attaching package: 'janitor'

## The following objects are masked from 'package:stats':
## 
##     chisq.test, fisher.test

library(tidyr) # to use function pivot_longer
library(dplyr) ## to use function group_by

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:data.table':
## 
##     between, first, last

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(viridis)

## Loading required package: viridisLite

##E Batch mode processing all .nc files

# Define a function to read .nc files and extract cpc variables
smps_read_nc_file <- function(file_path) {
  #S4_alt <- 2764 # the height of S4 site above sea level
  nc_data <- nc_open(file_path)
  smps.totalconc <- ncvar_get(nc_data, "total_N_conc")
  smps.alt <- ncvar_get(nc_data, "alt")
  date_data <- ncvar_get(nc_data, "base_time")
  time_data <- ncvar_get(nc_data, "time_offset")
  # Convert time data to POSIXct (datetime) objects
  smps.datetime <- as.POSIXct(time_data, origin = date_data, tz = "UTC")
  nc_close(nc_data)
  return(list(smps.datetime = smps.datetime, smps.totalconc = smps.totalconc, smps.alt = smps.alt))
}


# Define a function to create the plot
smps_create_plot <- function(smps.datetime, smps.totalconc, smps.alt, file_name) {
  df <- data.frame(smps.datetime = smps.datetime, smps.totalconc = smps.totalconc, smps.alt = smps.alt)
  ggplot(df, aes(x = smps.datetime, y = smps.totalconc)) +
    geom_line(size = 2)+
    scale_y_log10()+
    labs(x = "Datetime", y = "SMPS total num conc (#/cc)",  title = paste("SMPS Total Concentration vs. Date -", file_name)) +
    theme_minimal()
}

filepath <- "/Users/laiz097/Library/CloudStorage/OneDrive-PNNL/Documents/2024/ARM_summer_school_2024/SAIL_group_project/SMPS_data"
file_list <- list.files(path = filepath, pattern = ".nc", full.names=TRUE)


# Read each .nc file
nc_data_list <- lapply(file_list, nc_open)

# Loop through each file, read data, and create plots
for (file in file_list) {
  data <- smps_read_nc_file(file)
  plot <- smps_create_plot(data$smps.datetime, data$smps.totalconc, data$smps.alt, basename(file))
  print(plot)
}

## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.