Example 7: Analysis Involving Multiple Tracers

In this example, we benchmark simulation results from PyTracerLab against equivalent results obtained from TracerLPM. We consider the tracer input data given in Example 1 of the TracerLPM documentation (Jurgens et al., 2012).

import PyTracerLab.model as ism
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.collections import LineCollection
from matplotlib.colors import Normalize
import matplotlib as mpl
from datetime import datetime
import seaborn as sns
import pandas as pd

1. Load Observation Data

# load input series
# this would be the tracer concentration in precipitation or recharge in a
# practical problem
file_name = "TracerLPM_benchmark_input_multitracer.csv"
data = np.genfromtxt(
    file_name,
    delimiter=";",
    dtype=["<U7", float, float, float, float, float],
    encoding="utf-8",
    skip_header=1
)

timestamps = np.array([datetime.strptime(row[0], r"%Y-%m") for row in data])
# we only keep 3H and SF6
input_series = np.array([[row[1], row[2], row[5]] for row in data], dtype=float)

# load observation series
# this would be the measured tracer concentration in groudnwater in a
# practical problem
file_name = "TracerLPM_benchmark_observations_multitracer.csv"
data = np.genfromtxt(
    file_name,
    delimiter=";",
    dtype=["<U7", float, float, float, float, float],
    encoding="utf-8",
    skip_header=1
)

timestamps = np.array([datetime.strptime(row[0], r"%Y-%m") for row in data])
# we only keep 3H and SF6
obs_series = np.array([[row[1], row[2], row[5]] for row in data], dtype=float)
### plot input series, output series, and observations

# get observation timesteps
timesteps = [t.year + t.month / 12.0 for t in timestamps]

# create figure
fig, ax = plt.subplots(1, 1)
# plot input series
# this input is valid for both tracers
ax.plot(
    timesteps,
    input_series[:, 0],
    label="Tracer 1 & 2 Input",
    c="lightgrey"
)

# plot observations
ax.scatter(
    timesteps,
    obs_series[:, 0],
    label="Observations Tracer 1",
    color="coral",
    marker="x",
    zorder=10
)
ax.scatter(
    timesteps,
    obs_series[:, 1],
    label="Observations Tracer 2",
    color="darkred",
    marker="x",
    zorder=10
)

ax.set_xlabel("Time [months]")
ax.set_ylabel("Tracer concentration [-]")
ax.legend(ncol=1, fontsize=9)
ax.set_yscale("log")
plt.show()
../_images/95d245c76dc23e57e78787b8ba4413e724c870545cba352fad73cbae20deafb4.png

2. Model Setup

t_half = 12.32 # tritium, in years
lambda_1 = np.log(2.0) / t_half

t_half = 1e10 # SF6 (which does not decay)
lambda_2 = np.log(2.0) / t_half

### define model (we do not use the same structure / units as the true model)
# time step is 6 months / 0.5 years
# because we use half life in years, we have to use a time step of 0.5
# if we were to give the half life in months, we would use a time step of 6
m = ism.Model(
    dt=0.5,
    lambda_=[lambda_1, lambda_1, lambda_2],
    input_series=input_series,
    target_series=obs_series,
    production=[False, True, False],
    steady_state_input=[3., 3., 0.], # this is the true value
    n_warmup_half_lives=10,
    n_warmup_steps=5000
)

# add a dispersion model unit
# define the initial model parameters for inference
dm_mtt_init = 10
dm_dp_init = .8
m.add_unit(
    ism.DMUnit(mtt=dm_mtt_init, DP=dm_dp_init),
    fraction=1.,
    bounds=[(1.0, 1000.), (0.01, 10.0)],
    prefix="dm"
)

3. Model Simulation for Different Mean Travel Times

# Define range of mean travel times to consider
# mtt_range = np.linspace(1, 250, 150)
# mtt_range = np.arange(1., 250., 10)
mtt_range = np.logspace(0., 2.35, 91)
mtt_range = np.logspace(0., 2.7, 91)

# Create empty array of results
results = np.zeros((3, len(mtt_range), len(input_series)))

# Iterate over mean travel times
for i, mtt in enumerate(mtt_range):
    m.set_param(key="dm.mtt", value=mtt)
    sim = m.simulate()
    results[0, i, :] = sim[:, 0].flatten()
    results[1, i, :] = sim[:, 1].flatten()
    results[2, i, :] = sim[:, 2].flatten()
# load data simulated with TracerLPM
benchmark_data = pd.read_csv("tracer_tracer_sim_TracerLPM.csv", index_col=0, sep=";", decimal=",", usecols=["TT", "3H", "3He(trit)", "SF6"])
benchmark_data.head()
3H 3He(trit) SF6
TT
1 3.942932 0.233460 5.304668
2 3.903212 0.512449 5.148497
3 3.861798 0.959914 4.934477
4 3.858701 1.649138 4.729547
5 3.885202 2.544494 4.536607
def add_mtt_colored_line_plot(
    ax,
    results,
    mtt_range,
    obs_series,
    timestamps,
    obs_indices,
    idx_select,
    tracer1_col=1,
    tracer2_col=2,
    *,
    cmap="turbo",
    line_lw=4.0,
    outline=True,
    outline_lw=5.5,
    outline_alpha=0.6,
    stepper=10,
    percent_tracer_free=(0.75, 0.5, 0.25),
):
    # extract curve for selected observation date
    x = results[tracer1_col, :, obs_indices[idx_select]]
    y = results[tracer2_col, :, obs_indices[idx_select]]

    # multicolored line segments
    pts = np.column_stack([x, y])
    segs = np.stack([pts[:-1], pts[1:]], axis=1)
    mtt_seg = 0.5 * (mtt_range[:-1] + mtt_range[1:])

    lc = LineCollection(segs, cmap=cmap, linewidths=line_lw, zorder=10)
    lc.set_array(mtt_seg)  # norm will be set outside for shared colorbar
    ax.add_collection(lc)

    if outline:
        ax.plot(x[5:-1], y[5:-1], c="k", lw=outline_lw, alpha=outline_alpha, zorder=9)

    # observation
    ax.scatter(
        obs_series[obs_indices[idx_select], tracer1_col],
        obs_series[obs_indices[idx_select], tracer2_col],
        c="r",
        edgecolor="k",
        marker="X",
        zorder=20000,
        s=200,
        label="Observation",
    )

    # origin
    ax.scatter(
        [0.0],
        [0.0],
        c="k",
        edgecolor="k",
        marker="o",
        zorder=20,
        s=10,
        # label="Origin (Tracer-Free Water)",
    )

    # binary mixing rays
    points_idx = list(range(len(mtt_range)))[::stepper]
    points_idx = [0, -1, 30, 40, 47, 53, 59, 65, 72]
    for i in points_idx:
        label = "Binary Mixing" if i == points_idx[0] else None
        ax.plot(
            [0.0, results[tracer1_col, i, obs_indices[idx_select]]],
            [0.0, results[tracer2_col, i, obs_indices[idx_select]]],
            c="k",
            lw=1.0,
            alpha=0.3,
            ls="--",
            label=label,
        )

    # dilution factors
    for p in percent_tracer_free:
        ax.plot(
            x * p,
            y * p,
            c="k",
            lw=1.5,
            alpha=0.8 * p,
            label=f"{(1 - p) * 100:.0f}% Tracer-Free Water",
        )

    # ax.set_xlabel("Tracer 1")
    # ax.set_ylabel("Tracer 2")
    # ax.set_title(
    #     f"{title_prefix}Date of Observation: {timestamps[obs_indices[idx_select]].date()}"
    # )

    return lc  # return mappable for shared colorbar

fig, ax = plt.subplots(1, 2, figsize=(8, 4), constrained_layout=True, sharey=True)
axs = np.ravel(ax)

# Get indices of time series that have observations (shared across all)
obs_indices = np.arange(len(timestamps))[~np.isnan(obs_series).any(axis=1)]
idx_select = 0

# shared normalization (same color mapping across all subplots)
norm = Normalize(vmin=np.nanmin(mtt_range), vmax=np.nanmax(mtt_range))

tracer1_cols = [0, 1]
tracer2_cols = [2, 2]
mappables = []
for i, ax in enumerate(axs):
    lc = add_mtt_colored_line_plot(
        ax,
        results,
        mtt_range,
        obs_series,
        timestamps,
        obs_indices,
        idx_select,
        tracer1_col=tracer1_cols[i],
        tracer2_col=tracer2_cols[i],
        outline=True,
        outline_lw=11.,
        line_lw=10.
    )

    # plot benchmark data
    ax.plot(
        benchmark_data.iloc[:, tracer1_cols[i]],
        benchmark_data.iloc[:, tracer2_cols[i]],
        c="k",
        zorder=1000,
        ls="-",
        lw=2.
    )
    ax.plot(
        benchmark_data.iloc[:, tracer1_cols[i]],
        benchmark_data.iloc[:, tracer2_cols[i]],
        c="w",
        zorder=1000,
        ls=":",
        lw=2.
    )

    lc.set_norm(norm)  # enforce shared normalization
    mappables.append(lc)
    ax.set_xlim(0.)
    ax.set_ylim(0.)

# shared colorbar (use any of the LineCollections as the mappable)
cbar = fig.colorbar(
    mappables[0],
    ax=axs,
    label=r"Mean travel time $ [a] $",
    fraction=0.035,
    pad=0.02,
    extend="max"
)
cbar.ax.invert_yaxis()

# legend: usually best to show it only once to avoid clutter
handles, labels = axs[0].get_legend_handles_labels()
fig.legend(handles, labels, loc=(.62, .7), ncol=1, frameon=True)

axs[0].set_xlabel(r"Tritium - $ ^3\mathrm{H} \; [TU] $")
axs[0].set_ylabel(r"$ \mathrm{SF}_6 \; [pptv] $")
axs[0].grid(True, alpha=0.3)
axs[1].set_xlabel(r"Tritiogenic Helium - $ ^3\mathrm{He} \; [TU] $")
axs[1].grid(True, alpha=0.3)

plt.savefig("tracer_tracer_plots_benchmark.png", dpi=600)
../_images/90c5e87bde0cef4ffb9e759d732eba1a6dbf23b7562dd0b87bfbc1e27ad01333.png

4. Model Inversion with MCMC

# sample from parameter posterior
# create a solver
solver = ism.Solver(m)
# run MCMC
res = solver.dream_sample(
    n_samples=2000, # effective samples after burn-in and thinning
    burn_in=2000,
    thin=1,
    sigma=[1., 1., 0.5],
    random_state=42,
    return_sim=True,
    set_model_state=False,
    n_diff_pairs=1
)
print(res["gelman_rubin"])
{'dm.DP': 1.0293068701006136, 'dm.mtt': 1.0158922235193089}
res["posterior_map"]
{'dm.DP': 0.7907680280412412, 'dm.mtt': 78.20677860796225}
# plot chains to inspect convergence
fig, ax = plt.subplots(2, 1, figsize=(6, 6), sharex=True)

for i in range(res["samples_chain"].shape[2]):
    for j in range(res["samples_chain"].shape[0]):
        ax[i].plot(res["samples_chain"][j, :, i], c="k", lw=.5, alpha=0.4)
    ax[i].set_ylabel(m.param_keys(free_only=True)[i])
ax[-1].set_xlabel("Steps")
Text(0.5, 0, 'Steps')
../_images/28ffae543c65f92a7c9af4407c774b2626937d3c1f482333694654c86ea7dc19.png
# make dict from sample array
sample_df = pd.DataFrame({
    "DP": res["samples"][:, 0],
    "MTT": res["samples"][:, 1],
})

cmap = mpl.colormaps["Blues"]
colors = cmap(np.linspace(0, 1, 32))
c_blue = colors[22]

bins = 25

p = sns.jointplot(sample_df, x="DP", y="MTT", kind="kde", height=3.5, color="k", #cmap="Blues",
              fill=False, levels=7, linewidths=.5, thresh=0.02, zorder=100,
              marginal_kws={"color": c_blue, "fill": True})
p.plot_joint(sns.histplot, cmap="Blues", zorder=10, bins=bins)
p.set_axis_labels(r"Dispersion Parameter $ [-] $", r"Mean Travel Time $ [a] $")

ax = plt.gca()
ax.scatter([res["posterior_map"]["dm.DP"]], [res["posterior_map"]["dm.mtt"]], s=120,
           color="r", lw=.5, zorder=1000000, marker="o", edgecolor="k")
ax.scatter([0.8], [75.3], s=35, color="w", lw=.5, zorder=1000000, marker="v", edgecolor="k")

plt.savefig("joint_posterior_benchmark_2.png", dpi=600)
../_images/647a36adc9a009f07045db8730c24a24d83dcb714051370ac89b750158f26511.png
# Get indices of time series that have observations (shared across all)
obs_indices = np.arange(len(timestamps))[~np.isnan(obs_series).any(axis=1)]
idx_select = 0

# Get posterior simulations
sim_data = res["sims"][:, :, obs_indices[idx_select], :]
sim_data = sim_data.reshape((-1, sim_data.shape[-1]))
obs_data = obs_series[obs_indices[idx_select], :]

fig, ax = plt.subplots(1, 1, figsize=(3.5, 3.5))

ax.axhline(0, ls=":", c="k", lw=1.)
p = sns.violinplot(
    data=sim_data - obs_data, ax=ax, palette="Pastel1")
ax.set_xticklabels([r"$ ^3\mathrm{H} \; [TU] $", r"$ ^3\mathrm{He} \; [TU] $", r"$ \mathrm{SF}_6 \; [pptv] $"])
ax.set_ylabel(r"Sim. $ - $ Obs. $  [TU] $ or $ [pptv] $")

plt.savefig("errors_benchmark.png", dpi=600)
C:\Users\MRudolph\AppData\Local\Temp\ipykernel_10532\330341518.py:15: UserWarning: set_ticklabels() should only be used with a fixed number of ticks, i.e. after set_ticks() or using a FixedLocator.
  ax.set_xticklabels([r"$ ^3\mathrm{H} \; [TU] $", r"$ ^3\mathrm{He} \; [TU] $", r"$ \mathrm{SF}_6 \; [pptv] $"])
../_images/f237a4ab558816081962da5dc5e38ae6a6b0970bcd49652ab84f8d8398357c13.png