Bayesian fitting and forecasting¶

In this notebook, we show how to make a Bayesian fit to estimate \(M\), \(\tau\), and production using pymc’s No-U-Turn Sampler.

This starts out very similar to the deterministic forecast.

[1]:

# If necessary, install pymc
# %pip install pymc arviz

[2]:

import arviz as az
import matplotlib.pyplot as plt
import numpy as np
import pymc as pm
import seaborn as sns
from pymc.distributions.dist_math import SplineWrapper
from scipy.integrate import cumulative_trapezoid
from scipy.interpolate import UnivariateSpline, interp1d
from scipy.misc import derivative

from bluebonnet.flow import IdealReservoir

[3]:

# Get ideal gas recovery
t_end = 6
time = np.linspace(0, np.sqrt(t_end), 10_000) ** 2
res_ideal = IdealReservoir(50, 1000, 9000, None)
res_ideal.simulate(time)
rf_ideal = res_ideal.recovery_factor()
rf_func = interp1d(time, rf_ideal, bounds_error=False, fill_value=(0, rf_ideal[-1]))
rf_func2 = UnivariateSpline(time, rf_ideal, s=0)

[4]:

# Create test production to fit to
time_on_production = np.linspace(0.001, 2, 200)
tau_test = 2
M_test = 300
true_prod = M_test * derivative(rf_func, time_on_production / tau_test, dx=0.01)
production = true_prod * np.random.default_rng(42).normal(1, 0.1, size=len(time_on_production))
cum_production = cumulative_trapezoid(production, time_on_production, initial=0)

fig, ax = plt.subplots()
ax.plot(time_on_production, production, color="peru", label="Rate")
ax.plot(time_on_production, cum_production, color="steelblue", label="Cumulative")
ax.plot(
    time_on_production,
    M_test * rf_func2(time_on_production / tau_test),
    color="tab:green",
    label="Interpolated cumulative",
)
ax.set(
    xlabel="Time on production",
    ylabel="Rate or cumulative prod",
    xlim=(0, None),
    ylim=(0, None),
)
ax.legend()

/tmp/ipykernel_2642286/2562813304.py:5: DeprecationWarning: scipy.misc.derivative is deprecated in SciPy v1.10.0; and will be completely removed in SciPy v1.12.0. You may consider using findiff: https://github.com/maroba/findiff or numdifftools: https://github.com/pbrod/numdifftools
  true_prod = M_test * derivative(rf_func, time_on_production / tau_test, dx=0.01)

[4]:

<matplotlib.legend.Legend at 0x76dbead24fb0>

Introduce Bayesian inference¶

Here is the new part, where you specify the pymc model.

[5]:

with pm.Model() as model:
    M = pm.LogNormal("M", mu=np.log(cum_production[-1]))
    tau = pm.LogNormal("tau", mu=1)
    sigma = pm.HalfCauchy("sigma", 20)
    Np = pm.Deterministic("Np", M * SplineWrapper(rf_func2)(time_on_production / tau))
    cum = pm.Normal("cum", Np, sigma, observed=cum_production)
    idata = pm.sample(return_inferencedata=True)
    posterior_predictive = pm.sample_posterior_predictive(idata)

Sampling ... ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 100% 0:00:00 / 0:00:00

Now we can plot the posterior estimates of M and tau:

[6]:

az.plot_trace(idata, ["M", "tau", "sigma"])
plt.tight_layout()

And compare to the data:

[7]:

p = idata.posterior
rng = np.random.default_rng(405)


fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(4 * 2 * 1.65, 4))
ax1.plot(
    time_on_production,
    az.extract(idata, num_samples=400)["Np"],
    alpha=0.1,
    color="peru",
)
ax1.plot(time_on_production, cum_production, color="steelblue", label="Cumulative")

ax1.set(
    xlim=(0, None),
    ylim=(0, None),
    xlabel="Time on production",
    ylabel="Cumulative production",
    xscale="squareroot",
    title="Posterior production curves",
)
ax1.legend()


ax2.plot(
    time_on_production,
    az.extract(posterior_predictive, group="posterior_predictive", num_samples=100)["cum"],
    color="steelblue",
)
ax2.plot(time_on_production, cum_production, color="peru")

ax2.set(
    xlim=(0, None),
    ylim=(0, None),
    xlabel="Time on production",
    ylabel="Cumulative production",
    xscale="squareroot",
    title="Posterior predictions",
)
sns.despine()