Interactive Example - ML Method: Mean of a Gaussian#
(inspired by this tutorial)
In this section we have seen how the likelihood function is defined:
We then noticed how, for convenience, in HEP we use the negative log-likelihood:
for which we find the minimum.
We then saw how the uncertainty of a ML estimator at \(\pm 1 \sigma\) can be found by checking where the the NLL increases from the maximum by 0.5.
Here we will apply these easy concepts to a simple case: the estimation of the mean of a gaussian distribution.
from dataclasses import dataclass
import numpy as np
import matplotlib.pyplot as plt
from ipywidgets import interactive
import ipywidgets as widgets
Likelihood and NLL can be written like follows
@dataclass
class Likelihood:
function: callable
data: np.ndarray
def __call__(self, *params):
return np.prod(self.function(self.data, *params))
class NLL(Likelihood):
def __call__(self, *params):
return -np.sum([np.log(self.function(self.data, *params))])
where in the signature of __call__
every reference to data has been purposely avoided to emphasize the fact that the likelihood is a function of parameters only.
Our gaussian function can be written as follows
def gaussian(x, mu, sigma):
return (
1 / (sigma * np.sqrt(2 * np.pi)) * np.exp(-((x - mu) ** 2) / (2 * sigma ** 2))
)
true_mu = np.random.uniform(-0.1, 0.1)
true_sigma = np.random.uniform(0.1, 1.0)
print(f"true mean: {true_mu:.3f}")
print(f"true standard deviation: {true_sigma:.3f}")
true mean: 0.072
true standard deviation: 0.262
uncs = []
def draw(n_samples):
fig, axes = plt.subplots(nrows=2, ncols=2, figsize=(12, 8))
axes[0, 0].clear()
axes[1, 0].clear()
lkl_label = r"$L\left(\mu | \vec{x}\right)$"
nll_label = r"$NLL\left(\mu | \vec{x}\right)$"
fig.suptitle(f"{n_samples} samples")
samples = np.random.normal(true_mu, true_sigma, n_samples)
lkl = Likelihood(gaussian, samples)
nll = NLL(gaussian, samples)
n_mu = 1000
mus = np.linspace(-0.5, 0.5, n_mu)
sigma = true_sigma
lkl_scan = np.array([lkl(mu, sigma) for mu in mus])
nll_scan = np.array([nll(mu, sigma) for mu in mus])
idx_min_nll_scan = np.argmin(nll_scan)
mu_best = mus[idx_min_nll_scan]
nll_scan -= nll_scan[idx_min_nll_scan] # move minimum to 0
arr_min_nll_ = np.ones(n_mu)*0.5
low_idx, high_idx = np.argwhere(np.diff(np.sign(nll_scan - arr_min_nll_))).flatten()
unc = np.abs(mus[high_idx] - mus[low_idx])
mu_low = abs(mu_best - mus[low_idx])
mu_high = abs(mu_best - mus[high_idx])
uncs.append((n_samples, unc))
axes[0, 0].plot(mus, lkl_scan, label=lkl_label)
axes[0, 0].set_xlim(-0.5, 0.5)
axes[0, 0].plot(samples, np.zeros_like(samples), "k|", label=r"$\vec{x}$")
axes[0, 0].set_ylabel(lkl_label)
axes[0, 0].legend()
axes[0, 1].axis('off')
axes[1, 0].plot(mus, nll_scan, label=nll_label)
axes[1, 0].set_xlim(-0.5, 0.5)
axes[1, 0].set_ylim(0, 3)
axes[1, 0].set_ylabel(nll_label)
axes[1, 0].set_xlabel(r"$\mu$")
axes[1, 0].axvline(mu_best, color="k", linestyle="--", label=r"$\mu_{best}$")
axes[1, 0].axvline(true_mu, color="r", linestyle="--", label=r"$\mu_{true}$")
axes[1, 0].axhline(0.5, color="k", linestyle="--", linewidth=0.5)
axes[1, 0].legend()
axes[1, 1].plot([u[0] for u in uncs], [u[1] for u in uncs], 'ro')
axes[1, 1].set_xlim(0, 200)
axes[1, 1].set_ylim(0, 0.5)
axes[1, 1].set_ylabel("Uncertainty")
axes[1, 1].set_xlabel(r"$N_{samples}$")
plt.show();
slider_plot = interactive(draw, n_samples=widgets.IntSlider(min=1, max=200, step=1, value=50))
display(slider_plot)
From the plot on the right (and by looking at how the NLL width shrinks) you can notice how the uncertainty on the estimated parameter decreases as a function of the number of samples (i.e., more statistics implies more precise estimations).