Interactive Example - ML Method: Mean of a Gaussian

Interactive Example - ML Method: Mean of a Gaussian#

(inspired by this tutorial)

In this section we have seen how the likelihood function is defined:

\[L(\theta)=f(x_1|\theta)\cdot f(x_2|\theta)\cdots f(x_n|\theta)=\prod_{i = 1}^{Nevts} f(x_i|\theta).\]

We then noticed how, for convenience, in HEP we use the negative log-likelihood:

\[NLL(\theta) = - \sum_{i = 1}^{Nevts} \text{ln} f(x_i|\theta)\]

for which we find the minimum.

We then saw how the uncertainty of a ML estimator at \(\pm 1 \sigma\) can be found by checking where the the NLL increases from the maximum by 0.5.

Here we will apply these easy concepts to a simple case: the estimation of the mean of a gaussian distribution.

from dataclasses import dataclass
import numpy as np
import matplotlib.pyplot as plt
from ipywidgets import interactive
import ipywidgets as widgets

Likelihood and NLL can be written like follows

@dataclass
class Likelihood:
    function: callable
    data: np.ndarray

    def __call__(self, *params):
        return np.prod(self.function(self.data, *params))


class NLL(Likelihood):
    def __call__(self, *params):
        return -np.sum([np.log(self.function(self.data, *params))])

where in the signature of __call__ every reference to data has been purposely avoided to emphasize the fact that the likelihood is a function of parameters only.

Our gaussian function can be written as follows

def gaussian(x, mu, sigma):
    return (
        1 / (sigma * np.sqrt(2 * np.pi)) * np.exp(-((x - mu) ** 2) / (2 * sigma ** 2))
    )
true_mu = np.random.uniform(-0.1, 0.1)
true_sigma = np.random.uniform(0.1, 1.0)

print(f"true mean: {true_mu:.3f}")
print(f"true standard deviation: {true_sigma:.3f}")
true mean: -0.092
true standard deviation: 0.549
uncs = []
def draw(n_samples):
    fig, axes = plt.subplots(nrows=2, ncols=2, figsize=(12, 8))
    axes[0, 0].clear()
    axes[1, 0].clear()

    lkl_label = r"$L\left(\mu | \vec{x}\right)$"
    nll_label = r"$NLL\left(\mu | \vec{x}\right)$"

    fig.suptitle(f"{n_samples} samples")

    samples = np.random.normal(true_mu, true_sigma, n_samples)
    lkl = Likelihood(gaussian, samples)
    nll = NLL(gaussian, samples)

    n_mu  = 1000
    mus = np.linspace(-0.5, 0.5, n_mu)
    sigma = true_sigma

    lkl_scan = np.array([lkl(mu, sigma) for mu in mus])
    nll_scan = np.array([nll(mu, sigma) for mu in mus])
    idx_min_nll_scan = np.argmin(nll_scan)
    mu_best = mus[idx_min_nll_scan]
    nll_scan -= nll_scan[idx_min_nll_scan] # move minimum to 0
    arr_min_nll_ = np.ones(n_mu)*0.5
    low_idx, high_idx = np.argwhere(np.diff(np.sign(nll_scan - arr_min_nll_))).flatten()
    unc = np.abs(mus[high_idx] - mus[low_idx])
    mu_low = abs(mu_best - mus[low_idx])
    mu_high = abs(mu_best - mus[high_idx])
    uncs.append((n_samples, unc))

    axes[0, 0].plot(mus, lkl_scan, label=lkl_label)
    axes[0, 0].set_xlim(-0.5, 0.5)
    axes[0, 0].plot(samples, np.zeros_like(samples), "k|", label=r"$\vec{x}$")
    axes[0, 0].set_ylabel(lkl_label)
    axes[0, 0].legend()
    
    axes[0, 1].axis('off')
    
    axes[1, 0].plot(mus, nll_scan, label=nll_label)
    axes[1, 0].set_xlim(-0.5, 0.5)
    axes[1, 0].set_ylim(0, 3)
    axes[1, 0].set_ylabel(nll_label)
    axes[1, 0].set_xlabel(r"$\mu$")
    axes[1, 0].axvline(mu_best, color="k", linestyle="--", label=r"$\mu_{best}$")
    axes[1, 0].axvline(true_mu, color="r", linestyle="--", label=r"$\mu_{true}$")
    axes[1, 0].axhline(0.5, color="k", linestyle="--", linewidth=0.5)
    axes[1, 0].legend()

    axes[1, 1].plot([u[0] for u in uncs], [u[1] for u in uncs], 'ro')
    axes[1, 1].set_xlim(0, 200)
    axes[1, 1].set_ylim(0, 0.5)
    axes[1, 1].set_ylabel("Uncertainty")
    axes[1, 1].set_xlabel(r"$N_{samples}$")

    plt.show();
slider_plot = interactive(draw, n_samples=widgets.IntSlider(min=1, max=200, step=1, value=50))
display(slider_plot)

From the plot on the right (and by looking at how the NLL width shrinks) you can notice how the uncertainty on the estimated parameter decreases as a function of the number of samples (i.e., more statistics implies more precise estimations).