Source code for eval_model
"""Evaluation metrics for comparing model output to observed data.
This module provides functions to extract model predictions and compute
common regression-quality metrics (RMSE, MAE, R-squared) between the
simulated counts and ground-truth observations. The ``return_func_zoo``
dictionary maps string keys to these metric functions so that callers can
select a metric by name at runtime.
"""
import math
import numpy as np
from models.states import STATES
from sklearn.metrics import mean_squared_error, mean_absolute_error
[docs]
def detected_active_counts(model, fit_column):
"""Extract the time-series of simulated counts for a given model column.
Reads the model's output DataFrame and returns all rows after the first
(index 0) as a NumPy array, matching the convention used by the
optimisation routines.
Args:
model: A ``ModelM`` instance that has already been run. Must expose
a ``get_df()`` method returning a pandas DataFrame with at least
one column named ``fit_column``.
fit_column (str): Name of the DataFrame column to extract (e.g.
``'I_d'`` for detected infectious individuals).
Returns:
numpy.ndarray: 1-D array of simulated counts, one entry per
simulated day (the day-0 initialisation row is excluded).
"""
counts = model.get_df()
counts = counts[fit_column][1:].to_numpy()
return counts
[docs]
def model_rmse(model, y_true, fit_column='I_d'):
"""Compute the root-mean-squared error between model output and observations.
Args:
model: A ``ModelM`` instance that has already been run.
y_true (numpy.ndarray): Array of observed (ground-truth) values with
the same length as the model output series.
fit_column (str): Column in the model DataFrame to compare against
``y_true``. Defaults to ``'I_d'``.
Returns:
float: RMSE between the model-predicted counts and ``y_true``.
"""
infected_count = detected_active_counts(model, fit_column)
return math.sqrt(mean_squared_error(y_true, infected_count))
[docs]
def model_mae(model, y_true, fit_column='I_d'):
"""Compute the mean absolute error between model output and observations.
Args:
model: A ``ModelM`` instance that has already been run.
y_true (numpy.ndarray): Array of observed (ground-truth) values with
the same length as the model output series.
fit_column (str): Column in the model DataFrame to compare against
``y_true``. Defaults to ``'I_d'``.
Returns:
float: MAE between the model-predicted counts and ``y_true``.
"""
infected_count = detected_active_counts(model, fit_column)
return mean_absolute_error(y_true, infected_count)
[docs]
def model_r_squared(model, y_true, fit_column='I_d'):
"""Compute a negated coefficient of determination (R²) for use as a loss.
The value is negated so that minimising it corresponds to maximising the
conventional R² fit quality. A small epsilon (``np.finfo(np.float32).eps``)
is added to the total sum of squares to avoid division by zero when
``y_true`` is constant.
Args:
model: A ``ModelM`` instance that has already been run.
y_true (numpy.ndarray): Array of observed (ground-truth) values with
the same length as the model output series.
fit_column (str): Column in the model DataFrame to compare against
``y_true``. Defaults to ``'I_d'``.
Returns:
float: Negated R² score, i.e. ``-(1 - RSS/TSS)``. A perfect fit
returns ``0.0``; worse fits return increasingly negative values.
"""
infected_count = detected_active_counts(model, fit_column)
y_mean = np.mean(y_true)
tss = np.sum((y_true - y_mean) ** 2) + np.finfo(np.float32).eps
rss = np.sum((y_true - infected_count) ** 2)
return -(1 - rss / tss)
return_func_zoo = {
'rmse': model_rmse,
'mae': model_mae,
'r2': model_r_squared
}