Source code for eval_model

"""Evaluation metrics for comparing model output to observed data.

This module provides functions to extract model predictions and compute
common regression-quality metrics (RMSE, MAE, R-squared) between the
simulated counts and ground-truth observations. The ``return_func_zoo``
dictionary maps string keys to these metric functions so that callers can
select a metric by name at runtime.
"""

import math
import numpy as np

from models.states import STATES
from sklearn.metrics import mean_squared_error, mean_absolute_error


[docs] def detected_active_counts(model, fit_column): """Extract the time-series of simulated counts for a given model column. Reads the model's output DataFrame and returns all rows after the first (index 0) as a NumPy array, matching the convention used by the optimisation routines. Args: model: A ``ModelM`` instance that has already been run. Must expose a ``get_df()`` method returning a pandas DataFrame with at least one column named ``fit_column``. fit_column (str): Name of the DataFrame column to extract (e.g. ``'I_d'`` for detected infectious individuals). Returns: numpy.ndarray: 1-D array of simulated counts, one entry per simulated day (the day-0 initialisation row is excluded). """ counts = model.get_df() counts = counts[fit_column][1:].to_numpy() return counts
[docs] def model_rmse(model, y_true, fit_column='I_d'): """Compute the root-mean-squared error between model output and observations. Args: model: A ``ModelM`` instance that has already been run. y_true (numpy.ndarray): Array of observed (ground-truth) values with the same length as the model output series. fit_column (str): Column in the model DataFrame to compare against ``y_true``. Defaults to ``'I_d'``. Returns: float: RMSE between the model-predicted counts and ``y_true``. """ infected_count = detected_active_counts(model, fit_column) return math.sqrt(mean_squared_error(y_true, infected_count))
[docs] def model_mae(model, y_true, fit_column='I_d'): """Compute the mean absolute error between model output and observations. Args: model: A ``ModelM`` instance that has already been run. y_true (numpy.ndarray): Array of observed (ground-truth) values with the same length as the model output series. fit_column (str): Column in the model DataFrame to compare against ``y_true``. Defaults to ``'I_d'``. Returns: float: MAE between the model-predicted counts and ``y_true``. """ infected_count = detected_active_counts(model, fit_column) return mean_absolute_error(y_true, infected_count)
[docs] def model_r_squared(model, y_true, fit_column='I_d'): """Compute a negated coefficient of determination (R²) for use as a loss. The value is negated so that minimising it corresponds to maximising the conventional R² fit quality. A small epsilon (``np.finfo(np.float32).eps``) is added to the total sum of squares to avoid division by zero when ``y_true`` is constant. Args: model: A ``ModelM`` instance that has already been run. y_true (numpy.ndarray): Array of observed (ground-truth) values with the same length as the model output series. fit_column (str): Column in the model DataFrame to compare against ``y_true``. Defaults to ``'I_d'``. Returns: float: Negated R² score, i.e. ``-(1 - RSS/TSS)``. A perfect fit returns ``0.0``; worse fits return increasingly negative values. """ infected_count = detected_active_counts(model, fit_column) y_mean = np.mean(y_true) tss = np.sum((y_true - y_mean) ** 2) + np.finfo(np.float32).eps rss = np.sum((y_true - infected_count) ** 2) return -(1 - rss / tss)
return_func_zoo = { 'rmse': model_rmse, 'mae': model_mae, 'r2': model_r_squared }