Source code for epysurv.simulation.seasonal_noise

from dataclasses import dataclass
from typing import Optional, Sequence

import numpy as np
import pandas as pd
import rpy2.robjects.packages as rpackages
from epysurv.simulation.base import BaseSimulation
from epysurv.simulation.utils import add_date_time_index_to_frame, r_list_to_frame
from rpy2 import robjects
from scipy.stats import nbinom, poisson

surveillance = rpackages.importr("surveillance")


[docs]@dataclass class SeasonalNoisePoisson(BaseSimulation): r"""Simulation of an endemic time series based on a Poisson distribution. The mean of the Poisson distribution is modelled as: :math:`\mu(t) = \exp{(A\sin{(frequency \cdot \omega \cdot (t + \phi))} + \alpha + \beta \cdot t + K \cdot state)}` with :math:`\omega = \pi / 52`, :math:`A` being the amplitude, :math:`\beta` the trend parameter, :math:`t` the current week, and :math:`\theta` the seasonal move. Parameters ---------- amplitude Amplitude of the sine. Determines the range of simulated cases. alpha Parameter to move simulation along the y-axis (negative values are not allowed) with `alpha` >= `amplitude`. frequency Factor in oscillation term. Is multiplied with the annual term :math:`\omega` and the current time point. seasonal_move A term added to each time point :math:`t` to move the curve along the x-axis. seed Seed for the random number generation. trend Controls the influence of the current week on :math:`\mu`. References ---------- http://surveillance.r-forge.r-project.org/ """ alpha: float = 1.0 amplitude: float = 1.0 frequency: int = 1 seasonal_move: int = 0 seed: Optional[int] = None trend: float = 0.0
[docs] def simulate( self, length: int, state_weight: Optional[float] = None, state: Optional[Sequence[int]] = None, ) -> pd.DataFrame: r""" Simulate outbreaks. Parameters ---------- length Number of weeks to model. ``length`` is ignored if ``state`` is given. In this case the length of ``state`` is used. state Use a state chain to define the status at this time point (outbreak or not). If not given, a Markov chain is generated automatically. state_weight Additional weight for an outbreak which influences the distribution parameter :math:`\mu`. Returns ------- A ``DataFrame`` of an endemic time series where each row contains the case counts of this week. It also contains the mean case count value based on the underlying sinus model. """ if self.seed: base = robjects.packages.importr("base") base.set_seed(self.seed) simulated = surveillance.sim_seasonalNoise( A=self.amplitude, alpha=self.alpha, beta=self.trend, phi=self.seasonal_move, length=length, frequency=self.frequency, state=robjects.NULL if state is None else robjects.IntVector(state), K=robjects.NULL if state_weight is None else state_weight, ) simulated = r_list_to_frame(simulated, ["mu", "seasonalBackground"]) simulated = simulated.pipe(add_date_time_index_to_frame).rename( columns={"mu": "mean", "seasonalBackground": "n_cases"} ) return simulated
[docs]@dataclass class SeasonalNoiseNegativeBinomial(BaseSimulation): r"""A time series simulation that generates case counts based on a negative binomial model. The model is described by a mean :math:`\mu`, variance :math:`\phi \cdot \mu`, and a linear predictor including trend and seasonality determined by Fourier terms. :math:`\mu` of the model depends on the current week and is defined as follows: :math:`\mu(t) = \exp \left\{ \theta + \beta t + \sum_{j=1}^{m} \left\{ \gamma_{1} \cos (\frac{2\pi j t}{52}) + \gamma_{2} \sin (\frac{2\pi j t}{52}) \right\} \right\}` where :math:`t` is the current week, :math:`m` the seasonality length, :math:`\beta` equals to the trend parameter, :math:`\gamma` is a seasonality parameter, and :math:`\theta` is the baseline frequency of the cases. The simulation is then run using :math:`\mu` and the dispersion parameter :math:`\phi` to specify the negative binomial model we draw case counts from. Parameters ---------- baseline_frequency Baseline frequency of cases. dispersion Regulates the overdispersion compared to the Poisson distribution (:math:`\phi \cdot \mu`). seasonality_cos Seasonality parameter to model :math:`\cos` of the Fourier term. seasonality_sin seasonality parameter to model :math:`\sin` of the Fourier term. seasonality_length Models the annual-wise seasonality. 0 equals to no seasonality, 1 to annual seasonality, 2 to biannual seasonality and so forth. seed A seed for the random number generation. trend Controls the influence of the current week on :math:`\mu`. References ---------- .. [1] Noufaily, A., Enki, D.G., Farrington, C.P., Garthwaite, P., Andrews, N.J., Charlett, A. (2012): An improved algorithm for outbreak detection in multiple surveillance systems. Statistics in Medicine, 32 (7), 1206-1222. """ baseline_frequency: float = 1.5 dispersion: float = 1.0 seasonality_cos: float = 0.2 seasonality_sin: float = -0.4 seasonality_length: int = 1 seed: Optional[int] = None trend: float = 0.003 def _seasonality(self, week: int): """A Fourier-based seasonality term to model the season-depended case counts. Parameters ---------- week The week to model the season-based case count. """ years = np.arange(1, self.seasonality_length + 1) return np.sum( self.seasonality_cos * np.cos((2 * np.pi * years * week) / 52) + self.seasonality_sin * np.sin((2 * np.pi * years * week) / 52) )
[docs] def simulate(self, length: int) -> pd.DataFrame: r"""Simulate outbreaks. Parameter --------- length Number of weeks to model. Returns ------- A ``DataFrame`` of an endemic time series where each row contains the case counts ot this week. """ if self.seed: np.random.seed(self.seed) mu_s = [ np.exp( self.baseline_frequency + self.trend * week + self._seasonality(week) ) for week in range(length) ] if self.dispersion == 1: cases = [poisson.rvs(mu, size=1)[0] for mu in mu_s] else: cases = [] for mu in mu_s: r = np.float(mu / (self.dispersion - 1)) p = r / (r + mu) cases.append(nbinom.rvs(r, p, size=1)[0]) return ( pd.DataFrame({"n_cases": cases}) .pipe(add_date_time_index_to_frame) .assign(timestep=list(range(1, length + 1))) )