Source code for epysurv.models.timepoint.glr

"""Count data regression charts for the monitoring of surveillance time series.

Method as proposed by Höhle and Paul (2008).
The implementation is described in Salmon et al. (2016).
"""
from dataclasses import dataclass
from typing import Tuple, Union

from rpy2 import robjects
from rpy2.robjects import r
from rpy2.robjects.packages import importr

from ._base import STSBasedAlgorithm

surveillance = importr("surveillance")


[docs]@dataclass class GLRNegativeBinomial(STSBasedAlgorithm): """ Generalized likelihood ratio algorithm using negative binomial distribution. Attributes ---------- alpha The (known) dispersion parameter of the negative binomial distribution, i.e. the parametrization of the negative binomial is such that the variance is mean + alpha ∗ mean2. Note: This parametrization is the inverse of the shape parametrization used in R – for example in dnbinom and glr.nb. Hence, if alpha=0 then the negative binomial distribution boils down to the Poisson distribution and a call of algo.glrnb is equivalent to a call to algo.glrpois. If alpha=NULL the parameter is calculated as part of the in-control estimation. However, the parameter is estimated only once from the first fit. Subsequent fittings are only for the parameters of the linear predictor with alpha fixed. glr_test_threshold Threshold in the GLR test, i.e. cγ. m Number of time instances back in time in the window-limited approach, i.e. the last value considered is max(1, n − m). To always look back until the first observation use -1. change A string specifying the type of the alternative. The two choices are "intercept" and "epi". direction Specifying the direction of testing in GLR scheme. - ("inc",) only increases in x are considered in the GLR-statistic - ("dec",) only decreases are regarded - ("inc", "dec") both increases and decreases are regarded. upperbound_statistic A string specifying the type of upperbound-statistic that is returned. - "cases" for the number of cases that would have been necessary to produce an alarm - "value" for the GLR-statistic x_max Maximum value to try for x to see if this is the upperbound number of cases before sounding an alarm (Default: 1e4). This only applies only when ``upperbound_statistic == "cases"``. References ---------- .. [1] Höhle, M. and Paul, M. (2008): Count data regression charts for the monitoring of surveillance time series. Computational Statistics and Data Analysis, 52 (9), 4357-4368. .. [2] Salmon, M., Schumacher, D. and Höhle, M. (2016): Monitoring count time series in R: Aberration detection in public health surveillance. Journal of Statistical Software, 70 (10), 1-35. doi: 10.18637/jss.v070.i10 """ alpha: float = 0 glr_test_threshold: int = 5 m: int = -1 change: str = "intercept" direction: Union[Tuple[str, str], Tuple[str]] = ("inc", "dec") upperbound_statistic: str = "cases" x_max: float = 1e4 def _call_surveillance_algo(self, sts, detection_range): control = r.list( **{ "range": detection_range, "c.ARL": self.glr_test_threshold, "m0": robjects.NULL, "alpha": self.alpha, # Mtilde is set to 1, since that is the only valid value for "epi" and "intercept" "Mtilde": 1, "M": self.m, "change": self.change, "theta": robjects.NULL, "dir": r.c(*self.direction), "ret": self.upperbound_statistic, "xMax": self.x_max, } ) surv = surveillance.glrnb(sts, control=control) return surv
[docs]@dataclass class GLRPoisson(STSBasedAlgorithm): """Generalized likelihood ratio algorithm using Poisson distribution. Attributes ---------- glr_test_threshold Threshold in the GLR test, i.e. cγ. m Number of time instances back in time in the window-limited approach, i.e. the last value considered is max(1, n − m). To always look back until the first observation use -1. change A string specifying the type of the alternative. The two choices are "intercept" and "epi". direction Specifying the direction of testing in GLR scheme. - ("inc",) only increases in x are considered in the GLR-statistic - ("dec",) only decreases are regarded - ("inc", "dec") both increases and decreases are regarded. upperbound_statistic a string specifying the type of upperbound-statistic that is returned. With "cases" the number of cases that would have been necessary to produce an alarm or with "value" the GLR-statistic is computed. References ---------- .. [1] Höhle, M. and Paul, M. (2008): Count data regression charts for the monitoring of surveillance time series. Computational Statistics and Data Analysis, 52 (9), 4357-4368. .. [2] Salmon, M., Schumacher, D. and Höhle, M. (2016): Monitoring count time series in R: Aberration detection in public health surveillance. Journal of Statistical Software, 70 (10), 1-35. doi: 10.18637/jss.v070.i10 """ glr_test_threshold: int = 5 """threshold in the GLR test, i.e. cγ.""" m: int = -1 """number of time instances back in time in the window-limited approach, i.e. the last value considered is max 1, n − M. To always look back until the first observation use M=-1.""" change: str = "intercept" """a string specifying the type of the alternative. Currently the two choices are intercept and epi. See the SFB Discussion Paper 500 for details""" direction: Union[Tuple[str, str], Tuple[str]] = ("inc", "dec") """Specifying the direction of testing in GLR scheme. With "inc" only increases in x are considered in the GLR-statistic, with "dec" decreases are regarded.""" upperbound_statistic: str = "cases" """a string specifying the type of upperbound-statistic that is returned. With "cases" the number of cases that would have been necessary to produce an alarm or with "value" the GLR-statistic is computed (see below)""" def _call_surveillance_algo(self, sts, detection_range): control = r.list( **{ "range": detection_range, "c.ARL": self.glr_test_threshold, "m0": robjects.NULL, # Mtilde is set to 1, since that is the only valid value for "epi" and "intercept" "Mtilde": 1, "M": self.m, "change": self.change, # Role of theta: If NULL then the GLR scheme is used. If not NULL the prespecified value for κ or λ is used in a recursive LR scheme, which is faster.""" "theta": robjects.NULL, "dir": r.c(*self.direction), "ret": self.upperbound_statistic, } ) surv = surveillance.glrpois(sts, control=control) return surv