diff --git a/chainladder/__init__.py b/chainladder/__init__.py index ea6f1aa2..a32a7a1b 100644 --- a/chainladder/__init__.py +++ b/chainladder/__init__.py @@ -445,6 +445,7 @@ def describe_option(self, pat: str = "", _print_desc: bool=True) -> None | str: ParallelogramOLF, Trend, TrendConstant, + DisposalRate, ) from chainladder.tails import ( # noqa (API import) TailBase, diff --git a/chainladder/adjustments/__init__.py b/chainladder/adjustments/__init__.py index 06b6d161..ad3a660b 100644 --- a/chainladder/adjustments/__init__.py +++ b/chainladder/adjustments/__init__.py @@ -3,6 +3,7 @@ from chainladder.adjustments.parallelogram import ParallelogramOLF # noqa (API import) from chainladder.adjustments.trend import Trend # noqa (API import) from chainladder.adjustments.trend import TrendConstant # noqa (API import) +from chainladder.adjustments.disposal import DisposalRate # noqa (API import) __all__ = [ "BootstrapODPSample", @@ -10,4 +11,5 @@ "ParallelogramOLF", "Trend", "TrendConstant", + "DisposalRate" ] diff --git a/chainladder/adjustments/disposal.py b/chainladder/adjustments/disposal.py new file mode 100644 index 00000000..560bb20e --- /dev/null +++ b/chainladder/adjustments/disposal.py @@ -0,0 +1,282 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. + +from chainladder.methods import Chainladder +from chainladder.development import DevelopmentBase +import numpy as np +import copy +import warnings +from chainladder.utils import TriangleWeight, concat +from chainladder import Triangle + +class DisposalRate(DevelopmentBase): + """ + Calculates the bottom of a fitted full_triangle_ using the Disposal Rate method described + by Friedland. + + Parameters + ---------- + n_periods: integer, optional (default = -1) + number of origin periods to be used in the ldf average calculation. For + all origin periods, set n_periods = -1 + drop: tuple or list of tuples + Drops specific origin/development combination(s). See order of operations + below when combined with multiple drop parameters. + drop_high: bool, int, list of bools, or list of ints (default = None) + Drops highest (by rank) link ratio(s) from LDF calculation + If a boolean variable is passed, drop_high is set to 1, dropping only the + highest value. Protected by ``preserve``. + See order of operations below when combined with multiple drop parameters. + drop_low: bool, int, list of bools, or list of ints (default = None) + Drops lowest (by rank) link ratio(s) from LDF calculation + If a boolean variable is passed, drop_low is set to 1, dropping only the + lowest value. Protected by ``preserve``. + See order of operations below when combined with multiple drop parameters. + drop_above: float or list of floats (default = numpy.inf) + Drops all link ratio(s) above the given parameter from the LDF calculation. + Protected by ``preserve``. + See order of operations below when combined with multiple drop parameters. + drop_below: float or list of floats (default = 0.00) + Drops all link ratio(s) below the given parameter from the LDF calculation. + Protected by ``preserve``. + See order of operations below when combined with multiple drop parameters. + preserve: int (default = 1) + The minimum number of link ratio(s) required for LDF calculation. + See order of operations below when combined with multiple drop parameters. + drop_valuation: str or list of str (default = None) + Drops specific valuation periods. str must be date convertible. + See order of operations below when combined with multiple drop parameters. + + .. note :: + + (Order of Drop Operations) + + When multiple drop parameters are used together, the weights are built in this order (steps 4 and 5 are reversed from `Development`): + + 1. ``n_periods`` — limit to the most recent origin periods. + 2. ``drop`` — remove specific origin/development cells. + 3. ``drop_valuation`` — remove entire valuation diagonal in the triangle. + 4. ``drop_above`` / ``drop_below`` — remove link ratios outside a range + (Protected by``preserve``, which may relax exclusions from this step if too few ratios would remain + then this step is skipped). + 5. ``drop_high`` / ``drop_low`` — remove highest/lowest link ratios by rank + (eligible factors from ``n_periods`` are used; protected by ``preserve``, + which may relax exclusions from this step if too few ratios would remain then this step is skipped). + 6. Calculate the loss development factors using ``average`` method. + + Attributes + ---------- + disposal_rate_tri: Triangle + actual disposal rates by origin and development + + disposal_: Triangle + fitted disposal rates + + incr_disposal_: Triangle + incremental of disposal_ + + Examples + -------- + ``trend`` tilts the case-adequacy adjustment before ``Incurred`` is rebuilt; + on the ``MedMal`` slice the inner diagonals of the adjusted ``Incurred`` + triangle restate materially between ``0%`` and ``15%`` annual drift, while + the latest diagonal is preserved. + + .. testsetup:: + + import chainladder as cl + import numpy as np + + .. testcode:: + + tri = cl.load_sample("berqsherm").loc["MedMal"] + base = cl.BerquistSherman( + paid_amount="Paid", + incurred_amount="Incurred", + reported_count="Reported", + closed_count="Closed", + trend=0.0, + ).fit(tri) + tilted = cl.BerquistSherman( + paid_amount="Paid", + incurred_amount="Incurred", + reported_count="Reported", + closed_count="Closed", + trend=0.15, + ).fit(tri) + print(np.round(base.adjusted_triangle_["Incurred"], 0)) + + .. testoutput:: + :options: +NORMALIZE_WHITESPACE + + 12 24 36 48 60 72 84 96 + 1969 9883293.0 27420103.0 35879085.0 43105257.0 33438702.0 30397324.0 25723694.0 23506000.0 + 1970 8641763.0 31305782.0 41543535.0 48550616.0 38203864.0 36222888.0 32216000.0 NaN + 1971 11733960.0 43887171.0 61649896.0 64917222.0 51410209.0 48377000.0 NaN NaN + 1972 13638651.0 50987209.0 66696278.0 72777529.0 61163000.0 NaN NaN NaN + 1973 14387930.0 45470590.0 56577593.0 73733000.0 NaN NaN NaN NaN + 1974 13630366.0 47189379.0 63477000.0 NaN NaN NaN NaN NaN + 1975 15036351.0 48904000.0 NaN NaN NaN NaN NaN NaN + 1976 15791000.0 NaN NaN NaN NaN NaN NaN NaN + + .. testcode:: + + print(np.round(tilted.adjusted_triangle_["Incurred"], 0)) + + .. testoutput:: + :options: +NORMALIZE_WHITESPACE + + 12 24 36 48 60 72 84 96 + 1969 3793504.0 12084942.0 18563821.0 25924316.0 23516364.0 24979245.0 24016864.0 23506000.0 + 1970 3760482.0 15830500.0 24615996.0 33169802.0 30722141.0 33362729.0 32216000.0 NaN + 1971 5982185.0 25583831.0 41384825.0 50323342.0 46191356.0 48377000.0 NaN NaN + 1972 7819355.0 33794110.0 51361061.0 64559286.0 61163000.0 NaN NaN NaN + 1973 9533246.0 34585431.0 49667342.0 73733000.0 NaN NaN NaN NaN + 1974 10348458.0 41241243.0 63477000.0 NaN NaN NaN NaN NaN + 1975 13102479.0 48904000.0 NaN NaN NaN NaN NaN NaN + 1976 15791000.0 NaN NaN NaN NaN NaN NaN NaN + + """ + + def __init__( + self, + n_periods: int = -1, + average: str | list[str] = 'volume', + drop: tuple | list[tuple] | None = None, + drop_high: bool | int | list[bool] | list[int] | None = None, + drop_low: bool | int | list[bool] | list[int] | None = None, + preserve: int = 1, + drop_valuation: str | list[str] | None = None, + drop_above: float = np.inf, + drop_below: float = 0.00, + ): + self.n_periods = n_periods + self.average = average + self.drop_high = drop_high + self.drop_low = drop_low + self.preserve = preserve + self.drop_valuation = drop_valuation + self.drop_above = drop_above + self.drop_below = drop_below + self.drop = drop + + def fit( + self, + X:Triangle, + y:None=None, + sample_weight:Triangle|None=None + ): + """ + Estimate disposal rate for a given Triangle and ultimate + + Parameters + ---------- + X : Triangle + Triangle to which the Disposal Rate method is applied + y : None + Ignored + sample_weight : Triangle + Ultimate + + Returns + ------- + self : object + Returns the instance itself. + + """ + if sample_weight is None: + raise ValueError("sample_weight is required.") + #convert to numpy + if X.array_backend == "sparse": + X = X.set_backend("numpy").incr_to_cum() + else: + X = X.copy().incr_to_cum() + if sample_weight.array_backend == "sparse": + ult = sample_weight.set_backend("numpy") + else: + ult = sample_weight.copy() + #get backend + self.xp = X.get_array_module() + self.disposal_rate_tri = X / ult.values + tw = TriangleWeight( + n_periods = self.n_periods, + drop_high = self.drop_high, + drop_low = self.drop_low, + drop_above = self.drop_above, + drop_below = self.drop_below, + drop_valuation = self.drop_valuation, + preserve = self.preserve, + drop = self.drop + ) + if hasattr(X, "w_"): + self.w_ = tw.fit(X=self.disposal_rate_tri * X.w_).w_.values + else: + self.w_ = tw.fit(X=self.disposal_rate_tri).w_.values + #calculate factors + super().fit(ult.values,X.values,self.w_) + #keep attributes + self.disposal_ = self._param_property(self.disposal_rate_tri,self.params_.slope_[...,0][..., None, :]) + self.disposal_ = concat((self.disposal_,(ult/ult).iloc[:,:,0,:].rename("development", [9999])),axis=3) + self.disposal_.is_cumulative = True + self.disposal_.is_pattern = False + self.incr_disposal_ = self.disposal_.cum_to_incr() + self.incr_disposal_.is_pattern = True + self.disposal_.is_pattern = True + return self + + def transform( + self, + X: Triangle, + sample_weight: Triangle | None = None + ) -> Triangle: + """ If X and self are of different shapes, align self to X, else + return self. + + Parameters + ---------- + X: Triangle + The triangle to be transformed + + sample_weight: Triangle + Ultimate + + Returns + ------- + X_new: New triangle with transformed attributes. + """ + if sample_weight is None: + raise ValueError("sample_weight is required.") + X_new = copy.deepcopy(X) + X_new.disposal_rate_tri = self.disposal_rate_tri + X_new.disposal_ = self.disposal_ + X_new.incr_disposal_ = self.incr_disposal_ + X_new.ultimate_ = sample_weight.latest_diagonal + ibnr_pct = 1 - X_new.disposal_.align_pattern(X_new.disposal_rate_tri) + run_off = X_new.incr_disposal_ / ibnr_pct * X_new.ibnr_ + run_off = run_off[run_off.valuation > X_new.valuation_date] + X_new.ldf_ = (X_new.cum_to_incr() + run_off).incr_to_cum().age_to_age + return X_new + + def fit_transform(self, X, y=None, sample_weight=None): + """Fit and return predictions for VotingChainladder + + Parameters + ---------- + X : Triangle + Loss data to which the model will be applied. + + y : None + Ignored + + sample_weight : Triangle, default=None + Ultimate + + Returns + ------- + X_new: Triangle + Loss data with VotingChainladder ultimate applied + """ + return self.fit(X, y, sample_weight).transform(X, sample_weight=sample_weight) + def _test(self, X, ult): + return 'test' \ No newline at end of file diff --git a/chainladder/adjustments/tests/test_disposal.py b/chainladder/adjustments/tests/test_disposal.py new file mode 100644 index 00000000..db04959f --- /dev/null +++ b/chainladder/adjustments/tests/test_disposal.py @@ -0,0 +1,31 @@ +import chainladder as cl +import numpy as np + + +def test_disposal(): + tri = cl.load_sample('friedland_gl_insurer')['Closed Claim Counts'] + ult_tri = cl.Triangle( + data = { + 'Closed Claim Counts':[873,720,626,629,588,553,438,609], + 'ay': [2001,2002,2003,2004,2005,2006,2007,2008], + 'dev':[2008,2008,2008,2008,2008,2008,2008,2008], + }, + origin = 'ay', + development='dev', + columns='Closed Claim Counts', + cumulative=True, + ) + dr = cl.DisposalRate(n_periods = 5, average = 'simple', drop_high = 1, drop_low = 1).fit_transform(X=tri,sample_weight=ult_tri) + assert np.all(dr.disposal_.round(3).values.flatten() - [.200,.433,.585,.710,.791,.862,.882,.912,1.000] <=0.001) + lhs = (dr.full_triangle_.cum_to_incr()-tri.cum_to_incr()).round(0).values.flatten() + rhs = np.array([ + 77., + 24., 70., + 12., 18., 54., + 46., 13., 19., 57., + 52., 45., 13., 19., 56., + 76., 49., 43., 12., 18., 54., + 67., 55., 36., 31., 9., 13., 39., + 140., 91., 75., 49., 42., 12., 18., 53. + ]) + assert np.all(lhs[~np.isnan(lhs)] - rhs <= 1) \ No newline at end of file diff --git a/chainladder/development/base.py b/chainladder/development/base.py index 47c13081..49c1192e 100644 --- a/chainladder/development/base.py +++ b/chainladder/development/base.py @@ -21,7 +21,7 @@ from typing import TYPE_CHECKING if TYPE_CHECKING: - from chainladder.core.typing import TriangleLike + from chainladder.core import Triangle class DevelopmentBase( @@ -45,18 +45,18 @@ def fit(self, X, y=None, sample_weight=None): def _set_fit_groups( self, - X: TriangleLike - ) -> TriangleLike: + X: Triangle + ) -> Triangle: """ Used for assigning group_index in fit. Parameters ---------- - X: TriangleLike + X: Triangle Returns ------- - TriangleLike, after performing the groupby on it. + Triangle, after performing the groupby on it. """ backend = "numpy" if X.array_backend in ["sparse", "numpy"] else "cupy" @@ -383,4 +383,39 @@ def _drop(self, X): np.where(X.origin == item[0])[0][0], np.where(X.development == item[1])[0][0], ] = 0 - return arr[:, :-1] \ No newline at end of file + return arr[:, :-1] + + @staticmethod + def _param_property( + X: Triangle, + params: np.ndarray + ) -> Triangle: + """ + Wrap an array of estimated parameters in a Triangle + + Parameters + ---------- + X: Triangle + The Triangle to wrap the parameters with + + params: np.ndarray + The parameters to be wrapped + + Returns + ------- + Triangle + The wrapped parameters + + """ + from chainladder import options + + obj: Triangle = X[X.origin == X.origin.min()] + xp = X.get_array_module() + obj.values = params + obj.valuation_date = pd.to_datetime(options.ULT_VAL) + obj.is_pattern = True + obj.is_additive = True + obj.is_cumulative = False + obj.virtual_columns.columns = {} + obj._set_slicers() + return obj diff --git a/chainladder/development/incremental.py b/chainladder/development/incremental.py index a79df253..90e80777 100644 --- a/chainladder/development/incremental.py +++ b/chainladder/development/incremental.py @@ -294,18 +294,4 @@ def transform(self, X): X_new = X.copy() for item in ["ldf_", "w_", "zeta_", "incremental_", "tri_zeta", "fit_zeta_", "sample_weight"]: X_new.__dict__[item] = self.__dict__[item] - return X_new - - def _param_property(self, factor, params): - from chainladder import options - - obj = factor[factor.origin == factor.origin.min()] - xp = factor.get_array_module() - obj.values = params - obj.valuation_date = pd.to_datetime(options.ULT_VAL) - obj.is_pattern = True - obj.is_additive = True - obj.is_cumulative = False - obj.virtual_columns.columns = {} - obj._set_slicers() - return obj + return X_new \ No newline at end of file diff --git a/chainladder/tests/test_public_api.py b/chainladder/tests/test_public_api.py index f01c629d..6ea1c331 100644 --- a/chainladder/tests/test_public_api.py +++ b/chainladder/tests/test_public_api.py @@ -56,6 +56,7 @@ "ParallelogramOLF", "Trend", "TrendConstant", + "DisposalRate", # tails "TailBase", "TailConstant",