Shape estimators, weighted GP likelihood

lbelzile · Jul 18, 2024 · 132743e · 132743e
1 parent 93eb431
commit 132743e
Show file tree

Hide file tree

Showing 17 changed files with 720 additions and 34 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -2,8 +2,7 @@ Package: mev
 Type: Package
 Title: Modelling of Extreme Values
 Version: 1.17
-Authors@R: c(person(given="Leo", family="Belzile", role = c("aut", "cre"), email = "[email protected]", comment = c(ORCID = "0000-0002-9135-014X")), person(given="Jennifer L.", family="Wadsworth", role=c("aut")), person(given="Paul J.", family="Northrop", role=c("aut"), comment = c(ORCID = "0000-0002-1992-4882")), person(given="Scott D.", family="Grimshaw", role=c("aut"), comment = c(ORCID = "0000-0002-6326-9360
-")), person(given="Jin", family="Zhang", role=c("ctb")), person(given="Michael A.", family="Stephens", role=c("ctb")), person(given="Art B.", family="Owen", role=c("ctb")), person(given="Raphael", family="Huser", role=c("aut"), comment = c(ORCID = "0000-0002-1228-2071")))
+Authors@R: c(person(given="Leo", family="Belzile", role = c("aut", "cre"), email = "[email protected]", comment = c(ORCID = "0000-0002-9135-014X")), person(given="Jennifer L.", family="Wadsworth", role=c("aut")), person(given="Paul J.", family="Northrop", role=c("aut"), comment = c(ORCID = "0000-0002-1992-4882")), person(given="Scott D.", family="Grimshaw", role=c("aut"), comment = c(ORCID = "0000-0002-6326-9360")), person(given="Jin", family="Zhang", role=c("ctb")), person(given="Michael A.", family="Stephens", role=c("ctb")), person(given="Art B.", family="Owen", role=c("ctb")), person(given="Raphael", family="Huser", role=c("aut"), comment = c(ORCID = "0000-0002-1228-2071")))
 Description: Various tools for the analysis of univariate, multivariate and functional extremes. Exact simulation from max-stable processes [Dombry, Engelke and Oesting (2016) <doi:10.1093/biomet/asw008>, R-Pareto processes for various parametric models, including Brown-Resnick (Wadsworth and Tawn, 2014, <doi:10.1093/biomet/ast042>) and Extremal Student (Thibaud and Opitz, 2015, <doi:10.1093/biomet/asv045>). Threshold selection methods, including Wadsworth (2016) <doi:10.1080/00401706.2014.998345>, and Northrop and Coleman (2014) <doi:10.1007/s10687-014-0183-z>. Multivariate extreme diagnostics. Estimation and likelihoods for univariate extremes, e.g., Coles (2001) <doi:10.1007/978-1-4471-3675-0>.
 License: GPL-3
 URL: https://lbelzile.github.io/mev/, https://github.com/lbelzile/mev/

diff --git a/NAMESPACE b/NAMESPACE
@@ -55,6 +55,7 @@ export(.gpd_2D_fit)
 export(Lambda2cov)
 export(NC.diag)
 export(PickandsXU)
+export(Stein_weights)
 export(W.diag)
 export(angextrapo)
 export(angmeas)
@@ -84,6 +85,8 @@ export(fit.gev)
 export(fit.gpd)
 export(fit.pp)
 export(fit.rlarg)
+export(fit.shape)
+export(fit.wgpd)
 export(gev.Fscore)
 export(gev.Nyr)
 export(gev.Vfun)
@@ -192,10 +195,16 @@ export(rparpcshr)
 export(rrlarg)
 export(schlather.vario)
 export(scoreindep)
+export(shape.genquant)
+export(shape.hill)
+export(shape.moment)
+export(shape.pickands)
+export(shape.pickandsxu)
 export(smith.penult)
 export(smith.penult.fn)
 export(spline.corr)
 export(spunif)
+export(stein_gp_lik)
 export(taildep)
 export(tem.corr)
 export(tstab.egp)

diff --git a/NEWS.md b/NEWS.md
@@ -1,3 +1,14 @@
+# mev 1.18  (Release date ???)
+
+## New:
+
+- Stein's weighted generalized Pareto estimator with `fit.wgpd`
+- Multiple shape estimators (Hill, Pickands, moment estimator, generalized quantile) via `fit.shape` and specific routines `shape.moment`, `shape.pickands`, `shape.pickandsxu`, `shape.genquant` and `shape.hill`.
+
+## Changes:
+
+- Function `PickandsXU` is deprecated and replaced with `shape.pickandsxu`, 
+
 # mev 1.17  (Release date 2024-07-09)
 
 ## Fixes:

diff --git a/R/Stein.R b/R/Stein.R
@@ -0,0 +1,153 @@
+#' Stein's weighted generalized Pareto likelihood
+#'
+#' @param pars vector of length with scale and shape parameters
+#' @param xdat vector of exceedances in decreasing order if \code{sort=FALSE}
+#' @param weights vector of weights
+#' @param ... additional arguments, currently ignored
+#' @keywords internal
+#' @export
+#' @references Stein, M.L. (2023). A weighted composite log-likelihood approach to parametric estimation of the extreme quantiles of a distribution. \emph{Extremes} \bold{26}, 469-507 <doi:10.1007/s10687-023-00466-w>
+stein_gp_lik <- function(pars, xdat, weights = rep(1, length(xdat)), ...){
+  xdat <- as.numeric(xdat)
+  pars <- as.numeric(pars) # strip names
+  stopifnot(length(xdat) == length(weights))
+  # Extract parameters
+  scale <- pars[1]
+  shape <- pars[2]
+  # Get order statistics
+  n <- length(xdat)
+  args <- list(...)
+  sorted <- isTRUE(args$sorted)
+  if(!sorted){
+   xdat <- sort(xdat, decreasing = TRUE)
+  }
+  # ydat <- rev(c(0, diff(xdat)))
+  if(xdat[n] <= 0){
+   stop("Invalid vector of threshold exceedances \"xdat\".")
+  }
+  if((scale < 0) | ((shape < 0) & (1+shape*xdat[1]/scale < 0)) | shape < -1){
+    return(-1e10)
+  }
+  lp <- log1p(shape*xdat/scale)
+  if(!isTRUE(all.equal(shape, 0, tol = 1e-5))){
+  -sum(weights)*log(scale) -
+    sum(weights * (seq_len(n)/shape + 1) * lp) +
+    sum(weights[-n] * seq_len(n-1) * lp[-1])/shape
+  }
+}
+
+#' Stein's vector of weights
+#'
+#' Computes a vector of decreasing weights for exceedances.
+#'
+#' @param n integer, sample size
+#' @param gamma positive scalar for power
+#' @return a vector of positive weights of length \code{n}
+#' @export
+#' @keywords internal
+#' @references Stein, M.L. (2023). A weighted composite log-likelihood approach to parametric estimation of the extreme quantiles of a distribution. \emph{Extremes} \bold{26}, 469-507 <doi:10.1007/s10687-023-00466-w>
+Stein_weights <- function(n, gamma = 1){
+  stopifnot(length(gamma) == 1, gamma > 0)
+  n <- as.integer(n)
+  stopifnot(length(n) == 1L, n > 0)
+  (gamma + 1)/gamma * (1-((0:(n-1))/n)^gamma)
+}
+
+#' Maximum likelihood estimation for weighted generalized Pareto distribution
+#'
+#' Weighted maximum likelihood estimation, with user-specified vector of weights.
+#'
+#' @export
+#' @param xdat vector of observations
+#' @param threshold numeric, value of the threshold
+#' @param weightfun function whose first argument is the length of the weight vector
+#' @return a list with components
+#' \itemize{
+#' \item \code{estimate} a vector containing the \code{scale} and \code{shape} parameters (optimized and fixed).
+#' \item \code{std.err} a vector containing the standard errors. For \code{method = "obre"}, these are Huber's robust standard errors.
+#' \item \code{vcov} the variance covariance matrix, obtained as the numerical inverse of the observed information matrix. For \code{method = "obre"},
+#' this is the sandwich Godambe matrix inverse.
+#' \item \code{threshold} the threshold.
+#' \item \code{method} the method used to fit the parameter. See details.
+#' \item \code{nllh} the negative log-likelihood evaluated at the parameter \code{estimate}.
+#' \item \code{nat} number of points lying above the threshold.
+#' \item \code{pat} proportion of points lying above the threshold.
+#' \item \code{convergence} logical indicator of convergence.
+#' \item \code{weights} vector of weights for exceedances.
+#' \item \code{exceedances} excess over the threshold, sorted in decreasing order.
+#' }
+fit.wgpd <- function(xdat, threshold = 0, weightfun = Stein_weights, ...){
+  xdat <- as.numeric(xdat[is.finite(xdat)])
+  ntot <- length(xdat)
+  # Extract exceedances
+  xdat <- xdat[xdat > threshold] - threshold
+  stopifnot(class(weightfun) == "function")
+  weights <- try(weightfun(length(xdat), ...), silent = TRUE)
+  if(inherits(weights, "try-error")){
+    weights <- rep(1, length(xdat))
+  }
+  stopifnot(length(xdat) == length(weights))
+  # Scale data for optimization
+  sc <- sd(xdat)
+  # Sort data to avoid doing this repeatedly in the optimization
+  xdat <- sort(xdat, decreasing = TRUE)
+  exc <- xdat
+  xdat <- exc/sc
+ # Define inequality and support constraints for the parameters
+  hin <- function(par, maxdat = NULL, thresh = 0, ...) {
+    stopifnot(`Argument "maxdat" is missing, with no default value.` = !is.null(maxdat))
+    c(par[1], par[2], ifelse(par[2] < 0, thresh - par[1]/par[2] -
+                               maxdat, 1e-05))
+  }
+  ineqLB <- c(0, -1, 0)
+  ineqUB <- c(Inf, 10, Inf)
+  LB <- c(0, -1)
+  UB <- c(Inf, 2)
+  start <- c(1, 0.1)
+  opt <- Rsolnp::solnp(pars = start,
+                fun = function(pars, ...){
+                  -stein_gp_lik(pars = pars, ...)},
+                ineqfun = hin,
+                ineqLB = ineqLB,
+                ineqUB = ineqUB,
+                LB = LB,
+                UB = UB,
+                xdat = xdat,
+                maxdat = xdat[1],
+                weights = weights,
+                sorted = TRUE,
+                control = list(trace = 0))
+  # Scale back parameters
+  mle <- opt$pars * c(sc, 1)
+  se <- "fail"
+  cov <- try(solve(opt$hessian[-(1:length(ineqLB)),-(1:length(ineqLB))]),
+             silent = TRUE)
+  if(!inherits(cov, "try-error")){
+    se <- try(sqrt(diag(cov)),
+              silent = TRUE)
+  }
+  if(!is.character(se)){
+    if(isTRUE(all(is.finite(se))) & isTRUE(all(se > 0))){
+     se <- se * c(sc, 1)
+    }
+  }
+  if(is.character(se)){
+    se <- rep(NA, 2)
+    cov <- NULL
+  }
+  return(list(
+    estimate = mle,
+    param = mle,
+    std.err = se,
+    vcov = cov,
+    threshold = threshold,
+    nllh = -(-opt$values[length(opt$values)] - sum(weights)*log(sc)),
+    #stein_gp_lik(pars = mle, xdat = exc, weights = weights),
+    convergence = isTRUE(opt$convergence == 0),
+    exceedances = xdat * sc,
+    nat = length(xdat),
+    pat = length(xdat)/ntot,
+    weights = weights
+  ))
+}
+