En este script se realiza un análisis de frecuencia para una serie de máximos anuales, ya sea de precipitaicones máximas o caudales instantáneos máximos. Las funciones de distribución consideradas son: Normal, Log Normal, Pearson III, Log Pearson III, Gamma y Gumbel.

Se cargan las librerías necesarias

library(tidyverse)
library(lubridate)
library(dplyr)
library(fitdistrplus)
library(PearsonDS)

Cargar mediciones diarias (debe ser un dataframe de dos columnas: Fecha y Valor)

load("Archivos R/Coltauco.Rda")

head(Coltauco)

## # A tibble: 6 x 2
##   Fecha      valor
##   <date>     <dbl>
## 1 1978-10-01     0
## 2 1978-10-02     0
## 3 1978-10-03     0
## 4 1978-10-04     0
## 5 1978-10-05     0
## 6 1978-10-06     0

Generar serie de máximos anuales

Con los paquetes lubridate, dplyr y tidiverse se construye la serie de máximos anuales en función de la serie de mediciones diarias entregada.

Coltauco <- Coltauco %>% 
  group_by(Fecha = floor_date(Fecha, "year")) %>% 
  summarise(PMax = max(valor, na.rm = TRUE))

Coltauco$Fecha <- year(Coltauco$Fecha)

head(Coltauco)

## # A tibble: 6 x 2
##   Fecha  PMax
##   <dbl> <dbl>
## 1  1978  39  
## 2  1979 100  
## 3  1980 129  
## 4  1981  55  
## 5  1982 129  
## 6  1983  72.5

plot(Coltauco, type = "h", xlab = "Año", ylab = "Precipitación máxima diaria [mm]")

Con el comando plotdist() se pueden ver las distribuciones empíricas de la serie de datos.

plotdist(Coltauco$PMax, histo=TRUE, demp=TRUE)

Ajuste de funciones de distribución

Las distribuciones se ajustan con el paquete fitdistrplus. Algunas de ellas, como Gumbel y Pearson III, requieren que se definan las funciones de densidad de forma previa a realizar el ajuste. Para las de tipo Pearson III se emplea el paquete PearsonDS, que entrega las definiciones de las funciones qpearsonIII(), dpearsonIII() y ppearsonIII().

Distribución Gumbel

dgumbel <- function(x, a, b) 1/b*exp((a-x)/b)*exp(-exp((a-x)/b))
pgumbel <- function(q, a, b) exp(-exp((a-q)/b))
qgumbel <- function(p, a, b) a-b*log(-log(p))

Gumbel <- fitdist(Coltauco$PMax, "gumbel", start = list(a=1, b=1))

Distribución Pearson III

Se definen las funciones de densidad, distribución y cuantiles

dPIII <- function(x, shape, location, scale) dpearsonIII(x, shape, location, scale, log=FALSE)
pPIII <- function(q, shape, location, scale) ppearsonIII(q, shape, location, scale, lower.tail = TRUE, log.p = FALSE)
qPIII <- function(p, shape, location, scale) qpearsonIII(p, shape, location, scale, lower.tail = TRUE, log.p = FALSE)

Funciones generadoras de momentos (Para ajuste por método de los momentos)

mPIII <- function(order, shape, location, scale) # compute the empirical first 3 moments of the PIII distribution
  {
  if(order == 1) return(location + shape * scale)
  if(order == 2) return(scale * scale * shape)
  if(order == 3) return(2 / sqrt(shape) * sign(scale))
  }

ePIII <- function(x, order) # compute (centered) empirical centered moments of the data
  {
  if(order == 1) return(mean(x))
  if(order == 2) return(var(x))
  if(order == 3) return(e1071::skewness(x, type = 2))
  }

Estadígrafos para Pearson III

m <- mean(Coltauco$PMax)
v <- var(Coltauco$PMax)
s <- sd(Coltauco$PMax)
g <- e1071::skewness(Coltauco$PMax, type=2)

Se generan los parámetros iniciales

# Correct the sample skew for bias using the recommendation of 
# Bobee, B. and R. Robitaille (1977). "The use of the Pearson Type 3 and Log Pearson Type 3 distributions revisited." 
# Water Resources Reseach 13(2): 427-443, as used by Kite

my_shape <- (2 / g) ^ 2
my_scale <- sqrt(v / my_shape) * sign(g) # modified as recommended by Carl Schwarz
my_location <- m - my_scale * my_shape

start <- list(shape = my_shape, location = my_location, scale = my_scale)

Finalmente, se ajusta la distribución Pearson III

PIII <- fitdist(Coltauco$PMax, "PIII", start = start, method = "mme", order = 1:3, memp = ePIII, control = list(maxit = 1000))

Distribución Log Pearson III

El proceso es idéntico al seguido para la distribución Pearson III. Sólo se utiliza el logaritmo de los datos.

dlPIII <- function(x, shape, location, scale) dpearsonIII(log(x), shape, location, scale, log=FALSE)/x
plPIII <- function(q, shape, location, scale) ppearsonIII(log(q), shape, location, scale, lower.tail = TRUE, log.p = FALSE)
qlPIII <- function(p, shape, location, scale) exp(qpearsonIII(p, shape, location, scale, lower.tail = TRUE, log.p = FALSE))

Funciones generadores de momentos (logarítmicas):

mlPIII <- function(order, shape, location, scale)
  {
  # compute the empirical first 3 moments of the PIII distribution
  if(order == 1) return(location + shape * scale)
  if(order == 2) return(scale * scale * shape)
  if(order == 3) return(2 / sqrt(shape) * sign(scale))
  }

elPIII <- function(x, order)
  {
  # compute (centered) empirical centered moments of the data
  if(order == 1) return(mean(log(x)))
  if(order == 2) return(var(log(x)))
  if(order == 3) return(e1071::skewness(log(x), type = 2))
  }

Estadígrafos para Log Pearson III

m <- mean(log(Coltauco$PMax))
v <- var(log(Coltauco$PMax))
s <- sd(log(Coltauco$PMax))
g <- e1071::skewness(log(Coltauco$PMax), type=2)

Se generan los parámetros iniciales para la estimación

# Correct the sample skew for bias using the recommendation of 
# Bobee, B. and R. Robitaille (1977). "The use of the Pearson Type 3 and Log Pearson Type 3 distributions revisited." 
# Water Resources Reseach 13(2): 427-443, as used by Kite

my_shape <- (2 / g) ^ 2
my_scale <- sqrt(v / my_shape) * sign(g)
my_location <- m - my_scale * my_shape

start <- list(shape = my_shape, location = my_location, scale = my_scale)

Finalmente, se ajusta la distribución Log Pearson III

LPIII <- fitdist(Coltauco$PMax, "lPIII", start = start, method = "mme", order = 1:3, memp = elPIII, control = list(maxit = 1000))

Distribuciones Gamma, Normal y Log Normal

Gamma <- fitdist(Coltauco$PMax, distr = "gamma", method = "mme")
Normal <- fitdist(Coltauco$PMax, distr = "norm", method = "mme")
LogNormal <- fitdist(Coltauco$PMax, distr = "lnorm", method = "mme")

Probabilidades de ocurrencia teóricas

A continuación, se agregan las probabilidades de ocurrencia teóricas de las distribuciones ajustadas.

Poc <- data.frame(
  "P" = sort(Coltauco$PMax, decreasing = TRUE),
  "Pexc" = (1:nrow(Coltauco))/(nrow(Coltauco)+1)
  )

Poc$Pocu <- 1 - Poc$Pexc

Poc$Normal <- pnorm(Poc$P, Normal$estimate[1], Normal$estimate[2])
Poc$LogNormal <- pnorm(log(Poc$P), LogNormal$estimate[1], LogNormal$estimate[2])
Poc$Gumbel <- pgumbel(Poc$P, Gumbel$estimate[1], Gumbel$estimate[2])
Poc$Gamma <- pgamma(Poc$P, Gamma$estimate[1], Gamma$estimate[2])
Poc$PIII <- pPIII(Poc$P, PIII$estimate[1], PIII$estimate[2], PIII$estimate[3])
Poc$LPIII <- plPIII(Poc$P, LPIII$estimate[1], LPIII$estimate[2], LPIII$estimate[3])

head(Poc)

##       P       Pexc      Pocu    Normal LogNormal    Gumbel     Gamma      PIII     LPIII
## 1 161.0 0.02325581 0.9767442 0.9791197 0.9584695 0.9515162 0.9616247 0.9689365 0.9506394
## 2 157.2 0.04651163 0.9534884 0.9733565 0.9530180 0.9453323 0.9555171 0.9626401 0.9440352
## 3 152.5 0.06976744 0.9302326 0.9644478 0.9452783 0.9366191 0.9467450 0.9533474 0.9347380
## 4 148.3 0.09302326 0.9069767 0.9545459 0.9372979 0.9277097 0.9376163 0.9434372 0.9252500
## 5 140.0 0.11627907 0.8837209 0.9285363 0.9180069 0.9064475 0.9153754 0.9186123 0.9027161
## 6 129.0 0.13953488 0.8604651 0.8781770 0.8833493 0.8690613 0.8754008 0.8727945 0.8635405

Evaluación de ajustes

Gráficos de densidad

denscomp(list(Gamma, Gumbel, Normal, LogNormal, PIII, LPIII), addlegend = TRUE, 
         legendtext = c("Gamma", "Gumbel", "Normal","LogNormal","Pearson III", "LogPearson III"))

Gráficos de densidad acumulada

cdfcomp(list(Gamma, Gumbel, Normal, LogNormal, PIII, LPIII), addlegend = TRUE, 
         legendtext = c("Gamma", "Gumbel", "Normal","LogNormal","Pearson III", "LogPearson III"))

par(mfrow=c(3,2))

cdfcomp(Normal, addlegend = FALSE, main = "Normal")
cdfcomp(LogNormal, addlegend = FALSE, main = "Log Normal")
cdfcomp(Gumbel, addlegend = FALSE, main = "Gumbel")
cdfcomp(Gamma, addlegend = FALSE, main = "Gamma")
cdfcomp(PIII, addlegend = FALSE, main = "Pearson III")
cdfcomp(LPIII, addlegend = FALSE, main = "Log Pearson III")

Se construyen los gráficos Q-Q para evaluar visualmente la calidad de los ajustes.

par(mfrow=c(3,2))

qqcomp(Normal, addlegend = FALSE, main = "Normal")
qqcomp(LogNormal, addlegend = FALSE, main = "Log Normal")
qqcomp(Gumbel, addlegend = FALSE, main = "Gumbel")
qqcomp(Gamma, addlegend = FALSE, main = "Gamma")
qqcomp(PIII, addlegend = FALSE, main = "Pearson III")
qqcomp(LPIII, addlegend = FALSE, main = "Log Pearson III")

Gráficos P-P

par(mfrow=c(3,2))

ppcomp(Normal, addlegend = FALSE, main = "Normal")
ppcomp(LogNormal, addlegend = FALSE, main = "Log Normal")
ppcomp(Gumbel, addlegend = FALSE, main = "Gumbel")
ppcomp(Gamma, addlegend = FALSE, main = "Gamma")
ppcomp(PIII, addlegend = FALSE, main = "Pearson III")
ppcomp(LPIII, addlegend = FALSE, main = "Log Pearson III")

Pruebas de bondad de ajuste

Mediante el comando gofstat() del paquete fitdistrplus se pueden ejecutar diversas pruebas de bondad de ajuste: Kolmogorov - Smirnov, Cramer - von Misses y Anderson - Darling.

GOF <- gofstat(list(Normal,LogNormal,Gumbel,Gamma, PIII, LPIII), 
               fitnames = c("Normal", "LogNormal", "Gumbel", "Gamma", "Pearson III", "LogPearson III"))

print(GOF)

## Goodness-of-fit statistics
##                                 Normal  LogNormal     Gumbel      Gamma Pearson III LogPearson III
## Kolmogorov-Smirnov statistic 0.1054531 0.09911765 0.07303247 0.08278532  0.09211181     0.06594981
## Cramer-von Mises statistic   0.0673508 0.11862008 0.05821080 0.05914570  0.04489579     0.04734267
## Anderson-Darling statistic   0.5071199 0.91324383 0.40406760 0.42461276  0.35993877     0.33290625
## 
## Goodness-of-fit criteria
##                                  Normal LogNormal   Gumbel    Gamma Pearson III LogPearson III
## Akaike's Information Criterion 426.0131  425.1907 424.0954 422.7658    426.4427       424.7974
## Bayesian Information Criterion 429.4884  428.6660 427.5708 426.2412    431.6557       430.0104

La ventaja de usar gofstat() es que, además de ejecutar las pruebas mencionadas anteriormente, también realiza la prueba Chi cuadrado, entregando la tabla generada, los valores de Chi y, además, los p-value.

GOF$chisq

##         Normal      LogNormal         Gumbel          Gamma    Pearson III LogPearson III 
##       4.261763       7.451795       2.917793       3.355632       3.091655       2.477523

knitr::kable(GOF$chisqpvalue)

	x
Normal	0.5123764
LogNormal	0.1891488
Gumbel	0.7126581
Gamma	0.6453358
Pearson III	0.5426060
LogPearson III	0.6486652

Se puede usar el comando ks.test() para realizar una prueba de Kolmogorov - Smirnov entre las probabilidades de ocurrencia empíricas y las teóricas de cada modelo. Los resultados de esta prueba pueden servir como comparación con los obtenidos mediante gofstat(), aunque nunca darán iguales.

ksnorm <- ks.test(Poc$Pocu, Poc$Normal)
kslnorm <- ks.test(Poc$Pocu, Poc$LogNormal)
ksgumbel <- ks.test(Poc$Pocu, Poc$Gumbel)
ksgamma <- ks.test(Poc$Pocu, Poc$Gamma)
kspiii <- ks.test(Poc$Pocu, Poc$PIII)
kslpiii <- ks.test(Poc$Pocu, Poc$LPIII)

KS <- data.frame(c(ksnorm$statistic, 
                   kslnorm$statistic, 
                   ksgumbel$statistic, 
                   ksgamma$statistic, 
                   kspiii$statistic, 
                   kslpiii$statistic),
                 c(ksnorm$p.value,
                   kslnorm$p.value,
                   ksgumbel$p.value,
                   ksgamma$p.value,
                   kspiii$p.value,
                   kslpiii$p.value))

colnames(KS) <- c("D", "P-Value")
rownames(KS) <- c("Normal", "LogNormal", "Gumbel", "Gamma", "Pearson III", "LogPearson III")

KS <- round(KS, 3)
head(KS)

##                    D P-Value
## Normal         0.119   0.927
## LogNormal      0.095   0.991
## Gumbel         0.071   1.000
## Gamma          0.095   0.991
## Pearson III    0.095   0.991
## LogPearson III 0.071   1.000

Magnitudes de retorno

En el siguiente gráfico se muestran las magnitudes de retorno para periodos de retorno de 2 a 500 años.

T <- 2:500
Pexc <- 1/T

Magnitudes <- data.frame(Pexc,
                         qnorm(1 - Pexc, Normal$estimate[1], Normal$estimate[2]),
                         exp(qnorm(1 - Pexc, LogNormal$estimate[1], LogNormal$estimate[2])),
                         qgumbel(1 - Pexc, Gumbel$estimate[1], Gumbel$estimate[2]),
                         qgamma(1 - Pexc, Gamma$estimate[1], Gamma$estimate[2]),
                         qPIII(1 - Pexc, PIII$estimate[1], PIII$estimate[2], PIII$estimate[3]),
                         qlPIII(1 - Pexc, LPIII$estimate[1], LPIII$estimate[2], LPIII$estimate[3]))

colnames(Magnitudes) <- c("Pexc", "Normal", "LogNormal", "Gumbel", "Gamma", "Pearson III", "LogPearson III")

matplot(Magnitudes[,2:7], type = c("l"), col = 1:6, ylab = "Precipitación Máxima Diaria [mm]", xlab = "T [años]")
legend("bottomright", 
       legend = c("Normal", "LogNormal", "Gumbel", "Gamma", "Pearson III", "LogPearson III"), 
       col=1:6, pch=15, cex = 0.8)

La siguiente tabla muestra las magnitudes de retorno para T = 2,5,10,25,50,100,200 y 500 años.

T <- c(2,5,10,25,50,100,200,500)

Resumen <- data.frame("T" = T,
                      qnorm(1 - 1/T, Normal$estimate[1], Normal$estimate[2]),
                      exp(qnorm(1 - 1/T, LogNormal$estimate[1], LogNormal$estimate[2])),
                      qgumbel(1 - 1/T, Gumbel$estimate[1], Gumbel$estimate[2]),
                      qgamma(1 - 1/T, Gamma$estimate[1], Gamma$estimate[2]),
                      qPIII(1 - 1/T, PIII$estimate[1], PIII$estimate[2], PIII$estimate[3]),
                      qlPIII(1 - 1/T, LPIII$estimate[1], LPIII$estimate[2], LPIII$estimate[3]))

colnames(Resumen) <- c("T", "Normal", "LogNormal", "Gumbel", "Gamma", "Pearson III", "LogPearson III")

knitr::kable(Resumen)

T	Normal	LogNormal	Gumbel	Gamma	Pearson III	LogPearson III
2	86.11429	79.19257	79.76671	80.93763	83.82689	79.84329
5	117.07135	111.76729	114.70512	114.58633	116.59381	115.97200
10	133.25316	133.82249	137.83739	135.32479	135.05367	139.12019
25	150.50918	162.15699	167.06508	159.93802	155.77063	167.28955
50	161.65663	183.57631	188.74784	177.24764	169.72868	187.47163
100	171.68355	205.24922	210.27051	193.78264	182.67394	206.98416
200	180.86014	227.31932	231.71464	209.72891	194.84837	225.98007
500	191.98073	257.26901	260.00610	230.10988	210.02449	250.46034

Análisis estacionario de frecuencias

Fernando Carreño

07-02-2022