library(sp)
library(spacetime)
library(gstat)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
library(lattice)
library(minpack.lm)
library(SpatFD)
library(tidyr)
library(zoo)
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
data(DE_RB_2005)
Seleccionamos una fecha específica para el análisis puramente espacial (corte transversal) y definimos el sistema de coordenadas.
crs_utm32 <- CRS("+proj=utm +zone=32 +datum=WGS84 +units=m +no_defs")
proj4string(DE_RB_2005) <- crs_utm32
## Warning in CPL_crs_from_input(x): GDAL Message 1: +init=epsg:XXXX syntax is
## deprecated. It might return a CRS with a non-EPSG compliant axis order.
## Warning in `proj4string<-`(`*tmp*`, value = value): A new CRS was assigned to an object with an existing CRS:
## +init=epsg:32632 +proj=utm +zone=32 +datum=WGS84 +units=m +no_defs +ellps=WGS84 +towgs84=0,0,0
## without reprojecting.
## For reprojection, use function spTransform
fecha_target <- as.POSIXct("2005-10-09", tz = "UTC")
pm10_1hora <- DE_RB_2005[, fecha_target]
pm10_1hora <- pm10_1hora[!is.na(pm10_1hora$PM10), ]
pm10_1hora$Log_PM10 <- log(pm10_1hora$PM10)
head(pm10_1hora@data)
## PM10 station_altitude station_european_code country_iso_code
## 19390 40.000 8 DESH001 DE
## 19391 54.000 3 DENI063 DE
## 19392 23.708 700 DEBY109 DE
## 19394 33.125 35 DEBE056 DE
## 19395 35.083 50 DEBE032 DE
## 19396 26.792 343 DEHE046 DE
## station_start_date station_end_date type_of_station station_type_of_area
## 19390 1979-07-31 Background rural
## 19391 1999-02-11 Background rural
## 19392 2003-04-17 Background rural
## 19394 1994-02-01 Background rural
## 19395 1986-10-01 Background rural
## 19396 1999-05-11 Background rural
## street_type annual_mean_PM10 Log_PM10
## 19390 Unknown 20.94724 3.688879
## 19391 21.77014 3.988984
## 19392 16.51371 3.165813
## 19394 23.25704 3.500288
## 19395 21.71967 3.557717
## 19396 17.06436 3.288103
Analizamos si existe una tendencia determinista basada en las coordenadas (X, Y).
df_hora <- as.data.frame(pm10_1hora)
coords <- coordinates(pm10_1hora)
df_hora$x <- coords[,1]
df_hora$y <- coords[,2]
mod_media <- lm(PM10 ~ x + y, data = df_hora)
summary(mod_media)
##
## Call:
## lm(formula = PM10 ~ x + y, data = df_hora)
##
## Residuals:
## Min 1Q Median 3Q Max
## -13.9025 -6.7190 -0.3182 4.7641 19.7393
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.083e+02 3.173e+01 -3.413 0.001147 **
## x 6.938e-06 6.555e-06 1.058 0.294064
## y 2.340e-05 5.819e-06 4.021 0.000162 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 8.083 on 61 degrees of freedom
## Multiple R-squared: 0.2803, Adjusted R-squared: 0.2567
## F-statistic: 11.88 on 2 and 61 DF, p-value: 4.397e-05
df_hora$resid <- residuals(mod_media)
pm10_res <- pm10_1hora
pm10_res$resid <- df_hora$resid
plot(mod_media)
spplot(pm10_res, "resid", main = "Mapa de Residuales (PM10)")
## 1.2 Semivariogramas Empíricos Calculamos la semivarianza de los
residuos.
vg_res_emp <- variogram(resid ~ 1, pm10_res)
plot(vg_res_emp, main = "Semivariograma empírico de los residuos (PM10)", pch=19)
## 1.3 Estimación Visual (“A sentimiento”) Probamos visualmente varios
modelos teóricos para encontrar semillas iniciales.
models_list <- c("Exp", "Sph", "Gau", "Mat", "Wav")
variograms_visual <- list()
par(mfrow=c(2,3))
for (mod in models_list) {
v_model <- vgm(psill = 40, model = mod, range = 50000, nugget = 20)
variograms_visual[[mod]] <- v_model
plot(vg_res_emp$dist, vg_res_emp$gamma, main=paste("Modelo:", mod),
xlab="Distancia", ylab="Semivarianza", ylim=c(0,80))
lines(variogramLine(v_model, maxdist=max(vg_res_emp$dist)), col="blue")
}
par(mfrow=c(1,1))
models <- c("Exp", "Sph", "Gau", "Mat", "Lin", "Pow",
"Bes", "Pen", "Cir", "Wav", "Nug")
variograms <- list()
for (mod in models) {
if (mod == "Nug") {
variograms[[mod]] <- vgm(
psill = 40,
model = "Nug",
range = 0,
nugget = 0
)
} else {
variograms[[mod]] <- vgm(
psill = 40,
model = mod,
range = 50000,
nugget = 20
)
}
}
Visualmente, el modelo Wave o el Matern parecen capturar la oscilación o el ascenso. Usaremos Wave como base según el código original. ## 1.4 Estimación por Métodos Automáticos (MCO, MCP, MV)
ini_model <- variograms_visual[['Wav']]
fit_mcp <- fit.variogram(vg_res_emp, model = ini_model, fit.method = 1)
fit_mco <- fit.variogram(vg_res_emp, model = ini_model, fit.method = 6)
fit_mv <- fit.variogram(vg_res_emp, model = ini_model, fit.method = 6)
fit_cl <- fit.variogram(vg_res_emp, model = ini_model, fit.method = 7)
print(list(MCP = fit_mcp, MCO = fit_mco, CL_proxy = fit_cl))
## $MCP
## model psill range
## 1 Nug 33.46270 0
## 2 Wav 25.05858 50000
##
## $MCO
## model psill range
## 1 Nug 26.91390 0
## 2 Wav 29.20752 50000
##
## $CL_proxy
## model psill range
## 1 Nug 22.06978 0
## 2 Wav 28.28698 50000
plot(vg_res_emp, fit_mcp, main = "Ajuste Óptimo (MCP - gstat)")
### Validación Cruzada
cv_mco <- krige.cv(resid ~ 1, pm10_res, model = fit_mco)
cv_mcp <- krige.cv(resid ~ 1, pm10_res, model = fit_mcp)
cv_mv <- krige.cv(resid ~ 1, pm10_res, model = fit_mv)
cv_cl <- krige.cv(resid ~ 1, pm10_res, model = fit_cl)
Función para calcular estadísticas de error:
calc_stats <- function(cv) {
err <- cv$observed - cv$var1.pred
ME <- mean(err, na.rm = TRUE)
MSE <- mean(err^2, na.rm = TRUE)
RMSE <- sqrt(MSE)
data.frame(ME = ME, MSE = MSE, RMSE = RMSE)
}
resumen <- rbind(
MCO = calc_stats(cv_mco),
MCP = calc_stats(cv_mcp),
MV = calc_stats(cv_mv),
CL = calc_stats(cv_cl)
)
resumen
## ME MSE RMSE
## MCO 0.012690176 54.39495 7.375293
## MCP 0.004806923 55.45499 7.446811
## MV 0.012690176 54.39495 7.375293
## CL 0.017429725 54.02478 7.350155
ordenar_modelos <- function(tabla_stats) {
tabla_stats[order(
tabla_stats$RMSE,
abs(tabla_stats$ME)
), ]
}
resumen_ordenado <- ordenar_modelos(resumen)
resumen_ordenado
## ME MSE RMSE
## CL 0.017429725 54.02478 7.350155
## MCO 0.012690176 54.39495 7.375293
## MV 0.012690176 54.39495 7.375293
## MCP 0.004806923 55.45499 7.446811
grd <- spsample(pm10_res, type = "regular", n = 5000)
gridded(grd) <- TRUE
proj4string(grd) <- crs_utm32
krig_res <- krige(resid ~ 1, pm10_res, grd, model = fit_cl)
## [using ordinary kriging]
spplot(krig_res["var1.pred"], main = "Predicción Kriging (Residuos PM10)")
spplot(krig_res["var1.var"], main = "Varianza del Error Kriging (PM10)")
### Análisis Variable 2: Log(PM10)
df_hora$log_pm10 <- pm10_1hora$Log_PM10
mod_media_log <- lm(log_pm10 ~ x + y, data = df_hora)
pm10_res$resid_log <- residuals(mod_media_log)
vg_log_emp <- variogram(resid_log ~ 1, pm10_res)
fit_log_mcp <- fit.variogram(vg_log_emp, model = vgm(psill=0.5, "Exp", range=50000, nugget=0.1))
plot(vg_log_emp, fit_log_mcp, main = "Semivariograma Log(PM10)")
krig_log <- krige(resid_log ~ 1, pm10_res, grd, model = fit_log_mcp)
## [using ordinary kriging]
spplot(krig_log["var1.pred"], main = "Predicción Log_PM10 (Residuos)")
spplot(krig_log["var1.var"], main = "Varianza del Error Kriging (PM10)")
En esta sección, construimos el variograma “a mano” y ajustamos modelos no lineales usando nls y optim, fuera de gstat. ### Creación semivariograma robusto.
coords <- as.data.frame(coordinates(pm10_res))
names(coords) <- c("x", "y")
coords$ID <- 1:nrow(coords)
D <-as.matrix(dist(coords[, c("x", "y")], method='euclidean'))
df_dist <- as.data.frame(as.table(as.matrix(D)))
df_dist <- df_dist[df_dist$Freq >0,]
names(df_dist) <- c("i", "j", "h")
pm10_res_sub <- pm10_res
pm10_res_sub@data <- pm10_res_sub@data[, c("station_european_code", "PM10", "resid")]
df_dist <- df_dist %>%
left_join(pm10_res_sub@data, by= c("i"="station_european_code"), suffix = c("", "_i")) %>%
left_join(pm10_res_sub@data, by= c("j"="station_european_code"), suffix = c("", "_j"))
df_dist$diff_values <- abs(df_dist$resid - df_dist$resid_j)
df_dist$gamma <- 0.5*(df_dist$resid - df_dist$resid_j)^2
df_dist$bin <- cut(df_dist$h, breaks = 20)
Cálculo del estimador robusto (Cressie/Hawkins).
vg_emp_robust.1 <- df_dist %>%
group_by(bin) %>%
summarise(
h_med = mean(h, na.rm = TRUE),
m_h = mean((diff_values)^(1/2), na.rm = TRUE),
np = n()
) %>%
mutate( gamma_robust = ( (m_h / 2)^4 ) / (0.457 + 0.494 / np + 0.045 / (np^2)) )
plot(vg_emp_robust.1$h_med, vg_emp_robust.1$gamma_robust,
main = "Semivariograma empírico Robusto Manual", pch = 16)
vg_fit <- vg_emp_robust.1 %>% filter(!is.na(gamma_robust), gamma_robust >= 0 )
vg_fit$h_med_scaled <- vg_fit$h_med/1000
vg_fit <- vg_fit[is.finite(vg_fit$gamma_robust) & is.finite(vg_fit$h_med_scaled), ]
vg_fit <- subset(vg_fit, !(h_med_scaled > 750 & gamma_robust < 3))
h <- vg_fit$h_med_scaled
gamma <- vg_fit$gamma_robust
np <- vg_fit$np
potencial <- function(h, nugget, sill, lambda){
ifelse(h == 0, 0, nugget + sill * abs(h)^lambda)
}
obj_potencial <- function(par, h, gamma, w){
nugget <- par[1]; sill <- par[2]; lambda <- par[3]
gamma_model <- potencial(h, nugget, sill, lambda)
sum(w * (gamma - gamma_model)^2)
}
start_par <- c(nugget = min(gamma), sill = max(gamma) - min(gamma), lambda = 0.8)
fit_opt <- optim(
par = start_par,
fn = obj_potencial,
h = h, gamma = gamma, w = np,
method = "L-BFGS-B",
lower = c(0, 0, 0.01), upper = c(Inf, Inf, 1.99)
)
fit_opt$par
## nugget sill lambda
## 0.0000000 1.9339682 0.2641522
h_seq <- seq(min(h), max(h), length.out = 200)
pars_opt <- fit_opt$par
gamma_pred_opt <- pars_opt["nugget"] + pars_opt["sill"] * (h_seq ^ pars_opt["lambda"])
plot(
h, gamma,
pch = 16,
xlab = "Distancia (h)",
ylab = "Semivarianza gamma(h)",
main = "Ajuste Modelo Potencial (optim)"
)
lines(
h_seq, gamma_pred_opt,
col = "blue",
lwd = 2
)
fit_nlsLM <- nlsLM(
gamma_robust ~ nugget + sill * (h_med_scaled^lambda),
data = vg_fit,
start = list(nugget=1, sill=5, lambda=0.5),
lower = c(0, 0, 0.001), upper = c(20, 50, 1.5)
)
coef(fit_nlsLM)
## nugget sill lambda
## 0.0000000 4.8830785 0.1069787
h_seq <- seq(min(h), max(h), length.out = 200)
pars_lm <- coef(fit_nlsLM)
gamma_pred <- pars_lm["nugget"] + pars_lm["sill"] * (h_seq ^ pars_lm["lambda"])
plot(h, gamma, pch = 16, main = "Ajuste Modelo Potencial (nlsLM)")
lines(h_seq, gamma_pred, col = "red", lwd = 2)
### Ajuste Modelo Cuadrático Racional (optim vs nls)
cuadratico_racional <-function(h, nugget, sill, a){
ifelse(h==0, 0, nugget + sill*(19*(abs(h)/a)^2/(1+19*(abs(h)/a)^2)))
}
obj_cuadratico_racional <-function(par, h, gamma, w){
gamma_model <- cuadratico_racional(h, par[1], par[2], par[3])
sum(w * (gamma - gamma_model)^2)
}
fit_opt_cr <- optim(
par = c(0.1, 10, 1),
fn = obj_cuadratico_racional,
h=h, gamma=gamma, w=np,
method = "L-BFGS-B", lower = c(0, 0, 0.1)
)
fit_opt_cr$par
## [1] 4.245127 6.179790 701.622342
h_seq <- seq(min(h), max(h), length.out = 300)
pars_opt <- fit_opt_cr$par
gamma_pred_opt <- cuadratico_racional(
h_seq,
nugget = pars_opt[1],
sill = pars_opt[2],
a = pars_opt[3]
)
plot(
h, gamma,
pch = 16,
xlab = "Distancia (h)",
ylab = "Semivarianza gamma(h)",
main = "Modelo Cuadrático-Racional (optim)"
)
lines(h_seq, gamma_pred_opt, col = "blue", lwd = 2)
### Matriz de pesos de varia con cada iteración.
lambda_fix <- 0.8
x <- abs(h)^lambda_fix
y <- gamma
X <- cbind(1, x)
beta <- c(1, 1)
tol <- 1e-6
for (k in 1:50) {
res <- as.numeric(y - X %*% beta)
w <- 1 / (abs(res) + 1e-6)
W <- diag(w)
beta_new <- solve(t(X) %*% W %*% X) %*% (t(X) %*% W %*% y)
if (max(abs(beta_new - beta)) < tol) break
beta <- beta_new
}
beta
## [,1]
## 6.14205119
## x 0.02453045
h_seq <- seq(min(h), max(h), length.out = 300)
x_seq <- abs(h_seq)^lambda_fix
gamma_pred_irls <- beta[1] + beta[2] * x_seq
plot(
h, gamma,
pch = 16,
xlab = "Distancia (h)",
ylab = "Semivarianza gamma(h)",
main = "Ajuste Potencial (λ = 0.8) usando IRLS (Robusto)"
)
lines(h_seq, gamma_pred_irls, col = "darkgreen", lwd = 2)
### Matriz de pesos fija.
lambda_fix <- 0.8
x <- abs(h)^lambda_fix
y <- gamma
X <- cbind(1, x)
w_fix <- np
W_fix <- diag(w_fix)
beta_Wfix <- solve(t(X) %*% W_fix %*% X) %*% (t(X) %*% W_fix %*% y)
beta_Wfix
## [,1]
## 5.56952888
## x 0.03093432
h_seq <- seq(min(h), max(h), length.out = 300)
x_seq <- abs(h_seq)^lambda_fix
gamma_pred_Wfix <- as.numeric(beta_Wfix[1] + beta_Wfix[2] * x_seq)
plot(
h, gamma,
pch = 16,
xlab = "Distancia (h)",
ylab = "Semivarianza gamma(h)",
main = "Modelo potencial (λ fijo) - MCO ponderado con W fija"
)
lines(h_seq, gamma_pred_Wfix, lwd = 2)
Acá: \[W(\theta) = Diag(Var(2\hat{\gamma}(h_k))) = Diag(\frac{2(2(\gamma)(h_k|\theta))^2}{N(h_k)}\]
start_par <- c(
nugget = min(gamma),
sill = max(gamma) - min(gamma),
a = median(h)
)
obj_MCP <- function(par, h, gamma_emp, N_h) {
gamma_mod <- cuadratico_racional(
h,
nugget = par["nugget"],
sill = par["sill"],
a = par["a"]
)
W_diag <- 4 * gamma_mod^2 / N_h
w <- 1/ W_diag
sum( w * (gamma_emp - gamma_mod)^2 )
}
fit_MCP <- optim(
par = start_par,
fn = obj_MCP,
h = h,
gamma_emp = gamma,
N_h = np,
method = "L-BFGS-B",
lower = c(0, 0, 0.01)
)
h_seq <- seq(min(h), max(h), length.out = 300)
pars_mcp <- fit_MCP$par
gamma_mcp <- cuadratico_racional(
h_seq,
nugget = pars_mcp["nugget"],
sill = pars_mcp["sill"],
a = pars_mcp["a"]
)
plot(
h, gamma,
pch = 16,
xlab = "Distancia (h)",
ylab = "Semivarianza gamma(h)",
main = "Ajuste Variograma - Modelo Cuadrático-Racional (MCP)"
)
lines(
h_seq, gamma_mcp,
col = "blue",
lwd = 2
)
En esta sección formalizamos de manera simbólica las funciones
objetivo usadas para estimar los parámetros del modelo de semivariograma
Wave y mostramos cómo programarlas explícitamente en
R usando la función optim.
Sea \(\gamma(h_j)\) el semivariograma empírico en el bin \(j\), \((\gamma_\theta(h_j))\) el semivariograma teórico del modelo Wave con parámetros \((\theta = (\tau^2,\sigma^2, a))\) (nugget, sill y rango), y \((w_j)\) un peso para cada bin.
Mínimos Cuadrados Ordinarios (MCO)
\[
Q_{\text{MCO}}(\theta) =
\sum_{j=1}^{K} \left[\gamma(h_j) - \gamma_\theta(h_j)\right]^2
\]
Mínimos Cuadrados Ponderados (MCP)
\[
Q_{\text{MCP}}(\theta) =
\sum_{j=1}^{K} w_j
\left[\gamma(h_j) - \gamma_\theta(h_j)\right]^2,
\] con \(w_j\) =
Máxima Verosimilitud (MV)
Suponiendo que el vector de datos \(Z\)
es gaussiano \((Z \sim N(X\beta,
\Sigma_\theta))\), la log-verosimilitud (ignorando constantes) es
\[
\ell(\theta, \beta) =
-\frac{1}{2}\log|\Sigma_\theta|
-\frac{1}{2}(Z - X\beta)^\top \Sigma_\theta^{-1}(Z - X\beta).
\] El criterio a minimizar es \[
Q_{\text{MV}}(\theta, \beta) = -\ell(\theta,\beta).
\]
Verosimilitud compuesta (CL)
Considerando pares \[((Z_i, Z_j)\)\],
el criterio compuesto (bajo normalidad) se puede escribir, para
diferencias \((U_{ij} = Z_i - Z_j\)\),
\[
Q_{\text{CL}}(\theta) =
\sum_{i<j}
\left[
\log\gamma_\theta(h_{ij}) +
\frac{U_{ij}^2}{4\,\gamma_\theta(h_{ij})}
\right],
\] donde \((h_{ij})\) es la
distancia entre las ubicaciones \((i)\)
y \((j)\). Este criterio usa
directamente \((\gamma_\theta(h))\)
como varianza de la diferencia \((Z_i -
Z_j)\).
A continuación se muestran las implementaciones en R de
estos criterios para el modelo Wave, usando los mismos datos de
semivariograma y residuos empleados en la sección anterior.
dat_fit <- vg_emp_robust.1
idx_ok <- is.finite(dat_fit$h_med) &
is.finite(dat_fit$gamma_robust) &
is.finite(dat_fit$np) &
dat_fit$np > 0
dat_fit <- dat_fit[idx_ok, ]
h_obs <- dat_fit$h_med
gamma_obs <- dat_fit$gamma_robust
N_obs <- dat_fit$np
wave_model <- function(h, par) {
nugget <- par[1]; sill <- par[2]; a <- par[3]
val <- ifelse(h < 1e-6, nugget, nugget + sill * (1 - sin(h / a) / (h / a)))
return(val)
}
obj_MCO <- function(par, h, gamma_emp) {
gamma_teo <- wave_model(h, par)
sum((gamma_emp - gamma_teo)^2)
}
obj_MCP <- function(par, h, gamma_emp, N) {
gamma_teo <- wave_model(h, par)
weights <- N / (gamma_teo^2 + 1e-6)
sum(weights * (gamma_emp - gamma_teo)^2)
}
start_par <- c(nugget = 10, sill = 40, range = 15000)
res_MCO <- optim(par = start_par, fn = obj_MCO, h = h_obs, gamma_emp = gamma_obs,
method = "L-BFGS-B", lower = c(0,0,100))
res_MCP <- optim(par = start_par, fn = obj_MCP, h = h_obs, gamma_emp = gamma_obs, N = N_obs,
method = "L-BFGS-B", lower = c(0,0,100))
cov_wave <- function(h, par) {
nugget <- par[1]; sill <- par[2]; a <- par[3]
ifelse(
h < 1e-6,
nugget + sill,
sill * (sin(h / a) / (h / a))
)
}
cov_matrix <- function(coords, par) {
D <- as.matrix(dist(coords))
cov_wave(D, par)
}
coords_ml <- as.data.frame(coordinates(pm10_res))
names(coords_ml) <- c("x", "y")
coords_ml_mat <- as.matrix(coords_ml[, c("x", "y")])
Z_ml <- pm10_res$resid
X_ml <- model.matrix(~ 1, data = coords_ml)
obj_ML <- function(par, Z, X, coords) {
beta <- par[1]
theta <- par[-1]
C <- cov_matrix(coords, theta)
C <- C + diag(1e-6, nrow(C))
mu <- as.vector(X %*% beta)
U <- Z - mu
sign_det <- determinant(C, logarithm = TRUE)$sign
log_det <- as.numeric(determinant(C, logarithm = TRUE)$modulus)
if (sign_det <= 0) return(1e10)
quad <- as.numeric(t(U) %*% solve(C, U))
0.5 * (log_det + quad)
}
start_par_ML <- c(beta0 = 0, nugget = 5, sill = 40, range = 15000)
fit_ML_wave <- optim(
par = start_par_ML,
fn = obj_ML,
Z = Z_ml,
X = X_ml,
coords = coords_ml_mat,
method = "L-BFGS-B",
lower = c(-Inf, 0, 0, 100)
)
fit_ML_wave$par
## beta0 nugget sill range
## -0.4623638 8.0048486 56.5128291 15242.4977071
D_full <- as.matrix(dist(coords_ml_mat))
h_ij <- D_full[upper.tri(D_full)]
Z_mat <- outer(Z_ml, Z_ml, "-")
U_ij <- Z_mat[upper.tri(Z_mat)]
obj_CL <- function(par, h, U) {
gamma_val <- wave_model(h, par) + 1e-9
term <- log(gamma_val) + (U^2) / (4 * gamma_val)
sum(term)
}
start_par_CL <- c(nugget = 5, sill = 40, range = 15000)
fit_CL_wave <- optim(
par = start_par_CL,
fn = obj_CL,
h = h_ij,
U = U_ij,
method = "L-BFGS-B",
lower = c(0, 0, 100)
)
fit_CL_wave$par
## nugget sill range
## 11.90988 19.77599 14999.86976
vg_res_emp <- variogram(
resid ~ 1,
data = pm10_res,
cressie = TRUE
)
vgm_ini <- vgm(psill = 40, model = "Wav", range = 50000, nugget =20)
fit_mcp <- fit.variogram(vg_res_emp, model = vgm_ini, fit.method = 1)
fit_mco <- fit.variogram(vg_res_emp, model = vgm_ini, fit.method = 6)
fit_mv <- fit.variogram(vg_res_emp, model = vgm_ini, fit.method = 6)
fit_cl <- fit.variogram(vg_res_emp, model = vgm_ini, fit.method = 7)
## Extraer parámetros de los modelos de gstat (Nugget + Wav)
extrae_par <- function(vgm_obj) {
nugget <- vgm_obj$psill[vgm_obj$model == "Nug"]
sill <- vgm_obj$psill[vgm_obj$model == "Wav"]
range <- vgm_obj$range[vgm_obj$model == "Wav"]
c(nugget = nugget, sill = sill, range = range)
}
par_mco_gstat <- extrae_par(fit_mco)
par_mcp_gstat <- extrae_par(fit_mcp)
par_mv_gstat <- extrae_par(fit_mv)
par_cl_gstat <- extrae_par(fit_cl)
par_mco_man <- res_MCO$par
par_mcp_man <- res_MCP$par
tabla_comp <- data.frame(
Parametro = c("Nugget", "Sill", "Range"),
MCO_manual = par_mco_man,
MCP_manual = par_mcp_man,
MCO_gstat = par_mco_gstat,
MCP_gstat = par_mcp_gstat,
MV_gstat = par_mv_gstat,
CL_gstat = par_cl_gstat,
row.names = NULL
)
tabla_comp
## Parametro MCO_manual MCP_manual MCO_gstat MCP_gstat MV_gstat
## 1 Nugget 2.479598 0.000000 11.13333 14.43615 11.13333
## 2 Sill 5.860104 8.971897 45.08646 44.16479 45.08646
## 3 Range 15129.554991 14682.336591 49999.99999 49999.99999 49999.99999
## CL_gstat
## 1 8.625997
## 2 41.484674
## 3 49999.999992
pm10_ST <- DE_RB_2005[,"2005-09-20::2005-10-19"]
df_st <- as.data.frame(pm10_ST)
df_st$time_num <- as.numeric(df_st$time - min(df_st$time)) / 86400
fit_lm_st <- lm(PM10 ~ coords.x1 + coords.x2 + poly(time_num, 2), data = df_st)
summary(fit_lm_st)
##
## Call:
## lm(formula = PM10 ~ coords.x1 + coords.x2 + poly(time_num, 2),
## data = df_st)
##
## Residuals:
## Min 1Q Median 3Q Max
## -23.816 -10.515 -0.202 8.409 53.666
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.271e+01 8.207e+00 -1.548 0.1218
## coords.x1 4.115e-06 1.770e-06 2.325 0.0202 *
## coords.x2 5.912e-06 1.482e-06 3.988 6.91e-05 ***
## poly(time_num, 2)1 1.038e+02 1.233e+01 8.417 < 2e-16 ***
## poly(time_num, 2)2 -7.139e+01 1.233e+01 -5.789 8.25e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 12.33 on 1951 degrees of freedom
## Multiple R-squared: 0.06356, Adjusted R-squared: 0.06164
## F-statistic: 33.11 on 4 and 1951 DF, p-value: < 2.2e-16
df_st$resid <- residuals(fit_lm_st)
pm10_ST@data$resid <- df_st$resid
vg_sp_res <- variogram(
resid ~ 1,
locations = ~ coords.x1 + coords.x2,
data = df_st,
cutoff = 750*1000,
width = 20000
)
plot(
vg_sp_res,
main = "Variograma espacial empírico de residuos",
xlab = "Distancia (m)",
ylab = "Semivarianza"
)
class(vg_sp_res)
## [1] "gstatVariogram" "data.frame"
estacion_1 <- df_st$station_european_code[1]
df_1st <- df_st[df_st$station_european_code == estacion_1, ]
df_1st$time_x <- df_1st$time_num
df_1st$time_y <- 0
coordinates(df_1st) <- ~ time_x + time_y
vg_tm_res <- variogram(
resid ~ 1,
data = df_1st,
cutoff = max(df_1st$time_num)/2,
width = 1
)
plot(
vg_tm_res,
main = "Variograma temporal empírico de residuos",
xlab = "Lag (días)",
ylab = "Semivarianza"
)
## 4.4 Variograma espacio–tiempo empírico
vg_st_emp <- variogramST(
resid ~ 1,
data = pm10_ST,
tlags = 0:10,
assumeRegular = FALSE
)
plot(vg_st_emp, map = FALSE)
vgm_sp_ini <- vgm(
psill = 40,
model = "Exp",
range = 100000,
nugget = 5
)
vgm_sp_fit <- fit.variogram(vg_sp_res, vgm_sp_ini)
vgm_tm_ini <- vgm(psill = max(vg_tm_res$gamma, na.rm = TRUE),
model = "Exp",
range = 3,
nugget = min(vg_tm_res$gamma, na.rm = TRUE))
vgm_tm_fit <- fit.variogram(vg_tm_res, vgm_tm_ini)
vgm_tm_fit
## model psill range
## 1 Nug 0.0000 0.000000
## 2 Exp 371.4906 4.344402
st_model_sep <- vgmST(
"separable",
space = vgm_sp_fit,
time = vgm_tm_fit,
sill = 1
)
st_model_sep
## space component:
## model psill range
## 1 Nug 141.52067 0.0
## 2 Exp 15.93542 302455.8
## time component:
## model psill range
## 1 Nug 0.0000 0.000000
## 2 Exp 371.4906 4.344402
## sill: 1
st_model_fit <- fit.StVariogram(
vg_st_emp,
st_model_sep,
method = "L-BFGS-B"
)
st_model_fit
## space component:
## model psill range
## 1 Nug 0 0.0
## 2 Exp 1 302455.8
## time component:
## model psill range
## 1 Nug 0 0.00000
## 2 Exp 1 3.63701
## sill: 210.635811568567
plot(vg_st_emp, st_model_fit, wireframe = TRUE, all = TRUE)
grd_sp <- spsample(pm10_ST@sp, n = 200, type = "regular")
gridded(grd_sp) <- TRUE
proj4string(grd_sp) <- proj4string(pm10_ST@sp)
t_all <- sort(unique(time(pm10_ST)))
nt <- length(t_all)
t_pred <- t_all[pmax(1, nt - 2):nt]
t_pred
## [1] "2005-10-17 GMT" "2005-10-18 GMT" "2005-10-19 GMT"
t_pred
## [1] "2005-10-17 GMT" "2005-10-18 GMT" "2005-10-19 GMT"
length(t_pred)
## [1] 3
class(t_pred)
## [1] "POSIXct" "POSIXt"
grd_ST <- STF(grd_sp, t_pred)
head(grd_ST)
## An object of class "STF"
## Slot "sp":
## Object of class SpatialPixels
## Grid topology:
## cellcentre.offset cellsize cells.dim
## x1 337119.2 48693.01 12
## x2 5305250.3 48693.01 17
## SpatialPoints:
## x1 x2
## 1 337119.2 5305250
## 2 385812.2 5305250
## 3 434505.3 5305250
## 4 483198.3 5305250
## 5 531891.3 5305250
## 6 580584.3 5305250
## Coordinate Reference System (CRS) arguments: +proj=utm +zone=32
## +datum=WGS84 +units=m +no_defs
##
## Slot "time":
## Warning: object timezone ('GMT') is different from system timezone ('')
## NOTE: set 'options(xts_check_TZ = FALSE)' to disable this warning
## This note is displayed once per session
## timeIndex
## 2005-10-17 1
## 2005-10-18 2
## 2005-10-19 3
##
## Slot "endTime":
## [1] "2005-10-18 GMT" "2005-10-19 GMT" "2005-10-20 GMT"
r_sp <- vgm_sp_fit$range[vgm_sp_fit$model != "Nug"][1]
r_tm <- vgm_tm_fit$range[vgm_tm_fit$model != "Nug"][1]
stAni <- r_sp / r_tm
stAni
## [1] 69619.66
krig_st_res <- krigeST(
resid ~ 1,
data = pm10_ST,
newdata = grd_ST,
modelList = st_model_fit,
nmax = 50,
stAni = stAni,
computeVar = TRUE
)
## | | | 0% | | | 1% | |= | 1% | |= | 2% | |== | 2% | |== | 3% | |=== | 4% | |=== | 5% | |==== | 5% | |==== | 6% | |===== | 7% | |===== | 8% | |====== | 8% | |====== | 9% | |======= | 9% | |======= | 10% | |======= | 11% | |======== | 11% | |======== | 12% | |========= | 12% | |========= | 13% | |========= | 14% | |========== | 14% | |========== | 15% | |=========== | 15% | |=========== | 16% | |============ | 17% | |============ | 18% | |============= | 18% | |============= | 19% | |============== | 19% | |============== | 20% | |============== | 21% | |=============== | 21% | |=============== | 22% | |================ | 22% | |================ | 23% | |================ | 24% | |================= | 24% | |================= | 25% | |================== | 25% | |================== | 26% | |=================== | 26% | |=================== | 27% | |=================== | 28% | |==================== | 28% | |==================== | 29% | |===================== | 29% | |===================== | 30% | |===================== | 31% | |====================== | 31% | |====================== | 32% | |======================= | 32% | |======================= | 33% | |======================== | 34% | |======================== | 35% | |========================= | 35% | |========================= | 36% | |========================== | 36% | |========================== | 37% | |========================== | 38% | |=========================== | 38% | |=========================== | 39% | |============================ | 39% | |============================ | 40% | |============================ | 41% | |============================= | 41% | |============================= | 42% | |============================== | 42% | |============================== | 43% | |=============================== | 44% | |=============================== | 45% | |================================ | 45% | |================================ | 46% | |================================= | 47% | |================================= | 48% | |================================== | 48% | |================================== | 49% | |=================================== | 49% | |=================================== | 50% | |=================================== | 51% | |==================================== | 51% | |==================================== | 52% | |===================================== | 52% | |===================================== | 53% | |====================================== | 54% | |====================================== | 55% | |======================================= | 55% | |======================================= | 56% | |======================================== | 57% | |======================================== | 58% | |========================================= | 58% | |========================================= | 59% | |========================================== | 59% | |========================================== | 60% | |========================================== | 61% | |=========================================== | 61% | |=========================================== | 62% | |============================================ | 62% | |============================================ | 63% | |============================================ | 64% | |============================================= | 64% | |============================================= | 65% | |============================================== | 65% | |============================================== | 66% | |=============================================== | 67% | |=============================================== | 68% | |================================================ | 68% | |================================================ | 69% | |================================================= | 69% | |================================================= | 70% | |================================================= | 71% | |================================================== | 71% | |================================================== | 72% | |=================================================== | 72% | |=================================================== | 73% | |=================================================== | 74% | |==================================================== | 74% | |==================================================== | 75% | |===================================================== | 75% | |===================================================== | 76% | |====================================================== | 76% | |====================================================== | 77% | |====================================================== | 78% | |======================================================= | 78% | |======================================================= | 79% | |======================================================== | 79% | |======================================================== | 80% | |======================================================== | 81% | |========================================================= | 81% | |========================================================= | 82% | |========================================================== | 82% | |========================================================== | 83% | |=========================================================== | 84% | |=========================================================== | 85% | |============================================================ | 85% | |============================================================ | 86% | |============================================================= | 86% | |============================================================= | 87% | |============================================================= | 88% | |============================================================== | 88% | |============================================================== | 89% | |=============================================================== | 89% | |=============================================================== | 90% | |=============================================================== | 91% | |================================================================ | 91% | |================================================================ | 92% | |================================================================= | 92% | |================================================================= | 93% | |================================================================== | 94% | |================================================================== | 95% | |=================================================================== | 95% | |=================================================================== | 96% | |==================================================================== | 97% | |==================================================================== | 98% | |===================================================================== | 98% | |===================================================================== | 99% | |======================================================================| 99% | |======================================================================| 100%
names(krig_st_res@data)
## [1] "var1.pred" "var1.var"
stplot(krig_st_res, main = "Predicción ST PM10 (Residuos) - Últimos días")
data(DE_RB_2005)
crs_utm32 <- CRS("+proj=utm +zone=32 +datum=WGS84 +units=m +no_defs")
proj4string(DE_RB_2005) <- crs_utm32
## Warning in `proj4string<-`(`*tmp*`, value = value): A new CRS was assigned to an object with an existing CRS:
## +init=epsg:32632 +proj=utm +zone=32 +datum=WGS84 +units=m +no_defs +ellps=WGS84 +towgs84=0,0,0
## without reprojecting.
## For reprojection, use function spTransform
df <- as.data.frame(DE_RB_2005)
df_pm <- df %>%
dplyr::select(time, station_european_code, PM10)
df_pm_2005 <- df_pm %>%
dplyr::filter(
time >= as.POSIXct("2005-01-01 00:00:00"),
time <= as.POSIXct("2005-12-31 23:59:59")
)
dd_wide <- df_pm_2005 %>%
tidyr::pivot_wider(
names_from = station_european_code,
values_from = PM10
) %>%
dplyr::arrange(time)
head(dd_wide)
## # A tibble: 6 × 70
## time DESH001 DENI063 DEBY109 DEUB038 DEBE056 DEBE032 DEHE046
## <dttm> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 2005-01-02 00:00:00 13.9 14.4 4.38 22.8 13.5 11.4 11.0
## 2 2005-01-03 00:00:00 16.6 13.3 7.25 17.7 9.04 9.05 14.8
## 3 2005-01-04 00:00:00 16.0 18.7 5 24.3 16.8 14.6 8.67
## 4 2005-01-05 00:00:00 21.2 20.5 6.75 27.2 15.8 13.3 7.29
## 5 2005-01-06 00:00:00 23.2 23.0 4.83 30.8 17.0 14 11.7
## 6 2005-01-07 00:00:00 13.7 16.6 5.42 15.8 13.6 10.5 5.58
## # ℹ 62 more variables: DESL019 <dbl>, DENW081 <dbl>, DESH008 <dbl>,
## # DESN049 <dbl>, DESN076 <dbl>, DETH026 <dbl>, DEBW004 <dbl>, DEUB039 <dbl>,
## # DEHE028 <dbl>, DEMV017 <dbl>, DEMV004 <dbl>, DEBB053 <dbl>, DENW063 <dbl>,
## # DETH061 <dbl>, DERP014 <dbl>, DENI031 <dbl>, DEUB035 <dbl>, DEMV012 <dbl>,
## # DEUB031 <dbl>, DEBB065 <dbl>, DEBY013 <dbl>, DEUB033 <dbl>, DEBY047 <dbl>,
## # DENW065 <dbl>, DEUB030 <dbl>, DETH027 <dbl>, DEBY049 <dbl>, DEBW103 <dbl>,
## # DENI058 <dbl>, DERP017 <dbl>, DESN051 <dbl>, DEHE043 <dbl>, …
dim(dd_wide)
## [1] 364 70
Z <- as.matrix(dd_wide[, -1])
rownames(Z) <- dd_wide$time
Z_df <- as.data.frame(Z)
Z_df <- as.data.frame(lapply(Z_df, as.numeric))
Z_df_clean <- Z_df
for(i in seq_len(ncol(Z_df_clean))) {
Z_df_clean[, i] <- zoo::na.approx(Z_df_clean[, i], na.rm = FALSE)
}
Z_df_clean <- Z_df_clean[complete.cases(Z_df_clean), ]
dim(Z_df_clean)
## [1] 7 69
head(Z_df_clean[, 1:5])
## DESH001 DENI063 DEBY109 DEUB038 DEBE056
## 283 40.667 40.125 25.208 38.417 50.625
## 284 47.208 47.250 22.417 37.125 49.542
## 285 51.625 50.375 30.000 39.708 57.167
## 286 55.958 50.292 42.833 41.042 54.458
## 287 12.947 21.042 34.375 15.087 36.667
## 288 8.792 13.208 37.000 6.455 11.950
coords_info <- as.data.frame(DE_RB_2005) %>%
dplyr::select(station_european_code, coords.x1, coords.x2) %>%
dplyr::distinct()
estaciones_keep <- colnames(Z_df_clean)
coords_use <- coords_info %>%
dplyr::filter(station_european_code %in% estaciones_keep) %>%
dplyr::arrange(match(station_european_code, estaciones_keep))
coords_mat <- as.matrix(coords_use[, c("coords.x1", "coords.x2")])
ncol(Z_df_clean)
## [1] 69
nrow(coords_use)
## [1] 69
PM10_DE_real <- SpatFD(
data = Z_df_clean,
coords = coords_mat,
basis = "Bsplines",
nbasis = 20,
norder = 4,
lambda = 1e-4,
nharm = 3,
name = "PM10_DE_real"
)
plot(PM10_DE_real$PM10_DE_real$data_fd,
xlab = "Índice de tiempo",
ylab = "PM10",
main = "Curvas funcionales observadas - PM10 (todas las estaciones)")
## [1] "done"
set.seed(123)
idx10 <- sample(ncol(Z_df_clean), 10)
coefs10 <- PM10_DE_real$PM10_DE_real$data_fd$coefs[, idx10, drop = FALSE]
fd10 <- fda::fd(coefs10,
PM10_DE_real$data_fd$basis,
PM10_DE_real$data_fd$fdnames)
plot(fd10,
xlab = "Índice de tiempo",
ylab = "PM10",
main = "PM10 - 10 curvas funcionales reales")
## [1] "done"
range_x <- range(coords_use$coords.x1)
range_y <- range(coords_use$coords.x2)
xs <- seq(range_x[1], range_x[2], length.out = 40)
ys <- seq(range_y[1], range_y[2], length.out = 40)
newcoorden <- expand.grid(X = xs, Y = ys)
newcoords <- as.matrix(newcoorden)
coords <- PM10_DE_real$PM10_DE_real$coords
scores <- PM10_DE_real$PM10_DE_real$fpca$scores
nharm <- ncol(scores)
dim(coords)
## [1] 69 2
dim(scores)
## [1] 69 3
modelos_ini <- list(
vgm(psill = 2199288.58, model = "Wav", range = 1484.57, nugget = 0),
vgm(psill = 62640.74, model = "Mat", range = 1979.43, nugget = 0, kappa = 0.68),
vgm(psill = 37098.25, model = "Exp", range = 6433.16, nugget = 0)
)
KS_PM10_scores <- KS_scores_lambdas(
PM10_DE_real,
newcoords = newcoords,
method = "scores",
model = modelos_ini
)
## Using first variable by default
## Using fill.all = TRUE by default
## [using simple kriging]
## [using simple kriging]
## [using simple kriging]
#KS_PM10_lambda <- KS_scores_lambdas(
#PM10_DE_real,
#newcoords = newcoords,
#method = "lambda",
#model = modelos_ini
#)
curvas_scores <- recons_fd(KS_PM10_scores, name = "PM10_DE_real")
#curvas_lambda <- recons_fd(KS_PM10_lambda, name = "PM10_DE_real")
# Si sólo tienes unos pocos puntos nuevos en newcoords
plot(curvas_scores,
xlab = "Índice de tiempo",
ylab = "PM10",
main = "Kriging funcional - método scores")
## [1] "done"
# plot(curvas_lambda,
# xlab = "Índice de tiempo",
# ylab = "PM10",
# main = "Kriging funcional - método lambda")
# par(mfrow = c(1, 1))