Introducción Este documento realiza un análisis de regresión espacial mediante máxima verosimilitud (MLE). Implementaremos una función personalizada para optimizar los parámetros del modelo espacial y compararemos los resultados con el paquete spatialreg.
Definición de la función de log-verosimilitud
sml_ll <- function(theta, y, X, W, gradient = TRUE, hessian = TRUE){
K <- ncol(X)
N <- nrow(X)
betas <- theta[1:K]
rho <- theta[K + 1]
sig.sq <- theta[K + 2]
A <- diag(N) - rho * W
Ay <- A %*% y
Xb <- X %*% betas
res <- Ay - Xb
detA <- det(A)
ll <- -0.5 * N * log(2 * pi * sig.sq) - 0.5 * crossprod(res) / sig.sq + log(detA)
if (gradient){
C <- W %*% solve(A)
grad.betas <- (1 / sig.sq) * t(X) %*% res
grad.rho <- - sum(diag(C)) + (1 / sig.sq) * t(res) %*% W %*% y
grad.sig.sq <- (1 / (2 * sig.sq ^2)) * (t(res) %*% res - N * sig.sq)
attr(ll, 'gradient') <- c(grad.betas, grad.rho, grad.sig.sq)
}
if (hessian){
H <- matrix(NA, nrow = (K + 2), ncol = (K + 2))
h_bb <- - (1 / sig.sq) * t(X) %*% X
h_bs <- - (1 / sig.sq ^ 2) * t(X) %*% res
h_br <- - (1 / sig.sq) * t(X) %*% W %*% y
h_ss <- (N / (2 * sig.sq ^ 2)) - (1 / sig.sq ^ 3) * t(res) %*% res
h_sr <- - t(res) %*% W %*% y / sig.sq ^ 2
h_rr <- - sum(diag(C %*% C)) - (1 / sig.sq) * (t(y) %*% t(W) %*% W %*% y)
H[1:K, 1:K] <- h_bb
H[1:K, K + 1] <- h_bs
H[1:K, K + 2] <- h_br
H[K + 1, 1:K] <- t(h_bs)
H[K + 1, K + 1] <- h_ss
H[K + 1, K + 2] <- h_sr
H[K + 2, 1:K] <- t(h_br)
H[K + 2, K + 1] <- h_sr
H[K + 2, K + 2] <- h_rr
attr(ll, 'hessian') <- H
}
return(ll)
}
slm_ml <- function(formula, data, listw, gradient = TRUE, hessian = TRUE, ...){
require("maxLik")
require("spdep")
callT <- match.call(expand.dots = TRUE)
mf <- callT
m <- match(c("formula", "data"), names(mf), 0L)
mf <- mf[c(1L, m)]
mf[[1L]] <- as.name("model.frame")
mf <- eval(mf, parent.frame())
y <- model.response(mf)
X <- model.matrix(formula, mf)
W <- listw2mat(listw)
K <- ncol(X)
b_hat <- coef(lm(y ~ X - 1))
start <- c(b_hat, 0, 1)
names(start) <- c(colnames(X), "rho", "sig.sq")
sym <- all(W == t(W))
omega <- eigen(W, only.values = TRUE, symmetric = sym)
lambda_space <- if (is.complex(omega$values)) 1 / range(Re(omega$values)) else 1 / range(omega$values)
A <- rbind(c(rep(0, K), 1, 0),
c(rep(0, K), -1, 0),
c(rep(0, K), 0, 1))
B <- c(-1L * (lambda_space[1] + sqrt(.Machine$double.eps)),
lambda_space[2] - sqrt(.Machine$double.eps),
-1L* sqrt(.Machine$double.eps))
callT$constraints <- list(ineqA = A, ineqB = B)
if (is.null(callT$method)) callT$method <- 'bfgs'
if (is.null(callT$iterlim)) callT$iterlim <- 100000
opt <- callT
m <- match(c('method', 'print.level', 'iterlim', 'control'), names(opt), 0L)
opt <- opt[c(1L, m)]
opt$start <- start
opt[[1]] <- as.name('maxLik')
opt$logLik <- as.name('sml_ll')
opt$gradient <- gradient
opt$hessian <- hessian
opt[c('y', 'W', 'X')] <- list(as.name('y'), as.name('W'), as.name('X'))
out <- eval(opt, sys.frame(which = length(sys.calls())))
return(out)
}
library(spdep)
## Cargando paquete requerido: spData
## To access larger datasets in this package, install the spDataLarge
## package with: `install.packages('spDataLarge',
## repos='https://nowosad.github.io/drat/', type='source')`
## Cargando paquete requerido: sf
## Linking to GEOS 3.12.2, GDAL 3.9.3, PROJ 9.4.1; sf_use_s2() is TRUE
data(oldcol, package = "spdep")
listw <- nb2listw(COL.nb, style = "W")
test1 <- slm_ml(CRIME ~ INC + HOVAL, data = COL.OLD, listw = listw)
## Cargando paquete requerido: maxLik
## Cargando paquete requerido: miscTools
##
## Please cite the 'maxLik' package as:
## Henningsen, Arne and Toomet, Ott (2011). maxLik: A package for maximum likelihood estimation in R. Computational Statistics 26(3), 443-458. DOI 10.1007/s00180-010-0217-1.
##
## If you have questions, suggestions, or comments regarding the 'maxLik' package, please use a forum or 'tracker' at maxLik's R-Forge site:
## https://r-forge.r-project.org/projects/maxlik/
## Warning in log(detA): Se han producido NaNs
## Warning in log(detA): Se han producido NaNs
## Warning in log(detA): Se han producido NaNs
## Warning in log(detA): Se han producido NaNs
## Warning in log(detA): Se han producido NaNs
## Warning in log(detA): Se han producido NaNs
## Warning in log(detA): Se han producido NaNs
## Warning in log(detA): Se han producido NaNs
## Warning in log(detA): Se han producido NaNs
## Warning in log(detA): Se han producido NaNs
## Warning in log(detA): Se han producido NaNs
## Warning in log(detA): Se han producido NaNs
## Warning in log(detA): Se han producido NaNs
## Warning in log(detA): Se han producido NaNs
summary(test1)
## --------------------------------------------
## Maximum Likelihood estimation
## BFGS maximization, 134 iterations
## Return code 0: successful convergence
## Log-Likelihood: -182.3917
## 5 free parameters
## Estimates:
## Estimate Std. error t value Pr(> t)
## (Intercept) 44.8815 7.8317 5.731 1e-08 ***
## INC -1.0263 0.3268 -3.140 0.00169 **
## HOVAL -0.2655 0.0878 -3.024 0.00249 **
## rho 0.4339 19.1395 0.023 0.98191
## sig.sq 94.5811 0.1230 768.906 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## --------------------------------------------
library(spatialreg)
## Cargando paquete requerido: Matrix
##
## Adjuntando el paquete: 'spatialreg'
## The following objects are masked from 'package:spdep':
##
## get.ClusterOption, get.coresOption, get.mcOption,
## get.VerboseOption, get.ZeroPolicyOption, set.ClusterOption,
## set.coresOption, set.mcOption, set.VerboseOption,
## set.ZeroPolicyOption
sreg <- lagsarlm(CRIME ~ INC + HOVAL, data = COL.OLD, listw = listw)
summary(sreg)
##
## Call:lagsarlm(formula = CRIME ~ INC + HOVAL, data = COL.OLD, listw = listw)
##
## Residuals:
## Min 1Q Median 3Q Max
## -37.68585 -5.35636 0.05421 6.02013 23.20555
##
## Type: lag
## Coefficients: (asymptotic standard errors)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 45.079250 7.177347 6.2808 3.369e-10
## INC -1.031616 0.305143 -3.3808 0.0007229
## HOVAL -0.265926 0.088499 -3.0049 0.0026570
##
## Rho: 0.43102, LR test value: 9.9736, p-value: 0.001588
## Asymptotic standard error: 0.11768
## z-value: 3.6626, p-value: 0.00024962
## Wald statistic: 13.415, p-value: 0.00024962
##
## Log likelihood: -182.3904 for lag model
## ML residual variance (sigma squared): 95.494, (sigma: 9.7721)
## Number of observations: 49
## Number of parameters estimated: 5
## AIC: 374.78, (AIC for lm: 382.75)
## LM test for residual autocorrelation
## test value: 0.31954, p-value: 0.57188