SLM by MLE - Spatial Regression

Introducción Este documento realiza un análisis de regresión espacial mediante máxima verosimilitud (MLE). Implementaremos una función personalizada para optimizar los parámetros del modelo espacial y compararemos los resultados con el paquete spatialreg.

Definición de la función de log-verosimilitud

sml_ll <- function(theta, y, X, W, gradient = TRUE, hessian = TRUE){
  K <- ncol(X)
  N <- nrow(X)
  
  betas  <- theta[1:K]
  rho    <- theta[K + 1]
  sig.sq <- theta[K + 2]
  
  A   <- diag(N) -  rho * W
  Ay  <- A %*% y
  Xb  <- X %*% betas
  res <- Ay - Xb
  
  detA <- det(A)
  ll   <- -0.5 * N * log(2 * pi * sig.sq) - 0.5 * crossprod(res) / sig.sq + log(detA)
  
  if (gradient){
    C           <-  W %*% solve(A)
    grad.betas  <- (1 / sig.sq) * t(X) %*% res
    grad.rho    <- - sum(diag(C)) + (1 / sig.sq) * t(res) %*% W %*% y
    grad.sig.sq <- (1 / (2 * sig.sq ^2)) * (t(res) %*% res - N * sig.sq)
    attr(ll, 'gradient') <- c(grad.betas, grad.rho, grad.sig.sq)
  }
  
  if (hessian){
    H    <- matrix(NA, nrow = (K + 2), ncol = (K + 2))
    h_bb <- - (1 / sig.sq) * t(X) %*% X
    h_bs <- - (1 / sig.sq ^ 2) * t(X) %*% res
    h_br <- - (1 / sig.sq) * t(X) %*% W %*% y 
    h_ss <- (N / (2 * sig.sq ^ 2)) - (1 / sig.sq ^ 3) * t(res) %*% res
    h_sr <-  - t(res) %*% W %*% y / sig.sq ^ 2
    h_rr <- - sum(diag(C %*% C)) - (1 / sig.sq) * (t(y) %*% t(W) %*% W %*% y)
    H[1:K, 1:K]     <- h_bb
    H[1:K, K + 1]   <- h_bs
    H[1:K, K + 2]   <- h_br
    H[K + 1, 1:K]   <- t(h_bs)
    H[K + 1, K + 1] <- h_ss
    H[K + 1, K + 2] <- h_sr
    H[K + 2, 1:K]   <- t(h_br)
    H[K + 2, K + 1] <- h_sr
    H[K + 2, K + 2] <- h_rr
    attr(ll, 'hessian') <- H
  }
  return(ll)
}

slm_ml <- function(formula, data, listw, gradient = TRUE, hessian  = TRUE, ...){
  require("maxLik")
  require("spdep")
  
  callT <- match.call(expand.dots = TRUE)
  mf <- callT
  m  <- match(c("formula", "data"), names(mf), 0L)
  mf <- mf[c(1L, m)]
  mf[[1L]] <- as.name("model.frame")
  mf <- eval(mf, parent.frame())
  
  y  <- model.response(mf)
  X  <- model.matrix(formula, mf)
  W  <- listw2mat(listw)
  K  <- ncol(X)
  
  b_hat <- coef(lm(y ~ X - 1))
  start <- c(b_hat, 0, 1)
  names(start) <- c(colnames(X), "rho", "sig.sq")
  
  sym          <- all(W == t(W))
  omega        <- eigen(W, only.values = TRUE, symmetric = sym)
  lambda_space <- if (is.complex(omega$values)) 1 / range(Re(omega$values)) else 1 / range(omega$values)
  
  A <- rbind(c(rep(0, K), 1, 0),
             c(rep(0, K), -1, 0), 
             c(rep(0, K), 0, 1))
  B <- c(-1L * (lambda_space[1] + sqrt(.Machine$double.eps)), 
         lambda_space[2] - sqrt(.Machine$double.eps), 
         -1L* sqrt(.Machine$double.eps))
  callT$constraints <- list(ineqA = A, ineqB = B)
  
  if (is.null(callT$method))  callT$method  <- 'bfgs'
  if (is.null(callT$iterlim)) callT$iterlim <- 100000
  opt <- callT
  m <- match(c('method', 'print.level', 'iterlim', 'control'), names(opt), 0L)
  opt <- opt[c(1L, m)]
  opt$start     <- start
  opt[[1]]      <- as.name('maxLik')
  opt$logLik    <- as.name('sml_ll')
  opt$gradient  <- gradient
  opt$hessian   <- hessian
  opt[c('y', 'W', 'X')] <- list(as.name('y'), as.name('W'), as.name('X'))
  
  out <- eval(opt, sys.frame(which = length(sys.calls())))
  return(out)
}

library(spdep)

## Cargando paquete requerido: spData

## To access larger datasets in this package, install the spDataLarge
## package with: `install.packages('spDataLarge',
## repos='https://nowosad.github.io/drat/', type='source')`

## Cargando paquete requerido: sf

## Linking to GEOS 3.12.2, GDAL 3.9.3, PROJ 9.4.1; sf_use_s2() is TRUE

data(oldcol, package = "spdep")
listw <- nb2listw(COL.nb, style = "W")

test1 <- slm_ml(CRIME ~ INC + HOVAL, data = COL.OLD, listw = listw)

## Cargando paquete requerido: maxLik

## Cargando paquete requerido: miscTools

## 
## Please cite the 'maxLik' package as:
## Henningsen, Arne and Toomet, Ott (2011). maxLik: A package for maximum likelihood estimation in R. Computational Statistics 26(3), 443-458. DOI 10.1007/s00180-010-0217-1.
## 
## If you have questions, suggestions, or comments regarding the 'maxLik' package, please use a forum or 'tracker' at maxLik's R-Forge site:
## https://r-forge.r-project.org/projects/maxlik/

## Warning in log(detA): Se han producido NaNs
## Warning in log(detA): Se han producido NaNs
## Warning in log(detA): Se han producido NaNs
## Warning in log(detA): Se han producido NaNs
## Warning in log(detA): Se han producido NaNs
## Warning in log(detA): Se han producido NaNs
## Warning in log(detA): Se han producido NaNs
## Warning in log(detA): Se han producido NaNs
## Warning in log(detA): Se han producido NaNs
## Warning in log(detA): Se han producido NaNs
## Warning in log(detA): Se han producido NaNs
## Warning in log(detA): Se han producido NaNs
## Warning in log(detA): Se han producido NaNs
## Warning in log(detA): Se han producido NaNs

summary(test1)

## --------------------------------------------
## Maximum Likelihood estimation
## BFGS maximization, 134 iterations
## Return code 0: successful convergence 
## Log-Likelihood: -182.3917 
## 5  free parameters
## Estimates:
##             Estimate Std. error t value Pr(> t)    
## (Intercept)  44.8815     7.8317   5.731   1e-08 ***
## INC          -1.0263     0.3268  -3.140 0.00169 ** 
## HOVAL        -0.2655     0.0878  -3.024 0.00249 ** 
## rho           0.4339    19.1395   0.023 0.98191    
## sig.sq       94.5811     0.1230 768.906 < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## --------------------------------------------

library(spatialreg)

## Cargando paquete requerido: Matrix

## 
## Adjuntando el paquete: 'spatialreg'

## The following objects are masked from 'package:spdep':
## 
##     get.ClusterOption, get.coresOption, get.mcOption,
##     get.VerboseOption, get.ZeroPolicyOption, set.ClusterOption,
##     set.coresOption, set.mcOption, set.VerboseOption,
##     set.ZeroPolicyOption

sreg <- lagsarlm(CRIME ~ INC + HOVAL, data = COL.OLD, listw = listw)
summary(sreg)

## 
## Call:lagsarlm(formula = CRIME ~ INC + HOVAL, data = COL.OLD, listw = listw)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -37.68585  -5.35636   0.05421   6.02013  23.20555 
## 
## Type: lag 
## Coefficients: (asymptotic standard errors) 
##              Estimate Std. Error z value  Pr(>|z|)
## (Intercept) 45.079250   7.177347  6.2808 3.369e-10
## INC         -1.031616   0.305143 -3.3808 0.0007229
## HOVAL       -0.265926   0.088499 -3.0049 0.0026570
## 
## Rho: 0.43102, LR test value: 9.9736, p-value: 0.001588
## Asymptotic standard error: 0.11768
##     z-value: 3.6626, p-value: 0.00024962
## Wald statistic: 13.415, p-value: 0.00024962
## 
## Log likelihood: -182.3904 for lag model
## ML residual variance (sigma squared): 95.494, (sigma: 9.7721)
## Number of observations: 49 
## Number of parameters estimated: 5 
## AIC: 374.78, (AIC for lm: 382.75)
## LM test for residual autocorrelation
## test value: 0.31954, p-value: 0.57188

SLM by MLE - Spatial Regression

Jhonier Rangel

2025-01-20