# ACTIVIDAD 3 – Análisis de Regresión Espacial Local
# ----------------------------------------------------
library(readr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
## Loading required package: spData
## To access larger datasets in this package, install the spDataLarge
## package with: `install.packages('spDataLarge',
## repos='https://nowosad.github.io/drat/', type='source')`
## Loading required package: sf
## Linking to GEOS 3.11.0, GDAL 3.5.3, PROJ 9.1.0; sf_use_s2() is TRUE
## Loading required package: Matrix
##
## Attaching package: 'spatialreg'
## The following objects are masked from 'package:spdep':
##
## get.ClusterOption, get.coresOption, get.mcOption,
## get.VerboseOption, get.ZeroPolicyOption, set.ClusterOption,
## set.coresOption, set.mcOption, set.VerboseOption,
## set.ZeroPolicyOption
library(sf)
library(tmap)
## Breaking News: tmap 3.x is retiring. Please test v4, e.g. with
## remotes::install_github('r-tmap/tmap')
## Loading required package: sp
## NOTE: This package does not constitute approval of GWR
## as a method of spatial analysis; see example(gwr)
library(gstat)
library(ggplot2)
# 1. Cargar la base de datos
base <- read_csv("/Users/gabrielmedina/Downloads/regional_price_dataset.csv")
## Rows: 1491 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): zona
## dbl (2): precio, cantidad
## date (1): fecha
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# 2. Preparar los datos
base <- base %>%
group_by(zona) %>%
summarise(precio = mean(precio, na.rm = TRUE), cantidad = sum(cantidad, na.rm = TRUE)) %>%
ungroup()
zonas_coord <- data.frame(
zona = c("Centro", "Norte", "Sur"),
lon = c(-99.1332, -100.3161, -93.1131),
lat = c(19.4326, 25.6866, 16.7539)
)
base <- left_join(base, zonas_coord, by = "zona") %>%
filter(!is.na(precio) & !is.na(cantidad) & !is.na(lon) & !is.na(lat))
# Convertir a objeto sf
sf_data <- st_as_sf(base, coords = c("lon", "lat"), crs = 4326)
# 3. Matrices de conectividad
coords <- st_coordinates(sf_data)
# Contigüedad simulada usando distancias mínimas entre 3 puntos (k=2)
k_nb <- knn2nb(knearneigh(coords, k = 2))
## Warning in knearneigh(coords, k = 2): k greater than one-third of the number of
## data points
k_lw <- nb2listw(k_nb, style = "W")
# Matriz de distancia (hasta 2000 km por ejemplo)
d_nb <- dnearneigh(coords, 0, 2000)
d_lw <- nb2listw(d_nb, style = "W")
# 4. Modelo de regresión global no espacial
modelo_lm <- lm(precio ~ cantidad, data = base)
summary(modelo_lm)
## Warning in summary.lm(modelo_lm): essentially perfect fit: summary may be
## unreliable
##
## Call:
## lm(formula = precio ~ cantidad, data = base)
##
## Residuals:
## 1 2 3
## 6.5e-16 -9.5e-16 3.0e-16
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.155e+01 6.436e-15 1.795e+15 3.55e-16 ***
## cantidad 7.325e-20 6.598e-20 1.110e+00 0.467
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.19e-15 on 1 degrees of freedom
## Multiple R-squared: 0.6904, Adjusted R-squared: 0.3808
## F-statistic: 2.23 on 1 and 1 DF, p-value: 0.3757
# 5. Modelo de regresión espacial global (SAR)
modelo_sar <- lagsarlm(precio ~ cantidad, data = base, listw = k_lw, zero.policy = TRUE)
## Warning in lagsarlm(precio ~ cantidad, data = base, listw = k_lw, zero.policy = TRUE): inversion of asymptotic covariance matrix failed for tol.solve = 2.22044604925031e-16
## reciprocal condition number = 4.44822e-62 - using numerical Hessian.
##
## Call:lagsarlm(formula = precio ~ cantidad, data = base, listw = k_lw,
## zero.policy = TRUE)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.0681e-23 -3.6539e-24 3.3728e-24 5.3403e-24 7.3078e-24
##
## Type: lag
## Coefficients: (numerical Hessian approximate standard errors)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 3.9651e-07 NaN NaN NaN
## cantidad 8.2352e-28 NaN NaN NaN
##
## Rho: 1, LR test value: 70.223, p-value: < 2.22e-16
## Approximate (numerical Hessian) standard error: NaN
## z-value: NaN, p-value: NA
## Wald statistic: NaN, p-value: NA
##
## Log likelihood: 135.5983 for lag model
## ML residual variance (sigma squared): 5.9618e-47, (sigma: 7.7213e-24)
## Number of observations: 3
## Number of parameters estimated: 4
## AIC: -263.2, (AIC for lm: -194.97)
# 6. Autocorrelación espacial en los residuales (controlando NAs)
resid_lm <- residuals(modelo_lm)
resid_sar <- residuals(modelo_sar)
# Reemplazar NA por 0 si es necesario para evitar errores de Moran
resid_lm[is.na(resid_lm)] <- 0
resid_sar[is.na(resid_sar)] <- 0
# 7. Modelo GWR
gwr_coords <- as.matrix(base[, c("lon", "lat")])
base_sp <- base
coordinates(base_sp) <- ~lon+lat
proj4string(base_sp) <- CRS("+proj=longlat +datum=WGS84")
bw <- gwr.sel(precio ~ cantidad, data = base_sp, coords = gwr_coords)
## Warning in gwr.sel(precio ~ cantidad, data = base_sp, coords = gwr_coords):
## data is Spatial* object, ignoring coords argument
## Bandwidth: 474.1029 CV score: 4.102077e-29
## Bandwidth: 766.3487 CV score: 6.626432e-29
## Bandwidth: 293.485 CV score: 3.470988e-29
## Bandwidth: 223.1893 CV score: 6.310887e-30
## Bandwidth: 138.4119 CV score: NA
## Warning in optimize(gwr.cv.f, lower = beta1, upper = beta2, maximum = FALSE, :
## NA/Inf replaced by maximum positive value
## Bandwidth: 190.8072 CV score: 1.577722e-29
## Bandwidth: 228.5514 CV score: 1.893266e-29
## Bandwidth: 209.0831 CV score: 3.155444e-29
## Bandwidth: 217.8012 CV score: 1.893266e-29
## Bandwidth: 221.1313 CV score: 1.293732e-28
## Bandwidth: 225.2375 CV score: 3.155444e-29
## Bandwidth: 222.4032 CV score: 2.524355e-29
## Bandwidth: 223.9717 CV score: 3.155444e-30
## Bandwidth: 224.4552 CV score: 5.995343e-29
## Bandwidth: 223.6728 CV score: 4.102077e-29
## Bandwidth: 224.1563 CV score: 6.310887e-30
## Bandwidth: 223.8575 CV score: 6.626432e-29
## Bandwidth: 224.0422 CV score: 3.470988e-29
## Bandwidth: 223.9281 CV score: 1.262177e-29
## Bandwidth: 223.9986 CV score: 6.310887e-30
## Bandwidth: 223.955 CV score: 3.786532e-29
## Bandwidth: 223.9819 CV score: 1.893266e-29
## Bandwidth: 223.9653 CV score: 2.524355e-29
## Bandwidth: 223.9756 CV score: 1.262177e-29
## Bandwidth: 223.9692 CV score: 2.524355e-29
## Bandwidth: 223.9732 CV score: 5.04871e-29
## Bandwidth: 223.9707 CV score: 1.893266e-29
## Bandwidth: 223.9722 CV score: 1.893266e-29
## Bandwidth: 223.9713 CV score: 3.155444e-30
## Bandwidth: 223.9715 CV score: 5.995343e-29
## Bandwidth: 223.9711 CV score: 6.626432e-29
## Bandwidth: 223.9712 CV score: 3.155444e-30
## Bandwidth: 223.9712 CV score: 1.893266e-29
## Bandwidth: 223.9712 CV score: 3.155444e-30
gwr_model <- gwr(precio ~ cantidad, data = base_sp, coords = gwr_coords, bandwidth = bw, hatmatrix=TRUE)
## Warning in gwr(precio ~ cantidad, data = base_sp, coords = gwr_coords,
## bandwidth = bw, : data is Spatial* object, ignoring coords argument
## $v1
## [1] 2.996923
##
## $v2
## [1] 2.993878
##
## $delta1
## [1] 3.165426e-05
##
## $delta2
## [1] 1.001992e-09
##
## $sigma2
## [1] 7.396717e-15
##
## $sigma2.b
## [1] 7.804587e-20
##
## $AICb
## [1] -135.4866
##
## $AICh
## [1] -120.4804
##
## $AICc
## [1] -568190.1
##
## $edf
## [1] 3.165426e-05
##
## $rss
## [1] 2.341376e-19
##
## $nu1
## [1] 2.993878
##
## $odelta2
## [1] 5.009961e-10
##
## $n
## [1] 3
# 8. Visualización
base$gwr_coef <- gwr_model$SDF$"cantidad"