# ACTIVIDAD 3 – Análisis de Regresión Espacial Local
# ----------------------------------------------------
library(readr)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(spdep)
## Loading required package: spData
## To access larger datasets in this package, install the spDataLarge
## package with: `install.packages('spDataLarge',
## repos='https://nowosad.github.io/drat/', type='source')`
## Loading required package: sf
## Linking to GEOS 3.11.0, GDAL 3.5.3, PROJ 9.1.0; sf_use_s2() is TRUE
library(spatialreg)
## Loading required package: Matrix
## 
## Attaching package: 'spatialreg'
## The following objects are masked from 'package:spdep':
## 
##     get.ClusterOption, get.coresOption, get.mcOption,
##     get.VerboseOption, get.ZeroPolicyOption, set.ClusterOption,
##     set.coresOption, set.mcOption, set.VerboseOption,
##     set.ZeroPolicyOption
library(sf)
library(tmap)
## Breaking News: tmap 3.x is retiring. Please test v4, e.g. with
## remotes::install_github('r-tmap/tmap')
library(spgwr)
## Loading required package: sp
## NOTE: This package does not constitute approval of GWR
## as a method of spatial analysis; see example(gwr)
library(gstat)
library(ggplot2)

# 1. Cargar la base de datos
base <- read_csv("/Users/gabrielmedina/Downloads/regional_price_dataset.csv")
## Rows: 1491 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (1): zona
## dbl  (2): precio, cantidad
## date (1): fecha
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# 2. Preparar los datos
base <- base %>% 
  group_by(zona) %>% 
  summarise(precio = mean(precio, na.rm = TRUE), cantidad = sum(cantidad, na.rm = TRUE)) %>% 
  ungroup()

zonas_coord <- data.frame(
  zona = c("Centro", "Norte", "Sur"),
  lon = c(-99.1332, -100.3161, -93.1131),
  lat = c(19.4326, 25.6866, 16.7539)
)

base <- left_join(base, zonas_coord, by = "zona") %>% 
  filter(!is.na(precio) & !is.na(cantidad) & !is.na(lon) & !is.na(lat))

# Convertir a objeto sf
sf_data <- st_as_sf(base, coords = c("lon", "lat"), crs = 4326)

# 3. Matrices de conectividad
coords <- st_coordinates(sf_data)

# Contigüedad simulada usando distancias mínimas entre 3 puntos (k=2)
k_nb <- knn2nb(knearneigh(coords, k = 2))
## Warning in knearneigh(coords, k = 2): k greater than one-third of the number of
## data points
k_lw <- nb2listw(k_nb, style = "W")

# Matriz de distancia (hasta 2000 km por ejemplo)
d_nb <- dnearneigh(coords, 0, 2000)
d_lw <- nb2listw(d_nb, style = "W")

# 4. Modelo de regresión global no espacial
modelo_lm <- lm(precio ~ cantidad, data = base)
summary(modelo_lm)
## Warning in summary.lm(modelo_lm): essentially perfect fit: summary may be
## unreliable
## 
## Call:
## lm(formula = precio ~ cantidad, data = base)
## 
## Residuals:
##        1        2        3 
##  6.5e-16 -9.5e-16  3.0e-16 
## 
## Coefficients:
##              Estimate Std. Error   t value Pr(>|t|)    
## (Intercept) 1.155e+01  6.436e-15 1.795e+15 3.55e-16 ***
## cantidad    7.325e-20  6.598e-20 1.110e+00    0.467    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.19e-15 on 1 degrees of freedom
## Multiple R-squared:  0.6904, Adjusted R-squared:  0.3808 
## F-statistic:  2.23 on 1 and 1 DF,  p-value: 0.3757
# 5. Modelo de regresión espacial global (SAR)
modelo_sar <- lagsarlm(precio ~ cantidad, data = base, listw = k_lw, zero.policy = TRUE)
## Warning in lagsarlm(precio ~ cantidad, data = base, listw = k_lw, zero.policy = TRUE): inversion of asymptotic covariance matrix failed for tol.solve = 2.22044604925031e-16 
##   reciprocal condition number = 4.44822e-62 - using numerical Hessian.
summary(modelo_sar)
## 
## Call:lagsarlm(formula = precio ~ cantidad, data = base, listw = k_lw, 
##     zero.policy = TRUE)
## 
## Residuals:
##         Min          1Q      Median          3Q         Max 
## -1.0681e-23 -3.6539e-24  3.3728e-24  5.3403e-24  7.3078e-24 
## 
## Type: lag 
## Coefficients: (numerical Hessian approximate standard errors) 
##               Estimate Std. Error z value Pr(>|z|)
## (Intercept) 3.9651e-07        NaN     NaN      NaN
## cantidad    8.2352e-28        NaN     NaN      NaN
## 
## Rho: 1, LR test value: 70.223, p-value: < 2.22e-16
## Approximate (numerical Hessian) standard error: NaN
##     z-value: NaN, p-value: NA
## Wald statistic: NaN, p-value: NA
## 
## Log likelihood: 135.5983 for lag model
## ML residual variance (sigma squared): 5.9618e-47, (sigma: 7.7213e-24)
## Number of observations: 3 
## Number of parameters estimated: 4 
## AIC: -263.2, (AIC for lm: -194.97)
# 6. Autocorrelación espacial en los residuales (controlando NAs)
resid_lm <- residuals(modelo_lm)
resid_sar <- residuals(modelo_sar)

# Reemplazar NA por 0 si es necesario para evitar errores de Moran
resid_lm[is.na(resid_lm)] <- 0
resid_sar[is.na(resid_sar)] <- 0



# 7. Modelo GWR
gwr_coords <- as.matrix(base[, c("lon", "lat")])
base_sp <- base
coordinates(base_sp) <- ~lon+lat
proj4string(base_sp) <- CRS("+proj=longlat +datum=WGS84")

bw <- gwr.sel(precio ~ cantidad, data = base_sp, coords = gwr_coords)
## Warning in gwr.sel(precio ~ cantidad, data = base_sp, coords = gwr_coords):
## data is Spatial* object, ignoring coords argument
## Bandwidth: 474.1029 CV score: 4.102077e-29 
## Bandwidth: 766.3487 CV score: 6.626432e-29 
## Bandwidth: 293.485 CV score: 3.470988e-29 
## Bandwidth: 223.1893 CV score: 6.310887e-30 
## Bandwidth: 138.4119 CV score: NA
## Warning in optimize(gwr.cv.f, lower = beta1, upper = beta2, maximum = FALSE, :
## NA/Inf replaced by maximum positive value
## Bandwidth: 190.8072 CV score: 1.577722e-29 
## Bandwidth: 228.5514 CV score: 1.893266e-29 
## Bandwidth: 209.0831 CV score: 3.155444e-29 
## Bandwidth: 217.8012 CV score: 1.893266e-29 
## Bandwidth: 221.1313 CV score: 1.293732e-28 
## Bandwidth: 225.2375 CV score: 3.155444e-29 
## Bandwidth: 222.4032 CV score: 2.524355e-29 
## Bandwidth: 223.9717 CV score: 3.155444e-30 
## Bandwidth: 224.4552 CV score: 5.995343e-29 
## Bandwidth: 223.6728 CV score: 4.102077e-29 
## Bandwidth: 224.1563 CV score: 6.310887e-30 
## Bandwidth: 223.8575 CV score: 6.626432e-29 
## Bandwidth: 224.0422 CV score: 3.470988e-29 
## Bandwidth: 223.9281 CV score: 1.262177e-29 
## Bandwidth: 223.9986 CV score: 6.310887e-30 
## Bandwidth: 223.955 CV score: 3.786532e-29 
## Bandwidth: 223.9819 CV score: 1.893266e-29 
## Bandwidth: 223.9653 CV score: 2.524355e-29 
## Bandwidth: 223.9756 CV score: 1.262177e-29 
## Bandwidth: 223.9692 CV score: 2.524355e-29 
## Bandwidth: 223.9732 CV score: 5.04871e-29 
## Bandwidth: 223.9707 CV score: 1.893266e-29 
## Bandwidth: 223.9722 CV score: 1.893266e-29 
## Bandwidth: 223.9713 CV score: 3.155444e-30 
## Bandwidth: 223.9715 CV score: 5.995343e-29 
## Bandwidth: 223.9711 CV score: 6.626432e-29 
## Bandwidth: 223.9712 CV score: 3.155444e-30 
## Bandwidth: 223.9712 CV score: 1.893266e-29 
## Bandwidth: 223.9712 CV score: 3.155444e-30
gwr_model <- gwr(precio ~ cantidad, data = base_sp, coords = gwr_coords, bandwidth = bw, hatmatrix=TRUE)
## Warning in gwr(precio ~ cantidad, data = base_sp, coords = gwr_coords,
## bandwidth = bw, : data is Spatial* object, ignoring coords argument
gwr_model$results
## $v1
## [1] 2.996923
## 
## $v2
## [1] 2.993878
## 
## $delta1
## [1] 3.165426e-05
## 
## $delta2
## [1] 1.001992e-09
## 
## $sigma2
## [1] 7.396717e-15
## 
## $sigma2.b
## [1] 7.804587e-20
## 
## $AICb
## [1] -135.4866
## 
## $AICh
## [1] -120.4804
## 
## $AICc
## [1] -568190.1
## 
## $edf
## [1] 3.165426e-05
## 
## $rss
## [1] 2.341376e-19
## 
## $nu1
## [1] 2.993878
## 
## $odelta2
## [1] 5.009961e-10
## 
## $n
## [1] 3
# 8. Visualización
base$gwr_coef <- gwr_model$SDF$"cantidad"