#Carga de datos
library(readxl)
datosqgiz <- read_excel("XPABLO_raw.xlsx")
attach(datosqgiz)
#Mapear la materia organica, con ggplot
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.3 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.4.4 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.0
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
datosqgiz |>
ggplot(aes(x=Lat, y=Long, color=MO))+
geom_point()+
labs(x="Longitud", y="Latitud", title = "Datos espaciales de Materia Organica ")+
theme_classic()

#Ahora se hace un modelo de regresión multiple, para la variable MO
modelo.lm <- lm(MO ~ Ca + Mg )
summary(modelo.lm)
##
## Call:
## lm(formula = MO ~ Ca + Mg)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.10451 -0.43644 -0.05345 0.47432 2.08162
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.450337 0.078924 18.376 <2e-16 ***
## Ca 0.089591 0.009465 9.466 <2e-16 ***
## Mg -0.002043 0.031346 -0.065 0.948
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.6212 on 400 degrees of freedom
## Multiple R-squared: 0.221, Adjusted R-squared: 0.2171
## F-statistic: 56.74 on 2 and 400 DF, p-value: < 2.2e-16
#Y se extraen los residuos de este modelo
res <-residuals(modelo.lm)
hist(res)

shapiro.test(res)
##
## Shapiro-Wilk normality test
##
## data: res
## W = 0.99226, p-value = 0.03472
#Ahora se hacen otras pruebas para verificar si se cuimple la normalidad, ya que con shapiro no se pudo
library(nortest)
ad.test(res);cvm.test(res);lillie.test(res)
##
## Anderson-Darling normality test
##
## data: res
## A = 0.97011, p-value = 0.01444
##
## Cramer-von Mises normality test
##
## data: res
## W = 0.15644, p-value = 0.01949
##
## Lilliefors (Kolmogorov-Smirnov) normality test
##
## data: res
## D = 0.04024, p-value = 0.1164
#Ahora que los datos son "normales", ahora se hace la
boxplot(res)

#Ahora se verifica la dependencia espacial de los residuos, mediante una matriz de los residuos, teniendo en cuena la longitud y latitud:
dist = as.matrix(dist(cbind(Long, Lat)))
dist_inv <- 1/dist
diag(dist_inv)<- 0
dist_inv[1:5,1:5]
## 1 2 3 4 5
## 1 0.00000 158.90672 158.9067 112.3640 79.45336
## 2 158.90672 0.00000 112.3640 158.9067 71.06525
## 3 158.90672 112.36402 0.0000 158.9067 158.90672
## 4 112.36402 158.90672 158.9067 0.0000 112.36402
## 5 79.45336 71.06525 158.9067 112.3640 0.00000
#Ahora se hace el indice de moran, el cual sirve para verificar autocorrelación espacial:
library(ape)
##
## Attaching package: 'ape'
## The following object is masked from 'package:dplyr':
##
## where
Moran.I (res, dist_inv)
## $observed
## [1] 0.02244296
##
## $expected
## [1] -0.002487562
##
## $sd
## [1] 0.004260633
##
## $p.value
## [1] 4.875545e-09
#Ahora en vez de mapear la materia organica, ahora se ampean los residuales del modelo:
datosqgiz |>
ggplot(aes(x=Lat, y=Long, color=res))+
geom_point()+
labs(x="Longitud", y="Latitud", title = "Datos espaciales de los residuos")+
theme_classic()

#Existen diversos modelos espaciales, y de gran interés saber cual me sirve más para ajustar la materia organica
library(spatialreg)
## Loading required package: spData
## To access larger datasets in this package, install the spDataLarge
## package with: `install.packages('spDataLarge',
## repos='https://nowosad.github.io/drat/', type='source')`
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
## Loading required package: sf
## Linking to GEOS 3.8.0, GDAL 3.0.4, PROJ 6.3.1; sf_use_s2() is TRUE
## Registered S3 method overwritten by 'spdep':
## method from
## plot.mst ape
library(spdep)
##
## Attaching package: 'spdep'
## The following objects are masked from 'package:spatialreg':
##
## get.ClusterOption, get.coresOption, get.mcOption,
## get.VerboseOption, get.ZeroPolicyOption, set.ClusterOption,
## set.coresOption, set.mcOption, set.VerboseOption,
## set.ZeroPolicyOption
library(sp)
#Ahora se construye la matriz de pesos espaciales
XYdata=data.frame(Lat,Long)
contnb=dnearneigh(coordinates(XYdata),0,380000,longlat = T)
dlist <- nbdists(contnb, XYdata) #Distance
dlist <- lapply(dlist, function(x) 1/x) #inverse distance
Wve=nb2listw(contnb,glist=dlist,style = "W") #W matriz-standarized
#Ahora se corre un modelo espacial con la matriz anterior, se usa el sacsar
mod_esp=sacsarlm(MO~Ca+Mg,listw = Wve)
summary(mod_esp)
##
## Call:sacsarlm(formula = MO ~ Ca + Mg, listw = Wve)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.99262 -0.40366 -0.02078 0.45603 2.03462
##
## Type: sac
## Coefficients: (asymptotic standard errors)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.0132573 1.4928887 0.0089 0.9929
## Ca 0.0852854 0.0110719 7.7028 1.332e-14
## Mg 0.0010619 0.0338908 0.0313 0.9750
##
## Rho: 0.66767
## Asymptotic standard error: 0.72032
## z-value: 0.92691, p-value: 0.35397
## Lambda: 0.70367
## Asymptotic standard error: 0.68067
## z-value: 1.0338, p-value: 0.30123
##
## LR test value: 13.13, p-value: 0.0014091
##
## Log likelihood: -371.8637 for sac model
## ML residual variance (sigma squared): 0.36747, (sigma: 0.6062)
## Number of observations: 403
## Number of parameters estimated: 6
## AIC: 755.73, (AIC for lm: 764.86)
res_esp <- residuals(mod_esp)
hist(res_esp)

Moran.I(res_esp, dist_inv)
## $observed
## [1] 0.005040485
##
## $expected
## [1] -0.002487562
##
## $sd
## [1] 0.004260315
##
## $p.value
## [1] 0.07722546
#Ahora se buscan los valores ajustados y predichos
Valores_predichos <- mod_esp$fitted.values
#Ahora se mapean en gg plot:
datosqgiz |>
ggplot(aes(x=Lat, y=Long, color=Valores_predichos))+
geom_point()+
labs(x="Longitud", y="Latitud", title = "Datos espaciales de los valores predichos")+
theme_classic()

#Como el magnesio no sirve, ahora solo se hace con el calcio:
mod_espca=sacsarlm(MO~Ca,listw = Wve)
summary(mod_espca)
##
## Call:sacsarlm(formula = MO ~ Ca, listw = Wve)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.994648 -0.403813 -0.021071 0.455428 2.034905
##
## Type: sac
## Coefficients: (asymptotic standard errors)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.0146609 1.4817690 0.0099 0.9921
## Ca 0.0854420 0.0096792 8.8274 <2e-16
##
## Rho: 0.66737
## Asymptotic standard error: 0.71638
## z-value: 0.93159, p-value: 0.35155
## Lambda: 0.70386
## Asymptotic standard error: 0.6764
## z-value: 1.0406, p-value: 0.29806
##
## LR test value: 13.133, p-value: 0.0014068
##
## Log likelihood: -371.8642 for sac model
## ML residual variance (sigma squared): 0.36748, (sigma: 0.6062)
## Number of observations: 403
## Number of parameters estimated: 5
## AIC: 753.73, (AIC for lm: 762.86)
res_espca <- residuals(mod_espca)
hist(res_espca)

Moran.I(res_espca, dist_inv)
## $observed
## [1] 0.005047208
##
## $expected
## [1] -0.002487562
##
## $sd
## [1] 0.00426031
##
## $p.value
## [1] 0.07696118
#Ahora que pasa si hago el modelo con todas las variables explicativas:
colnames(datosqgiz)
## [1] "id" "Long" "Lat" "z" "MO" "Ca" "Mg" "K" "Na" "CICE"
## [11] "CE" "Fe" "Cu" "Zn"
mod_all <- sacsarlm(MO ~ Ca + I(Ca**2) + z + K + Na,listw = Wve)
summary(mod_all)
##
## Call:sacsarlm(formula = MO ~ Ca + I(Ca^2) + z + K + Na, listw = Wve)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.581684 -0.384202 -0.060031 0.417791 1.950593
##
## Type: sac
## Coefficients: (asymptotic standard errors)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.28019999 1.49616122 -0.1873 0.8514
## Ca 0.12428195 0.03125885 3.9759 7.011e-05
## I(Ca^2) -0.00316672 0.00155496 -2.0365 0.0417
## z 0.00043946 0.00285613 0.1539 0.8777
## K 1.05861460 0.24210268 4.3726 1.228e-05
## Na 0.03565224 0.24589010 0.1450 0.8847
##
## Rho: 0.63124
## Asymptotic standard error: 0.67663
## z-value: 0.93291, p-value: 0.35087
## Lambda: 0.72372
## Asymptotic standard error: 0.5585
## z-value: 1.2958, p-value: 0.19503
##
## LR test value: 12.434, p-value: 0.001995
##
## Log likelihood: -358.1374 for sac model
## ML residual variance (sigma squared): 0.34333, (sigma: 0.58595)
## Number of observations: 403
## Number of parameters estimated: 9
## AIC: 734.27, (AIC for lm: 742.71)
res_all <- residuals(mod_all)
hist(res_all)

shapiro.test(res_all) #se toma un 0,01
##
## Shapiro-Wilk normality test
##
## data: res_all
## W = 0.98683, p-value = 0.001032
Moran.I(res_all, dist_inv)
## $observed
## [1] 0.005423791
##
## $expected
## [1] -0.002487562
##
## $sd
## [1] 0.004260256
##
## $p.value
## [1] 0.06330921
#Ahora se hace la correlación entre el valor observado y los valores predichos o ajustados:
a <- cor(MO, mod_all$fitted.values) #Coeficiente de determinación
cor(MO, mod_all$fitted.values)**2 #Coeficiente de determinación R2
## [1] 0.3018563
#La variabilad explicada es muy baja
#Se hace
plot(MO,mod_all$fitted.values,pch=19)

#De tarea ahora se selcionan otras cuatro variables explicativas, y se hace la pauta para los otros modelos.