library(spData)
## To access larger datasets in this package, install the spDataLarge
## package with: `install.packages('spDataLarge',
## repos='https://nowosad.github.io/drat/', type='source')`
library(readxl)
library(ggplot2)
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v tibble 3.1.6 v dplyr 1.0.7
## v tidyr 1.1.4 v stringr 1.4.0
## v readr 2.1.1 v forcats 0.5.1
## v purrr 0.3.4
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(spdep)
## Loading required package: sp
## Loading required package: sf
## Linking to GEOS 3.9.1, GDAL 3.2.1, PROJ 7.2.1; sf_use_s2() is TRUE
library(ape)
## Registered S3 method overwritten by 'ape':
## method from
## plot.mst spdep
library(sp)
library(MVA)
## Loading required package: HSAUR2
## Loading required package: tools
library(normtest)
library(nortest)
library(spatialreg)
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
##
## Attaching package: 'spatialreg'
## The following objects are masked from 'package:spdep':
##
## get.ClusterOption, get.coresOption, get.mcOption,
## get.VerboseOption, get.ZeroPolicyOption, set.ClusterOption,
## set.coresOption, set.mcOption, set.VerboseOption,
## set.ZeroPolicyOption
datat <- read_excel("C:/Users/faile/Downloads/XPABLO (2).xlsx",
sheet = "tabla_1")
# Plot
datat %>%
ggplot(aes(x=Long,y=Lat,color=Ca))+
geom_point(size=3)+
scale_color_continuous(type="viridis")+
ggtitle("Calcio")
# Variables explicativas
X<- datat[,-c(1,2,3,6)]
# Coordenadas
XYdata=as.matrix(datat[,2:3])
# Construyendo la matriz de pesos
Ca.d=as.matrix(dist(XYdata, diag=T, upper=T))
1/(max(Ca.d)*2/3)
## [1] 4.511031
Ca.d.inv <-as.matrix(1/Ca.d)
diag(Ca.d.inv) <- 0
W=as.matrix(Ca.d.inv)
SUMAS=apply(W,1,sum)
We=W/SUMAS
#apply(We,1,sum)
# Moran Index
Moran_Ca=Moran.I(datat$Ca, We);Moran_Ca
## $observed
## [1] 0.08097882
##
## $expected
## [1] -0.002487562
##
## $sd
## [1] 0.004258728
##
## $p.value
## [1] 0
# Rezago MO
Ca_lag = datat$Ca%*%We
plot(x= Ca_lag,y= datat$Ca,asp=1)
abline(lm(datat$Ca~as.vector(Ca_lag)))
mod_1= lm(datat$Ca~as.vector(Ca_lag))
summary(mod_1)
##
## Call:
## lm(formula = datat$Ca ~ as.vector(Ca_lag))
##
## Residuals:
## Min 1Q Median 3Q Max
## -8.0086 -2.2390 -0.3777 1.5156 12.7199
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.8196 1.3561 -2.079 0.0382 *
## as.vector(Ca_lag) 1.3523 0.1681 8.045 9.86e-15 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.433 on 401 degrees of freedom
## Multiple R-squared: 0.139, Adjusted R-squared: 0.1368
## F-statistic: 64.72 on 1 and 401 DF, p-value: 9.86e-15
#Angulo real
atan(1.3523)*180/pi
## [1] 53.51778
u = mod_1$residuals
shapiro.test(u)
##
## Shapiro-Wilk normality test
##
## data: u
## W = 0.95882, p-value = 3.403e-09
Moran.I(u, We)
## $observed
## [1] 0.0728868
##
## $expected
## [1] -0.002487562
##
## $sd
## [1] 0.004256182
##
## $p.value
## [1] 0
u_lag = We%*%u
plot(x= u_lag,y= u)
abline(lm(u_lag~u))
mod_1= lm(u_lag~u)
summary(mod_1)
##
## Call:
## lm(formula = u_lag ~ u)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.80487 -0.37402 -0.07393 0.34199 1.22027
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.09426 0.02278 -4.139 4.26e-05 ***
## u 0.07289 0.00665 10.960 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4572 on 401 degrees of freedom
## Multiple R-squared: 0.2305, Adjusted R-squared: 0.2286
## F-statistic: 120.1 on 1 and 401 DF, p-value: < 2.2e-16
epsi =mod_1$residuals
shapiro.test(epsi)
##
## Shapiro-Wilk normality test
##
## data: epsi
## W = 0.9586, p-value = 3.166e-09
Moran.I(epsi,We)
## $observed
## [1] 0.2771621
##
## $expected
## [1] -0.002487562
##
## $sd
## [1] 0.004265512
##
## $p.value
## [1] 0
contnb=dnearneigh(coordinates(XYdata)
,0,380000,longlat = F)
contnb
## Neighbour list object:
## Number of regions: 403
## Number of nonzero links: 162006
## Percentage nonzero weights: 99.75186
## Average number of links: 402
dlist <- nbdists(contnb, XYdata)
dlist <- lapply(dlist, function(x) 1/x)
Wve=nb2listw(contnb,glist=dlist,style = "W")
#Modelo autoregresivo puro SAR
map=spautolm(Ca~1,data=datat,listw=Wve)
summary(map)
##
## Call: spautolm(formula = Ca ~ 1, data = datat, listw = Wve)
##
## Residuals:
## Min 1Q Median 3Q Max
## -7.44226 -2.27552 -0.18596 1.87723 11.05241
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 7.0623 4.5096 1.566 0.1173
##
## Lambda: 0.96208 LR test value: 51.922 p-value: 5.7765e-13
## Numerical Hessian standard error of lambda: 0.037642
##
## Log likelihood: -1072.116
## ML residual variance (sigma squared): 11.788, (sigma: 3.4333)
## Number of observations: 403
## Number of parameters estimated: 3
## AIC: 2150.2
res_2=map$fit$residuals
shapiro.test(res_2)
##
## Shapiro-Wilk normality test
##
## data: res_2
## W = 0.97765, p-value = 7.105e-06
Moran.I(res_2,We)
## $observed
## [1] 0.05123807
##
## $expected
## [1] -0.002487562
##
## $sd
## [1] 0.004258707
##
## $p.value
## [1] 0
# SARAR
map2 = sacsarlm(Ca~1,data=datat,listw = Wve)
## Warning in sacsarlm(Ca ~ 1, data = datat, listw = Wve): inversion of asymptotic covariance matrix failed for tol.solve = 2.22044604925031e-16
## número de condición recíproco = 1.85034e-18 - using numerical Hessian.
## Warning in sqrt(diag(fdHess)[-c(1, 2)]): Se han producido NaNs
summary(map2)
##
## Call:sacsarlm(formula = Ca ~ 1, data = datat, listw = Wve)
##
## Residuals:
## Min 1Q Median 3Q Max
## -7.5819 -2.2124 -0.2028 1.8375 10.8525
##
## Type: sac
## Coefficients: (numerical Hessian approximate standard errors)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.21342 NaN NaN NaN
##
## Rho: 0.93969
## Approximate (numerical Hessian) standard error: 0.04942
## z-value: 19.014, p-value: < 2.22e-16
## Lambda: 0.93969
## Approximate (numerical Hessian) standard error: 0.034885
## z-value: 26.937, p-value: < 2.22e-16
##
## LR test value: 82.629, p-value: < 2.22e-16
##
## Log likelihood: -1056.762 for sac model
## ML residual variance (sigma squared): 10.804, (sigma: 3.287)
## Number of observations: 403
## Number of parameters estimated: 4
## AIC: 2121.5, (AIC for lm: 2200.2)
shapiro.test(map2$residuals)
##
## Shapiro-Wilk normality test
##
## data: map2$residuals
## W = 0.9794, p-value = 1.685e-05
Moran.I(map2$residuals,We)
## $observed
## [1] 0.0339781
##
## $expected
## [1] -0.002487562
##
## $sd
## [1] 0.004258585
##
## $p.value
## [1] 0
# Modelo 3 (Cual es?)
mser1=errorsarlm(formula=Ca~MO+Mg+CICE+Zn,data=datat,listw=Wve)
summary(mser1)
##
## Call:errorsarlm(formula = Ca ~ MO + Mg + CICE + Zn, data = datat,
## listw = Wve)
##
## Residuals:
## Min 1Q Median 3Q Max
## -15.43619 -0.29998 0.15726 0.57771 3.31148
##
## Type: error
## Coefficients: (asymptotic standard errors)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.232119 0.764161 -0.3038 0.7613132
## MO 0.570776 0.122581 4.6563 3.219e-06
## Mg -0.346261 0.094154 -3.6776 0.0002354
## CICE 0.764131 0.024392 31.3276 < 2.2e-16
## Zn -0.184899 0.034758 -5.3196 1.040e-07
##
## Lambda: 0.90422, LR test value: 18.953, p-value: 1.3396e-05
## Asymptotic standard error: 0.06646
## z-value: 13.605, p-value: < 2.22e-16
## Wald statistic: 185.11, p-value: < 2.22e-16
##
## Log likelihood: -709.1093 for error model
## ML residual variance (sigma squared): 1.955, (sigma: 1.3982)
## Number of observations: 403
## Number of parameters estimated: 7
## AIC: 1432.2, (AIC for lm: 1449.2)
shapiro.test(mser1$residuals)
##
## Shapiro-Wilk normality test
##
## data: mser1$residuals
## W = 0.68182, p-value < 2.2e-16
Moran.I(mser1$residuals,We) # Hay dependencia espacial en residuales, no sirve el modelo
## $observed
## [1] 0.01829508
##
## $expected
## [1] -0.002487562
##
## $sd
## [1] 0.004033867
##
## $p.value
## [1] 2.576685e-07
#Modelo 4 (Cual es?)
meer1=lagsarlm(formula=Ca~MO+CICE+z+Mg+Zn,data=datat,listw=Wve)
summary(meer1)
##
## Call:lagsarlm(formula = Ca ~ MO + CICE + z + Mg + Zn, data = datat,
## listw = Wve)
##
## Residuals:
## Min 1Q Median 3Q Max
## -15.37071 -0.35594 0.14732 0.59973 3.30121
##
## Type: lag
## Coefficients: (asymptotic standard errors)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -7.1459855 1.0129166 -7.0549 1.728e-12
## MO 0.5606368 0.1169964 4.7919 1.652e-06
## CICE 0.7347772 0.0237913 30.8843 < 2.2e-16
## z 0.0181623 0.0046117 3.9383 8.206e-05
## Mg -0.3326466 0.0839108 -3.9643 7.362e-05
## Zn -0.1831379 0.0333618 -5.4895 4.032e-08
##
## Rho: 0.73161, LR test value: 26.436, p-value: 2.7243e-07
## Asymptotic standard error: 0.11005
## z-value: 6.6477, p-value: 2.9767e-11
## Wald statistic: 44.192, p-value: 2.9767e-11
##
## Log likelihood: -702.1659 for lag model
## ML residual variance (sigma squared): 1.8995, (sigma: 1.3782)
## Number of observations: 403
## Number of parameters estimated: 8
## AIC: 1420.3, (AIC for lm: 1444.8)
## LM test for residual autocorrelation
## test value: 9.9698, p-value: 0.0015913
shapiro.test(meer1$residuals)
##
## Shapiro-Wilk normality test
##
## data: meer1$residuals
## W = 0.69107, p-value < 2.2e-16
Moran.I(meer1$residuals,We) # Hay dependencia espacial en residuales, no sirve el modelo
## $observed
## [1] 0.01446512
##
## $expected
## [1] -0.002487562
##
## $sd
## [1] 0.004028952
##
## $p.value
## [1] 2.579662e-05
# Modelo 5
meer1b=lagsarlm(formula=Ca~MO+CICE+Fe+Zn,data=datat,listw=Wve,type="mixed")
summary(meer1b)
##
## Call:lagsarlm(formula = Ca ~ MO + CICE + Fe + Zn, data = datat, listw = Wve,
## type = "mixed")
##
## Residuals:
## Min 1Q Median 3Q Max
## -13.88186 -0.45810 0.13034 0.71325 3.64858
##
## Type: mixed
## Coefficients: (asymptotic standard errors)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -4.26128810 3.11646587 -1.3673 0.171517
## MO 0.61838488 0.12946431 4.7765 1.784e-06
## CICE 0.68140811 0.02001721 34.0411 < 2.2e-16
## Fe -0.00095408 0.00033636 -2.8365 0.004561
## Zn -0.11870685 0.03950613 -3.0048 0.002658
## lag.MO 1.19225991 1.31358589 0.9076 0.364070
## lag.CICE -0.37013754 0.15079525 -2.4546 0.014105
## lag.Fe 0.00956749 0.00297945 3.2112 0.001322
## lag.Zn -1.21674149 0.38825141 -3.1339 0.001725
##
## Rho: 0.89183, LR test value: 17.796, p-value: 2.4594e-05
## Asymptotic standard error: 0.074556
## z-value: 11.962, p-value: < 2.22e-16
## Wald statistic: 143.08, p-value: < 2.22e-16
##
## Log likelihood: -699.8818 for mixed model
## ML residual variance (sigma squared): 1.8688, (sigma: 1.367)
## Number of observations: 403
## Number of parameters estimated: 11
## AIC: 1421.8, (AIC for lm: 1437.6)
## LM test for residual autocorrelation
## test value: 21.551, p-value: 3.4452e-06
shapiro.test(meer1b$residuals)
##
## Shapiro-Wilk normality test
##
## data: meer1b$residuals
## W = 0.7686, p-value < 2.2e-16
Moran.I(meer1b$residuals,We) # No hay dependencia espacial
## $observed
## [1] 0.01659438
##
## $expected
## [1] -0.002487562
##
## $sd
## [1] 0.004102123
##
## $p.value
## [1] 3.291733e-06
# Modelo 6
SARAR1=sacsarlm(formula=Ca~MO+CICE+Fe+Mg+Zn,data=datat,listw=Wve)
summary(SARAR1)
##
## Call:sacsarlm(formula = Ca ~ MO + CICE + Fe + Mg + Zn, data = datat,
## listw = Wve)
##
## Residuals:
## Min 1Q Median 3Q Max
## -14.83238 -0.31392 0.11695 0.59105 3.45025
##
## Type: sac
## Coefficients: (asymptotic standard errors)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -5.74724015 1.38849783 -4.1392 3.486e-05
## MO 0.62133388 0.12512819 4.9656 6.850e-07
## CICE 0.72001234 0.02529229 28.4677 < 2.2e-16
## Fe -0.00071240 0.00032241 -2.2096 0.0271327
## Mg -0.29966194 0.09156317 -3.2727 0.0010651
## Zn -0.14178332 0.03802924 -3.7283 0.0001928
##
## Rho: 0.73129
## Asymptotic standard error: 0.16077
## z-value: 4.5487, p-value: 5.3984e-06
## Lambda: 0.89574
## Asymptotic standard error: 0.097151
## z-value: 9.2201, p-value: < 2.22e-16
##
## LR test value: 38.588, p-value: 4.1763e-09
##
## Log likelihood: -698.4658 for sac model
## ML residual variance (sigma squared): 1.8457, (sigma: 1.3586)
## Number of observations: 403
## Number of parameters estimated: 9
## AIC: 1414.9, (AIC for lm: 1449.5)
shapiro.test(SARAR1$residuals)
##
## Shapiro-Wilk normality test
##
## data: SARAR1$residuals
## W = 0.69194, p-value < 2.2e-16
Moran.I(SARAR1$residuals,We)
## $observed
## [1] 0.01403893
##
## $expected
## [1] -0.002487562
##
## $sd
## [1] 0.004042508
##
## $p.value
## [1] 4.347752e-05
# Hay dependencia espacial en residuales, no sirve el modelo
# Modelo 7
SARAR1_b=sacsarlm(formula=Ca~MO+CICE+Fe+Zn,data=datat,listw=Wve,type = "mixed")
summary(SARAR1_b)
##
## Call:sacsarlm(formula = Ca ~ MO + CICE + Fe + Zn, data = datat, listw = Wve,
## type = "mixed")
##
## Residuals:
## Min 1Q Median 3Q Max
## -13.90330 -0.44997 0.13976 0.66460 3.61750
##
## Type: sacmixed
## Coefficients: (asymptotic standard errors)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -4.53555314 3.80153144 -1.1931 0.232836
## MO 0.63530068 0.12751050 4.9823 6.282e-07
## CICE 0.67488964 0.01984157 34.0139 < 2.2e-16
## Fe -0.00096377 0.00033222 -2.9010 0.003720
## Zn -0.12283094 0.03906063 -3.1446 0.001663
## lag.MO 0.65783859 1.57273103 0.4183 0.675744
## lag.CICE -0.14175959 0.89728005 -0.1580 0.874466
## lag.Fe 0.00953078 0.00466375 2.0436 0.040994
## lag.Zn -1.27622154 0.73441538 -1.7377 0.082257
##
## Rho: 0.78978
## Asymptotic standard error: 1.0196
## z-value: 0.77456, p-value: 0.4386
## Lambda: 0.82342
## Asymptotic standard error: 0.87612
## z-value: 0.93985, p-value: 0.3473
##
## LR test value: 68.726, p-value: 7.4618e-13
##
## Log likelihood: -696.1394 for sacmixed model
## ML residual variance (sigma squared): 1.8273, (sigma: 1.3518)
## Number of observations: 403
## Number of parameters estimated: 12
## AIC: 1416.3, (AIC for lm: 1473)
shapiro.test(SARAR1_b$residuals)
##
## Shapiro-Wilk normality test
##
## data: SARAR1_b$residuals
## W = 0.75798, p-value < 2.2e-16
Moran.I(SARAR1_b$residuals,We) # No hay dependencia, los parametros no sirven, el modelo no sirve
## $observed
## [1] 0.009169024
##
## $expected
## [1] -0.002487562
##
## $sd
## [1] 0.004093329
##
## $p.value
## [1] 0.004403592
pred=SARAR1_b$fitted.values
plot(pred,datat$MO)
# AIC: mientras menor el valor, mejor el modelo
AIC(SARAR1)
## [1] 1414.932
AIC(meer1)
## [1] 1420.332
#Modelo 8: Regresion multiple
esp=lm(Ca~MO+CICE+Mg+Zn,data = datat)
summary(esp)
##
## Call:
## lm(formula = Ca ~ MO + CICE + Mg + Zn, data = datat)
##
## Residuals:
## Min 1Q Median 3Q Max
## -15.6218 -0.3114 0.1991 0.6312 3.2238
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.28071 0.25882 -1.085 0.279
## MO 0.60690 0.12237 4.960 1.05e-06 ***
## CICE 0.78276 0.02334 33.538 < 2e-16 ***
## Mg -0.43589 0.08731 -4.992 8.94e-07 ***
## Zn -0.19343 0.03487 -5.548 5.29e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.448 on 398 degrees of freedom
## Multiple R-squared: 0.8479, Adjusted R-squared: 0.8464
## F-statistic: 554.7 on 4 and 398 DF, p-value: < 2.2e-16
shapiro.test(esp$residuals)
##
## Shapiro-Wilk normality test
##
## data: esp$residuals
## W = 0.67826, p-value < 2.2e-16
Moran.I(esp$residuals,We) # Hay dependencia esp
## $observed
## [1] 0.03554183
##
## $expected
## [1] -0.002487562
##
## $sd
## [1] 0.004046137
##
## $p.value
## [1] 0
AIC(map)
## [1] 2150.231
AIC(map2)
## [1] 2121.524
AIC(mser1)
## [1] 1432.219
AIC(meer1)
## [1] 1420.332
AIC(meer1b)
## [1] 1421.764
AIC(SARAR1)
## [1] 1414.932
AIC(SARAR1_b)
## [1] 1416.279
AIC(esp)
## [1] 1449.172
El mejor modelo de los analizados fue el SARAR1, sin embargo, ninguno cumpio los supuestos de normalidad y dependencia espacial