library(spData)
## To access larger datasets in this package, install the spDataLarge
## package with: `install.packages('spDataLarge',
## repos='https://nowosad.github.io/drat/', type='source')`
library(readxl)
library(ggplot2)
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v tibble  3.1.6     v dplyr   1.0.7
## v tidyr   1.1.4     v stringr 1.4.0
## v readr   2.1.1     v forcats 0.5.1
## v purrr   0.3.4
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(spdep)
## Loading required package: sp
## Loading required package: sf
## Linking to GEOS 3.9.1, GDAL 3.2.1, PROJ 7.2.1; sf_use_s2() is TRUE
library(ape)
## Registered S3 method overwritten by 'ape':
##   method   from 
##   plot.mst spdep
library(sp)
library(MVA)
## Loading required package: HSAUR2
## Loading required package: tools
library(normtest)
library(nortest)
library(spatialreg)
## Loading required package: Matrix
## 
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack
## 
## Attaching package: 'spatialreg'
## The following objects are masked from 'package:spdep':
## 
##     get.ClusterOption, get.coresOption, get.mcOption,
##     get.VerboseOption, get.ZeroPolicyOption, set.ClusterOption,
##     set.coresOption, set.mcOption, set.VerboseOption,
##     set.ZeroPolicyOption
datat <- read_excel("C:/Users/faile/Downloads/XPABLO (2).xlsx", 
                    sheet = "tabla_1")
# Plot
datat %>% 
  ggplot(aes(x=Long,y=Lat,color=Ca))+
  geom_point(size=3)+
  scale_color_continuous(type="viridis")+
  ggtitle("Calcio")

# Variables explicativas
X<- datat[,-c(1,2,3,6)]

# Coordenadas
XYdata=as.matrix(datat[,2:3])

# Construyendo la matriz de pesos 

Ca.d=as.matrix(dist(XYdata, diag=T, upper=T))
1/(max(Ca.d)*2/3)
## [1] 4.511031
Ca.d.inv <-as.matrix(1/Ca.d)
diag(Ca.d.inv) <- 0
W=as.matrix(Ca.d.inv)
SUMAS=apply(W,1,sum)
We=W/SUMAS
#apply(We,1,sum)

# Moran Index 
Moran_Ca=Moran.I(datat$Ca, We);Moran_Ca
## $observed
## [1] 0.08097882
## 
## $expected
## [1] -0.002487562
## 
## $sd
## [1] 0.004258728
## 
## $p.value
## [1] 0
# Rezago MO

Ca_lag = datat$Ca%*%We

plot(x= Ca_lag,y= datat$Ca,asp=1)
abline(lm(datat$Ca~as.vector(Ca_lag)))

mod_1= lm(datat$Ca~as.vector(Ca_lag))
summary(mod_1)
## 
## Call:
## lm(formula = datat$Ca ~ as.vector(Ca_lag))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -8.0086 -2.2390 -0.3777  1.5156 12.7199 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        -2.8196     1.3561  -2.079   0.0382 *  
## as.vector(Ca_lag)   1.3523     0.1681   8.045 9.86e-15 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.433 on 401 degrees of freedom
## Multiple R-squared:  0.139,  Adjusted R-squared:  0.1368 
## F-statistic: 64.72 on 1 and 401 DF,  p-value: 9.86e-15
#Angulo real 
atan(1.3523)*180/pi
## [1] 53.51778
u = mod_1$residuals
shapiro.test(u)
## 
##  Shapiro-Wilk normality test
## 
## data:  u
## W = 0.95882, p-value = 3.403e-09
Moran.I(u, We)
## $observed
## [1] 0.0728868
## 
## $expected
## [1] -0.002487562
## 
## $sd
## [1] 0.004256182
## 
## $p.value
## [1] 0
u_lag = We%*%u
plot(x= u_lag,y= u)
abline(lm(u_lag~u))

mod_1= lm(u_lag~u)
summary(mod_1)
## 
## Call:
## lm(formula = u_lag ~ u)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.80487 -0.37402 -0.07393  0.34199  1.22027 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.09426    0.02278  -4.139 4.26e-05 ***
## u            0.07289    0.00665  10.960  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4572 on 401 degrees of freedom
## Multiple R-squared:  0.2305, Adjusted R-squared:  0.2286 
## F-statistic: 120.1 on 1 and 401 DF,  p-value: < 2.2e-16
epsi =mod_1$residuals 
shapiro.test(epsi)
## 
##  Shapiro-Wilk normality test
## 
## data:  epsi
## W = 0.9586, p-value = 3.166e-09
Moran.I(epsi,We)
## $observed
## [1] 0.2771621
## 
## $expected
## [1] -0.002487562
## 
## $sd
## [1] 0.004265512
## 
## $p.value
## [1] 0
contnb=dnearneigh(coordinates(XYdata)
                  ,0,380000,longlat = F)
contnb
## Neighbour list object:
## Number of regions: 403 
## Number of nonzero links: 162006 
## Percentage nonzero weights: 99.75186 
## Average number of links: 402
dlist <- nbdists(contnb, XYdata)
dlist <- lapply(dlist, function(x) 1/x)
Wve=nb2listw(contnb,glist=dlist,style = "W")
#Modelo autoregresivo puro SAR

map=spautolm(Ca~1,data=datat,listw=Wve)
summary(map)
## 
## Call: spautolm(formula = Ca ~ 1, data = datat, listw = Wve)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -7.44226 -2.27552 -0.18596  1.87723 11.05241 
## 
## Coefficients: 
##             Estimate Std. Error z value Pr(>|z|)
## (Intercept)   7.0623     4.5096   1.566   0.1173
## 
## Lambda: 0.96208 LR test value: 51.922 p-value: 5.7765e-13 
## Numerical Hessian standard error of lambda: 0.037642 
## 
## Log likelihood: -1072.116 
## ML residual variance (sigma squared): 11.788, (sigma: 3.4333)
## Number of observations: 403 
## Number of parameters estimated: 3 
## AIC: 2150.2
res_2=map$fit$residuals
shapiro.test(res_2)
## 
##  Shapiro-Wilk normality test
## 
## data:  res_2
## W = 0.97765, p-value = 7.105e-06
Moran.I(res_2,We)
## $observed
## [1] 0.05123807
## 
## $expected
## [1] -0.002487562
## 
## $sd
## [1] 0.004258707
## 
## $p.value
## [1] 0
# SARAR
map2 = sacsarlm(Ca~1,data=datat,listw = Wve)
## Warning in sacsarlm(Ca ~ 1, data = datat, listw = Wve): inversion of asymptotic covariance matrix failed for tol.solve = 2.22044604925031e-16 
##   número de condición recíproco = 1.85034e-18 - using numerical Hessian.
## Warning in sqrt(diag(fdHess)[-c(1, 2)]): Se han producido NaNs
summary(map2)
## 
## Call:sacsarlm(formula = Ca ~ 1, data = datat, listw = Wve)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -7.5819 -2.2124 -0.2028  1.8375 10.8525 
## 
## Type: sac 
## Coefficients: (numerical Hessian approximate standard errors) 
##             Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.21342        NaN     NaN      NaN
## 
## Rho: 0.93969
## Approximate (numerical Hessian) standard error: 0.04942
##     z-value: 19.014, p-value: < 2.22e-16
## Lambda: 0.93969
## Approximate (numerical Hessian) standard error: 0.034885
##     z-value: 26.937, p-value: < 2.22e-16
## 
## LR test value: 82.629, p-value: < 2.22e-16
## 
## Log likelihood: -1056.762 for sac model
## ML residual variance (sigma squared): 10.804, (sigma: 3.287)
## Number of observations: 403 
## Number of parameters estimated: 4 
## AIC: 2121.5, (AIC for lm: 2200.2)
shapiro.test(map2$residuals)
## 
##  Shapiro-Wilk normality test
## 
## data:  map2$residuals
## W = 0.9794, p-value = 1.685e-05
Moran.I(map2$residuals,We)
## $observed
## [1] 0.0339781
## 
## $expected
## [1] -0.002487562
## 
## $sd
## [1] 0.004258585
## 
## $p.value
## [1] 0
# Modelo 3 (Cual es?)

mser1=errorsarlm(formula=Ca~MO+Mg+CICE+Zn,data=datat,listw=Wve)
summary(mser1)
## 
## Call:errorsarlm(formula = Ca ~ MO + Mg + CICE + Zn, data = datat, 
##     listw = Wve)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -15.43619  -0.29998   0.15726   0.57771   3.31148 
## 
## Type: error 
## Coefficients: (asymptotic standard errors) 
##              Estimate Std. Error z value  Pr(>|z|)
## (Intercept) -0.232119   0.764161 -0.3038 0.7613132
## MO           0.570776   0.122581  4.6563 3.219e-06
## Mg          -0.346261   0.094154 -3.6776 0.0002354
## CICE         0.764131   0.024392 31.3276 < 2.2e-16
## Zn          -0.184899   0.034758 -5.3196 1.040e-07
## 
## Lambda: 0.90422, LR test value: 18.953, p-value: 1.3396e-05
## Asymptotic standard error: 0.06646
##     z-value: 13.605, p-value: < 2.22e-16
## Wald statistic: 185.11, p-value: < 2.22e-16
## 
## Log likelihood: -709.1093 for error model
## ML residual variance (sigma squared): 1.955, (sigma: 1.3982)
## Number of observations: 403 
## Number of parameters estimated: 7 
## AIC: 1432.2, (AIC for lm: 1449.2)
shapiro.test(mser1$residuals)
## 
##  Shapiro-Wilk normality test
## 
## data:  mser1$residuals
## W = 0.68182, p-value < 2.2e-16
Moran.I(mser1$residuals,We) # Hay dependencia espacial en residuales, no sirve el modelo
## $observed
## [1] 0.01829508
## 
## $expected
## [1] -0.002487562
## 
## $sd
## [1] 0.004033867
## 
## $p.value
## [1] 2.576685e-07
#Modelo 4 (Cual es?)

meer1=lagsarlm(formula=Ca~MO+CICE+z+Mg+Zn,data=datat,listw=Wve)
summary(meer1)
## 
## Call:lagsarlm(formula = Ca ~ MO + CICE + z + Mg + Zn, data = datat, 
##     listw = Wve)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -15.37071  -0.35594   0.14732   0.59973   3.30121 
## 
## Type: lag 
## Coefficients: (asymptotic standard errors) 
##               Estimate Std. Error z value  Pr(>|z|)
## (Intercept) -7.1459855  1.0129166 -7.0549 1.728e-12
## MO           0.5606368  0.1169964  4.7919 1.652e-06
## CICE         0.7347772  0.0237913 30.8843 < 2.2e-16
## z            0.0181623  0.0046117  3.9383 8.206e-05
## Mg          -0.3326466  0.0839108 -3.9643 7.362e-05
## Zn          -0.1831379  0.0333618 -5.4895 4.032e-08
## 
## Rho: 0.73161, LR test value: 26.436, p-value: 2.7243e-07
## Asymptotic standard error: 0.11005
##     z-value: 6.6477, p-value: 2.9767e-11
## Wald statistic: 44.192, p-value: 2.9767e-11
## 
## Log likelihood: -702.1659 for lag model
## ML residual variance (sigma squared): 1.8995, (sigma: 1.3782)
## Number of observations: 403 
## Number of parameters estimated: 8 
## AIC: 1420.3, (AIC for lm: 1444.8)
## LM test for residual autocorrelation
## test value: 9.9698, p-value: 0.0015913
shapiro.test(meer1$residuals)
## 
##  Shapiro-Wilk normality test
## 
## data:  meer1$residuals
## W = 0.69107, p-value < 2.2e-16
Moran.I(meer1$residuals,We) # Hay dependencia espacial en residuales, no sirve el modelo
## $observed
## [1] 0.01446512
## 
## $expected
## [1] -0.002487562
## 
## $sd
## [1] 0.004028952
## 
## $p.value
## [1] 2.579662e-05
# Modelo 5

meer1b=lagsarlm(formula=Ca~MO+CICE+Fe+Zn,data=datat,listw=Wve,type="mixed")
summary(meer1b)
## 
## Call:lagsarlm(formula = Ca ~ MO + CICE + Fe + Zn, data = datat, listw = Wve, 
##     type = "mixed")
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -13.88186  -0.45810   0.13034   0.71325   3.64858 
## 
## Type: mixed 
## Coefficients: (asymptotic standard errors) 
##                Estimate  Std. Error z value  Pr(>|z|)
## (Intercept) -4.26128810  3.11646587 -1.3673  0.171517
## MO           0.61838488  0.12946431  4.7765 1.784e-06
## CICE         0.68140811  0.02001721 34.0411 < 2.2e-16
## Fe          -0.00095408  0.00033636 -2.8365  0.004561
## Zn          -0.11870685  0.03950613 -3.0048  0.002658
## lag.MO       1.19225991  1.31358589  0.9076  0.364070
## lag.CICE    -0.37013754  0.15079525 -2.4546  0.014105
## lag.Fe       0.00956749  0.00297945  3.2112  0.001322
## lag.Zn      -1.21674149  0.38825141 -3.1339  0.001725
## 
## Rho: 0.89183, LR test value: 17.796, p-value: 2.4594e-05
## Asymptotic standard error: 0.074556
##     z-value: 11.962, p-value: < 2.22e-16
## Wald statistic: 143.08, p-value: < 2.22e-16
## 
## Log likelihood: -699.8818 for mixed model
## ML residual variance (sigma squared): 1.8688, (sigma: 1.367)
## Number of observations: 403 
## Number of parameters estimated: 11 
## AIC: 1421.8, (AIC for lm: 1437.6)
## LM test for residual autocorrelation
## test value: 21.551, p-value: 3.4452e-06
shapiro.test(meer1b$residuals)
## 
##  Shapiro-Wilk normality test
## 
## data:  meer1b$residuals
## W = 0.7686, p-value < 2.2e-16
Moran.I(meer1b$residuals,We) # No hay dependencia espacial
## $observed
## [1] 0.01659438
## 
## $expected
## [1] -0.002487562
## 
## $sd
## [1] 0.004102123
## 
## $p.value
## [1] 3.291733e-06
# Modelo 6

SARAR1=sacsarlm(formula=Ca~MO+CICE+Fe+Mg+Zn,data=datat,listw=Wve)
summary(SARAR1)
## 
## Call:sacsarlm(formula = Ca ~ MO + CICE + Fe + Mg + Zn, data = datat, 
##     listw = Wve)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -14.83238  -0.31392   0.11695   0.59105   3.45025 
## 
## Type: sac 
## Coefficients: (asymptotic standard errors) 
##                Estimate  Std. Error z value  Pr(>|z|)
## (Intercept) -5.74724015  1.38849783 -4.1392 3.486e-05
## MO           0.62133388  0.12512819  4.9656 6.850e-07
## CICE         0.72001234  0.02529229 28.4677 < 2.2e-16
## Fe          -0.00071240  0.00032241 -2.2096 0.0271327
## Mg          -0.29966194  0.09156317 -3.2727 0.0010651
## Zn          -0.14178332  0.03802924 -3.7283 0.0001928
## 
## Rho: 0.73129
## Asymptotic standard error: 0.16077
##     z-value: 4.5487, p-value: 5.3984e-06
## Lambda: 0.89574
## Asymptotic standard error: 0.097151
##     z-value: 9.2201, p-value: < 2.22e-16
## 
## LR test value: 38.588, p-value: 4.1763e-09
## 
## Log likelihood: -698.4658 for sac model
## ML residual variance (sigma squared): 1.8457, (sigma: 1.3586)
## Number of observations: 403 
## Number of parameters estimated: 9 
## AIC: 1414.9, (AIC for lm: 1449.5)
shapiro.test(SARAR1$residuals)
## 
##  Shapiro-Wilk normality test
## 
## data:  SARAR1$residuals
## W = 0.69194, p-value < 2.2e-16
Moran.I(SARAR1$residuals,We)
## $observed
## [1] 0.01403893
## 
## $expected
## [1] -0.002487562
## 
## $sd
## [1] 0.004042508
## 
## $p.value
## [1] 4.347752e-05
# Hay dependencia espacial en residuales, no sirve el modelo
# Modelo 7

SARAR1_b=sacsarlm(formula=Ca~MO+CICE+Fe+Zn,data=datat,listw=Wve,type = "mixed")
summary(SARAR1_b)
## 
## Call:sacsarlm(formula = Ca ~ MO + CICE + Fe + Zn, data = datat, listw = Wve, 
##     type = "mixed")
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -13.90330  -0.44997   0.13976   0.66460   3.61750 
## 
## Type: sacmixed 
## Coefficients: (asymptotic standard errors) 
##                Estimate  Std. Error z value  Pr(>|z|)
## (Intercept) -4.53555314  3.80153144 -1.1931  0.232836
## MO           0.63530068  0.12751050  4.9823 6.282e-07
## CICE         0.67488964  0.01984157 34.0139 < 2.2e-16
## Fe          -0.00096377  0.00033222 -2.9010  0.003720
## Zn          -0.12283094  0.03906063 -3.1446  0.001663
## lag.MO       0.65783859  1.57273103  0.4183  0.675744
## lag.CICE    -0.14175959  0.89728005 -0.1580  0.874466
## lag.Fe       0.00953078  0.00466375  2.0436  0.040994
## lag.Zn      -1.27622154  0.73441538 -1.7377  0.082257
## 
## Rho: 0.78978
## Asymptotic standard error: 1.0196
##     z-value: 0.77456, p-value: 0.4386
## Lambda: 0.82342
## Asymptotic standard error: 0.87612
##     z-value: 0.93985, p-value: 0.3473
## 
## LR test value: 68.726, p-value: 7.4618e-13
## 
## Log likelihood: -696.1394 for sacmixed model
## ML residual variance (sigma squared): 1.8273, (sigma: 1.3518)
## Number of observations: 403 
## Number of parameters estimated: 12 
## AIC: 1416.3, (AIC for lm: 1473)
shapiro.test(SARAR1_b$residuals)
## 
##  Shapiro-Wilk normality test
## 
## data:  SARAR1_b$residuals
## W = 0.75798, p-value < 2.2e-16
Moran.I(SARAR1_b$residuals,We) # No hay dependencia, los parametros no sirven, el modelo no sirve
## $observed
## [1] 0.009169024
## 
## $expected
## [1] -0.002487562
## 
## $sd
## [1] 0.004093329
## 
## $p.value
## [1] 0.004403592
pred=SARAR1_b$fitted.values
plot(pred,datat$MO)

# AIC: mientras menor el valor, mejor el modelo
AIC(SARAR1)
## [1] 1414.932
AIC(meer1)
## [1] 1420.332
#Modelo 8: Regresion multiple

esp=lm(Ca~MO+CICE+Mg+Zn,data = datat)
summary(esp)
## 
## Call:
## lm(formula = Ca ~ MO + CICE + Mg + Zn, data = datat)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -15.6218  -0.3114   0.1991   0.6312   3.2238 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.28071    0.25882  -1.085    0.279    
## MO           0.60690    0.12237   4.960 1.05e-06 ***
## CICE         0.78276    0.02334  33.538  < 2e-16 ***
## Mg          -0.43589    0.08731  -4.992 8.94e-07 ***
## Zn          -0.19343    0.03487  -5.548 5.29e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.448 on 398 degrees of freedom
## Multiple R-squared:  0.8479, Adjusted R-squared:  0.8464 
## F-statistic: 554.7 on 4 and 398 DF,  p-value: < 2.2e-16
shapiro.test(esp$residuals)
## 
##  Shapiro-Wilk normality test
## 
## data:  esp$residuals
## W = 0.67826, p-value < 2.2e-16
Moran.I(esp$residuals,We) # Hay dependencia esp 
## $observed
## [1] 0.03554183
## 
## $expected
## [1] -0.002487562
## 
## $sd
## [1] 0.004046137
## 
## $p.value
## [1] 0
AIC(map)
## [1] 2150.231
AIC(map2)
## [1] 2121.524
AIC(mser1)
## [1] 1432.219
AIC(meer1)
## [1] 1420.332
AIC(meer1b)
## [1] 1421.764
AIC(SARAR1)
## [1] 1414.932
AIC(SARAR1_b)
## [1] 1416.279
AIC(esp)
## [1] 1449.172

El mejor modelo de los analizados fue el SARAR1, sin embargo, ninguno cumpio los supuestos de normalidad y dependencia espacial