library(rworldmap)

## Loading required package: sp

## ### Welcome to rworldmap ###

## For a short introduction type :   vignette('rworldmap')

library(rworldxtra)
library(ggmap)

## Loading required package: ggplot2

## Google's Terms of Service: https://cloud.google.com/maps-platform/terms/.

## Please cite ggmap if you use it! See citation("ggmap") for details.

library(sf)

## Linking to GEOS 3.9.0, GDAL 3.2.1, PROJ 7.2.1

library(spdep)

## Loading required package: spData

## To access larger datasets in this package, install the spDataLarge
## package with: `install.packages('spDataLarge',
## repos='https://nowosad.github.io/drat/', type='source')`

library(ape)

## Registered S3 method overwritten by 'ape':
##   method   from 
##   plot.mst spdep

library(sp)
library(MVA)

## Loading required package: HSAUR2

## Loading required package: tools

library(Hmisc)

## Loading required package: lattice

## Loading required package: survival

## Loading required package: Formula

## 
## Attaching package: 'Hmisc'

## The following object is masked from 'package:ape':
## 
##     zoom

## The following objects are masked from 'package:base':
## 
##     format.pval, units

library(normtest)
library(nortest)
library(corrplot)

## corrplot 0.90 loaded

library(psych)

## 
## Attaching package: 'psych'

## The following object is masked from 'package:Hmisc':
## 
##     describe

## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha

library(crayon)

## 
## Attaching package: 'crayon'

## The following object is masked from 'package:psych':
## 
##     %+%

## The following object is masked from 'package:ggplot2':
## 
##     %+%

library(pastecs)
library(readxl)

Entrega de arroz inundado

Arroz_in <- read_excel("Arroz_in.xlsx")
View(Arroz_in)
## Data frame con Carbono organico
dfCOS=data.frame(Arroz_in)
X=as.matrix(data.frame(dfCOS[,4:15]))
## Para mirar el rango de las coordenadas 
range(dfCOS$Long);range(dfCOS$Lat)

## [1] -72.58261 -72.37495

## [1] 8.078605 8.380669

Analisis exploratorio

##Primer mapa de los puntos

mapa.puntos <- getMap(resolution = "high")
plot(mapa.puntos,xlim = c(-72.6, -72.3), ylim = c(8.0, 8.4), asp = 1)
points(dfCOS$Long, dfCOS$Lat, col = "darkred", cex = .6,pch=19)

##Segundo mapa de puntos

ggplot(dfCOS, aes(dfCOS$Long, dfCOS$Lat)) +  geom_point()

## Warning: Use of `dfCOS$Long` is discouraged. Use `Long` instead.

## Warning: Use of `dfCOS$Lat` is discouraged. Use `Lat` instead.

Matrices con variable de interes (Carbono organico del suelo).

###______matriz de predictores y respuesta
X=as.matrix(data.frame(dfCOS[,4:15]))
###___matriz de coordenadas
XY=as.matrix(dfCOS[,2:3])
##__matriz de distancias
COS.d=as.matrix(dist(XY, diag=T, upper=T))
##matriz inversa de distancias
COS.d.inv <-as.matrix(1/COS.d)
##asignando cero a la diag
diag(COS.d.inv) <- 0
##__la matriz de pesos basado en distancias
W=as.matrix(COS.d.inv)
##__verificando sumas por filas
SUMAS=apply(W,1,sum)
##__estandarizando la metriz
We=W/SUMAS
##verificando estandarización
apply(We,1,sum)

##   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17  18  19  20 
##   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1 
##  21  22  23  24  25  26  27  28  29  30  31  32  33  34  35  36  37  38  39  40 
##   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1 
##  41  42  43  44  45  46  47  48  49  50  51  52  53  54  55  56  57  58  59  60 
##   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1 
##  61  62  63  64  65  66  67  68  69  70  71  72  73  74  75  76  77  78  79  80 
##   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1 
##  81  82  83  84  85  86  87  88  89  90  91  92  93  94  95  96  97  98  99 100 
##   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1 
## 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 
##   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1 
## 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 
##   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1 
## 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 
##   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1 
## 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 
##   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1 
## 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 
##   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1 
## 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 
##   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1 
## 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 
##   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1 
## 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 
##   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1 
## 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 
##   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1 
## 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 
##   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1 
## 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 
##   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1 
## 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 
##   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1 
## 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 
##   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1 
## 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 
##   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1 
## 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 
##   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1 
## 401 402 403 
##   1   1   1

##Calculando indice de morant para ver si los datos tienen dependencia espacial.

Moran=list()
for(j in 1:12){
  Moran[j]=Moran.I(dfCOS[,j+3], COS.d.inv)$p.value
}
Moran

## [[1]]
## [1] 0
## 
## [[2]]
## [1] 0
## 
## [[3]]
## [1] 0
## 
## [[4]]
## [1] 0
## 
## [[5]]
## [1] 0
## 
## [[6]]
## [1] 0
## 
## [[7]]
## [1] 0
## 
## [[8]]
## [1] 4.081979e-11
## 
## [[9]]
## [1] 1.380351e-09
## 
## [[10]]
## [1] 0
## 
## [[11]]
## [1] 6.661338e-16
## 
## [[12]]
## [1] 0

#Realizando el indice de morant a los P valores, donde las variables con 0 muestran dependencia espacial. Por esto es necesario el uso de modelos que me solucionen dicha dependencia.
#Sin embargo, el indice de morant intereza evaluarlo a los residuales de los modelos para determinar si los modelos solucionan la dependencia espacial que se presenta en los valores de campo.

Ya que hay dependencia espacial no es adecuado utilizar modelos de regresion simple o cuadratica que no tengan en cuenta ese caracter espacial de los puntos. Por esto comienza un analisis de modelos espaciales.

Muestreo espacial: decidimos tomar 80% de datos y la variable Ca para variar el trabajo hecho en clase.

# Muestreo espacial 
library(clhs)
n = 0.80 *403
n = round(n,0)

df_nair = data.frame(x=dfCOS$Long,
                     y=dfCOS$Lat,
                     COS=dfCOS$cos)

res <- clhs(df_nair, size = n, 
            iter = 100, progress = FALSE,
            simple = TRUE)

df_nair = dfCOS[res,]
dfCOS = df_nair

##1. Modelo autorregresivo puro \[Y = \lambda W Y + \epsilon\]

XY=as.matrix(dfCOS[,2:3])
COS.d=as.matrix(dist(XY, diag=T, upper=T))
##matriz inversa de distancias
COS.d.inv <-as.matrix(1/COS.d)
##asignando cero a la diag
diag(COS.d.inv) <- 0
##__la matriz de pesos basado en distancias
W=as.matrix(COS.d.inv)
##__verificando sumas por filas
SUMAS=apply(W,1,sum)
##__estandarizando la metriz
We=W/SUMAS

# Nueva construccion de matriz de pesos estandarizada
range(dfCOS$Long);range(dfCOS$Lat)

## [1] -72.58261 -72.37495

## [1] 8.078605 8.380669

contnb=dnearneigh(coordinates(XY),0,380000,longlat = F)
dlist <- nbdists(contnb, XY)
dlist <- lapply(dlist, function(x) 1/x)
Wve=nb2listw(contnb,glist=dlist,style = "W")
#modelo autoregresivo puro
modelo.arp=spautolm(Ca~1,data=dfCOS,listw=Wve)
summary(modelo.arp)

## 
## Call: spautolm(formula = Ca ~ 1, data = dfCOS, listw = Wve)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -7.35509 -2.41944 -0.14151  1.94733 10.93396 
## 
## Coefficients: 
##             Estimate Std. Error z value Pr(>|z|)
## (Intercept)   7.9003     3.9243  2.0132   0.0441
## 
## Lambda: 0.95063 LR test value: 38.932 p-value: 4.3892e-10 
## Numerical Hessian standard error of lambda: 0.048843 
## 
## Log likelihood: -861.0571 
## ML residual variance (sigma squared): 12.088, (sigma: 3.4768)
## Number of observations: 322 
## Number of parameters estimated: 3 
## AIC: 1728.1

library(spatialreg)

# Este modelo asume independencia en los residuales por lo que no seria el indicado, entonces extraemos residuales y hacemos test de moran
res1 = modelo.arp$fit$residuals
Moran.I(res1, COS.d.inv)

## $observed
## [1] 0.04976567
## 
## $expected
## [1] -0.003115265
## 
## $sd
## [1] 0.005341869
## 
## $p.value
## [1] 0

# el test muestra que se rechaza la hipotesis de independencia.

##2. Modelo autorregresivo puro (dos veces con la misma variable) \[Y = \lambda W Y + u \\ u= \rho Wu + \epsilon\]

mod2 = sacsarlm(Ca~1,data=dfCOS,listw=Wve)
summary(mod2)

## 
## Call:sacsarlm(formula = Ca ~ 1, data = dfCOS, listw = Wve)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -7.72744 -2.37964 -0.05684  1.86195 10.56775 
## 
## Type: sac 
## Coefficients: (numerical Hessian approximate standard errors) 
##             Estimate Std. Error z value Pr(>|z|)
## (Intercept)  0.41586    2.04910  0.2029   0.8392
## 
## Rho: 0.92087
## Approximate (numerical Hessian) standard error: 0.076777
##     z-value: 11.994, p-value: < 2.22e-16
## Lambda: 0.92087
## Approximate (numerical Hessian) standard error: 0.077519
##     z-value: 11.879, p-value: < 2.22e-16
## 
## LR test value: 61.526, p-value: 4.3632e-14
## 
## Log likelihood: -849.7601 for sac model
## ML residual variance (sigma squared): 11.137, (sigma: 3.3372)
## Number of observations: 322 
## Number of parameters estimated: 4 
## AIC: 1707.5, (AIC for lm: 1765)

# Este modelo si evaluala dependencia espacial de los datos, entonces extraemos residuales y hacemos test de moran.
res2 =mod2$residuals
Moran.I(res2, COS.d.inv)

## $observed
## [1] 0.03254388
## 
## $expected
## [1] -0.003115265
## 
## $sd
## [1] 0.005341968
## 
## $p.value
## [1] 2.467559e-11

# Mejora mucho pero sigue habiendo dependencia espacial en los residuales.

##3. Modelo en resago espacial (Spatial lag model) \[Y = \lambda W Y +X \beta + \epsilon \]

mser1=errorsarlm(formula=Ca~Mg+K+Cu+Zn,data=dfCOS,listw=Wve)
summary(mser1)

## 
## Call:errorsarlm(formula = Ca ~ Mg + K + Cu + Zn, data = dfCOS, listw = Wve)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -6.64543 -1.84944 -0.27934  1.18091 10.92161 
## 
## Type: error 
## Coefficients: (asymptotic standard errors) 
##              Estimate Std. Error z value  Pr(>|z|)
## (Intercept)  5.720815   4.639933  1.2330 0.2175937
## Mg           1.162087   0.212464  5.4696 4.512e-08
## K            5.682582   1.498637  3.7918 0.0001495
## Cu           0.302784   0.123928  2.4432 0.0145562
## Zn          -0.362425   0.097595 -3.7135 0.0002044
## 
## Lambda: 0.96501, LR test value: 54.254, p-value: 1.7619e-13
## Asymptotic standard error: 0.024591
##     z-value: 39.243, p-value: < 2.22e-16
## Wald statistic: 1540, p-value: < 2.22e-16
## 
## Log likelihood: -803.7155 for error model
## ML residual variance (sigma squared): 8.4473, (sigma: 2.9064)
## Number of observations: 322 
## Number of parameters estimated: 7 
## AIC: 1621.4, (AIC for lm: 1673.7)

#Analizando hay variables como CICE , Na, z y cos, por el p. value alto muestran no estar relacionadas al la variable Ca. sin embargo primeo decidimos correr el test de moran y luego retirarlos.

# Evaluando la dependencia espacial de los residuales
res3 =mser1$residuals
Moran.I(res3, COS.d.inv)

## $observed
## [1] 0.06616071
## 
## $expected
## [1] -0.003115265
## 
## $sd
## [1] 0.005330153
## 
## $p.value
## [1] 0

# La parte autorregresiva puede ser buena, sin embargo la dependencia espacial no se corrige aun y siguen teniendo problemas algunas variables.

##4. Modelo espacial del error (Spatial error model) \[Y = X \beta + u\\u = \lambda W u + \epsilon\]

#Analizando hay variables como CICE , cos y z, por el p. value alto muestran no estar relacionadas al la variable Ca. sin embargo, primeo decidimos correr el test de moran y luego retirarlos.

mod4=lagsarlm(formula=Ca~Mg+K+Na+Cu+Zn,data=dfCOS,listw=Wve)
summary(mod4)

## 
## Call:lagsarlm(formula = Ca ~ Mg + K + Na + Cu + Zn, data = dfCOS, 
##     listw = Wve)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -6.94716 -1.86713 -0.36893  1.35040 11.23687 
## 
## Type: lag 
## Coefficients: (asymptotic standard errors) 
##              Estimate Std. Error z value  Pr(>|z|)
## (Intercept) -2.914038   0.577743 -5.0438 4.563e-07
## Mg           0.889207   0.216743  4.1026 4.086e-05
## K            5.861756   1.501030  3.9052 9.416e-05
## Na           1.255855   1.397166  0.8989 0.3687278
## Cu           0.227483   0.123991  1.8347 0.0665551
## Zn          -0.356818   0.098325 -3.6290 0.0002845
## 
## Rho: 0.95245, LR test value: 42.235, p-value: 8.0921e-11
## Asymptotic standard error: 0.033224
##     z-value: 28.668, p-value: < 2.22e-16
## Wald statistic: 821.83, p-value: < 2.22e-16
## 
## Log likelihood: -809.226 for lag model
## ML residual variance (sigma squared): 8.7589, (sigma: 2.9595)
## Number of observations: 322 
## Number of parameters estimated: 8 
## AIC: 1634.5, (AIC for lm: 1674.7)
## LM test for residual autocorrelation
## test value: 209.36, p-value: < 2.22e-16

# Evaluando la dependencia espacial de los residuales
res4 = mod4$residuals
Moran.I(res4, COS.d.inv)

## $observed
## [1] 0.07193423
## 
## $expected
## [1] -0.003115265
## 
## $sd
## [1] 0.005332103
## 
## $p.value
## [1] 0

#  La parte autorregresiva puede ser buena, sin embargo la dependencia espacial no se corrige aun y siguen teniendo problemas algunas variables.

###5. Modelo de autocorrelacion espacial (Spatial Autocorrelation Model)

\[Y = \lambda W Y +X \beta + u\\u =\rho W u +\epsilon \]

#Analizando si se eliminan variables como Na y Cu, ya que por el p. value alto muestran no estar relacionadas a la variable Ca. El modelo se comporta mejor.
mod5=sacsarlm(formula=Ca~cos+Mg+K+CICE+Zn,data=dfCOS,listw=Wve)
summary(mod5)

## 
## Call:sacsarlm(formula = Ca ~ cos + Mg + K + CICE + Zn, data = dfCOS, 
##     listw = Wve)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -8.32979 -0.21369  0.18160  0.52025  2.49741 
## 
## Type: sac 
## Coefficients: (asymptotic standard errors) 
##              Estimate Std. Error z value  Pr(>|z|)
## (Intercept) -5.530778   1.333094 -4.1488 3.342e-05
## cos          0.728221   0.200396  3.6339 0.0002792
## Mg          -0.424131   0.096136 -4.4118 1.025e-05
## K           -1.628296   0.644844 -2.5251 0.0115664
## CICE         0.802391   0.024816 32.3333 < 2.2e-16
## Zn          -0.089060   0.033133 -2.6879 0.0071894
## 
## Rho: 0.66911
## Asymptotic standard error: 0.14841
##     z-value: 4.5084, p-value: 6.5312e-06
## Lambda: 0.90842
## Asymptotic standard error: 0.07702
##     z-value: 11.795, p-value: < 2.22e-16
## 
## LR test value: 35.66, p-value: 1.805e-08
## 
## Log likelihood: -512.7704 for sac model
## ML residual variance (sigma squared): 1.3881, (sigma: 1.1782)
## Number of observations: 322 
## Number of parameters estimated: 9 
## AIC: 1043.5, (AIC for lm: 1075.2)

#Los P valores de Rho y lambda sin buenospor lo que se realiza test de moran para evaluar independencia espacial.
res5 = mod5$residuals
Moran.I(res5, COS.d.inv)

## $observed
## [1] 0.01701072
## 
## $expected
## [1] -0.003115265
## 
## $sd
## [1] 0.005191208
## 
## $p.value
## [1] 0.0001057799

#Con este modelo logramos un p valor mayor al 5% por lo que desaparecemos la dependencia espacial y seria correcto avanzar.

##6.Modelo espacial anidado (General Nesting Spatial Model)

\[Y = \lambda W Y +X \beta_1 +W X \beta_2+ u\\u =\rho W u +\epsilon\]

#Analizando si se eliminan variables como z, K y Mg, ya que por el p. value alto muestran no estar relacionadas a la variable Ca. El modelo se comporta mejor.
#Este modelo es el mismo que el anterior pero con resago
mod6=sacsarlm(formula=Ca~cos+K+Na+CICE+Cu+Zn,data=dfCOS,listw=Wve,type="mixed")
summary(mod6)

## 
## Call:sacsarlm(formula = Ca ~ cos + K + Na + CICE + Cu + Zn, data = dfCOS, 
##     listw = Wve, type = "mixed")
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -7.35237 -0.35226  0.13034  0.51605  2.95287 
## 
## Type: sacmixed 
## Coefficients: (asymptotic standard errors) 
##               Estimate Std. Error z value  Pr(>|z|)
## (Intercept)  -3.781439   3.612548 -1.0468 0.2952144
## cos           0.841796   0.205314  4.1000 4.131e-05
## K            -2.112033   0.622933 -3.3905 0.0006977
## Na           -1.028719   0.557168 -1.8463 0.0648433
## CICE          0.752630   0.021688 34.7023 < 2.2e-16
## Cu           -0.028558   0.052557 -0.5434 0.5868810
## Zn           -0.075655   0.041131 -1.8394 0.0658609
## lag.cos       2.577857   2.862935  0.9004 0.3678944
## lag.K       -13.033910   9.642569 -1.3517 0.1764697
## lag.Na        1.672902   8.462292  0.1977 0.8432884
## lag.CICE      0.125832   0.973055  0.1293 0.8971073
## lag.Cu       -0.215618   0.799690 -0.2696 0.7874471
## lag.Zn       -0.605516   0.700011 -0.8650 0.3870334
## 
## Rho: 0.68845
## Asymptotic standard error: 0.94263
##     z-value: 0.73035, p-value: 0.46518
## Lambda: 0.80775
## Asymptotic standard error: 0.62356
##     z-value: 1.2954, p-value: 0.19519
## 
## LR test value: 55.678, p-value: 3.2592e-09
## 
## Log likelihood: -511.8763 for sacmixed model
## ML residual variance (sigma squared): 1.3869, (sigma: 1.1777)
## Number of observations: 322 
## Number of parameters estimated: 16 
## AIC: 1055.8, (AIC for lm: 1095.4)

# En este caso el Rho y lambda no sirven por lo que no es una opcion.

##7. Modelo espacial del error de durbin (Spatial Durbin Error Model)

# z, Mg y Na
mod7=lagsarlm(formula=Ca~cos+K+CICE+Cu+Zn,data=dfCOS,listw=Wve,type="mixed")
summary(mod7)

## 
## Call:lagsarlm(formula = Ca ~ cos + K + CICE + Cu + Zn, data = dfCOS, 
##     listw = Wve, type = "mixed")
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -7.27002 -0.33237  0.14690  0.58092  3.25963 
## 
## Type: mixed 
## Coefficients: (asymptotic standard errors) 
##               Estimate Std. Error z value  Pr(>|z|)
## (Intercept)  -3.861675   2.903129 -1.3302 0.1834600
## cos           0.855133   0.209020  4.0912 4.292e-05
## K            -2.265104   0.634188 -3.5717 0.0003547
## CICE          0.750760   0.021733 34.5447 < 2.2e-16
## Cu           -0.042007   0.052904 -0.7940 0.4271883
## Zn           -0.074455   0.042020 -1.7719 0.0764142
## lag.cos       3.290864   2.299242  1.4313 0.1523493
## lag.K       -11.148810   7.415647 -1.5034 0.1327316
## lag.CICE     -0.182476   0.175791 -1.0380 0.2992589
## lag.Cu       -0.100283   0.565371 -0.1774 0.8592136
## lag.Zn       -0.479842   0.620464 -0.7734 0.4393099
## 
## Rho: 0.85923, LR test value: 12.788, p-value: 0.00034884
## Asymptotic standard error: 0.096033
##     z-value: 8.9472, p-value: < 2.22e-16
## Wald statistic: 80.053, p-value: < 2.22e-16
## 
## Log likelihood: -517.0148 for mixed model
## ML residual variance (sigma squared): 1.4367, (sigma: 1.1986)
## Number of observations: 322 
## Number of parameters estimated: 13 
## AIC: 1060, (AIC for lm: 1070.8)
## LM test for residual autocorrelation
## test value: 21.679, p-value: 3.2235e-06

res7 = mod7$residuals
Moran.I(res7, COS.d.inv) # No hay depedencia espacial

## $observed
## [1] 0.01956033
## 
## $expected
## [1] -0.003115265
## 
## $sd
## [1] 0.00525682
## 
## $p.value
## [1] 1.606486e-05

Pruebas de normalidad a el modelo de autocorrelacion espacial (mod5) y modelo espacial del error de durbin (mod7)

#prueba de normalidad shapiro test para los residuakes dek modelo 5 y 7
shapiro.test(res5)

## 
##  Shapiro-Wilk normality test
## 
## data:  res5
## W = 0.71508, p-value < 2.2e-16

shapiro.test(res7)

## 
##  Shapiro-Wilk normality test
## 
## data:  res7
## W = 0.82737, p-value < 2.2e-16

#prueba ad.test
ad.test(res5)

## 
##  Anderson-Darling normality test
## 
## data:  res5
## A = 21.292, p-value < 2.2e-16

ad.test(res7)

## 
##  Anderson-Darling normality test
## 
## data:  res7
## A = 12.28, p-value < 2.2e-16

# prueba sf.test
sf.test(res5)

## 
##  Shapiro-Francia normality test
## 
## data:  res5
## W = 0.70955, p-value < 2.2e-16

sf.test(res7)

## 
##  Shapiro-Francia normality test
## 
## data:  res7
## W = 0.82252, p-value = 4.192e-16

#prueba cvm.test
cvm.test(res5)

## Warning in cvm.test(res5): p-value is smaller than 7.37e-10, cannot be computed
## more accurately

## 
##  Cramer-von Mises normality test
## 
## data:  res5
## W = 3.9442, p-value = 7.37e-10

cvm.test(res7)

## Warning in cvm.test(res7): p-value is smaller than 7.37e-10, cannot be computed
## more accurately

## 
##  Cramer-von Mises normality test
## 
## data:  res7
## W = 2.2239, p-value = 7.37e-10

#ninguna prueba nos corrobora la normalidad de los residuales.

hist(res5)

hist(res7)

# prueba de grubbs para detectar datos atipicos
library(outliers)

## 
## Attaching package: 'outliers'

## The following object is masked from 'package:psych':
## 
##     outlier

outliers::grubbs.test(res5)

## 
##  Grubbs test for one outlier
## 
## data:  res5
## G.373 = 7.05897, U = 0.84429, p-value = 3.096e-11
## alternative hypothesis: lowest value -8.32978854775542 is an outlier

outliers::grubbs.test(res7) # El valor mas alto es un outlier

## 
##  Grubbs test for one outlier
## 
## data:  res7
## G.373 = 6.0558, U = 0.8854, p-value = 7.221e-08
## alternative hypothesis: lowest value -7.2700218063831 is an outlier

which.max(res5)

## 140 
##  10

which.max(res7) # Maximo

## 140 
##  10

#se ven algunos datos atipicos
plot(dfCOS$Ca,mod5$fitted.values)

plot(dfCOS$Ca,mod7$fitted.values)

Tratando de eliminar datos atipicos mod5

dfCOS2 = dfCOS 
atipicos =which(dfCOS$Ca>15)
dfCOS2 =dfCOS2[-atipicos,]

XY=as.matrix(dfCOS2[,2:3])
COS.d=as.matrix(dist(XY, diag=T, upper=T))
COS.d.inv <-as.matrix(1/COS.d)
diag(COS.d.inv) <- 0
W=as.matrix(COS.d.inv)
SUMAS=apply(W,1,sum)
We=W/SUMAS


# Nueva construccion de matriz de pesos estandarizada
contnb=dnearneigh(coordinates(XY),0,380000,longlat = F)
dlist <- nbdists(contnb, XY)
dlist <- lapply(dlist, function(x) 1/x)
Wve=nb2listw(contnb,glist=dlist,style = "W")

mod5b=sacsarlm(formula=Ca~cos+Mg+K+CICE+Zn,data=dfCOS2,listw=Wve)
summary(mod5)

## 
## Call:sacsarlm(formula = Ca ~ cos + Mg + K + CICE + Zn, data = dfCOS, 
##     listw = Wve)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -8.32979 -0.21369  0.18160  0.52025  2.49741 
## 
## Type: sac 
## Coefficients: (asymptotic standard errors) 
##              Estimate Std. Error z value  Pr(>|z|)
## (Intercept) -5.530778   1.333094 -4.1488 3.342e-05
## cos          0.728221   0.200396  3.6339 0.0002792
## Mg          -0.424131   0.096136 -4.4118 1.025e-05
## K           -1.628296   0.644844 -2.5251 0.0115664
## CICE         0.802391   0.024816 32.3333 < 2.2e-16
## Zn          -0.089060   0.033133 -2.6879 0.0071894
## 
## Rho: 0.66911
## Asymptotic standard error: 0.14841
##     z-value: 4.5084, p-value: 6.5312e-06
## Lambda: 0.90842
## Asymptotic standard error: 0.07702
##     z-value: 11.795, p-value: < 2.22e-16
## 
## LR test value: 35.66, p-value: 1.805e-08
## 
## Log likelihood: -512.7704 for sac model
## ML residual variance (sigma squared): 1.3881, (sigma: 1.1782)
## Number of observations: 322 
## Number of parameters estimated: 9 
## AIC: 1043.5, (AIC for lm: 1075.2)

res5b = mod5b$residuals
Moran.I(res5b, COS.d.inv) # Vuelve la depedencia espacial

## $observed
## [1] 0.01748661
## 
## $expected
## [1] -0.003311258
## 
## $sd
## [1] 0.005525781
## 
## $p.value
## [1] 0.0001673591

shapiro.test(res5b) # No son normales

## 
##  Shapiro-Wilk normality test
## 
## data:  res5b
## W = 0.82335, p-value < 2.2e-16

ad.test(res5b)

## 
##  Anderson-Darling normality test
## 
## data:  res5b
## A = 10.773, p-value < 2.2e-16

sf.test(res5b)

## 
##  Shapiro-Francia normality test
## 
## data:  res5b
## W = 0.81802, p-value = 8.758e-16

cvm.test(res5b)

## Warning in cvm.test(res5b): p-value is smaller than 7.37e-10, cannot be computed
## more accurately

## 
##  Cramer-von Mises normality test
## 
## data:  res5b
## W = 1.9052, p-value = 7.37e-10

#eliminando datos atipicos mod7

mod7b=lagsarlm(formula=Ca~cos+K+CICE+Cu+Zn,data=dfCOS2,listw=Wve,type="mixed")
summary(mod7b)

## 
## Call:lagsarlm(formula = Ca ~ cos + K + CICE + Cu + Zn, data = dfCOS2, 
##     listw = Wve, type = "mixed")
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -6.43277 -0.37985  0.12452  0.59315  2.83522 
## 
## Type: mixed 
## Coefficients: (asymptotic standard errors) 
##                Estimate  Std. Error z value  Pr(>|z|)
## (Intercept)  -6.7377794   2.7912753 -2.4139  0.015784
## cos           0.9019458   0.2028234  4.4470  8.71e-06
## K            -1.7687792   0.5798061 -3.0506  0.002284
## CICE          0.6785425   0.0226030 30.0201 < 2.2e-16
## Cu           -0.0080048   0.0486440 -0.1646  0.869292
## Zn           -0.0734409   0.0381739 -1.9239  0.054373
## lag.cos       3.2675566   2.1326601  1.5322  0.125485
## lag.K       -13.3458786   6.8330813 -1.9531  0.050804
## lag.CICE      0.3000402   0.2563756  1.1703  0.241874
## lag.Cu       -0.6590432   0.5425264 -1.2148  0.224455
## lag.Zn       -0.0265750   0.5413210 -0.0491  0.960845
## 
## Rho: 0.86242, LR test value: 14.257, p-value: 0.00015949
## Asymptotic standard error: 0.093799
##     z-value: 9.1944, p-value: < 2.22e-16
## Wald statistic: 84.536, p-value: < 2.22e-16
## 
## Log likelihood: -453.3873 for mixed model
## ML residual variance (sigma squared): 1.1537, (sigma: 1.0741)
## Number of observations: 303 
## Number of parameters estimated: 13 
## AIC: 932.77, (AIC for lm: 945.03)
## LM test for residual autocorrelation
## test value: 10.232, p-value: 0.0013804

res7b = mod7b$residuals
Moran.I(res7b, COS.d.inv) # Vuelve la depedencia espacial

## $observed
## [1] 0.01429806
## 
## $expected
## [1] -0.003311258
## 
## $sd
## [1] 0.005550195
## 
## $p.value
## [1] 0.001510081

shapiro.test(res7b) # No son normales

## 
##  Shapiro-Wilk normality test
## 
## data:  res7b
## W = 0.86523, p-value = 1.336e-15

ad.test(res7b)

## 
##  Anderson-Darling normality test
## 
## data:  res7b
## A = 7.2404, p-value < 2.2e-16

sf.test(res7b)

## 
##  Shapiro-Francia normality test
## 
## data:  res7b
## W = 0.86041, p-value = 7.096e-14

cvm.test(res7b)

## Warning in cvm.test(res7b): p-value is smaller than 7.37e-10, cannot be computed
## more accurately

## 
##  Cramer-von Mises normality test
## 
## data:  res7b
## W = 1.2007, p-value = 7.37e-10

#No logramos dar normalidad a ninguno de los dos modelos.sin embargo para hacer una prediccion tomaremos el modelo 5

library(ggplot2)
library(plotly)

## 
## Attaching package: 'plotly'

## The following object is masked from 'package:crayon':
## 
##     style

## The following object is masked from 'package:Hmisc':
## 
##     subplot

## The following object is masked from 'package:ggmap':
## 
##     wind

## The following object is masked from 'package:ggplot2':
## 
##     last_plot

## The following object is masked from 'package:stats':
## 
##     filter

## The following object is masked from 'package:graphics':
## 
##     layout

library(viridis)

## Loading required package: viridisLite

dataq=dfCOS
dataq$ajustados =mod5$fitted.values 
q<-ggplot(dataq,aes(x=Long,y=Lat,color=ajustados))+
  geom_point(size = 2) +
  ggtitle("Predicciones del modelo 5")+
  labs(x="x",y="y")+
  scale_colour_gradient(low = "yellow", high = "red", na.value = NA)
ggplotly(q)

12-07