options(digits = 12)
library(rworldmap)
library(rworldxtra)
library(ggmap)
library(sf)                                                                     
library(spdep)
library(ape)
library(sp)
library(MVA)
library(Hmisc)
library(normtest)
library(nortest)
library(corrplot)
library(psych) 
library(crayon)
library(pastecs)
library(readxl)
XPABLO <- read_excel("XPABLO.xlsx")
dfCOS=data.frame(XPABLO)

X=as.matrix(data.frame(dfCOS[,4:15]))
###___matriz de coordenadas
# Muestreo espacial 
library(clhs)
## Warning: package 'clhs' was built under R version 4.0.5
n = 0.80 *403
n = round(n,0)

df_Luis = data.frame(x=dfCOS$Long,
                     y=dfCOS$Lat,
                     COS=dfCOS$cos)

res <- clhs(df_Luis, size = n, 
            iter = 100, progress = FALSE,
            simple = TRUE)

df_Luis = dfCOS[res,]
dfCOS = df_Luis
XY=as.matrix(dfCOS[,2:3])
COS.d=as.matrix(dist(XY, diag=T, upper=T))
##matriz inversa de distancias
COS.d.inv <-as.matrix(1/COS.d)
##asignando cero a la diag
diag(COS.d.inv) <- 0
##__la matriz de pesos basado en distancias
W=as.matrix(COS.d.inv)
##__verificando sumas por filas
SUMAS=apply(W,1,sum)
##__estandarizando la metriz
We=W/SUMAS
##verificando estandarizaci?n
##calculando ?ndice de Moran para variables

Primer modelo

# Nueva construccion de matriz de pesos estandarizada
contnb=dnearneigh(coordinates(XY),0,380000,longlat = F)
dlist <- nbdists(contnb, XY)
dlist <- lapply(dlist, function(x) 1/x)
Wve=nb2listw(contnb,glist=dlist,style = "W")
#modelo autoregresivo puro
modelo.arp=spautolm(cos~1,data=dfCOS,listw=Wve)
summary(modelo.arp)
## 
## Call: spautolm(formula = cos ~ 1, data = dfCOS, listw = Wve)
## 
## Residuals:
##            Min             1Q         Median             3Q            Max 
## -0.95167330215 -0.30242927281  0.00279215457  0.26292754728  1.41532621572 
## 
## Coefficients: 
##                Estimate  Std. Error z value   Pr(>|z|)
## (Intercept) 1.214208433 0.152317163 7.97158 1.5543e-15
## 
## Lambda: 0.85239667 LR test value: 11.1571569 p-value: 0.0008370811 
## Numerical Hessian standard error of lambda: 0.139347026 
## 
## Log likelihood: -166.323137198 
## ML residual variance (sigma squared): 0.162759321, (sigma: 0.403434408)
## Number of observations: 322 
## Number of parameters estimated: 3 
## AIC: 338.646274
library(spatialreg)

\[Y = \lambda W Y + \epsilon\]

res1 = modelo.arp$fit$residuals
Moran.I(res1, COS.d.inv)
## $observed
## [1] 0.0169415923986
## 
## $expected
## [1] -0.00311526479751
## 
## $sd
## [1] 0.005328566605
## 
## $p.value
## [1] 0.000167200247459

Segundo modelo

mod2 = sacsarlm(cos~1,data=dfCOS,listw=Wve)
summary(mod2)
## 
## Call:sacsarlm(formula = cos ~ 1, data = dfCOS, listw = Wve)
## 
## Residuals:
##            Min             1Q         Median             3Q            Max 
## -0.94271581997 -0.29821102915 -0.00782303031  0.25827689558  1.42174302489 
## 
## Type: sac 
## Coefficients: (numerical Hessian approximate standard errors) 
##                Estimate  Std. Error z value Pr(>|z|)
## (Intercept) 0.291026032 0.293519916  0.9915  0.32144
## 
## Rho: 0.749223279
## Approximate (numerical Hessian) standard error: 0.225576267
##     z-value: 3.32137458, p-value: 0.000895752314
## Lambda: 0.749223292
## Approximate (numerical Hessian) standard error: 0.224984641
##     z-value: 3.33010862, p-value: 0.000868121107
## 
## LR test value: 16.2976657, p-value: 0.000289072557
## 
## Log likelihood: -163.752882805 for sac model
## ML residual variance (sigma squared): 0.159651535, (sigma: 0.399564181)
## Number of observations: 322 
## Number of parameters estimated: 4 
## AIC: 335.505766, (AIC for lm: 347.803431)

\[Y = \lambda W Y + u \\ u= \rho Wu + \epsilon\]

res2 =mod2$residuals
Moran.I(res2, COS.d.inv)
## $observed
## [1] 0.00935126591104
## 
## $expected
## [1] -0.00311526479751
## 
## $sd
## [1] 0.00532822226694
## 
## $p.value
## [1] 0.0192983715461

Modelo 3

mser1=errorsarlm(formula=cos~Ca+Mg+K+Cu+Zn,data=dfCOS,listw=Wve)
summary(mser1)
## 
## Call:errorsarlm(formula = cos ~ Ca + Mg + K + Cu + Zn, data = dfCOS, 
##     listw = Wve)
## 
## Residuals:
##           Min            1Q        Median            3Q           Max 
## -0.9751635956 -0.1964369823 -0.0278153459  0.1714053234  1.2188188322 
## 
## Type: error 
## Coefficients: (asymptotic standard errors) 
##                   Estimate     Std. Error  z value   Pr(>|z|)
## (Intercept)  0.45924081633  0.15813934073  2.90403   0.003684
## Ca           0.05043737224  0.00583277103  8.64724 < 2.22e-16
## Mg          -0.13167652885  0.02424287756 -5.43156 5.5865e-08
## K            0.71046908968  0.16511082890  4.30298 1.6851e-05
## Cu           0.07089153768  0.01320021926  5.37048 7.8527e-08
## Zn           0.02511855977  0.01102425883  2.27848   0.022698
## 
## Lambda: 0.885314964, LR test value: 17.0841624, p-value: 3.57592702e-05
## Asymptotic standard error: 0.0792310283
##     z-value: 11.1738416, p-value: < 2.220446e-16
## Wald statistic: 124.854737, p-value: < 2.220446e-16
## 
## Log likelihood: -79.4223948849 for error model
## ML residual variance (sigma squared): 0.0947067355, (sigma: 0.307744595)
## Number of observations: 322 
## Number of parameters estimated: 8 
## AIC: 174.84479, (AIC for lm: 189.928952)
res3 =mser1$residuals
Moran.I(res3, COS.d.inv)
## $observed
## [1] 0.0171871677896
## 
## $expected
## [1] -0.00311526479751
## 
## $sd
## [1] 0.00532332774856
## 
## $p.value
## [1] 0.00013681238486

Spatial lag model

\[Y = \lambda W Y +X \beta + \epsilon \]

Spatial error model

mod4=lagsarlm(formula=cos~Ca+z+Mg+K+Cu,data=dfCOS,listw=Wve)
summary(mod4)
## 
## Call:lagsarlm(formula = cos ~ Ca + z + Mg + K + Cu, data = dfCOS, 
##     listw = Wve)
## 
## Residuals:
##           Min            1Q        Median            3Q           Max 
## -0.9753497653 -0.2076094602 -0.0341018992  0.1700610153  1.2806728576 
## 
## Type: lag 
## Coefficients: (asymptotic standard errors) 
##                   Estimate     Std. Error  z value   Pr(>|z|)
## (Intercept) -0.52586179010  0.23306359526 -2.25630   0.024052
## Ca           0.04352539452  0.00531423662  8.19034 2.2204e-16
## z            0.00224411758  0.00111127544  2.01941   0.043445
## Mg          -0.13612907955  0.02308213607 -5.89759 3.6884e-09
## K            0.81752452459  0.16318410542  5.00983 5.4478e-07
## Cu           0.08595419577  0.01027927181  8.36190 < 2.22e-16
## 
## Rho: 0.72516181, LR test value: 5.71520501, p-value: 0.0168186005
## Asymptotic standard error: 0.166968549
##     z-value: 4.34310423, p-value: 1.40483438e-05
## Wald statistic: 18.8625544, p-value: 1.40483438e-05
## 
## Log likelihood: -84.2468107596 for lag model
## ML residual variance (sigma squared): 0.0981788848, (sigma: 0.3133351)
## Number of observations: 322 
## Number of parameters estimated: 8 
## AIC: 184.493622, (AIC for lm: 188.208827)
## LM test for residual autocorrelation
## test value: 24.0191918, p-value: 9.53802311e-07
res4 = mod4$residuals
Moran.I(res4, COS.d.inv)
## $observed
## [1] 0.018535102587
## 
## $expected
## [1] -0.00311526479751
## 
## $sd
## [1] 0.00532319394579
## 
## $p.value
## [1] 4.75862754592e-05
mod5=sacsarlm(formula=cos~Ca+Mg+K+Cu+Zn,data=dfCOS,listw=Wve)
summary(mod5)
## 
## Call:sacsarlm(formula = cos ~ Ca + Mg + K + Cu + Zn, data = dfCOS, 
##     listw = Wve)
## 
## Residuals:
##           Min            1Q        Median            3Q           Max 
## -0.9580011399 -0.1974771619 -0.0288531845  0.1705473999  1.2341558039 
## 
## Type: sac 
## Coefficients: (asymptotic standard errors) 
##                  Estimate    Std. Error  z value   Pr(>|z|)
## (Intercept) -0.1923264920  0.5347074329 -0.35969   0.719082
## Ca           0.0487561910  0.0059898314  8.13983 4.4409e-16
## Mg          -0.1300895170  0.0241143567 -5.39469 6.8641e-08
## K            0.7037490645  0.1644586647  4.27919 1.8758e-05
## Cu           0.0703775170  0.0131576804  5.34878 8.8550e-08
## Zn           0.0262736597  0.0109752937  2.39389   0.016671
## 
## Rho: 0.533547597
## Asymptotic standard error: 0.442877291
##     z-value: 1.20473009, p-value: 0.228307517
## Lambda: 0.852011459
## Asymptotic standard error: 0.184823219
##     z-value: 4.60987241, p-value: 4.02916168e-06
## 
## LR test value: 19.1478698, p-value: 6.95173006e-05
## 
## Log likelihood: -78.3905411642 for sac model
## ML residual variance (sigma squared): 0.0939949928, (sigma: 0.306586028)
## Number of observations: 322 
## Number of parameters estimated: 9 
## AIC: 174.781082, (AIC for lm: 189.928952)
res5 = mod5$residuals
Moran.I(res5, COS.d.inv)
## $observed
## [1] 0.0123444998355
## 
## $expected
## [1] -0.00311526479751
## 
## $sd
## [1] 0.00532265160961
## 
## $p.value
## [1] 0.00367813042932

SAC Spatial Autocorrelation Model

\[Y = \lambda W Y +X \beta + u\\u =\rho W u +\epsilon \]

mod6=sacsarlm(formula=cos~Ca+Mg+K+Cu+Zn,data=dfCOS,listw=Wve,type="mixed")
summary(mod6)
## 
## Call:sacsarlm(formula = cos ~ Ca + Mg + K + Cu + Zn, data = dfCOS, 
##     listw = Wve, type = "mixed")
## 
## Residuals:
##           Min            1Q        Median            3Q           Max 
## -0.8749328856 -0.1939540872 -0.0129975529  0.1715165962  1.2045784185 
## 
## Type: sacmixed 
## Coefficients: (asymptotic standard errors) 
##                   Estimate     Std. Error  z value   Pr(>|z|)
## (Intercept)  1.73825352662  1.14500193337  1.51812  0.1289834
## Ca           0.04846060825  0.00658679573  7.35724 1.8785e-13
## Mg          -0.12084469173  0.02544289609 -4.74964 2.0378e-06
## K            0.67797321767  0.16079616190  4.21635 2.4829e-05
## Cu           0.07309597218  0.01328173661  5.50350 3.7233e-08
## Zn           0.02961477667  0.01118304624  2.64819  0.0080925
## lag.Ca      -0.08741536045  0.06054214919 -1.44388  0.1487738
## lag.Mg      -0.25712002054  0.27452877759 -0.93659  0.3489712
## lag.K        2.88728887028  2.46723920577  1.17025  0.2419000
## lag.Cu       0.18358511759  0.16550285401  1.10926  0.2673195
## lag.Zn      -0.61186242288  0.16741430373 -3.65478  0.0002574
## 
## Rho: 0.564425533
## Asymptotic standard error: 1.07424583
##     z-value: 0.525415614, p-value: 0.599294302
## Lambda: 0.316262512
## Asymptotic standard error: 1.50065834
##     z-value: 0.210749178, p-value: 0.833082998
## 
## LR test value: 41.7877912, p-value: 5.71279243e-07
## 
## Log likelihood: -67.0705804566 for sacmixed model
## ML residual variance (sigma squared): 0.0884357587, (sigma: 0.297381504)
## Number of observations: 322 
## Number of parameters estimated: 14 
## AIC: 162.141161, (AIC for lm: 189.928952)

GNS : General Nesting Spatial Model

\[Y = \lambda W Y +X \beta_1 +W X \beta_2+ u\\u =\rho W u +\epsilon\] ## SDE Spatial Durbin Error Model

mod7=lagsarlm(formula=cos~Ca+Mg+K+Cu,data=dfCOS,listw=Wve,type="mixed")
summary(mod7)
## 
## Call:lagsarlm(formula = cos ~ Ca + Mg + K + Cu, data = dfCOS, listw = Wve, 
##     type = "mixed")
## 
## Residuals:
##           Min            1Q        Median            3Q           Max 
## -0.8784124883 -0.1975703931 -0.0264888029  0.1818513714  1.1598261445 
## 
## Type: mixed 
## Coefficients: (asymptotic standard errors) 
##                  Estimate    Std. Error  z value   Pr(>|z|)
## (Intercept)  0.8354039206  0.3606372640  2.31647 0.02053284
## Ca           0.0462236482  0.0065199685  7.08955 1.3456e-12
## Mg          -0.1290293480  0.0261743121 -4.92962 8.2391e-07
## K            0.7525312556  0.1616061075  4.65658 3.2151e-06
## Cu           0.0970454488  0.0105682018  9.18278 < 2.22e-16
## lag.Ca      -0.0671419351  0.0468486395 -1.43317 0.15181005
## lag.Mg       0.0969553784  0.2147609495  0.45146 0.65166006
## lag.K        2.1608127750  2.2010872599  0.98170 0.32624645
## lag.Cu      -0.3151951837  0.0933889795 -3.37508 0.00073795
## 
## Rho: 0.731523192, LR test value: 5.52673481, p-value: 0.0187280234
## Asymptotic standard error: 0.172407524
##     z-value: 4.2429888, p-value: 2.20562445e-05
## Wald statistic: 18.002954, p-value: 2.20562445e-05
## 
## Log likelihood: -77.3928991596 for mixed model
## ML residual variance (sigma squared): 0.0940718626, (sigma: 0.306711367)
## Number of observations: 322 
## Number of parameters estimated: 11 
## AIC: 176.785798, (AIC for lm: 180.312533)
## LM test for residual autocorrelation
## test value: 3.15555808, p-value: 0.0756688137
res7 = mod7$residuals
Moran.I(res7, COS.d.inv) # No hay depedencia espacial 
## $observed
## [1] 0.00585865512765
## 
## $expected
## [1] -0.00311526479751
## 
## $sd
## [1] 0.0053253921761
## 
## $p.value
## [1] 0.0919656005729
shapiro.test(res7)
## 
##  Shapiro-Wilk normality test
## 
## data:  res7
## W = 0.9793073832, p-value = 0.000135186226
ad.test(res7)
## 
##  Anderson-Darling normality test
## 
## data:  res7
## A = 1.523400659, p-value = 0.000623307138
sf.test(res7)
## 
##  Shapiro-Francia normality test
## 
## data:  res7
## W = 0.9782820093, p-value = 0.00017938898
cvm.test(res7)
## 
##  Cramer-von Mises normality test
## 
## data:  res7
## W = 0.2379572897, p-value = 0.00179826551
hist(res7)

outliers::grubbs.test(res7) # El valor mas alto es un outlier
## 
##  Grubbs test for one outlier
## 
## data:  res7
## G.42 = 3.7756141328, U = 0.9554527383, p-value = 0.0218616865
## alternative hypothesis: highest value 1.15982614445771 is an outlier
which.max(res7) # Maximo 
##  42 
## 145
plot(dfCOS$cos,mod7$fitted.values)

#library(mvoutlier)
#color.plot(cbind(dfCOS$cos,mod7$fitted.values))
#corr.plot(dfCOS$cos,mod7$fitted.values,
 #         quan=1/2, alpha=0.025,
  #        xlab="Observados",
   #       ylab="Estimados")

ggplot(dfCOS, aes(x= dfCOS$cos, y= mod7$fitted.values))+
  geom_point(size= 2)+
  theme_minimal()+
  stat_ellipse(level = 0.9) +
  stat_ellipse(level = 0.95, color = 2) +
  stat_ellipse(level = 0.99, color = 3)
## Warning: Use of `dfCOS$cos` is discouraged. Use `cos` instead.

## Warning: Use of `dfCOS$cos` is discouraged. Use `cos` instead.

## Warning: Use of `dfCOS$cos` is discouraged. Use `cos` instead.

## Warning: Use of `dfCOS$cos` is discouraged. Use `cos` instead.

dfCOS2 = dfCOS 
atipicos =which(dfCOS2$cos>2.5)
dfCOS2 =dfCOS2[-atipicos,]

XY=as.matrix(dfCOS2[,2:3])
COS.d=as.matrix(dist(XY, diag=T, upper=T))
COS.d.inv <-as.matrix(1/COS.d)
diag(COS.d.inv) <- 0
W=as.matrix(COS.d.inv)
SUMAS=apply(W,1,sum)
We=W/SUMAS

# Nueva construccion de matriz de pesos estandarizada
contnb=dnearneigh(coordinates(XY),0,380000,longlat = F)
dlist <- nbdists(contnb, XY)
dlist <- lapply(dlist, function(x) 1/x)
Wve=nb2listw(contnb,glist=dlist,style = "W")
mod7b=lagsarlm(formula=cos~Ca+Mg+K+Cu,data=dfCOS2,listw=Wve,type="mixed")
summary(mod7b)
## 
## Call:lagsarlm(formula = cos ~ Ca + Mg + K + Cu, data = dfCOS2, listw = Wve, 
##     type = "mixed")
## 
## Residuals:
##           Min            1Q        Median            3Q           Max 
## -0.8056843566 -0.2009387852 -0.0315530325  0.1887586753  0.8662568325 
## 
## Type: mixed 
## Coefficients: (asymptotic standard errors) 
##                   Estimate     Std. Error  z value   Pr(>|z|)
## (Intercept)  0.74218522295  0.33849418080  2.19261 0.02833560
## Ca           0.03791302149  0.00623522433  6.08046 1.1984e-09
## Mg          -0.11874290032  0.02458505269 -4.82988 1.3661e-06
## K            0.69717850523  0.15114320552  4.61270 3.9747e-06
## Cu           0.09734873018  0.00988296961  9.85015 < 2.22e-16
## lag.Ca      -0.04751804972  0.04278559114 -1.11061 0.26673682
## lag.Mg       0.12185068998  0.20093419963  0.60642 0.54423532
## lag.K        2.13367338619  2.02925093007  1.05146 0.29304800
## lag.Cu      -0.33383837988  0.08956593865 -3.72729 0.00019355
## 
## Rho: 0.76416651, LR test value: 6.78534866, p-value: 0.00919090763
## Asymptotic standard error: 0.15409003
##     z-value: 4.95922098, p-value: 7.07764211e-07
## Wald statistic: 24.5938728, p-value: 7.07764212e-07
## 
## Log likelihood: -54.1526830112 for mixed model
## ML residual variance (sigma squared): 0.0816918096, (sigma: 0.285817791)
## Number of observations: 318 
## Number of parameters estimated: 11 
## AIC: 130.305366, (AIC for lm: 135.090715)
## LM test for residual autocorrelation
## test value: 3.91513669, p-value: 0.0478531239
res7b = mod7b$residuals
Moran.I(res7b, COS.d.inv) # No hay depedencia espacial 
## $observed
## [1] 0.00704175132041
## 
## $expected
## [1] -0.00315457413249
## 
## $sd
## [1] 0.00539904627465
## 
## $p.value
## [1] 0.0589532299132
shapiro.test(res7b) # Son normales 
## 
##  Shapiro-Wilk normality test
## 
## data:  res7b
## W = 0.9902307868, p-value = 0.0323644349
ad.test(res7b)
## 
##  Anderson-Darling normality test
## 
## data:  res7b
## A = 0.7982286725, p-value = 0.0382990996
sf.test(res7b)
## 
##  Shapiro-Francia normality test
## 
## data:  res7b
## W = 0.9903891847, p-value = 0.0354967612
cvm.test(res7b)
## 
##  Cramer-von Mises normality test
## 
## data:  res7b
## W = 0.1361005899, p-value = 0.0362163695
plot(dfCOS2$cos,mod7b$fitted.values)

#(dfCOS2$cos,mod7b$fitted.values,
 #         quan=1/2, alpha=0.025,
  #        xlab="Observados",
   #       ylab="Estimados")

ggplot(dfCOS2, aes(x= dfCOS2$cos, y= mod7b$fitted.values))+
  geom_point(size= 2)+
  theme_minimal()+
  stat_ellipse(level = 0.9) +
  stat_ellipse(level = 0.95, color = 2) +
  stat_ellipse(level = 0.99, color = 3)
## Warning: Use of `dfCOS2$cos` is discouraged. Use `cos` instead.

## Warning: Use of `dfCOS2$cos` is discouraged. Use `cos` instead.

## Warning: Use of `dfCOS2$cos` is discouraged. Use `cos` instead.

## Warning: Use of `dfCOS2$cos` is discouraged. Use `cos` instead.

library(ggplot2)
library(plotly)
## Warning: package 'plotly' was built under R version 4.0.5
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:crayon':
## 
##     style
## The following object is masked from 'package:Hmisc':
## 
##     subplot
## The following object is masked from 'package:ggmap':
## 
##     wind
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
library(viridis)
## Warning: package 'viridis' was built under R version 4.0.5
## Loading required package: viridisLite
## Warning: package 'viridisLite' was built under R version 4.0.5
dataq=dfCOS2
dataq$ajustados =mod7b$fitted.values 
q<-ggplot(dataq,aes(x=Long,y=Lat,color=ajustados))+
  geom_point(size = 3) +
  ggtitle("Predicciones del modelo 6")+
  labs(x="x",y="y")+
  scale_colour_gradient(low = "yellow", high = "red", na.value = NA)
ggplotly(q)

Interpolacion AKIMA

library(akima)
## Warning: package 'akima' was built under R version 4.0.5
akima.li <- interp(dataq$Long, dataq$Lat, dataq$ajustados,
                    nx = 50, ny = 50,
                  linear = F)
image(akima.li)
contour(akima.li,add=T)

q<-ggplot(dataq,aes(x=Long,y=Lat,color=cos))+
  geom_point(size = 2) +
  ggtitle("Observados del modelo 6")+
  labs(x="x",y="y")+
  scale_colour_gradient(low = "yellow", high = "red", na.value = NA)
ggplotly(q)

Eliminar mas datos atipicos

Eliminando datos mayores a 2 y menores a 0.5

dfCOS3 = dfCOS 
atipicos =which(dfCOS3$cos>2 | dfCOS3$cos<0.45)
dfCOS3 =dfCOS3[-atipicos,]

XY=as.matrix(dfCOS3[,2:3])
COS.d=as.matrix(dist(XY, diag=T, upper=T))
COS.d.inv <-as.matrix(1/COS.d)
diag(COS.d.inv) <- 0
W=as.matrix(COS.d.inv)
SUMAS=apply(W,1,sum)
We=W/SUMAS
# Nueva construccion de matriz de pesos estandarizada
contnb=dnearneigh(coordinates(XY),0,380000,longlat = F)
dlist <- nbdists(contnb, XY)
dlist <- lapply(dlist, function(x) 1/x)
Wve=nb2listw(contnb,glist=dlist,style = "W")
mod7c=lagsarlm(formula=cos~Ca+Mg+K+Cu,data=dfCOS3,listw=Wve,type="mixed")
summary(mod7c)
## 
## Call:lagsarlm(formula = cos ~ Ca + Mg + K + Cu, data = dfCOS3, listw = Wve, 
##     type = "mixed")
## 
## Residuals:
##           Min            1Q        Median            3Q           Max 
## -0.7713582017 -0.1920765578 -0.0275801628  0.1836768495  0.8423941873 
## 
## Type: mixed 
## Coefficients: (asymptotic standard errors) 
##                   Estimate     Std. Error  z value   Pr(>|z|)
## (Intercept)  0.98624822312  0.37875402661  2.60393  0.0092162
## Ca           0.03374203320  0.00596759486  5.65421 1.5656e-08
## Mg          -0.11440297699  0.02334876191 -4.89974 9.5961e-07
## K            0.61548694610  0.14449146649  4.25968 2.0472e-05
## Cu           0.09636708770  0.00943892445 10.20954 < 2.22e-16
## lag.Ca      -0.04202346787  0.04122382447 -1.01940  0.3080142
## lag.Mg       0.19363503952  0.18654844714  1.03799  0.2992757
## lag.K        1.68394177193  1.83807073383  0.91615  0.3595901
## lag.Cu      -0.36272433182  0.09119747449 -3.97735 6.9687e-05
## 
## Rho: 0.669196956, LR test value: 4.09251279, p-value: 0.0430735606
## Asymptotic standard error: 0.204966068
##     z-value: 3.26491581, p-value: 0.00109496617
## Wald statistic: 10.6596753, p-value: 0.00109496617
## 
## Log likelihood: -35.8967797831 for mixed model
## ML residual variance (sigma squared): 0.0734185313, (sigma: 0.270958542)
## Number of observations: 310 
## Number of parameters estimated: 11 
## AIC: 93.7935596, (AIC for lm: 95.8860724)
## LM test for residual autocorrelation
## test value: 1.25310875, p-value: 0.262959552
res7c = mod7c$residuals
Moran.I(res7c, COS.d.inv) # No hay depedencia espacial 
## $observed
## [1] 0.00344349611471
## 
## $expected
## [1] -0.00323624595469
## 
## $sd
## [1] 0.0055452194235
## 
## $p.value
## [1] 0.228359793893

Se ve que el p valor aumenta en gran medida, lo que indica que ya no hay dependencia espacial

shapiro.test(res7c) # Son normales 
## 
##  Shapiro-Wilk normality test
## 
## data:  res7c
## W = 0.9931568962, p-value = 0.169857498
ad.test(res7c)
## 
##  Anderson-Darling normality test
## 
## data:  res7c
## A = 0.6616534102, p-value = 0.0833485602
sf.test(res7c)
## 
##  Shapiro-Francia normality test
## 
## data:  res7c
## W = 0.9931043412, p-value = 0.147044329
cvm.test(res7c)
## 
##  Cramer-von Mises normality test
## 
## data:  res7c
## W = 0.1120387366, p-value = 0.0765338448

Al hacer las pruebas de normalidad, se puede decir que se cumple el supuesto de normalidad para los residuales del modelo

plot(dfCOS3$cos,mod7c$fitted.values)

ggplot(dfCOS3, aes(x= dfCOS3$cos, y= mod7c$fitted.values))+
  geom_point(size= 2)+
  theme_minimal()+
  stat_ellipse(level = 0.9) +
  stat_ellipse(level = 0.95, color = 2) +
  stat_ellipse(level = 0.99, color = 3)
## Warning: Use of `dfCOS3$cos` is discouraged. Use `cos` instead.

## Warning: Use of `dfCOS3$cos` is discouraged. Use `cos` instead.

## Warning: Use of `dfCOS3$cos` is discouraged. Use `cos` instead.

## Warning: Use of `dfCOS3$cos` is discouraged. Use `cos` instead.

library(ggplot2)
library(plotly)
library(viridis)
dataq=dfCOS3
dataq$ajustados =mod7c$fitted.values 
q<-ggplot(dataq,aes(x=Long,y=Lat,color=ajustados))+
  geom_point(size = 3) +
  ggtitle("Predicciones del modelo 6")+
  labs(x="x",y="y")+
  scale_colour_gradient(low = "yellow", high = "red", na.value = NA)
ggplotly(q)

Primer modelo sin datos atipicos

modelo.arpb=spautolm(cos~1,data=dfCOS3,listw=Wve)
summary(modelo.arpb)
## 
## Call: spautolm(formula = cos ~ 1, data = dfCOS3, listw = Wve)
## 
## Residuals:
##          Min           1Q       Median           3Q          Max 
## -0.739416010 -0.285981134  0.017626671  0.253193098  0.784883924 
## 
## Coefficients: 
##                Estimate  Std. Error  z value   Pr(>|z|)
## (Intercept) 1.205410699 0.105560428 11.41915 < 2.22e-16
## 
## Lambda: 0.810276597 LR test value: 8.36785316 p-value: 0.00381916526 
## Numerical Hessian standard error of lambda: 0.173191163 
## 
## Log likelihood: -118.172509658 
## ML residual variance (sigma squared): 0.124338547, (sigma: 0.352616714)
## Number of observations: 310 
## Number of parameters estimated: 3 
## AIC: 242.345019
res1b = modelo.arpb$fit$residuals
Moran.I(res1b, COS.d.inv)
## $observed
## [1] 0.0130143727956
## 
## $expected
## [1] -0.00323624595469
## 
## $sd
## [1] 0.0055522218586
## 
## $p.value
## [1] 0.00342394511819

Se puede observar como al eleiminar los valores atipicos el p valor mejora en gran medida, con respecto a los datos iniciales.

Segundo modelo sin datos atipicos

mod2 = sacsarlm(cos~1,data=dfCOS3,listw=Wve)
summary(mod2)
## 
## Call:sacsarlm(formula = cos ~ 1, data = dfCOS3, listw = Wve)
## 
## Residuals:
##          Min           1Q       Median           3Q          Max 
## -0.738661932 -0.283699357  0.014964596  0.265591486  0.801220611 
## 
## Type: sac 
## Coefficients: (numerical Hessian approximate standard errors) 
##                Estimate  Std. Error z value Pr(>|z|)
## (Intercept) 0.385578987 0.347690222 1.10897  0.26744
## 
## Rho: 0.675040767
## Approximate (numerical Hessian) standard error: 0.280870905
##     z-value: 2.40338445, p-value: 0.0162440996
## Lambda: 0.67504076
## Approximate (numerical Hessian) standard error: 0.280266855
##     z-value: 2.40856437, p-value: 0.0160154019
## 
## LR test value: 11.639119, p-value: 0.00296891271
## 
## Log likelihood: -116.536876754 for sac model
## ML residual variance (sigma squared): 0.122841107, (sigma: 0.350486957)
## Number of observations: 310 
## Number of parameters estimated: 4 
## AIC: 241.073754, (AIC for lm: 248.712872)
res2 =mod2$residuals
Moran.I(res2, COS.d.inv)
## $observed
## [1] 0.00693430201451
## 
## $expected
## [1] -0.00323624595469
## 
## $sd
## [1] 0.00555205418886
## 
## $p.value
## [1] 0.0669733017529

Modelo 3 sin datos atipicos

mser1b=errorsarlm(formula=cos~Ca+Mg+K+Cu+Zn,data=dfCOS3,listw=Wve)
summary(mser1b)
## 
## Call:errorsarlm(formula = cos ~ Ca + Mg + K + Cu + Zn, data = dfCOS3, 
##     listw = Wve)
## 
## Residuals:
##           Min            1Q        Median            3Q           Max 
## -0.8724711247 -0.1974798065 -0.0209656548  0.1939424679  0.8753692281 
## 
## Type: error 
## Coefficients: (asymptotic standard errors) 
##                   Estimate     Std. Error  z value   Pr(>|z|)
## (Intercept)  0.55644123545  0.13083561428  4.25298 2.1094e-05
## Ca           0.03828296785  0.00544205949  7.03465 1.9977e-12
## Mg          -0.11984572249  0.02175931026 -5.50779 3.6337e-08
## K            0.60692792319  0.14826505944  4.09353 4.2485e-05
## Cu           0.07631019666  0.01190405090  6.41044 1.4510e-10
## Zn           0.01708116627  0.00997485968  1.71242   0.086819
## 
## Lambda: 0.8723403, LR test value: 15.2391584, p-value: 9.4718915e-05
## Asymptotic standard error: 0.0878371805
##     z-value: 9.93133312, p-value: < 2.220446e-16
## Wald statistic: 98.6313776, p-value: < 2.220446e-16
## 
## Log likelihood: -40.3641398048 for error model
## ML residual variance (sigma squared): 0.0750516664, (sigma: 0.273955592)
## Number of observations: 310 
## Number of parameters estimated: 8 
## AIC: 96.7282796, (AIC for lm: 109.967438)
res2b =mser1b$residuals
Moran.I(res2b, COS.d.inv)
## $observed
## [1] 0.0152542035384
## 
## $expected
## [1] -0.00323624595469
## 
## $sd
## [1] 0.00554326378349
## 
## $p.value
## [1] 0.000850968257627

Spatial error model sin datos atipicos

mod4b=lagsarlm(formula=cos~Ca+z+Mg+K+Cu,data=dfCOS3,listw=Wve)
summary(mod4b)
## 
## Call:lagsarlm(formula = cos ~ Ca + z + Mg + K + Cu, data = dfCOS3, 
##     listw = Wve)
## 
## Residuals:
##           Min            1Q        Median            3Q           Max 
## -0.8830399687 -0.2046775967 -0.0217052063  0.1879746074  0.9101067995 
## 
## Type: lag 
## Coefficients: (asymptotic standard errors) 
##                    Estimate      Std. Error  z value   Pr(>|z|)
## (Intercept) -0.367941317010  0.248193385373 -1.48248   0.138213
## Ca           0.033734010116  0.004915861486  6.86228 6.7770e-12
## z            0.002159771093  0.000996740571  2.16683   0.030248
## Mg          -0.123670087707  0.020602943610 -6.00254 1.9425e-09
## K            0.687426143088  0.146014296484  4.70794 2.5024e-06
## Cu           0.085766675951  0.009220226664  9.30201 < 2.22e-16
## 
## Rho: 0.671955778, LR test value: 4.30912397, p-value: 0.0379084782
## Asymptotic standard error: 0.192945507
##     z-value: 3.48261947, p-value: 0.00049653361
## Wald statistic: 12.1286384, p-value: 0.00049653361
## 
## Log likelihood: -43.1404090004 for lag model
## ML residual variance (sigma squared): 0.0769265385, (sigma: 0.277356338)
## Number of observations: 310 
## Number of parameters estimated: 8 
## AIC: 102.280818, (AIC for lm: 104.589942)
## LM test for residual autocorrelation
## test value: 15.7781123, p-value: 7.12216992e-05
res4b = mod4b$residuals
Moran.I(res4b, COS.d.inv)
## $observed
## [1] 0.014781732924
## 
## $expected
## [1] -0.00323624595469
## 
## $sd
## [1] 0.00554384341832
## 
## $p.value
## [1] 0.00115369308859
mod5b=sacsarlm(formula=cos~Ca+Mg+K+Cu+Zn,data=dfCOS3,listw=Wve)
summary(mod5)
## 
## Call:sacsarlm(formula = cos ~ Ca + Mg + K + Cu + Zn, data = dfCOS, 
##     listw = Wve)
## 
## Residuals:
##           Min            1Q        Median            3Q           Max 
## -0.9580011399 -0.1974771619 -0.0288531845  0.1705473999  1.2341558039 
## 
## Type: sac 
## Coefficients: (asymptotic standard errors) 
##                  Estimate    Std. Error  z value   Pr(>|z|)
## (Intercept) -0.1923264920  0.5347074329 -0.35969   0.719082
## Ca           0.0487561910  0.0059898314  8.13983 4.4409e-16
## Mg          -0.1300895170  0.0241143567 -5.39469 6.8641e-08
## K            0.7037490645  0.1644586647  4.27919 1.8758e-05
## Cu           0.0703775170  0.0131576804  5.34878 8.8550e-08
## Zn           0.0262736597  0.0109752937  2.39389   0.016671
## 
## Rho: 0.533547597
## Asymptotic standard error: 0.442877291
##     z-value: 1.20473009, p-value: 0.228307517
## Lambda: 0.852011459
## Asymptotic standard error: 0.184823219
##     z-value: 4.60987241, p-value: 4.02916168e-06
## 
## LR test value: 19.1478698, p-value: 6.95173006e-05
## 
## Log likelihood: -78.3905411642 for sac model
## ML residual variance (sigma squared): 0.0939949928, (sigma: 0.306586028)
## Number of observations: 322 
## Number of parameters estimated: 9 
## AIC: 174.781082, (AIC for lm: 189.928952)
res5b = mod5b$residuals
Moran.I(res5b, COS.d.inv)
## $observed
## [1] 0.0108829732729
## 
## $expected
## [1] -0.00323624595469
## 
## $sd
## [1] 0.0055429359859
## 
## $p.value
## [1] 0.0108576991626

SAC Spatial Autocorrelation Model sin datos atipicos

\[Y = \lambda W Y +X \beta + u\\u =\rho W u +\epsilon \]

mod6b=sacsarlm(formula=cos~Ca+Mg+K+Cu+Zn,data=dfCOS3,listw=Wve,type="mixed")
summary(mod6b)
## 
## Call:sacsarlm(formula = cos ~ Ca + Mg + K + Cu + Zn, data = dfCOS3, 
##     listw = Wve, type = "mixed")
## 
## Residuals:
##            Min             1Q         Median             3Q            Max 
## -0.77023192723 -0.18462767289 -0.00318083665  0.17228084214  0.84182928839 
## 
## Type: sacmixed 
## Coefficients: (asymptotic standard errors) 
##                   Estimate     Std. Error  z value   Pr(>|z|)
## (Intercept)  1.73342758562  0.90797679816  1.90911   0.056248
## Ca           0.03505715185  0.00603928878  5.80485 6.4425e-09
## Mg          -0.10803063551  0.02270934646 -4.75710 1.9639e-06
## K            0.57231957866  0.14349326234  3.98848 6.6499e-05
## Cu           0.07922472532  0.01188095633  6.66821 2.5894e-11
## Zn           0.02152543676  0.00996147979  2.16087   0.030706
## lag.Ca      -0.05987887969  0.04432013915 -1.35105   0.176678
## lag.Mg      -0.18774267764  0.20609967816 -0.91093   0.362331
## lag.K        2.63873525221  1.80527564178  1.46168   0.143829
## lag.Cu       0.12670118340  0.14468126509  0.87573   0.381179
## lag.Zn      -0.57097612702  0.13970340115 -4.08706 4.3687e-05
## 
## Rho: 0.506700093
## Asymptotic standard error: 0.753583252
##     z-value: 0.672387678, p-value: 0.501336925
## Lambda: -0.0663800317
## Asymptotic standard error: 1.27058199
##     z-value: -0.0522438004, p-value: 0.958334433
## 
## LR test value: 44.1746488, p-value: 1.97697505e-07
## 
## Log likelihood: -25.8963946095 for sacmixed model
## ML residual variance (sigma squared): 0.0690110573, (sigma: 0.262699557)
## Number of observations: 310 
## Number of parameters estimated: 14 
## AIC: 79.7927892, (AIC for lm: 109.967438)
res6b = mod6b$residuals
Moran.I(res6b, COS.d.inv)
## $observed
## [1] -0.000252897856322
## 
## $expected
## [1] -0.00323624595469
## 
## $sd
## [1] 0.00554379432916
## 
## $p.value
## [1] 0.590479076866