Clase 6

#ley de tobler
library(ape)

set.seed(123)
pp = sort.int(rnorm(120, 40, 5),60)
hr = sort.int(rnorm(120, 70, 10),60)
plot(pp,hr)

mod1 = lm(hr~pp)
summary(mod1)

## 
## Call:
## lm(formula = hr ~ pp)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -16.1846  -3.8079  -0.9131   3.9651  20.4561 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   6.2889     5.6666    1.11    0.269    
## pp            1.5832     0.1405   11.27   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 6.856 on 118 degrees of freedom
## Multiple R-squared:  0.5182, Adjusted R-squared:  0.5141 
## F-statistic: 126.9 on 1 and 118 DF,  p-value: < 2.2e-16

cor(hr,pp)

## [1] 0.7198678

# Supuestos ####
# Normalidad de residuos
shapiro.test(mod1$residuals)

## 
##  Shapiro-Wilk normality test
## 
## data:  mod1$residuals
## W = 0.97517, p-value = 0.02548

library(nortest)
ad.test(mod1$residuals)

## 
##  Anderson-Darling normality test
## 
## data:  mod1$residuals
## A = 1.0905, p-value = 0.007086

# cvm.test(mod1$residuals)
lillie.test(mod1$residuals)

## 
##  Lilliefors (Kolmogorov-Smirnov) normality test
## 
## data:  mod1$residuals
## D = 0.094324, p-value = 0.01065

pearson.test(mod1$residuals)

## 
##  Pearson chi-square normality test
## 
## data:  mod1$residuals
## P = 23.733, p-value = 0.01391

sf.test(mod1$residuals)

## 
##  Shapiro-Francia normality test
## 
## data:  mod1$residuals
## W = 0.97471, p-value = 0.02446

# Varianza constante
plot(mod1$residuals, pch = 16)
abline(h=0, col ='red')

mean(mod1$residuals)

## [1] -1.157928e-16

var(mod1$residuals)

## [1] 46.60573

d <- dist(mod1$residuals,
          method = "euclidean")
fit <- hclust(d, method="ward.D2") 
plot(fit)
(groups <- cutree(fit, k=3))

##   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17  18 
##   1   2   2   3   1   1   1   1   1   1   1   1   1   1   3   1   3   2 
##  19  20  21  22  23  24  25  26  27  28  29  30  31  32  33  34  35  36 
##   2   1   2   1   3   3   3   2   3   1   1   3   1   1   3   1   1   1 
##  37  38  39  40  41  42  43  44  45  46  47  48  49  50  51  52  53  54 
##   3   3   1   3   1   3   3   1   1   1   1   1   1   1   1   1   1   1 
##  55  56  57  58  59  60  61  62  63  64  65  66  67  68  69  70  71  72 
##   1   1   1   1   1   1   1   2   1   2   1   2   1   1   2   2   1   3 
##  73  74  75  76  77  78  79  80  81  82  83  84  85  86  87  88  89  90 
##   3   3   1   3   2   2   2   3   3   2   3   1   1   2   1   2   3   3 
##  91  92  93  94  95  96  97  98  99 100 101 102 103 104 105 106 107 108 
##   3   1   1   3   1   3   3   3   1   3   1   2   2   2   2   2   2   2 
## 109 110 111 112 113 114 115 116 117 118 119 120 
##   2   2   2   2   2   2   2   2   1   2   2   2

rect.hclust(fit, k=3, border="blue")

clus = data.frame(groups,mod1$residuals)
clus

##     groups mod1.residuals
## 1        1    -2.99446865
## 2        2     9.18855605
## 3        2     3.58553388
## 4        3    -7.17197832
## 5        1     0.08654211
## 6        1     0.59089030
## 7        1    -1.92928874
## 8        1     0.24236937
## 9        1    -3.79728620
## 10       1     1.69621574
## 11       1     1.40118370
## 12       1    -1.37916303
## 13       1     0.34402235
## 14       1    -2.31545303
## 15       3   -15.74826213
## 16       1    -2.29204426
## 17       3   -10.33521708
## 18       2     6.02664018
## 19       2     8.42680297
## 20       1    -1.79497449
## 21       2     2.89081836
## 22       1     1.38858985
## 23       3    -7.21548583
## 24       3    -8.99262661
## 25       3   -10.68338911
## 26       2     8.42680926
## 27       3    -8.02011311
## 28       1     0.59219693
## 29       1    -2.46122031
## 30       3    -7.73448455
## 31       1     1.80546858
## 32       1    -2.11176573
## 33       3    -7.22456305
## 34       1    -1.57451375
## 35       1    -2.94590562
## 36       1    -1.08087262
## 37       3    -5.26105876
## 38       3    -6.50646104
## 39       1    -0.50542515
## 40       3    -7.56395753
## 41       1    -3.83993111
## 42       3    -7.85329658
## 43       3    -9.88155770
## 44       1    -2.50077755
## 45       1    -1.14670279
## 46       1    -1.97494692
## 47       1    -1.27628355
## 48       1    -0.91817978
## 49       1    -0.78152966
## 50       1    -0.82505657
## 51       1    -1.50698181
## 52       1    -0.44514741
## 53       1    -1.13714980
## 54       1    -0.63253145
## 55       1    -1.10961644
## 56       1    -0.59188039
## 57       1    -1.48846041
## 58       1    -2.01280867
## 59       1    -1.73685032
## 60       1    -1.23107489
## 61       1    -2.32115145
## 62       2     5.08884222
## 63       1     2.27497880
## 64       2     5.17762868
## 65       1    -0.21827751
## 66       2     4.52844659
## 67       1     1.71469348
## 68       1     0.81205004
## 69       2     7.36665450
## 70       2     3.85326649
## 71       1     1.09353164
## 72       3   -16.18459924
## 73       3    -6.63119629
## 74       3    -8.11081267
## 75       1     0.15831218
## 76       3    -5.58863728
## 77       2     3.98864985
## 78       2     5.06950147
## 79       2     6.55227686
## 80       3    -5.14666391
## 81       3   -10.84030531
## 82       2     6.18093135
## 83       3    -9.79647896
## 84       1     0.71539883
## 85       1     2.06866309
## 86       2     4.63708008
## 87       1    -3.12954959
## 88       2     3.95725981
## 89       3    -6.50292238
## 90       3    -9.24982735
## 91       3    -6.28773132
## 92       1    -1.51990674
## 93       1    -0.90801595
## 94       3    -8.00133775
## 95       1    -2.98700442
## 96       3    -9.70901124
## 97       3   -10.56466797
## 98       3    -5.56781775
## 99       1    -1.01820614
## 100      3    -4.65459771
## 101      1    -1.60422702
## 102      2     9.66416838
## 103      2     4.53155307
## 104      2    10.05553339
## 105      2    16.09945681
## 106      2     2.78398987
## 107      2     8.19795695
## 108      2    11.34149249
## 109      2     9.56746746
## 110      2    15.09770853
## 111      2     5.79224136
## 112      2    16.57278163
## 113      2    17.18393527
## 114      2     8.63521902
## 115      2    14.71131779
## 116      2    10.63221575
## 117      1     0.46432811
## 118      2     7.26281878
## 119      2     8.09256040
## 120      2    20.45613786

tapply(mod1$residuals, groups, var)

##         1         2         3 
##  2.311072 20.526230  7.585819

bartlett.test(mod1$residuals~groups)

## 
##  Bartlett test of homogeneity of variances
## 
## data:  mod1$residuals by groups
## Bartlett's K-squared = 49.169, df = 2, p-value = 2.104e-11

# Variable respuesta debe ser ALEATORIA
# Variable explicativa debe ser FIJA

# Datos Karla extraer:
# y = CO
# x = uc

library(readxl)
datos <- read_excel("C:/karla.xlsx", 
                    sheet = "hoy")
y = datos$CO
x = datos$Mn

mod2 = lm(y~x)
summary(mod2)

## 
## Call:
## lm(formula = y ~ x)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.7823 -0.5344 -0.0409  0.5637  1.7809 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 0.973856   0.115063   8.464 1.77e-14 ***
## x           0.026133   0.003363   7.772 9.74e-13 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.7763 on 156 degrees of freedom
## Multiple R-squared:  0.2791, Adjusted R-squared:  0.2745 
## F-statistic:  60.4 on 1 and 156 DF,  p-value: 9.739e-13

residuos = mod2$residuals[-which.min(mod2$residuals)]
shapiro.test(residuos)

## 
##  Shapiro-Wilk normality test
## 
## data:  residuos
## W = 0.98782, p-value = 0.1893

plot(residuos, pch = 16)
abline(h=0, col ='red')

mean(residuos)

## [1] 0.01772189

var(residuos)

## [1] 0.5527473

d <- dist(residuos,
          method = "euclidean")
fit <- hclust(d, method="ward.D2") 
plot(fit)
(groups <- cutree(fit, k=3))

##   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  18  19 
##   1   1   1   1   1   1   1   2   2   1   1   1   1   1   1   1   1   1 
##  20  21  22  23  24  25  26  27  28  29  30  31  32  33  34  35  36  37 
##   1   1   1   2   1   2   1   3   1   1   1   1   1   1   1   1   1   2 
##  38  39  40  41  42  43  44  45  46  47  48  49  50  51  52  53  54  55 
##   2   2   2   1   1   2   1   3   2   2   1   1   1   1   1   1   1   2 
##  56  57  58  59  60  61  62  63  64  65  66  67  68  69  70  71  72  73 
##   2   1   1   1   2   2   2   2   1   1   1   2   1   1   2   1   1   2 
##  74  75  76  77  78  79  80  81  82  83  84  85  86  87  88  89  90  91 
##   2   3   3   3   2   2   2   2   1   2   3   3   3   1   3   2   2   3 
##  92  93  94  95  96  97  98  99 100 101 102 103 104 105 106 107 108 109 
##   3   2   3   2   3   2   3   2   2   3   1   2   2   3   1   2   2   2 
## 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 
##   1   1   3   2   3   2   2   1   1   1   1   1   2   1   2   1   1   1 
## 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 
##   2   2   2   2   3   3   3   2   1   3   3   3   2   2   3   2   3   3 
## 146 147 148 149 150 151 152 153 154 155 156 157 158 
##   3   3   3   3   3   2   3   3   2   2   2   1   2

rect.hclust(fit, k=3, border="blue")

clus = data.frame(groups,residuos)
clus

##     groups    residuos
## 1        1 -0.92841502
## 2        1 -0.28458105
## 3        1 -0.50932497
## 4        1 -1.10305176
## 5        1 -0.53249023
## 6        1 -0.29206650
## 7        1 -0.68928831
## 8        2 -0.08467741
## 9        2  0.05334374
## 10       1 -0.61654679
## 11       1 -0.56950736
## 12       1 -0.82969339
## 13       1 -1.24767913
## 14       1 -1.54036889
## 15       1 -0.34519435
## 16       1 -0.48580526
## 18       1 -0.53502707
## 19       1 -0.86683990
## 20       1 -0.99387693
## 21       1 -0.88716248
## 22       1 -0.27841616
## 23       2 -0.03654679
## 24       1 -0.35833731
## 25       2  0.11771446
## 26       1 -0.29351215
## 27       3  1.00232661
## 28       1 -0.38447033
## 29       1 -0.64072678
## 30       1 -0.66510832
## 31       1 -0.51244564
## 32       1 -1.20390286
## 33       1 -0.51774870
## 34       1 -0.61175109
## 35       1 -1.77845118
## 36       1 -0.56521904
## 37       2  0.30536479
## 38       2  0.53939426
## 39       2  0.49286221
## 40       2  0.18878096
## 41       1 -0.94518602
## 42       1 -0.41575349
## 43       2  0.57186862
## 44       1 -0.24604181
## 45       3  0.73490804
## 46       2  0.34499522
## 47       2 -0.13113301
## 48       1 -1.02939549
## 49       1 -1.07947913
## 50       1 -0.34636200
## 51       1 -0.49633492
## 52       1 -0.57785463
## 53       1 -0.95518477
## 54       1 -0.69596057
## 55       2  0.44693752
## 56       2  0.12453962
## 57       1 -0.45546038
## 58       1 -0.51145444
## 59       1 -0.17599004
## 60       2 -0.08234210
## 61       2 -0.08183473
## 62       2 -0.04524851
## 63       2  0.62352728
## 64       1 -0.36871406
## 65       1 -0.32378057
## 66       1 -0.75601358
## 67       2  0.31254441
## 68       1 -1.01377339
## 69       1 -0.85848286
## 70       2  0.37896537
## 71       1 -1.32880238
## 72       1 -0.83048860
## 73       2 -0.03489664
## 74       2  0.37063733
## 75       3  0.82389934
## 76       3  0.91332212
## 77       3  0.85234355
## 78       2  0.14408910
## 79       2  0.21096805
## 80       2  0.08268096
## 81       2  0.02212853
## 82       1 -0.38366123
## 83       2  0.09338317
## 84       3  0.91922374
## 85       3  0.81992595
## 86       3  0.73878279
## 87       1 -0.43691721
## 88       3  1.60535111
## 89       2  0.32321666
## 90       2  0.50007484
## 91       3  1.12828173
## 92       3  0.96705513
## 93       2  0.29501427
## 94       3  1.78093943
## 95       2 -0.05448860
## 96       3  1.46908269
## 97       2  0.22334249
## 98       3  1.18249025
## 99       2  0.48873513
## 100      2  0.59722948
## 101      3  0.98587197
## 102      1 -0.97226946
## 103      2  0.61269522
## 104      2 -0.09612305
## 105      3  1.22636843
## 106      1 -1.02913577
## 107      2 -0.05792659
## 108      2 -0.05316440
## 109      2  0.34154231
## 110      1 -1.09865961
## 111      1 -1.09505607
## 112      3  0.95954279
## 113      2  0.41859274
## 114      3  1.36335465
## 115      2 -0.11538382
## 116      2 -0.05811233
## 117      1 -0.28330257
## 118      1 -0.19608525
## 119      1 -0.73182114
## 120      1 -1.10668382
## 121      1 -0.47776496
## 122      2  0.01607053
## 123      1 -0.20966985
## 124      2  0.08448490
## 125      1 -1.19742726
## 126      1 -0.34583607
## 127      1 -0.74059148
## 128      2  0.45394680
## 129      2  0.06049235
## 130      2  0.22870115
## 131      2  0.25760977
## 132      3  1.17078575
## 133      3  0.89995867
## 134      3  1.15030307
## 135      2 -0.02883779
## 136      1 -0.64133186
## 137      3  1.09124039
## 138      3  0.82437982
## 139      3  0.95126652
## 140      2  0.61908221
## 141      2  0.48955178
## 142      3  1.25611618
## 143      2  0.21111493
## 144      3  1.41190594
## 145      3  0.83241666
## 146      3  0.75705723
## 147      3  1.25179809
## 148      3  0.73764345
## 149      3  0.82049647
## 150      3  1.39993398
## 151      2  0.49927006
## 152      3  1.09415867
## 153      3  0.92844518
## 154      2  0.04909647
## 155      2  0.48578508
## 156      2  0.58673858
## 157      1 -0.62161271
## 158      2  0.47031131

tapply(residuos, groups, var)

##          1          2          3 
## 0.12149718 0.05751811 0.07001335

bartlett.test(residuos~groups)

## 
##  Bartlett test of homogeneity of variances
## 
## data:  residuos by groups
## Bartlett's K-squared = 8.8855, df = 2, p-value = 0.01176

# Independencia de los residuales
acf(mod1$residuals)

#
# par(bg = gray(0.8), fg = 'blue')
plot(x, y, cex = datos$CO*0.6,
     col = 'blue', pch = 16)

plot(x, y, cex = 1.5,
     col = ceiling(datos$CO), pch = 16)
med.col = round(tapply(datos$CO, ceiling(datos$CO), mean),2)
legend('topright', legend = med.col,
       fill = levels(factor(ceiling(datos$CO))))

###
co.dist <- as.matrix(dist(cbind(x,y)))
co.dist.inv <- 1/co.dist
diag(co.dist.inv) <- 0
co.dist.inv[1:5, 1:5]

##            1          2          3          4          5
## 1 0.00000000 0.47221558 0.09413686 0.03214757 0.02783594
## 2 0.47221558 0.00000000 0.11627907 0.03436419 0.02949330
## 3 0.09413686 0.11627907 0.00000000 0.04878029 0.03951313
## 4 0.03214757 0.03436419 0.04878029 0.00000000 0.20617717
## 5 0.02783594 0.02949330 0.03951313 0.20617717 0.00000000

sum.filas = apply(co.dist.inv, 1, sum)
w = co.dist.inv/sum.filas
round(w[1:5, 1:5],3)

##       1     2     3     4     5
## 1 0.000 0.034 0.007 0.002 0.002
## 2 0.016 0.000 0.004 0.001 0.001
## 3 0.003 0.004 0.000 0.002 0.001
## 4 0.002 0.002 0.002 0.000 0.010
## 5 0.001 0.001 0.001 0.007 0.000

dim(w)

## [1] 158 158

im = Moran.I(datos$CO, w)
im$p.value

## [1] 0

im.res = Moran.I(mod2$residuals, w)

ifelse(im.res$p.value<0.05,
       'Dependencia Espacial',
       'No depend. Espacial')

## [1] "Dependencia Espacial"

#####

library(ggplot2)

## Registered S3 methods overwritten by 'ggplot2':
##   method         from 
##   [.quosures     rlang
##   c.quosures     rlang
##   print.quosures rlang

set.seed(123)
xy = expand.grid(x = seq(0,90,9), y = seq(0,70,7))
plot(xy$x,xy$y)

dim(xy)

## [1] 121   2

dd = data.frame(xy, ca = sort.int(runif(121,18,22), 10))
ggplot(dd, aes(x, y))+
  geom_tile(aes(fill = ca))

ca.dis <- as.matrix(dist(cbind(dd$x,dd$y)))
ca.dis.inv <- 1/ca.dis
diag(ca.dis.inv) <- 0
ca.dis.inv[1:5, 1:5]

##            1          2          3          4          5
## 1 0.00000000 0.11111111 0.05555556 0.03703704 0.02777778
## 2 0.11111111 0.00000000 0.11111111 0.05555556 0.03703704
## 3 0.05555556 0.11111111 0.00000000 0.11111111 0.05555556
## 4 0.03703704 0.05555556 0.11111111 0.00000000 0.11111111
## 5 0.02777778 0.03703704 0.05555556 0.11111111 0.00000000

sum.filas = apply(ca.dis.inv, 1, sum)
w.ca = ca.dis.inv/sum.filas
round(w.ca[1:5, 1:5],3)

##       1     2     3     4     5
## 1 0.000 0.043 0.022 0.014 0.011
## 2 0.038 0.000 0.038 0.019 0.013
## 3 0.018 0.036 0.000 0.036 0.018
## 4 0.011 0.017 0.034 0.000 0.034
## 5 0.008 0.011 0.017 0.033 0.000

dim(w.ca)

## [1] 121 121

Moran.I(dd$ca, w.ca)

## $observed
## [1] 0.2285506
## 
## $expected
## [1] -0.008333333
## 
## $sd
## [1] 0.009179934
## 
## $p.value
## [1] 0

######
ph20 = runif(144, 5.5, 6.5)
ph40 = runif(144, 6.5, 7)
xy = expand.grid(x = seq(1,120,10), y = seq(1,120,10))
dim(xy)

## [1] 144   2

Clase 6

Carlos Rivera

21 de mayo de 2019