Taller parcial

longitud = c(48,38,31,43,45,33,36,33,35,40,42,44,46)
ancho=c(21,19,17,22,25,16,19,17,18,21,23,23,25)
df1 = data.frame(longitud,ancho)
df1

##    longitud ancho
## 1        48    21
## 2        38    19
## 3        31    17
## 4        43    22
## 5        45    25
## 6        33    16
## 7        36    19
## 8        33    17
## 9        35    18
## 10       40    21
## 11       42    23
## 12       44    23
## 13       46    25

mod1 = lm(longitud~ancho,df1)
summary(mod1)

## 
## Call:
## lm(formula = longitud ~ ancho, data = df1)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.8817 -1.1501 -0.5159  0.7524  7.5816 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   6.1010     5.2251   1.168    0.268    
## ancho         1.6342     0.2528   6.464 4.65e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.666 on 11 degrees of freedom
## Multiple R-squared:  0.7916, Adjusted R-squared:  0.7727 
## F-statistic: 41.79 on 1 and 11 DF,  p-value: 4.649e-05

\[Longitud=1.63∗ancho+6.10\]

library(ggplot2)
ggplot(df1)+
  aes(ancho, longitud)+
  geom_point()+
  labs(x='ancho', y='longitud')+
  geom_smooth(method='lm', se=FALSE)

## `geom_smooth()` using formula 'y ~ x'

ejercicio 1.9

set.seed(123)
o2 = c(round(rnorm(30, 5.0, 0.2),2), 
        round(rnorm(30, 4.9, 0.20),2), 
        round(rnorm(30, 5.9, 0.20),2))
sitio = gl(3,30,90, c('s1','s2','s3'))

df2 = data.frame(o2, sitio)
df2

##      o2 sitio
## 1  4.89    s1
## 2  4.95    s1
## 3  5.31    s1
## 4  5.01    s1
## 5  5.03    s1
## 6  5.34    s1
## 7  5.09    s1
## 8  4.75    s1
## 9  4.86    s1
## 10 4.91    s1
## 11 5.24    s1
## 12 5.07    s1
## 13 5.08    s1
## 14 5.02    s1
## 15 4.89    s1
## 16 5.36    s1
## 17 5.10    s1
## 18 4.61    s1
## 19 5.14    s1
## 20 4.91    s1
## 21 4.79    s1
## 22 4.96    s1
## 23 4.79    s1
## 24 4.85    s1
## 25 4.87    s1
## 26 4.66    s1
## 27 5.17    s1
## 28 5.03    s1
## 29 4.77    s1
## 30 5.25    s1
## 31 4.99    s2
## 32 4.84    s2
## 33 5.08    s2
## 34 5.08    s2
## 35 5.06    s2
## 36 5.04    s2
## 37 5.01    s2
## 38 4.89    s2
## 39 4.84    s2
## 40 4.82    s2
## 41 4.76    s2
## 42 4.86    s2
## 43 4.65    s2
## 44 5.33    s2
## 45 5.14    s2
## 46 4.68    s2
## 47 4.82    s2
## 48 4.81    s2
## 49 5.06    s2
## 50 4.88    s2
## 51 4.95    s2
## 52 4.89    s2
## 53 4.89    s2
## 54 5.17    s2
## 55 4.85    s2
## 56 5.20    s2
## 57 4.59    s2
## 58 5.02    s2
## 59 4.92    s2
## 60 4.94    s2
## 61 5.98    s3
## 62 5.80    s3
## 63 5.83    s3
## 64 5.70    s3
## 65 5.69    s3
## 66 5.96    s3
## 67 5.99    s3
## 68 5.91    s3
## 69 6.08    s3
## 70 6.31    s3
## 71 5.80    s3
## 72 5.44    s3
## 73 6.10    s3
## 74 5.76    s3
## 75 5.76    s3
## 76 6.11    s3
## 77 5.84    s3
## 78 5.66    s3
## 79 5.94    s3
## 80 5.87    s3
## 81 5.90    s3
## 82 5.98    s3
## 83 5.83    s3
## 84 6.03    s3
## 85 5.86    s3
## 86 5.97    s3
## 87 6.12    s3
## 88 5.99    s3
## 89 5.83    s3
## 90 6.13    s3

mod2 = aov(o2~sitio)
summary(mod2)

##             Df Sum Sq Mean Sq F value Pr(>F)    
## sitio        2  17.83   8.915     278 <2e-16 ***
## Residuals   87   2.79   0.032                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

se puede observar que existen diferencias en las cantidades de o2 en los 3 sitios

shapiro.test(mod2$residuals)

## 
##  Shapiro-Wilk normality test
## 
## data:  mod2$residuals
## W = 0.99275, p-value = 0.9072

bartlett.test(mod2$residuals, df2$sitio)

## 
##  Bartlett test of homogeneity of variances
## 
## data:  mod2$residuals and df2$sitio
## Bartlett's K-squared = 0.84252, df = 2, p-value = 0.6562

set.seed(123)
o2_1 = rnorm(30, 5.0, 0.2)
o2_2 = rnorm(30, 4.9, 0.20)
o2_3 = rnorm(30, 5.9, 0.20)
boxplot(o2~sitio)
abline(h = mean(o2), col='red')
round(mean(df2$o2)-mean(o2_1), 3)

## [1] 0.286

round(mean(df2$o2)-mean(o2_2), 3)

## [1] 0.341

round(mean(df2$o2)-mean(o2_3), 3)

## [1] -0.628

med=tapply(df2$o2, df2$sitio, mean) 
var=tapply(df2$o2, df2$sitio, var) 
points(1:3, med, pch=16, col= 'blue') 
text(x = c(1,2,3), y= c(5.05, 5.0, 5.96), 
     labels=c(round(mean(df2$o2)-mean(o2_1), 3),
              round(mean(df2$o2)-mean(o2_2), 3),
              round(mean(df2$o2)-mean(o2_3), 3)))

ejercicio 1.10

set.seed(123)

p=c(rnorm(22, 6.0, 0.62),
          rnorm(22, 6.9, 0.60),
          rnorm(22, 7.2, 0.50))

metodo=gl(3, 22, 66, c('Bray', 'Olsen', 'Mehlich')) 


df3=data.frame(p,metodo)
df3

##           p  metodo
## 1  5.652505    Bray
## 2  5.857290    Bray
## 3  6.966399    Bray
## 4  6.043715    Bray
## 5  6.080158    Bray
## 6  7.063340    Bray
## 7  6.285768    Bray
## 8  5.215662    Bray
## 9  5.574151    Bray
## 10 5.723690    Bray
## 11 6.758931    Bray
## 12 6.223085    Bray
## 13 6.248478    Bray
## 14 6.068623    Bray
## 15 5.655378    Bray
## 16 7.107886    Bray
## 17 6.308667    Bray
## 18 4.780697    Bray
## 19 6.434841    Bray
## 20 5.706869    Bray
## 21 5.337949    Bray
## 22 5.864856    Bray
## 23 6.284397   Olsen
## 24 6.462665   Olsen
## 25 6.524976   Olsen
## 26 5.887984   Olsen
## 27 7.402672   Olsen
## 28 6.992024   Olsen
## 29 6.217118   Olsen
## 30 7.652289   Olsen
## 31 7.155879   Olsen
## 32 6.722957   Olsen
## 33 7.437075   Olsen
## 34 7.426880   Olsen
## 35 7.392949   Olsen
## 36 7.313184   Olsen
## 37 7.232351   Olsen
## 38 6.862853   Olsen
## 39 6.716422   Olsen
## 40 6.671717   Olsen
## 41 6.483176   Olsen
## 42 6.775250   Olsen
## 43 6.140762   Olsen
## 44 8.201374   Olsen
## 45 7.803981 Mehlich
## 46 6.638446 Mehlich
## 47 6.998558 Mehlich
## 48 6.966672 Mehlich
## 49 7.589983 Mehlich
## 50 7.158315 Mehlich
## 51 7.326659 Mehlich
## 52 7.185727 Mehlich
## 53 7.178565 Mehlich
## 54 7.884301 Mehlich
## 55 7.087115 Mehlich
## 56 7.958235 Mehlich
## 57 6.425624 Mehlich
## 58 7.492307 Mehlich
## 59 7.261927 Mehlich
## 60 7.307971 Mehlich
## 61 7.389820 Mehlich
## 62 6.948838 Mehlich
## 63 7.033396 Mehlich
## 64 6.690712 Mehlich
## 65 6.664104 Mehlich
## 66 7.351764 Mehlich

mod3=aov(p~metodo, df3)
summary(mod3)

##             Df Sum Sq Mean Sq F value  Pr(>F)    
## metodo       2  15.85   7.925    28.3 1.7e-09 ***
## Residuals   63  17.64   0.280                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

shapiro.test(mod3$residuals)

## 
##  Shapiro-Wilk normality test
## 
## data:  mod3$residuals
## W = 0.99574, p-value = 0.9987

bartlett.test(mod3$residuals, df3$metodo)

## 
##  Bartlett test of homogeneity of variances
## 
## data:  mod3$residuals and df3$metodo
## Bartlett's K-squared = 3.4507, df = 2, p-value = 0.1781

se puede decir que los metodos son diferentes

set.seed(123)

bray=rnorm(22, 6.0, 0.62)
olsen=rnorm(22, 6.9, 0.60)
mehlich=rnorm(22, 7.2, 0.50)

ef1=round(mean(df3$p)-mean(bray),3) 
ef2=round(mean(df3$p)-mean(olsen),3) 
ef3=round(mean(df3$p)-mean(mehlich),3)

boxplot(p~metodo) 
abline(h = mean(df3$p), col='red') 

med2=tapply(df3$p, df3$met, mean) 
var2=tapply(df3$p, df3$met, var) 
points(1:3, med2, pch=16, col= 'blue') 
text(x = c(1,2,3), y= c(5.9, 7.1, 7.08), 
     labels=c(ef1,ef2,ef3))

d=dist(df3)

## Warning in dist(df3): NAs introducidos por coerción

dim(as.matrix(d))

## [1] 66 66

cluster=hclust(d, method = 'ward.D2')
plot(cluster)
grupos=cutree(cluster, 3)
rect.hclust(cluster, 3)

ejercicio 1.11

localidad=gl(2, 11, 22, c('l1','l2'))

df4=data.frame(p,metodo,localidad)
mod4=aov(p~localidad+metodo, df4)
summary(mod4)

##             Df Sum Sq Mean Sq F value   Pr(>F)    
## localidad    1  0.001   0.001   0.005    0.945    
## metodo       2 15.849   7.925  27.858 2.33e-09 ***
## Residuals   62 17.637   0.284                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

library(lattice)

bwplot(p~metodo|localidad, df4)

bwplot(p~localidad|metodo, df4)

shapiro.test(mod4$residuals)

## 
##  Shapiro-Wilk normality test
## 
## data:  mod4$residuals
## W = 0.99591, p-value = 0.999

bartlett.test(mod4$residuals, df4$metodo)

## 
##  Bartlett test of homogeneity of variances
## 
## data:  mod4$residuals and df4$metodo
## Bartlett's K-squared = 3.4807, df = 2, p-value = 0.1755

ejercicio 1.12

temperatura=c(rep('80ºc',4),rep('100ºc',4),rep('120ºc',4))
presion=c(rep('120',1), rep('130',1),rep('140',1),rep('150',1))
rto=c(9.60,9.69,8.40,9.98,
         11.28,10.10,11.01,10.44,
         9, 9.57,9.03,9.80)
df5=data.frame(rto,temperatura,presion)
df5

##      rto temperatura presion
## 1   9.60        80ºc     120
## 2   9.69        80ºc     130
## 3   8.40        80ºc     140
## 4   9.98        80ºc     150
## 5  11.28       100ºc     120
## 6  10.10       100ºc     130
## 7  11.01       100ºc     140
## 8  10.44       100ºc     150
## 9   9.00       120ºc     120
## 10  9.57       120ºc     130
## 11  9.03       120ºc     140
## 12  9.80       120ºc     150

mod5=aov(rto~temperatura+presion, df5)
summary(mod5)

##             Df Sum Sq Mean Sq F value Pr(>F)  
## temperatura  2  4.682  2.3410   6.403 0.0325 *
## presion      3  0.601  0.2004   0.548 0.6676  
## Residuals    6  2.194  0.3656                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

shapiro.test(mod5$residuals)

## 
##  Shapiro-Wilk normality test
## 
## data:  mod5$residuals
## W = 0.8995, p-value = 0.1563

bartlett.test(mod5$residuals, df5$temperatura)

## 
##  Bartlett test of homogeneity of variances
## 
## data:  mod5$residuals and df5$temperatura
## Bartlett's K-squared = 0.9889, df = 2, p-value = 0.6099

bartlett.test(mod5$residuals, df5$presion)

## 
##  Bartlett test of homogeneity of variances
## 
## data:  mod5$residuals and df5$presion
## Bartlett's K-squared = 0.33487, df = 3, p-value = 0.9533

library(lattice)
bwplot(rto~temperatura|presion,df5)

ejercicio 1.13

seleccion=c(rep('a7r',4),rep('a8r',4),rep('a16r',4))
edad=c(rep('50',1), rep('45',1),rep('40',1),rep('35',1))
Y1=c(13,15,15.06,10.77,
         16.07,13.95,13.89,10.70,
         12.13, 17.87,13.08,10.00)
Y2=c(88,106.67,105.33,78.67,
         115.33,101.33,108,83.33,
          83.67,119.33,87.67,73)
df6=data.frame(Y1,Y2,seleccion,edad)
df6

##       Y1     Y2 seleccion edad
## 1  13.00  88.00       a7r   50
## 2  15.00 106.67       a7r   45
## 3  15.06 105.33       a7r   40
## 4  10.77  78.67       a7r   35
## 5  16.07 115.33       a8r   50
## 6  13.95 101.33       a8r   45
## 7  13.89 108.00       a8r   40
## 8  10.70  83.33       a8r   35
## 9  12.13  83.67      a16r   50
## 10 17.87 119.33      a16r   45
## 11 13.08  87.67      a16r   40
## 12 10.00  73.00      a16r   35

mod6=aov(Y1~edad+seleccion,df6)
summary(mod6)

##             Df Sum Sq Mean Sq F value Pr(>F)  
## edad         3  41.42  13.806   4.393 0.0585 .
## seleccion    2   0.29   0.146   0.047 0.9548  
## Residuals    6  18.86   3.143                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

library(lattice)
bwplot(Y1~seleccion|edad,df6)

bwplot(Y2~seleccion|edad,df6)

mod7=aov(Y2~edad+seleccion,df6)
summary(mod7)

##             Df Sum Sq Mean Sq F value Pr(>F)  
## edad         3 1508.4   502.8   3.753 0.0789 .
## seleccion    2  254.1   127.0   0.948 0.4387  
## Residuals    6  803.8   134.0                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

correccionbonferroni=0.05/2
TukeyHSD(mod6)

##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = Y1 ~ edad + seleccion, data = df6)
## 
## $edad
##             diff        lwr       upr     p adj
## 40-35  3.5200000 -1.4905850  8.530585 0.1705985
## 45-35  5.1166667  0.1060816 10.127252 0.0459588
## 50-35  3.2433333 -1.7672517  8.253918 0.2145938
## 45-40  1.5966667 -3.4139184  6.607252 0.7009106
## 50-40 -0.2766667 -5.2872517  4.733918 0.9972494
## 50-45 -1.8733333 -6.8839184  3.137252 0.5980095
## 
## $seleccion
##            diff       lwr      upr     p adj
## a7r-a16r 0.1875 -3.658616 4.033616 0.9877660
## a8r-a16r 0.3825 -3.463616 4.228616 0.9503850
## a8r-a7r  0.1950 -3.651116 4.041116 0.9867766

TukeyHSD(mod7)

##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = Y2 ~ edad + seleccion, data = df6)
## 
## $edad
##             diff        lwr      upr     p adj
## 40-35  22.000000 -10.715294 54.71529 0.1933412
## 45-35  30.776667  -1.938627 63.49196 0.0634860
## 50-35  17.333333 -15.381960 50.04863 0.3443919
## 45-40   8.776667 -23.938627 41.49196 0.7916534
## 50-40  -4.666667 -37.381960 28.04863 0.9576150
## 50-45 -13.443333 -46.158627 19.27196 0.5308535
## 
## $seleccion
##           diff      lwr     upr     p adj
## a7r-a16r  3.75 -21.3622 28.8622 0.8927022
## a8r-a16r 11.08 -14.0322 36.1922 0.4197810
## a8r-a7r   7.33 -17.7822 32.4422 0.6624808

shapiro.test(mod6$residuals)

## 
##  Shapiro-Wilk normality test
## 
## data:  mod6$residuals
## W = 0.92563, p-value = 0.3361

shapiro.test(mod7$residuals)

## 
##  Shapiro-Wilk normality test
## 
## data:  mod7$residuals
## W = 0.95058, p-value = 0.6455

bartlett.test(mod6$residual, df6$edad)

## 
##  Bartlett test of homogeneity of variances
## 
## data:  mod6$residual and df6$edad
## Bartlett's K-squared = 5.2388, df = 3, p-value = 0.1551

bartlett.test(mod7$residual, df6$edad)

## 
##  Bartlett test of homogeneity of variances
## 
## data:  mod7$residual and df6$edad
## Bartlett's K-squared = 5.9648, df = 3, p-value = 0.1133

C=dist(df6[,1:4])

## Warning in dist(df6[, 1:4]): NAs introducidos por coerción

dim(as.matrix(C))

## [1] 12 12

cluster= hclust(C, method = 'ward.D2')
plot(cluster)

Taller parcial

Danilo Patarroyo

2022-06-15

ejercicio 1.9

ejercicio 1.10

ejercicio 1.11

ejercicio 1.12

ejercicio 1.13