longitud = c(48,38,31,43,45,33,36,33,35,40,42,44,46)
ancho=c(21,19,17,22,25,16,19,17,18,21,23,23,25)
df1 = data.frame(longitud,ancho)
df1
## longitud ancho
## 1 48 21
## 2 38 19
## 3 31 17
## 4 43 22
## 5 45 25
## 6 33 16
## 7 36 19
## 8 33 17
## 9 35 18
## 10 40 21
## 11 42 23
## 12 44 23
## 13 46 25
mod1 = lm(longitud~ancho,df1)
summary(mod1)
##
## Call:
## lm(formula = longitud ~ ancho, data = df1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.8817 -1.1501 -0.5159 0.7524 7.5816
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.1010 5.2251 1.168 0.268
## ancho 1.6342 0.2528 6.464 4.65e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.666 on 11 degrees of freedom
## Multiple R-squared: 0.7916, Adjusted R-squared: 0.7727
## F-statistic: 41.79 on 1 and 11 DF, p-value: 4.649e-05
\[Longitud=1.63āancho+6.10\]
library(ggplot2)
ggplot(df1)+
aes(ancho, longitud)+
geom_point()+
labs(x='ancho', y='longitud')+
geom_smooth(method='lm', se=FALSE)
## `geom_smooth()` using formula 'y ~ x'
set.seed(123)
o2 = c(round(rnorm(30, 5.0, 0.2),2),
round(rnorm(30, 4.9, 0.20),2),
round(rnorm(30, 5.9, 0.20),2))
sitio = gl(3,30,90, c('s1','s2','s3'))
df2 = data.frame(o2, sitio)
df2
## o2 sitio
## 1 4.89 s1
## 2 4.95 s1
## 3 5.31 s1
## 4 5.01 s1
## 5 5.03 s1
## 6 5.34 s1
## 7 5.09 s1
## 8 4.75 s1
## 9 4.86 s1
## 10 4.91 s1
## 11 5.24 s1
## 12 5.07 s1
## 13 5.08 s1
## 14 5.02 s1
## 15 4.89 s1
## 16 5.36 s1
## 17 5.10 s1
## 18 4.61 s1
## 19 5.14 s1
## 20 4.91 s1
## 21 4.79 s1
## 22 4.96 s1
## 23 4.79 s1
## 24 4.85 s1
## 25 4.87 s1
## 26 4.66 s1
## 27 5.17 s1
## 28 5.03 s1
## 29 4.77 s1
## 30 5.25 s1
## 31 4.99 s2
## 32 4.84 s2
## 33 5.08 s2
## 34 5.08 s2
## 35 5.06 s2
## 36 5.04 s2
## 37 5.01 s2
## 38 4.89 s2
## 39 4.84 s2
## 40 4.82 s2
## 41 4.76 s2
## 42 4.86 s2
## 43 4.65 s2
## 44 5.33 s2
## 45 5.14 s2
## 46 4.68 s2
## 47 4.82 s2
## 48 4.81 s2
## 49 5.06 s2
## 50 4.88 s2
## 51 4.95 s2
## 52 4.89 s2
## 53 4.89 s2
## 54 5.17 s2
## 55 4.85 s2
## 56 5.20 s2
## 57 4.59 s2
## 58 5.02 s2
## 59 4.92 s2
## 60 4.94 s2
## 61 5.98 s3
## 62 5.80 s3
## 63 5.83 s3
## 64 5.70 s3
## 65 5.69 s3
## 66 5.96 s3
## 67 5.99 s3
## 68 5.91 s3
## 69 6.08 s3
## 70 6.31 s3
## 71 5.80 s3
## 72 5.44 s3
## 73 6.10 s3
## 74 5.76 s3
## 75 5.76 s3
## 76 6.11 s3
## 77 5.84 s3
## 78 5.66 s3
## 79 5.94 s3
## 80 5.87 s3
## 81 5.90 s3
## 82 5.98 s3
## 83 5.83 s3
## 84 6.03 s3
## 85 5.86 s3
## 86 5.97 s3
## 87 6.12 s3
## 88 5.99 s3
## 89 5.83 s3
## 90 6.13 s3
mod2 = aov(o2~sitio)
summary(mod2)
## Df Sum Sq Mean Sq F value Pr(>F)
## sitio 2 17.83 8.915 278 <2e-16 ***
## Residuals 87 2.79 0.032
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
se puede observar que existen diferencias en las cantidades de o2 en los 3 sitios
shapiro.test(mod2$residuals)
##
## Shapiro-Wilk normality test
##
## data: mod2$residuals
## W = 0.99275, p-value = 0.9072
bartlett.test(mod2$residuals, df2$sitio)
##
## Bartlett test of homogeneity of variances
##
## data: mod2$residuals and df2$sitio
## Bartlett's K-squared = 0.84252, df = 2, p-value = 0.6562
set.seed(123)
o2_1 = rnorm(30, 5.0, 0.2)
o2_2 = rnorm(30, 4.9, 0.20)
o2_3 = rnorm(30, 5.9, 0.20)
boxplot(o2~sitio)
abline(h = mean(o2), col='red')
round(mean(df2$o2)-mean(o2_1), 3)
## [1] 0.286
round(mean(df2$o2)-mean(o2_2), 3)
## [1] 0.341
round(mean(df2$o2)-mean(o2_3), 3)
## [1] -0.628
med=tapply(df2$o2, df2$sitio, mean)
var=tapply(df2$o2, df2$sitio, var)
points(1:3, med, pch=16, col= 'blue')
text(x = c(1,2,3), y= c(5.05, 5.0, 5.96),
labels=c(round(mean(df2$o2)-mean(o2_1), 3),
round(mean(df2$o2)-mean(o2_2), 3),
round(mean(df2$o2)-mean(o2_3), 3)))
set.seed(123)
p=c(rnorm(22, 6.0, 0.62),
rnorm(22, 6.9, 0.60),
rnorm(22, 7.2, 0.50))
metodo=gl(3, 22, 66, c('Bray', 'Olsen', 'Mehlich'))
df3=data.frame(p,metodo)
df3
## p metodo
## 1 5.652505 Bray
## 2 5.857290 Bray
## 3 6.966399 Bray
## 4 6.043715 Bray
## 5 6.080158 Bray
## 6 7.063340 Bray
## 7 6.285768 Bray
## 8 5.215662 Bray
## 9 5.574151 Bray
## 10 5.723690 Bray
## 11 6.758931 Bray
## 12 6.223085 Bray
## 13 6.248478 Bray
## 14 6.068623 Bray
## 15 5.655378 Bray
## 16 7.107886 Bray
## 17 6.308667 Bray
## 18 4.780697 Bray
## 19 6.434841 Bray
## 20 5.706869 Bray
## 21 5.337949 Bray
## 22 5.864856 Bray
## 23 6.284397 Olsen
## 24 6.462665 Olsen
## 25 6.524976 Olsen
## 26 5.887984 Olsen
## 27 7.402672 Olsen
## 28 6.992024 Olsen
## 29 6.217118 Olsen
## 30 7.652289 Olsen
## 31 7.155879 Olsen
## 32 6.722957 Olsen
## 33 7.437075 Olsen
## 34 7.426880 Olsen
## 35 7.392949 Olsen
## 36 7.313184 Olsen
## 37 7.232351 Olsen
## 38 6.862853 Olsen
## 39 6.716422 Olsen
## 40 6.671717 Olsen
## 41 6.483176 Olsen
## 42 6.775250 Olsen
## 43 6.140762 Olsen
## 44 8.201374 Olsen
## 45 7.803981 Mehlich
## 46 6.638446 Mehlich
## 47 6.998558 Mehlich
## 48 6.966672 Mehlich
## 49 7.589983 Mehlich
## 50 7.158315 Mehlich
## 51 7.326659 Mehlich
## 52 7.185727 Mehlich
## 53 7.178565 Mehlich
## 54 7.884301 Mehlich
## 55 7.087115 Mehlich
## 56 7.958235 Mehlich
## 57 6.425624 Mehlich
## 58 7.492307 Mehlich
## 59 7.261927 Mehlich
## 60 7.307971 Mehlich
## 61 7.389820 Mehlich
## 62 6.948838 Mehlich
## 63 7.033396 Mehlich
## 64 6.690712 Mehlich
## 65 6.664104 Mehlich
## 66 7.351764 Mehlich
mod3=aov(p~metodo, df3)
summary(mod3)
## Df Sum Sq Mean Sq F value Pr(>F)
## metodo 2 15.85 7.925 28.3 1.7e-09 ***
## Residuals 63 17.64 0.280
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
shapiro.test(mod3$residuals)
##
## Shapiro-Wilk normality test
##
## data: mod3$residuals
## W = 0.99574, p-value = 0.9987
bartlett.test(mod3$residuals, df3$metodo)
##
## Bartlett test of homogeneity of variances
##
## data: mod3$residuals and df3$metodo
## Bartlett's K-squared = 3.4507, df = 2, p-value = 0.1781
se puede decir que los metodos son diferentes
set.seed(123)
bray=rnorm(22, 6.0, 0.62)
olsen=rnorm(22, 6.9, 0.60)
mehlich=rnorm(22, 7.2, 0.50)
ef1=round(mean(df3$p)-mean(bray),3)
ef2=round(mean(df3$p)-mean(olsen),3)
ef3=round(mean(df3$p)-mean(mehlich),3)
boxplot(p~metodo)
abline(h = mean(df3$p), col='red')
med2=tapply(df3$p, df3$met, mean)
var2=tapply(df3$p, df3$met, var)
points(1:3, med2, pch=16, col= 'blue')
text(x = c(1,2,3), y= c(5.9, 7.1, 7.08),
labels=c(ef1,ef2,ef3))
d=dist(df3)
## Warning in dist(df3): NAs introducidos por coerción
dim(as.matrix(d))
## [1] 66 66
cluster=hclust(d, method = 'ward.D2')
plot(cluster)
grupos=cutree(cluster, 3)
rect.hclust(cluster, 3)
localidad=gl(2, 11, 22, c('l1','l2'))
df4=data.frame(p,metodo,localidad)
mod4=aov(p~localidad+metodo, df4)
summary(mod4)
## Df Sum Sq Mean Sq F value Pr(>F)
## localidad 1 0.001 0.001 0.005 0.945
## metodo 2 15.849 7.925 27.858 2.33e-09 ***
## Residuals 62 17.637 0.284
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
library(lattice)
bwplot(p~metodo|localidad, df4)
bwplot(p~localidad|metodo, df4)
shapiro.test(mod4$residuals)
##
## Shapiro-Wilk normality test
##
## data: mod4$residuals
## W = 0.99591, p-value = 0.999
bartlett.test(mod4$residuals, df4$metodo)
##
## Bartlett test of homogeneity of variances
##
## data: mod4$residuals and df4$metodo
## Bartlett's K-squared = 3.4807, df = 2, p-value = 0.1755
temperatura=c(rep('80Āŗc',4),rep('100Āŗc',4),rep('120Āŗc',4))
presion=c(rep('120',1), rep('130',1),rep('140',1),rep('150',1))
rto=c(9.60,9.69,8.40,9.98,
11.28,10.10,11.01,10.44,
9, 9.57,9.03,9.80)
df5=data.frame(rto,temperatura,presion)
df5
## rto temperatura presion
## 1 9.60 80Āŗc 120
## 2 9.69 80Āŗc 130
## 3 8.40 80Āŗc 140
## 4 9.98 80Āŗc 150
## 5 11.28 100Āŗc 120
## 6 10.10 100Āŗc 130
## 7 11.01 100Āŗc 140
## 8 10.44 100Āŗc 150
## 9 9.00 120Āŗc 120
## 10 9.57 120Āŗc 130
## 11 9.03 120Āŗc 140
## 12 9.80 120Āŗc 150
mod5=aov(rto~temperatura+presion, df5)
summary(mod5)
## Df Sum Sq Mean Sq F value Pr(>F)
## temperatura 2 4.682 2.3410 6.403 0.0325 *
## presion 3 0.601 0.2004 0.548 0.6676
## Residuals 6 2.194 0.3656
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
shapiro.test(mod5$residuals)
##
## Shapiro-Wilk normality test
##
## data: mod5$residuals
## W = 0.8995, p-value = 0.1563
bartlett.test(mod5$residuals, df5$temperatura)
##
## Bartlett test of homogeneity of variances
##
## data: mod5$residuals and df5$temperatura
## Bartlett's K-squared = 0.9889, df = 2, p-value = 0.6099
bartlett.test(mod5$residuals, df5$presion)
##
## Bartlett test of homogeneity of variances
##
## data: mod5$residuals and df5$presion
## Bartlett's K-squared = 0.33487, df = 3, p-value = 0.9533
library(lattice)
bwplot(rto~temperatura|presion,df5)
seleccion=c(rep('a7r',4),rep('a8r',4),rep('a16r',4))
edad=c(rep('50',1), rep('45',1),rep('40',1),rep('35',1))
Y1=c(13,15,15.06,10.77,
16.07,13.95,13.89,10.70,
12.13, 17.87,13.08,10.00)
Y2=c(88,106.67,105.33,78.67,
115.33,101.33,108,83.33,
83.67,119.33,87.67,73)
df6=data.frame(Y1,Y2,seleccion,edad)
df6
## Y1 Y2 seleccion edad
## 1 13.00 88.00 a7r 50
## 2 15.00 106.67 a7r 45
## 3 15.06 105.33 a7r 40
## 4 10.77 78.67 a7r 35
## 5 16.07 115.33 a8r 50
## 6 13.95 101.33 a8r 45
## 7 13.89 108.00 a8r 40
## 8 10.70 83.33 a8r 35
## 9 12.13 83.67 a16r 50
## 10 17.87 119.33 a16r 45
## 11 13.08 87.67 a16r 40
## 12 10.00 73.00 a16r 35
mod6=aov(Y1~edad+seleccion,df6)
summary(mod6)
## Df Sum Sq Mean Sq F value Pr(>F)
## edad 3 41.42 13.806 4.393 0.0585 .
## seleccion 2 0.29 0.146 0.047 0.9548
## Residuals 6 18.86 3.143
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
library(lattice)
bwplot(Y1~seleccion|edad,df6)
bwplot(Y2~seleccion|edad,df6)
mod7=aov(Y2~edad+seleccion,df6)
summary(mod7)
## Df Sum Sq Mean Sq F value Pr(>F)
## edad 3 1508.4 502.8 3.753 0.0789 .
## seleccion 2 254.1 127.0 0.948 0.4387
## Residuals 6 803.8 134.0
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
correccionbonferroni=0.05/2
TukeyHSD(mod6)
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = Y1 ~ edad + seleccion, data = df6)
##
## $edad
## diff lwr upr p adj
## 40-35 3.5200000 -1.4905850 8.530585 0.1705985
## 45-35 5.1166667 0.1060816 10.127252 0.0459588
## 50-35 3.2433333 -1.7672517 8.253918 0.2145938
## 45-40 1.5966667 -3.4139184 6.607252 0.7009106
## 50-40 -0.2766667 -5.2872517 4.733918 0.9972494
## 50-45 -1.8733333 -6.8839184 3.137252 0.5980095
##
## $seleccion
## diff lwr upr p adj
## a7r-a16r 0.1875 -3.658616 4.033616 0.9877660
## a8r-a16r 0.3825 -3.463616 4.228616 0.9503850
## a8r-a7r 0.1950 -3.651116 4.041116 0.9867766
TukeyHSD(mod7)
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = Y2 ~ edad + seleccion, data = df6)
##
## $edad
## diff lwr upr p adj
## 40-35 22.000000 -10.715294 54.71529 0.1933412
## 45-35 30.776667 -1.938627 63.49196 0.0634860
## 50-35 17.333333 -15.381960 50.04863 0.3443919
## 45-40 8.776667 -23.938627 41.49196 0.7916534
## 50-40 -4.666667 -37.381960 28.04863 0.9576150
## 50-45 -13.443333 -46.158627 19.27196 0.5308535
##
## $seleccion
## diff lwr upr p adj
## a7r-a16r 3.75 -21.3622 28.8622 0.8927022
## a8r-a16r 11.08 -14.0322 36.1922 0.4197810
## a8r-a7r 7.33 -17.7822 32.4422 0.6624808
shapiro.test(mod6$residuals)
##
## Shapiro-Wilk normality test
##
## data: mod6$residuals
## W = 0.92563, p-value = 0.3361
shapiro.test(mod7$residuals)
##
## Shapiro-Wilk normality test
##
## data: mod7$residuals
## W = 0.95058, p-value = 0.6455
bartlett.test(mod6$residual, df6$edad)
##
## Bartlett test of homogeneity of variances
##
## data: mod6$residual and df6$edad
## Bartlett's K-squared = 5.2388, df = 3, p-value = 0.1551
bartlett.test(mod7$residual, df6$edad)
##
## Bartlett test of homogeneity of variances
##
## data: mod7$residual and df6$edad
## Bartlett's K-squared = 5.9648, df = 3, p-value = 0.1133
C=dist(df6[,1:4])
## Warning in dist(df6[, 1:4]): NAs introducidos por coerción
dim(as.matrix(C))
## [1] 12 12
cluster= hclust(C, method = 'ward.D2')
plot(cluster)