A1 <- aov(iris$Petal.Length ~ iris$Species)
anova(A1)
## Analysis of Variance Table
##
## Response: iris$Petal.Length
## Df Sum Sq Mean Sq F value Pr(>F)
## iris$Species 2 437.10 218.551 1180.2 < 2.2e-16 ***
## Residuals 147 27.22 0.185
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
TukeyHSD(A1, 'iris$Species')
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = iris$Petal.Length ~ iris$Species)
##
## $`iris$Species`
## diff lwr upr p adj
## versicolor-setosa 2.798 2.59422 3.00178 0
## virginica-setosa 4.090 3.88622 4.29378 0
## virginica-versicolor 1.292 1.08822 1.49578 0
medrano <- (A1$residuals)
shapiro.test(medrano)
##
## Shapiro-Wilk normality test
##
## data: medrano
## W = 0.98108, p-value = 0.03676
shapiro.test(A1$residuals)
##
## Shapiro-Wilk normality test
##
## data: A1$residuals
## W = 0.98108, p-value = 0.03676
bartlett.test(A1$residuals,
iris$Species)
##
## Bartlett test of homogeneity of variances
##
## data: A1$residuals and iris$Species
## Bartlett's K-squared = 55.423, df = 2, p-value = 9.229e-13
A2 <- aov(iris$Petal.Width ~ iris$Species)
anova(A2)
## Analysis of Variance Table
##
## Response: iris$Petal.Width
## Df Sum Sq Mean Sq F value Pr(>F)
## iris$Species 2 80.413 40.207 960.01 < 2.2e-16 ***
## Residuals 147 6.157 0.042
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
TukeyHSD(A2, 'iris$Species')
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = iris$Petal.Width ~ iris$Species)
##
## $`iris$Species`
## diff lwr upr p adj
## versicolor-setosa 1.08 0.9830903 1.1769097 0
## virginica-setosa 1.78 1.6830903 1.8769097 0
## virginica-versicolor 0.70 0.6030903 0.7969097 0
shapiro.test(A2$residuals)
##
## Shapiro-Wilk normality test
##
## data: A2$residuals
## W = 0.97217, p-value = 0.003866
bartlett.test(A2$residuals,
iris$Species)
##
## Bartlett test of homogeneity of variances
##
## data: A2$residuals and iris$Species
## Bartlett's K-squared = 39.213, df = 2, p-value = 3.055e-09
A3 <- aov(iris$Sepal.Length ~ iris$Species)
anova(A3)
## Analysis of Variance Table
##
## Response: iris$Sepal.Length
## Df Sum Sq Mean Sq F value Pr(>F)
## iris$Species 2 63.212 31.606 119.26 < 2.2e-16 ***
## Residuals 147 38.956 0.265
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
TukeyHSD(A3, 'iris$Species')
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = iris$Sepal.Length ~ iris$Species)
##
## $`iris$Species`
## diff lwr upr p adj
## versicolor-setosa 0.930 0.6862273 1.1737727 0
## virginica-setosa 1.582 1.3382273 1.8257727 0
## virginica-versicolor 0.652 0.4082273 0.8957727 0
shapiro.test(A3$residuals)
##
## Shapiro-Wilk normality test
##
## data: A3$residuals
## W = 0.9879, p-value = 0.2189
bartlett.test(A3$residuals,
iris$Species)
##
## Bartlett test of homogeneity of variances
##
## data: A3$residuals and iris$Species
## Bartlett's K-squared = 16.006, df = 2, p-value = 0.0003345
A4 <- aov(iris$Sepal.Width ~ iris$Species)
anova(A4)
## Analysis of Variance Table
##
## Response: iris$Sepal.Width
## Df Sum Sq Mean Sq F value Pr(>F)
## iris$Species 2 11.345 5.6725 49.16 < 2.2e-16 ***
## Residuals 147 16.962 0.1154
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
TukeyHSD(A4, 'iris$Species')
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = iris$Sepal.Width ~ iris$Species)
##
## $`iris$Species`
## diff lwr upr p adj
## versicolor-setosa -0.658 -0.81885528 -0.4971447 0.0000000
## virginica-setosa -0.454 -0.61485528 -0.2931447 0.0000000
## virginica-versicolor 0.204 0.04314472 0.3648553 0.0087802
shapiro.test(A4$residuals)
##
## Shapiro-Wilk normality test
##
## data: A4$residuals
## W = 0.98948, p-value = 0.323
bartlett.test(A4$residuals,
iris$Species)
##
## Bartlett test of homogeneity of variances
##
## data: A4$residuals and iris$Species
## Bartlett's K-squared = 2.0911, df = 2, p-value = 0.3515
Y1 = cbind(iris$Sepal.Length, iris$Sepal.Width, iris$Petal.Length, iris$Petal.Width)
mod_moav = manova(Y1 ~ iris$Species)
summary(mod_moav)
## Df Pillai approx F num Df den Df Pr(>F)
## iris$Species 2 1.1919 53.466 8 290 < 2.2e-16 ***
## Residuals 147
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
resid = (mod_moav$residuals)
R = cor(iris[,1:4])
(R = round(R,3))
## Sepal.Length Sepal.Width Petal.Length Petal.Width
## Sepal.Length 1.000 -0.118 0.872 0.818
## Sepal.Width -0.118 1.000 -0.428 -0.366
## Petal.Length 0.872 -0.428 1.000 0.963
## Petal.Width 0.818 -0.366 0.963 1.000
## Loading required package: xts
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
##
## Attaching package: 'PerformanceAnalytics'
## The following object is masked from 'package:graphics':
##
## legend
Largo de pƩtalos con ancho de pƩtalos (hacer este mismo grƔfico con todos los pares posibles)
library(ggplot2)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:xts':
##
## first, last
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
split.screen(c(6,6))
## [1] 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
## [26] 26 27 28 29 30 31 32 33 34 35 36
screen(1)
iris %>% ggplot(aes(x=Petal.Length, y=Petal.Width, col=Species))+geom_point()
screen(2)
iris %>% ggplot(aes(x=Petal.Length, y=Sepal.Length, col=Species))+geom_point()
screen(3)
iris %>% ggplot(aes(x=Petal.Length, y=Sepal.Width, col=Species))+geom_point()
screen(4)
iris %>% ggplot(aes(x=Sepal.Length, y=Sepal.Width, col=Species))+geom_point()
screen(5)
iris %>% ggplot(aes(x=Sepal.Length, y=Petal.Width, col=Species))+geom_point()
screen(6)
iris %>% ggplot(aes(x=Sepal.Width, y=Petal.Width, col=Species))+geom_point()
Matriz de distancia
d = dist(iris[,1:4])
dim(as.matrix(d))
## [1] 150 150
Metodo de Ward (basado en suma de cuadrados para agrupar)
clust = hclust(d, method= 'ward.D2')
plot(clust)
abline(h=10, col='red', lty=2)
grupos = cutree(clust, 3)
rect.hclust(clust, k = 3)
Matriz de confución Virginica tiene mayor confución
table(grupos)
## grupos
## 1 2 3
## 50 64 36
(tbl = table(iris$Species, grupos))
## grupos
## 1 2 3
## setosa 50 0 0
## versicolor 0 49 1
## virginica 0 15 35
100 * sum(diag(tbl))/sum(tbl)
## [1] 89.33333