####Analysis de Varianza en R
#### PARTE I - importar datos
iris <- read.csv("C:\\Users\\Administrator\\Desktop\\iris.csv", header=TRUE)
attach(iris)
# comprobación de datos importados
names(iris)
## [1] "Sepal.Length" "Sepal.Width" "Petal.Length" "Petal.Width"
## [5] "Species"
head(iris)
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1 5.1 3.5 1.4 0.2 setosa
## 2 4.9 3.0 1.4 0.2 setosa
## 3 4.7 3.2 1.3 0.2 setosa
## 4 4.6 3.1 1.5 0.2 setosa
## 5 5.0 3.6 1.4 0.2 setosa
## 6 5.4 3.9 1.7 0.4 setosa
tail(iris)
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 145 6.7 3.3 5.7 2.5 virginica
## 146 6.7 3.0 5.2 2.3 virginica
## 147 6.3 2.5 5.0 1.9 virginica
## 148 6.5 3.0 5.2 2.0 virginica
## 149 6.2 3.4 5.4 2.3 virginica
## 150 5.9 3.0 5.1 1.8 virginica
# install.packages("car")
library(car)
## Warning: package 'car' was built under R version 3.3.2
some(iris, 3) # 3 random rows
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 57 6.3 3.3 4.7 1.6 versicolor
## 84 6.0 2.7 5.1 1.6 versicolor
## 94 5.0 2.3 3.3 1.0 versicolor
# verificando si faltan datos
sapply(iris, function(x)(sum(is.na(x))))
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 0 0 0 0 0
#### PARTE II
# estadística descriptiva
# individual
apply(iris[1:4], MARGIN=2, mean) # all
## Sepal.Length Sepal.Width Petal.Length Petal.Width
## 5.843333 3.057333 3.758000 1.199333
apply(iris[1:50,1:4], MARGIN=2, mean) # Iris setosa
## Sepal.Length Sepal.Width Petal.Length Petal.Width
## 5.006 3.428 1.462 0.246
apply(iris[51:100,1:4], MARGIN=2, mean)
## Sepal.Length Sepal.Width Petal.Length Petal.Width
## 5.936 2.770 4.260 1.326
apply(iris[101:150,1:4], MARGIN=2, mean)
## Sepal.Length Sepal.Width Petal.Length Petal.Width
## 6.588 2.974 5.552 2.026
apply(iris[1:4], MARGIN=2, median)
## Sepal.Length Sepal.Width Petal.Length Petal.Width
## 5.80 3.00 4.35 1.30
apply(iris[1:4], MARGIN=2, sd)
## Sepal.Length Sepal.Width Petal.Length Petal.Width
## 0.8280661 0.4358663 1.7652982 0.7622377
apply(iris[1:4], MARGIN=2, var)
## Sepal.Length Sepal.Width Petal.Length Petal.Width
## 0.6856935 0.1899794 3.1162779 0.5810063
apply(iris[1:4], MARGIN=2, min)
## Sepal.Length Sepal.Width Petal.Length Petal.Width
## 4.3 2.0 1.0 0.1
apply(iris[1:4], MARGIN=2, max)
## Sepal.Length Sepal.Width Petal.Length Petal.Width
## 7.9 4.4 6.9 2.5
# estadística descriptiva, excluyendo valores faltantes
summary(iris, na.rm=TRUE)
## Sepal.Length Sepal.Width Petal.Length Petal.Width
## Min. :4.300 Min. :2.000 Min. :1.000 Min. :0.100
## 1st Qu.:5.100 1st Qu.:2.800 1st Qu.:1.600 1st Qu.:0.300
## Median :5.800 Median :3.000 Median :4.350 Median :1.300
## Mean :5.843 Mean :3.057 Mean :3.758 Mean :1.199
## 3rd Qu.:6.400 3rd Qu.:3.300 3rd Qu.:5.100 3rd Qu.:1.800
## Max. :7.900 Max. :4.400 Max. :6.900 Max. :2.500
## Species
## setosa :50
## versicolor:50
## virginica :50
##
##
##
# usando el paquete "psych"
# install.packages("psych")
library(psych)
## Warning: package 'psych' was built under R version 3.3.2
##
## Attaching package: 'psych'
## The following object is masked from 'package:car':
##
## logit
describe(Sepal.Length)
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 150 5.84 0.83 5.8 5.81 1.04 4.3 7.9 3.6 0.31 -0.61 0.07
describe(iris[1:50,1:4])
## vars n mean sd median trimmed mad min max range skew
## Sepal.Length 1 50 5.01 0.35 5.0 5.00 0.30 4.3 5.8 1.5 0.11
## Sepal.Width 2 50 3.43 0.38 3.4 3.42 0.37 2.3 4.4 2.1 0.04
## Petal.Length 3 50 1.46 0.17 1.5 1.46 0.15 1.0 1.9 0.9 0.10
## Petal.Width 4 50 0.25 0.11 0.2 0.24 0.00 0.1 0.6 0.5 1.18
## kurtosis se
## Sepal.Length -0.45 0.05
## Sepal.Width 0.60 0.05
## Petal.Length 0.65 0.02
## Petal.Width 1.26 0.01
describe(iris[51:100,1:4])
## vars n mean sd median trimmed mad min max range skew
## Sepal.Length 1 50 5.94 0.52 5.90 5.94 0.52 4.9 7.0 2.1 0.10
## Sepal.Width 2 50 2.77 0.31 2.80 2.78 0.30 2.0 3.4 1.4 -0.34
## Petal.Length 3 50 4.26 0.47 4.35 4.29 0.52 3.0 5.1 2.1 -0.57
## Petal.Width 4 50 1.33 0.20 1.30 1.32 0.22 1.0 1.8 0.8 -0.03
## kurtosis se
## Sepal.Length -0.69 0.07
## Sepal.Width -0.55 0.04
## Petal.Length -0.19 0.07
## Petal.Width -0.59 0.03
describe(Sepal.Width)
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 150 3.06 0.44 3 3.04 0.44 2 4.4 2.4 0.31 0.14 0.04
describe(Petal.Length)
## vars n mean sd median trimmed mad min max range skew kurtosis
## X1 1 150 3.76 1.77 4.35 3.76 1.85 1 6.9 5.9 -0.27 -1.42
## se
## X1 0.14
describe(Petal.Width)
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 150 1.2 0.76 1.3 1.18 1.04 0.1 2.5 2.4 -0.1 -1.36 0.06
# percentiles
apply(iris[1:4], MARGIN=2, quantile)
## Sepal.Length Sepal.Width Petal.Length Petal.Width
## 0% 4.3 2.0 1.00 0.1
## 25% 5.1 2.8 1.60 0.3
## 50% 5.8 3.0 4.35 1.3
## 75% 6.4 3.3 5.10 1.8
## 100% 7.9 4.4 6.90 2.5
quantile(Petal.Length) # rango de intercuartiles
## 0% 25% 50% 75% 100%
## 1.00 1.60 4.35 5.10 6.90
quantile(Petal.Length)[4] - quantile(Petal.Length)[2]
## 75%
## 3.5
# uusando el paquete "doBy"
# install.packages("doBy")
library(doBy)
## Warning: package 'doBy' was built under R version 3.3.2
summaryBy(Sepal.Length ~ Species, data = iris,
FUN = function(x) { c(m = mean(x), s = sd(x)) } )
## Species Sepal.Length.m Sepal.Length.s
## 1 setosa 5.006 0.3524897
## 2 versicolor 5.936 0.5161711
## 3 virginica 6.588 0.6358796
# usando el paquete "Hmisc"
# install.packages("Hmisc")
library(Hmisc)
## Warning: package 'Hmisc' was built under R version 3.3.2
## Loading required package: lattice
## Loading required package: survival
## Warning: package 'survival' was built under R version 3.3.2
## Loading required package: Formula
## Warning: package 'Formula' was built under R version 3.3.2
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 3.3.2
##
## Attaching package: 'ggplot2'
## The following objects are masked from 'package:psych':
##
## %+%, alpha
##
## Attaching package: 'Hmisc'
## The following object is masked from 'package:psych':
##
## describe
## The following objects are masked from 'package:base':
##
## format.pval, round.POSIXt, trunc.POSIXt, units
describe(iris)
## iris
##
## 5 Variables 150 Observations
## ---------------------------------------------------------------------------
## Sepal.Length
## n missing distinct Info Mean Gmd .05 .10
## 150 0 35 0.998 5.843 0.9462 4.600 4.800
## .25 .50 .75 .90 .95
## 5.100 5.800 6.400 6.900 7.255
##
## lowest : 4.3 4.4 4.5 4.6 4.7, highest: 7.3 7.4 7.6 7.7 7.9
## ---------------------------------------------------------------------------
## Sepal.Width
## n missing distinct Info Mean Gmd .05 .10
## 150 0 23 0.992 3.057 0.4872 2.345 2.500
## .25 .50 .75 .90 .95
## 2.800 3.000 3.300 3.610 3.800
##
## lowest : 2.0 2.2 2.3 2.4 2.5, highest: 3.9 4.0 4.1 4.2 4.4
## ---------------------------------------------------------------------------
## Petal.Length
## n missing distinct Info Mean Gmd .05 .10
## 150 0 43 0.998 3.758 1.979 1.30 1.40
## .25 .50 .75 .90 .95
## 1.60 4.35 5.10 5.80 6.10
##
## lowest : 1.0 1.1 1.2 1.3 1.4, highest: 6.3 6.4 6.6 6.7 6.9
## ---------------------------------------------------------------------------
## Petal.Width
## n missing distinct Info Mean Gmd .05 .10
## 150 0 22 0.99 1.199 0.8676 0.2 0.2
## .25 .50 .75 .90 .95
## 0.3 1.3 1.8 2.2 2.3
##
## lowest : 0.1 0.2 0.3 0.4 0.5, highest: 2.1 2.2 2.3 2.4 2.5
## ---------------------------------------------------------------------------
## Species
## n missing distinct
## 150 0 3
##
## Value setosa versicolor virginica
## Frequency 50 50 50
## Proportion 0.333 0.333 0.333
## ---------------------------------------------------------------------------
# usando el paquete "pastecs"
# install.packages("pastecs")
library(pastecs)
## Warning: package 'pastecs' was built under R version 3.3.2
## Loading required package: boot
##
## Attaching package: 'boot'
## The following object is masked from 'package:survival':
##
## aml
## The following object is masked from 'package:lattice':
##
## melanoma
## The following object is masked from 'package:psych':
##
## logit
## The following object is masked from 'package:car':
##
## logit
stat.desc(iris) # all
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## nbr.val 150.00000000 150.00000000 150.0000000 150.00000000 NA
## nbr.null 0.00000000 0.00000000 0.0000000 0.00000000 NA
## nbr.na 0.00000000 0.00000000 0.0000000 0.00000000 NA
## min 4.30000000 2.00000000 1.0000000 0.10000000 NA
## max 7.90000000 4.40000000 6.9000000 2.50000000 NA
## range 3.60000000 2.40000000 5.9000000 2.40000000 NA
## sum 876.50000000 458.60000000 563.7000000 179.90000000 NA
## median 5.80000000 3.00000000 4.3500000 1.30000000 NA
## mean 5.84333333 3.05733333 3.7580000 1.19933333 NA
## SE.mean 0.06761132 0.03558833 0.1441360 0.06223645 NA
## CI.mean.0.95 0.13360085 0.07032302 0.2848146 0.12298004 NA
## var 0.68569351 0.18997942 3.1162779 0.58100626 NA
## std.dev 0.82806613 0.43586628 1.7652982 0.76223767 NA
## coef.var 0.14171126 0.14256420 0.4697441 0.63555114 NA
stat.desc(iris[1:50,]) # Iris setosa
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## nbr.val 50.00000000 50.0000000 50.00000000 50.00000000 NA
## nbr.null 0.00000000 0.0000000 0.00000000 0.00000000 NA
## nbr.na 0.00000000 0.0000000 0.00000000 0.00000000 NA
## min 4.30000000 2.3000000 1.00000000 0.10000000 NA
## max 5.80000000 4.4000000 1.90000000 0.60000000 NA
## range 1.50000000 2.1000000 0.90000000 0.50000000 NA
## sum 250.30000000 171.4000000 73.10000000 12.30000000 NA
## median 5.00000000 3.4000000 1.50000000 0.20000000 NA
## mean 5.00600000 3.4280000 1.46200000 0.24600000 NA
## SE.mean 0.04984957 0.0536078 0.02455980 0.01490377 NA
## CI.mean.0.95 0.10017646 0.1077289 0.04935476 0.02995025 NA
## var 0.12424898 0.1436898 0.03015918 0.01110612 NA
## std.dev 0.35248969 0.3790644 0.17366400 0.10538559 NA
## coef.var 0.07041344 0.1105789 0.11878522 0.42839670 NA
stat.desc(iris[1:50,1:4])
## Sepal.Length Sepal.Width Petal.Length Petal.Width
## nbr.val 50.00000000 50.0000000 50.00000000 50.00000000
## nbr.null 0.00000000 0.0000000 0.00000000 0.00000000
## nbr.na 0.00000000 0.0000000 0.00000000 0.00000000
## min 4.30000000 2.3000000 1.00000000 0.10000000
## max 5.80000000 4.4000000 1.90000000 0.60000000
## range 1.50000000 2.1000000 0.90000000 0.50000000
## sum 250.30000000 171.4000000 73.10000000 12.30000000
## median 5.00000000 3.4000000 1.50000000 0.20000000
## mean 5.00600000 3.4280000 1.46200000 0.24600000
## SE.mean 0.04984957 0.0536078 0.02455980 0.01490377
## CI.mean.0.95 0.10017646 0.1077289 0.04935476 0.02995025
## var 0.12424898 0.1436898 0.03015918 0.01110612
## std.dev 0.35248969 0.3790644 0.17366400 0.10538559
## coef.var 0.07041344 0.1105789 0.11878522 0.42839670
round(stat.desc(iris[1:50,1:4]), digits = 2) # two decimal
## Sepal.Length Sepal.Width Petal.Length Petal.Width
## nbr.val 50.00 50.00 50.00 50.00
## nbr.null 0.00 0.00 0.00 0.00
## nbr.na 0.00 0.00 0.00 0.00
## min 4.30 2.30 1.00 0.10
## max 5.80 4.40 1.90 0.60
## range 1.50 2.10 0.90 0.50
## sum 250.30 171.40 73.10 12.30
## median 5.00 3.40 1.50 0.20
## mean 5.01 3.43 1.46 0.25
## SE.mean 0.05 0.05 0.02 0.01
## CI.mean.0.95 0.10 0.11 0.05 0.03
## var 0.12 0.14 0.03 0.01
## std.dev 0.35 0.38 0.17 0.11
## coef.var 0.07 0.11 0.12 0.43
#### PARTE III Tabulando datos
iris_discrete <- data.frame(Sepal.Length= cut(Sepal.Length, breaks = 3,
labels=c("small", "medium", "large"), ordered=TRUE),
Sepal.Width = cut(Sepal.Width, breaks = 3,
labels=c("small", "medium", "large"), ordered=TRUE),
Petal.Length= cut(Petal.Length, breaks = 3,
labels=c("small", "medium", "large"), ordered=TRUE),
Petal.Width = cut(Petal.Width, breaks = 3,
labels=c("small", "medium", "large"), ordered=TRUE),
Species = Species)
# estadística descriptiva para iris_discrete
head(iris_discrete)
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1 small medium small small setosa
## 2 small medium small small setosa
## 3 small medium small small setosa
## 4 small medium small small setosa
## 5 small medium small small setosa
## 6 small large small small setosa
iris_discrete
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1 small medium small small setosa
## 2 small medium small small setosa
## 3 small medium small small setosa
## 4 small medium small small setosa
## 5 small medium small small setosa
## 6 small large small small setosa
## 7 small medium small small setosa
## 8 small medium small small setosa
## 9 small medium small small setosa
## 10 small medium small small setosa
## 11 small large small small setosa
## 12 small medium small small setosa
## 13 small medium small small setosa
## 14 small medium small small setosa
## 15 medium large small small setosa
## 16 medium large small small setosa
## 17 small large small small setosa
## 18 small medium small small setosa
## 19 medium large small small setosa
## 20 small large small small setosa
## 21 small medium small small setosa
## 22 small large small small setosa
## 23 small medium small small setosa
## 24 small medium small small setosa
## 25 small medium small small setosa
## 26 small medium small small setosa
## 27 small medium small small setosa
## 28 small medium small small setosa
## 29 small medium small small setosa
## 30 small medium small small setosa
## 31 small medium small small setosa
## 32 small medium small small setosa
## 33 small large small small setosa
## 34 small large small small setosa
## 35 small medium small small setosa
## 36 small medium small small setosa
## 37 small medium small small setosa
## 38 small medium small small setosa
## 39 small medium small small setosa
## 40 small medium small small setosa
## 41 small medium small small setosa
## 42 small small small small setosa
## 43 small medium small small setosa
## 44 small medium small small setosa
## 45 small large small small setosa
## 46 small medium small small setosa
## 47 small large small small setosa
## 48 small medium small small setosa
## 49 small large small small setosa
## 50 small medium small small setosa
## 51 large medium medium medium versicolor
## 52 medium medium medium medium versicolor
## 53 large medium medium medium versicolor
## 54 small small medium medium versicolor
## 55 medium small medium medium versicolor
## 56 medium small medium medium versicolor
## 57 medium medium medium medium versicolor
## 58 small small medium medium versicolor
## 59 medium medium medium medium versicolor
## 60 small small medium medium versicolor
## 61 small small medium medium versicolor
## 62 medium medium medium medium versicolor
## 63 medium small medium medium versicolor
## 64 medium medium medium medium versicolor
## 65 medium medium medium medium versicolor
## 66 medium medium medium medium versicolor
## 67 medium medium medium medium versicolor
## 68 medium small medium medium versicolor
## 69 medium small medium medium versicolor
## 70 medium small medium medium versicolor
## 71 medium medium medium large versicolor
## 72 medium small medium medium versicolor
## 73 medium small medium medium versicolor
## 74 medium small medium medium versicolor
## 75 medium medium medium medium versicolor
## 76 medium medium medium medium versicolor
## 77 large small medium medium versicolor
## 78 medium medium large medium versicolor
## 79 medium medium medium medium versicolor
## 80 medium small medium medium versicolor
## 81 small small medium medium versicolor
## 82 small small medium medium versicolor
## 83 medium small medium medium versicolor
## 84 medium small large medium versicolor
## 85 small medium medium medium versicolor
## 86 medium medium medium medium versicolor
## 87 medium medium medium medium versicolor
## 88 medium small medium medium versicolor
## 89 medium medium medium medium versicolor
## 90 small small medium medium versicolor
## 91 small small medium medium versicolor
## 92 medium medium medium medium versicolor
## 93 medium small medium medium versicolor
## 94 small small medium medium versicolor
## 95 medium small medium medium versicolor
## 96 medium medium medium medium versicolor
## 97 medium medium medium medium versicolor
## 98 medium medium medium medium versicolor
## 99 small small medium medium versicolor
## 100 medium small medium medium versicolor
## 101 medium medium large large virginica
## 102 medium small large large virginica
## 103 large medium large large virginica
## 104 medium medium large large virginica
## 105 medium medium large large virginica
## 106 large medium large large virginica
## 107 small small medium medium virginica
## 108 large medium large large virginica
## 109 medium small large large virginica
## 110 large medium large large virginica
## 111 medium medium large large virginica
## 112 medium small large large virginica
## 113 large medium large large virginica
## 114 medium small large large virginica
## 115 medium small large large virginica
## 116 medium medium large large virginica
## 117 medium medium large large virginica
## 118 large large large large virginica
## 119 large small large large virginica
## 120 medium small large medium virginica
## 121 large medium large large virginica
## 122 medium small medium large virginica
## 123 large small large large virginica
## 124 medium small medium large virginica
## 125 medium medium large large virginica
## 126 large medium large large virginica
## 127 medium small medium large virginica
## 128 medium medium medium large virginica
## 129 medium small large large virginica
## 130 large medium large medium virginica
## 131 large small large large virginica
## 132 large large large large virginica
## 133 medium small large large virginica
## 134 medium small large medium virginica
## 135 medium small large medium virginica
## 136 large medium large large virginica
## 137 medium medium large large virginica
## 138 medium medium large large virginica
## 139 medium medium medium large virginica
## 140 large medium large large virginica
## 141 medium medium large large virginica
## 142 large medium large large virginica
## 143 medium small large large virginica
## 144 large medium large large virginica
## 145 medium medium large large virginica
## 146 medium medium large large virginica
## 147 medium small large large virginica
## 148 medium medium large large virginica
## 149 medium medium large large virginica
## 150 medium medium large large virginica
summary(iris_discrete)
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## small :59 small :47 small :50 small :50 setosa :50
## medium:71 medium:88 medium:54 medium:54 versicolor:50
## large :20 large :15 large :46 large :46 virginica :50
# gráfica para iris_discrete
barplot(table(iris_discrete$Sepal.Length))

pie(table(iris_discrete$Sepal.Length))

barplot(table(iris_discrete$Sepal.Width))

pie(table(iris_discrete$Sepal.Width))

barplot(table(iris_discrete$Petal.Length))

pie(table(iris_discrete$Petal.Length))

# creando algunas tablas
table(iris_discrete$Sepal.Length, iris_discrete$Sepal.Width)
##
## small medium large
## small 12 37 10
## medium 31 37 3
## large 4 14 2
table(iris_discrete$Petal.Length, iris_discrete$Petal.Width)
##
## small medium large
## small 50 0 0
## medium 0 48 6
## large 0 6 40
table(iris_discrete$Petal.Length, iris_discrete$Species)
##
## setosa versicolor virginica
## small 50 0 0
## medium 0 48 6
## large 0 2 44
#### PARTE IV gráficos
# matriz de dispersión
plot(iris)

pairs(iris, col=Species)

pairs(iris[,1:4], col=Species)

pairs(iris[1:50,1:4], col="black")

pairs(iris[51:100,1:4], col="red")

pairs(iris[101:150,1:4], col="green")

# matrix de dispersión con correlacion de Pearson
panel.pearson <- function(x, y, ...) {
horizontal <- (par("usr")[1] + par("usr")[2]) / 2;
vertical <- (par("usr")[3] + par("usr")[4]) / 2;
text(horizontal, vertical, format(abs(cor(x,y)), digits=2))}
pairs(iris[,1:4], main = "Scatterplot matrix with Pearson's correlation for Iris Data", pch = 21, bg = c("red","green3","blue")[unclass(Species)], upper.panel=panel.pearson)

# solo panel superior
pairs(iris[,1:4], main = "Scatterplot matrix for Iris Data [3 species]", pch = 21, bg = c("red", "green3", "blue")[unclass(Species)], lower.panel=NULL, labels=c("SL","SW","PL","PW"), font.labels=2, cex.labels=4.5)

# prueba de correlacion (cor.test)
# Datos de iris data como matrix
library(car)
iris_matrix <- as.matrix(iris[,1:4])
# correlación para todos los datos
cor(iris_matrix)
## Sepal.Length Sepal.Width Petal.Length Petal.Width
## Sepal.Length 1.0000000 -0.1175698 0.8717538 0.8179411
## Sepal.Width -0.1175698 1.0000000 -0.4284401 -0.3661259
## Petal.Length 0.8717538 -0.4284401 1.0000000 0.9628654
## Petal.Width 0.8179411 -0.3661259 0.9628654 1.0000000
scatterplot(Sepal.Length ~ Sepal.Width)

cor.test(Sepal.Length, Sepal.Width) # prueba si la correlación es significativamente diferente a cero
##
## Pearson's product-moment correlation
##
## data: Sepal.Length and Sepal.Width
## t = -1.4403, df = 148, p-value = 0.1519
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.27269325 0.04351158
## sample estimates:
## cor
## -0.1175698
scatterplot(Petal.Length ~ Petal.Width)

cor.test(Petal.Length, Petal.Width) # esta is significativa
##
## Pearson's product-moment correlation
##
## data: Petal.Length and Petal.Width
## t = 43.387, df = 148, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.9490525 0.9729853
## sample estimates:
## cor
## 0.9628654
# corelaccion para datos de Iris setosa
scatterplot(Sepal.Length[1:50] ~ Sepal.Width[1:50])

cor.test(Sepal.Length[1:50], Sepal.Width[1:50])
##
## Pearson's product-moment correlation
##
## data: Sepal.Length[1:50] and Sepal.Width[1:50]
## t = 7.6807, df = 48, p-value = 6.71e-10
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.5851391 0.8460314
## sample estimates:
## cor
## 0.7425467
scatterplot(Petal.Length[1:50] ~ Petal.Width[1:50])

cor.test(Petal.Length[1:50], Petal.Width[1:50]) # esta es significativa
##
## Pearson's product-moment correlation
##
## data: Petal.Length[1:50] and Petal.Width[1:50]
## t = 2.4354, df = 48, p-value = 0.01864
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.05870091 0.55842995
## sample estimates:
## cor
## 0.33163
cor.test(Petal.Length[1:50], Petal.Width[1:50],
alternative="two.sided", method="pearson", "spearman")
##
## Pearson's product-moment correlation
##
## data: Petal.Length[1:50] and Petal.Width[1:50]
## t = 2.4354, df = 48, p-value = 0.01864
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.05870091 0.55842995
## sample estimates:
## cor
## 0.33163
# otra alternativa para matrix de dispersión
# install.packages("gpairs")
library(gpairs)
## Warning: package 'gpairs' was built under R version 3.3.2
gpairs(iris)
## Loading required package: grid

gpairs(iris[,1:4])

gpairs(iris[101:150,1:4]) # Iris virginica

# matrix de dispersión individual
plot(Sepal.Width, Sepal.Length)

# matrix de dispersión colorida
plot(Sepal.Width, Sepal.Length,
pch = 21,
col = "red3",
bg = "black",
main = "Sepal Length Against Sepal Width\nfor Iris Dataset", # see " \n "
xlab = "Sepal width (cm)",
ylab = "Sepal length (cm)",
cex = 1.1)

# diagrama de caja
# simple
boxplot(iris)

boxplot(iris[,1:4])

boxplot(Sepal.Length, Petal.Width)

boxplot(Sepal.Length, Sepal.Width, Petal.Length, Petal.Width)

boxplot(Sepal.Length, Sepal.Width, Petal.Length, Petal.Width, col = c(2,3,4,7))

# diagrama de caja para Sepal Length
boxplot(Sepal.Length ~ Species,
var.width = TRUE,
col = c("red", "green", "blue"),
ylab = "Sepal length (cm)",
xlab = "Species") # puedes continuar con las otras especies

# dos gráficas juntas
par(mfrow=c(1,2))
plot(Petal.Length)
boxplot(Petal.Length ~ Species, col = c(2,3,4))

par(mfrow=c(1,2))
plot(Petal.Width)
boxplot(Petal.Width ~ Species, col = c(2,3,4))

par(mfrow=c(1,2))
plot(Sepal.Length)
boxplot(Sepal.Length ~ Species, col = c(2,3,4)) # intenta Sepal.Width

# diagrama de caja con el paquete "ggplot2"
# install.packages("ggplot2")
library(ggplot2)
ggplot(iris, aes(x = Species, y = Sepal.Length)) +
geom_boxplot(fill = "grey80", colour = "blue") +
scale_x_discrete() + xlab("Species") + ylab("sepal length, cm")

ggplot(iris, aes(x = Species, y = Sepal.Width)) +
geom_boxplot(fill = "grey80", colour = "blue") +
scale_x_discrete() + xlab("Species") + ylab("sepal length, cm")

ggplot(iris, aes(x = Species, y = Petal.Length)) +
geom_boxplot(fill = "grey80", colour = "blue") +
scale_x_discrete() + xlab("Species") + ylab("sepal length, cm")

# dibujando histogramas
par(mfrow=c(1,1))
hist(Petal.Length[1:50]) # solo datos de Iris setosa

hist(Petal.Length[1:50], breaks=10, col="#CCCCFF")

plot(density(Petal.Length))

plot(density(Petal.Length[1:50]))

# histograma: Petal Length para las tres especies
par(mfrow=c(1,1))
hist(Petal.Length[1:50]) # Iris setosa

hist(Petal.Length[1:50], breaks=10, col="#CCCCFF")

plot(density(Petal.Length))

plot(density(Petal.Length[1:50]))

hist(Petal.Length[51:100]) # Iris versicolor

hist(Petal.Length[51:100], breaks=10, col="#CCCCFF")

plot(density(Petal.Length))

plot(density(Petal.Length[51:100]))

par(mfrow=c(2,2))
hist(Petal.Length[51:100]) # Iris versicolor
hist(Petal.Length[51:100], breaks=10, col="#CCCCFF")
plot(density(Petal.Length))
plot(density(Petal.Length[51:100]))

hist(Petal.Length[101:150]) # Iris virginica
hist(Petal.Length[101:150], breaks=10, col="#CCCCFF")
plot(density(Petal.Length))
# Sepal Width
hist(Sepal.Width[1:50])

hist(Sepal.Width[1:50], breaks=10, col="#CCCCFF")
plot(density(Sepal.Width))
# una fila, dos columnas
par(mfrow=c(1,2))

hist(Petal.Length[1:50]) # solo setosa
hist(Petal.Length[51:100]) # versicolor

# dos filas, two columnas: Iris virginica
par(mfrow=c(2,2))
hist(Sepal.Length[101:150])
hist(Sepal.Width[101:150])
hist(Petal.Length[101:150])
hist(Petal.Width[101:150])

# matrix de dispersión, diagrama de caja, histograma, y gráfico de densidad para Sepal lenght
par(mfrow=c(2,2))
qqnorm(Sepal.Length[101:150], col="purple")
qqline(Sepal.Length[101:150], col="red")
boxplot(Sepal.Length[101:150], main = "Boxplot", col="green")
hist(Sepal.Length[101:150], breaks=10, col="#CCCCFF", main = "Histogram")
plot(density(Sepal.Length[101:150]), main = " Kernel density")
title("Iris virginica: Sepal Length", line = -1.5, outer=TRUE, cex.main=2)

# Sepal width for Iris virginica
par(mfrow=c(2,2))
qqnorm(Sepal.Width[101:150], col="purple")
qqline(Sepal.Width[101:150], col="red")
boxplot(Sepal.Width[101:150], main = "Boxplot ")
hist(Sepal.Width[101:150], breaks=10, col="#CCCCFF", main = "Histogram")
plot(density(Sepal.Width[101:150]), main = " Kernel density")
title("Iris virginica: Sepal Width", line = -1.5, outer=TRUE, cex.main=2)

# reemplazar "x" por la variable "Petal Length
x <- Petal.Length
par(mfrow=c(2,2))
qqnorm(x[101:150], col="purple")
qqline(x[101:150], col="red")
boxplot(x[101:150], main = "Boxplot", col="green")
hist(x[101:150], breaks=10, col="#CCCCFF", main = "Histogram")
plot(density(x[101:150]), main = " Kernel density", col="red")
title("Iris virginica: Petal Length", line = -25, outer=TRUE, cex.main=3)

#### ANOVA
# iris data in tu programa
library(car)
data(iris)
iris
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1 5.1 3.5 1.4 0.2 setosa
## 2 4.9 3.0 1.4 0.2 setosa
## 3 4.7 3.2 1.3 0.2 setosa
## 4 4.6 3.1 1.5 0.2 setosa
## 5 5.0 3.6 1.4 0.2 setosa
## 6 5.4 3.9 1.7 0.4 setosa
## 7 4.6 3.4 1.4 0.3 setosa
## 8 5.0 3.4 1.5 0.2 setosa
## 9 4.4 2.9 1.4 0.2 setosa
## 10 4.9 3.1 1.5 0.1 setosa
## 11 5.4 3.7 1.5 0.2 setosa
## 12 4.8 3.4 1.6 0.2 setosa
## 13 4.8 3.0 1.4 0.1 setosa
## 14 4.3 3.0 1.1 0.1 setosa
## 15 5.8 4.0 1.2 0.2 setosa
## 16 5.7 4.4 1.5 0.4 setosa
## 17 5.4 3.9 1.3 0.4 setosa
## 18 5.1 3.5 1.4 0.3 setosa
## 19 5.7 3.8 1.7 0.3 setosa
## 20 5.1 3.8 1.5 0.3 setosa
## 21 5.4 3.4 1.7 0.2 setosa
## 22 5.1 3.7 1.5 0.4 setosa
## 23 4.6 3.6 1.0 0.2 setosa
## 24 5.1 3.3 1.7 0.5 setosa
## 25 4.8 3.4 1.9 0.2 setosa
## 26 5.0 3.0 1.6 0.2 setosa
## 27 5.0 3.4 1.6 0.4 setosa
## 28 5.2 3.5 1.5 0.2 setosa
## 29 5.2 3.4 1.4 0.2 setosa
## 30 4.7 3.2 1.6 0.2 setosa
## 31 4.8 3.1 1.6 0.2 setosa
## 32 5.4 3.4 1.5 0.4 setosa
## 33 5.2 4.1 1.5 0.1 setosa
## 34 5.5 4.2 1.4 0.2 setosa
## 35 4.9 3.1 1.5 0.2 setosa
## 36 5.0 3.2 1.2 0.2 setosa
## 37 5.5 3.5 1.3 0.2 setosa
## 38 4.9 3.6 1.4 0.1 setosa
## 39 4.4 3.0 1.3 0.2 setosa
## 40 5.1 3.4 1.5 0.2 setosa
## 41 5.0 3.5 1.3 0.3 setosa
## 42 4.5 2.3 1.3 0.3 setosa
## 43 4.4 3.2 1.3 0.2 setosa
## 44 5.0 3.5 1.6 0.6 setosa
## 45 5.1 3.8 1.9 0.4 setosa
## 46 4.8 3.0 1.4 0.3 setosa
## 47 5.1 3.8 1.6 0.2 setosa
## 48 4.6 3.2 1.4 0.2 setosa
## 49 5.3 3.7 1.5 0.2 setosa
## 50 5.0 3.3 1.4 0.2 setosa
## 51 7.0 3.2 4.7 1.4 versicolor
## 52 6.4 3.2 4.5 1.5 versicolor
## 53 6.9 3.1 4.9 1.5 versicolor
## 54 5.5 2.3 4.0 1.3 versicolor
## 55 6.5 2.8 4.6 1.5 versicolor
## 56 5.7 2.8 4.5 1.3 versicolor
## 57 6.3 3.3 4.7 1.6 versicolor
## 58 4.9 2.4 3.3 1.0 versicolor
## 59 6.6 2.9 4.6 1.3 versicolor
## 60 5.2 2.7 3.9 1.4 versicolor
## 61 5.0 2.0 3.5 1.0 versicolor
## 62 5.9 3.0 4.2 1.5 versicolor
## 63 6.0 2.2 4.0 1.0 versicolor
## 64 6.1 2.9 4.7 1.4 versicolor
## 65 5.6 2.9 3.6 1.3 versicolor
## 66 6.7 3.1 4.4 1.4 versicolor
## 67 5.6 3.0 4.5 1.5 versicolor
## 68 5.8 2.7 4.1 1.0 versicolor
## 69 6.2 2.2 4.5 1.5 versicolor
## 70 5.6 2.5 3.9 1.1 versicolor
## 71 5.9 3.2 4.8 1.8 versicolor
## 72 6.1 2.8 4.0 1.3 versicolor
## 73 6.3 2.5 4.9 1.5 versicolor
## 74 6.1 2.8 4.7 1.2 versicolor
## 75 6.4 2.9 4.3 1.3 versicolor
## 76 6.6 3.0 4.4 1.4 versicolor
## 77 6.8 2.8 4.8 1.4 versicolor
## 78 6.7 3.0 5.0 1.7 versicolor
## 79 6.0 2.9 4.5 1.5 versicolor
## 80 5.7 2.6 3.5 1.0 versicolor
## 81 5.5 2.4 3.8 1.1 versicolor
## 82 5.5 2.4 3.7 1.0 versicolor
## 83 5.8 2.7 3.9 1.2 versicolor
## 84 6.0 2.7 5.1 1.6 versicolor
## 85 5.4 3.0 4.5 1.5 versicolor
## 86 6.0 3.4 4.5 1.6 versicolor
## 87 6.7 3.1 4.7 1.5 versicolor
## 88 6.3 2.3 4.4 1.3 versicolor
## 89 5.6 3.0 4.1 1.3 versicolor
## 90 5.5 2.5 4.0 1.3 versicolor
## 91 5.5 2.6 4.4 1.2 versicolor
## 92 6.1 3.0 4.6 1.4 versicolor
## 93 5.8 2.6 4.0 1.2 versicolor
## 94 5.0 2.3 3.3 1.0 versicolor
## 95 5.6 2.7 4.2 1.3 versicolor
## 96 5.7 3.0 4.2 1.2 versicolor
## 97 5.7 2.9 4.2 1.3 versicolor
## 98 6.2 2.9 4.3 1.3 versicolor
## 99 5.1 2.5 3.0 1.1 versicolor
## 100 5.7 2.8 4.1 1.3 versicolor
## 101 6.3 3.3 6.0 2.5 virginica
## 102 5.8 2.7 5.1 1.9 virginica
## 103 7.1 3.0 5.9 2.1 virginica
## 104 6.3 2.9 5.6 1.8 virginica
## 105 6.5 3.0 5.8 2.2 virginica
## 106 7.6 3.0 6.6 2.1 virginica
## 107 4.9 2.5 4.5 1.7 virginica
## 108 7.3 2.9 6.3 1.8 virginica
## 109 6.7 2.5 5.8 1.8 virginica
## 110 7.2 3.6 6.1 2.5 virginica
## 111 6.5 3.2 5.1 2.0 virginica
## 112 6.4 2.7 5.3 1.9 virginica
## 113 6.8 3.0 5.5 2.1 virginica
## 114 5.7 2.5 5.0 2.0 virginica
## 115 5.8 2.8 5.1 2.4 virginica
## 116 6.4 3.2 5.3 2.3 virginica
## 117 6.5 3.0 5.5 1.8 virginica
## 118 7.7 3.8 6.7 2.2 virginica
## 119 7.7 2.6 6.9 2.3 virginica
## 120 6.0 2.2 5.0 1.5 virginica
## 121 6.9 3.2 5.7 2.3 virginica
## 122 5.6 2.8 4.9 2.0 virginica
## 123 7.7 2.8 6.7 2.0 virginica
## 124 6.3 2.7 4.9 1.8 virginica
## 125 6.7 3.3 5.7 2.1 virginica
## 126 7.2 3.2 6.0 1.8 virginica
## 127 6.2 2.8 4.8 1.8 virginica
## 128 6.1 3.0 4.9 1.8 virginica
## 129 6.4 2.8 5.6 2.1 virginica
## 130 7.2 3.0 5.8 1.6 virginica
## 131 7.4 2.8 6.1 1.9 virginica
## 132 7.9 3.8 6.4 2.0 virginica
## 133 6.4 2.8 5.6 2.2 virginica
## 134 6.3 2.8 5.1 1.5 virginica
## 135 6.1 2.6 5.6 1.4 virginica
## 136 7.7 3.0 6.1 2.3 virginica
## 137 6.3 3.4 5.6 2.4 virginica
## 138 6.4 3.1 5.5 1.8 virginica
## 139 6.0 3.0 4.8 1.8 virginica
## 140 6.9 3.1 5.4 2.1 virginica
## 141 6.7 3.1 5.6 2.4 virginica
## 142 6.9 3.1 5.1 2.3 virginica
## 143 5.8 2.7 5.1 1.9 virginica
## 144 6.8 3.2 5.9 2.3 virginica
## 145 6.7 3.3 5.7 2.5 virginica
## 146 6.7 3.0 5.2 2.3 virginica
## 147 6.3 2.5 5.0 1.9 virginica
## 148 6.5 3.0 5.2 2.0 virginica
## 149 6.2 3.4 5.4 2.3 virginica
## 150 5.9 3.0 5.1 1.8 virginica
par(mfrow=c(1,2))
plot.design(Sepal.Length~Species)
boxplot(Sepal.Length~Species)

par(mfrow=c(1,1))
cdplot(Species~ Sepal.Length, data=iris)

# exportar
write.table(iris, "C:/Users/Administrator/Desktop/irisCOMPUTER.txt", sep="\t")
# install.packages("xlsx")
library(xlsx)
## Warning: package 'xlsx' was built under R version 3.3.2
## Loading required package: rJava
## Warning: package 'rJava' was built under R version 3.3.2
## Loading required package: xlsxjars
## Warning: package 'xlsxjars' was built under R version 3.3.2
write.xlsx(iris, "C:/Users/Administrator/Desktop/irisCOMPUTER.xlsx")
# ANOVA
model1<-lm(Petal.Width~Species,data=iris)
anova(model1)
## Analysis of Variance Table
##
## Response: Petal.Width
## Df Sum Sq Mean Sq F value Pr(>F)
## Species 2 80.413 40.207 960.01 < 2.2e-16 ***
## Residuals 147 6.157 0.042
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(model1)
##
## Call:
## lm(formula = Petal.Width ~ Species, data = iris)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.626 -0.126 -0.026 0.154 0.474
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.24600 0.02894 8.50 1.96e-14 ***
## Speciesversicolor 1.08000 0.04093 26.39 < 2e-16 ***
## Speciesvirginica 1.78000 0.04093 43.49 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2047 on 147 degrees of freedom
## Multiple R-squared: 0.9289, Adjusted R-squared: 0.9279
## F-statistic: 960 on 2 and 147 DF, p-value: < 2.2e-16
plot(model1)




# install.packages("multcomp")
library(multcomp)
## Warning: package 'multcomp' was built under R version 3.3.2
## Loading required package: mvtnorm
## Warning: package 'mvtnorm' was built under R version 3.3.2
## Loading required package: TH.data
## Warning: package 'TH.data' was built under R version 3.3.2
## Loading required package: MASS
##
## Attaching package: 'TH.data'
## The following object is masked from 'package:MASS':
##
## geyser
KT<-rbind("versicolor-Setosa"=c(0,1,0),
"virginica-Setosa"=c(0,0,1),
"versicolor-virginica"=c(0,1,-1))
summary(glht(model1, linfct =KT))
##
## Simultaneous Tests for General Linear Hypotheses
##
## Fit: lm(formula = Petal.Width ~ Species, data = iris)
##
## Linear Hypotheses:
## Estimate Std. Error t value Pr(>|t|)
## versicolor-Setosa == 0 1.08000 0.04093 26.39 <2e-16 ***
## virginica-Setosa == 0 1.78000 0.04093 43.49 <2e-16 ***
## versicolor-virginica == 0 -0.70000 0.04093 -17.10 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## (Adjusted p values reported -- single-step method)
# gráfico
par(mfrow=c(1,2))
plot(glht(model1, linfct =KT))
boxplot(Petal.Width~Species,data=iris) #swap

# Prueba Levene para análisis de varianzas en muestras
# install.packages("lawstat")
library(lawstat)
## Warning: package 'lawstat' was built under R version 3.3.2
## Loading required package: Kendall
## Warning: package 'Kendall' was built under R version 3.3.2
## Loading required package: VGAM
## Warning: package 'VGAM' was built under R version 3.3.2
## Loading required package: stats4
## Loading required package: splines
##
## Attaching package: 'VGAM'
## The following objects are masked from 'package:boot':
##
## logit, simplex
## The following objects are masked from 'package:psych':
##
## fisherz, logistic, logit
## The following object is masked from 'package:car':
##
## logit
##
## Attaching package: 'lawstat'
## The following object is masked from 'package:car':
##
## levene.test
levene.test(iris[,"Petal.Width"],iris[,"Species"])
##
## modified robust Brown-Forsythe Levene-type test based on the
## absolute deviations from the median
##
## data: iris[, "Petal.Width"]
## Test Statistic = 19.892, p-value = 2.261e-08
svar=tapply(iris[,"Petal.Width"],iris[,"Species"],var)
wiris=data.frame(iris,"weight"=1/svar[iris[,"Species"]])
model2<-lm(Petal.Width~Species,weight=weight,data=wiris)
summary(model2)
##
## Call:
## lm(formula = Petal.Width ~ Species, data = wiris, weights = weight)
##
## Weighted Residuals:
## Min 1Q Median 3Q Max
## -2.2793 -0.4588 -0.1315 0.8183 3.3591
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.24600 0.01490 16.51 <2e-16 ***
## Speciesversicolor 1.08000 0.03169 34.08 <2e-16 ***
## Speciesvirginica 1.78000 0.04160 42.79 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1 on 147 degrees of freedom
## Multiple R-squared: 0.946, Adjusted R-squared: 0.9452
## F-statistic: 1287 on 2 and 147 DF, p-value: < 2.2e-16