data(iris)
str(iris)
## 'data.frame':    150 obs. of  5 variables:
##  $ Sepal.Length: num  5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
##  $ Sepal.Width : num  3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
##  $ Petal.Length: num  1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
##  $ Petal.Width : num  0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
##  $ Species     : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...
table(iris$Species)/length(iris$Species)
## 
##     setosa versicolor  virginica 
##  0.3333333  0.3333333  0.3333333
mean(iris$Petal.Width)
## [1] 1.199333
vec<-rnorm(10,20,10)
mean(vec)
## [1] 20.80885
mean(vec, trim=0.1)
## [1] 20.88742
median(vec)
## [1] 18.82706
moda<-function(var){
  frec.var<-table(var)
  valor<-which(frec.var==max(frec.var))
  names(valor)
}
moda(iris$Sepal.Length)
## [1] "5"
moda(iris$Species)
## [1] "setosa"     "versicolor" "virginica"
quantile(iris$Sepal.Length, seq(0,1,0.25))
##   0%  25%  50%  75% 100% 
##  4.3  5.1  5.8  6.4  7.9
median(iris$Sepal.Length)
## [1] 5.8
summary(iris)
##   Sepal.Length    Sepal.Width     Petal.Length    Petal.Width   
##  Min.   :4.300   Min.   :2.000   Min.   :1.000   Min.   :0.100  
##  1st Qu.:5.100   1st Qu.:2.800   1st Qu.:1.600   1st Qu.:0.300  
##  Median :5.800   Median :3.000   Median :4.350   Median :1.300  
##  Mean   :5.843   Mean   :3.057   Mean   :3.758   Mean   :1.199  
##  3rd Qu.:6.400   3rd Qu.:3.300   3rd Qu.:5.100   3rd Qu.:1.800  
##  Max.   :7.900   Max.   :4.400   Max.   :6.900   Max.   :2.500  
##        Species  
##  setosa    :50  
##  versicolor:50  
##  virginica :50  
##                 
##                 
## 
tapply(iris$Petal.Length,iris$Species,summary)
## $setosa
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   1.000   1.400   1.500   1.462   1.575   1.900 
## 
## $versicolor
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    3.00    4.00    4.35    4.26    4.60    5.10 
## 
## $virginica
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   4.500   5.100   5.550   5.552   5.875   6.900
max(iris$Sepal.Length)-min(iris$Sepal.Length)
## [1] 3.6
var(iris$Sepal.Length)
## [1] 0.6856935
sd(iris$Sepal.Length)
## [1] 0.8280661
IQR(iris$Sepal.Length)
## [1] 1.3

Analisis multivariado:

Covarianza

cov(iris$Sepal.Length, iris$Sepal.Width)
## [1] -0.042434

Covarianza de todas las variables

cov(iris[,1:4])
##              Sepal.Length Sepal.Width Petal.Length Petal.Width
## Sepal.Length    0.6856935  -0.0424340    1.2743154   0.5162707
## Sepal.Width    -0.0424340   0.1899794   -0.3296564  -0.1216394
## Petal.Length    1.2743154  -0.3296564    3.1162779   1.2956094
## Petal.Width     0.5162707  -0.1216394    1.2956094   0.5810063

Correlacion de Pearson

cor(iris[,1:4])
##              Sepal.Length Sepal.Width Petal.Length Petal.Width
## Sepal.Length    1.0000000  -0.1175698    0.8717538   0.8179411
## Sepal.Width    -0.1175698   1.0000000   -0.4284401  -0.3661259
## Petal.Length    0.8717538  -0.4284401    1.0000000   0.9628654
## Petal.Width     0.8179411  -0.3661259    0.9628654   1.0000000

Visualizacion de datos

plot(rnorm(15,10,5), col="red", type = "l", pch=20)
lines(rnorm(15,10,5), col="blue", type ="p")
lines(rnorm(15,10,5), col = "green", type="b")
title(main="Mi primer gráfico en R")
legend('topright', c("lineas", "puntos", "lineas_y_puntos"), lty=1:3, col=c("red","blue","green"), bty='n', cex=.75)

Llamando paquetes y visualizando datos

library(ggplot2)
df<- aggregate(iris[,1:4], by=list(iris$Species), FUN=mean)
barplot(Petal.Length~Group.1, data=df, xlab = c("Species"), ylab = c("Petal Length"))

ggplot(df, aes(x=Group.1, y=Petal.Length)) + geom_bar(width=0.5, colour="red", fill="white", stat = "identity")+xlab("Especies")+ylab("Longitud de petalo")+ggtitle("Especies de iris")

pie(table(iris$Species))

Grafico de tortas con ggplot

ggplot(df, aes(x="", y=Petal.Length), fill = Group.1) + 
  geom_bar(width=0.5, colour="red", fill="white", stat = "identity") +
  xlab("Especies") + 
  ylab("Longitud de petalo") +
  ggtitle("Especies de iris") + 
  coord_polar(theta="y")

BOXPLOTS

ggplot(iris, aes(x=Species, y=Sepal.Length, fill=Species)) +
  geom_boxplot() +
  xlab("Especies") + ylab("Longitud de sepalo")

ggplot(iris, aes(x=Sepal.Length, y=Sepal.Width, color=Species)) +
  geom_point(size=3, shape=19)

Grafico de dispersion

pairs(iris[,1:4], pch=as.numeric(iris$Species), col=iris$Species)