Ejemplo de A. Exploratorio Univariado

A continuación vamos a realizar un ejemplo de exploración descriptiva de datos en R y visualización con la libreria ggplot2. Para esto usaremos datos de prueba que se encuentran en R en la libreria data.sets, particularmente la base de datos iris.

Algunos indicadores que se usaran son la varianza: \(\sigma^2\)

\[\sigma^2 =\frac{\sum_{i=1}^{n} (x_i -\mu )^2}{n-1}\]

data(iris)
head(iris,5)
##   Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1          5.1         3.5          1.4         0.2  setosa
## 2          4.9         3.0          1.4         0.2  setosa
## 3          4.7         3.2          1.3         0.2  setosa
## 4          4.6         3.1          1.5         0.2  setosa
## 5          5.0         3.6          1.4         0.2  setosa

Como se puede observar en la tabla la base de datos contiene un total de 150 registros y 5 variables como la longitud y ancho del petalo o la especie de la flor.

#A. Exploratorio Variable Cuantitativa

mean(iris$Petal.Length) #promedio
## [1] 3.758
median(iris$Petal.Length) #mediana
## [1] 4.35
sd(iris$Petal.Length) #desviación estandar
## [1] 1.765298
max(iris$Petal.Length) #maximo
## [1] 6.9
min(iris$Petal.Length) #minimo
## [1] 1
length(iris$Petal.Length) #total
## [1] 150
summary(iris$Petal.Length) #resumen
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   1.000   1.600   4.350   3.758   5.100   6.900
library(ggplot2)
require(ggplot2)
require(plotly)
## Loading required package: plotly
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
ggplot(data = iris,mapping = aes(x=Petal.Length))+geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(data = iris,mapping = aes(y=Petal.Length))+geom_boxplot()

ggplot(data = iris,mapping = aes(x=Petal.Width,y=Petal.Length))+geom_point()

ggplot(data = iris,mapping = aes(x=Petal.Length))+geom_histogram()+theme_bw()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(data = iris,mapping = aes(x=Petal.Length))+geom_histogram()+theme_minimal()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(data = iris,mapping = aes(x=Petal.Length))+geom_histogram()+theme_classic()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(data = iris,mapping = aes(x=Petal.Length,fill="blue"))+geom_histogram()+theme_bw()+xlab("Long. de Petalo")+
  ylab("Total")+ggtitle("Histograma de Long. del Petalo")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(data = iris,mapping = aes(x=Petal.Length,fill=Species))+geom_histogram()+theme_bw()+xlab("Long. de Petalo")+
  ylab("Total")+ggtitle("Histograma de Long. del Petalo")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(data=iris,aes(y=Petal.Length,x=Species,fill=Species))+geom_boxplot()+theme_bw()+ylab("Long. de Petalo")+
  xlab("Species")+ggtitle("Diagrama de Cajas de Long. del Petalo")

ggplot(data = iris,mapping = aes(x=Petal.Width,y=Petal.Length,colour=Species))+geom_point()

ggplot(data = iris,mapping = aes(x=Petal.Width,y=Petal.Length))+geom_point()+geom_smooth()
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

ggplot(data = iris,mapping = aes(x=Petal.Width,y=Petal.Length))+geom_point()+geom_smooth(method = "lm")
## `geom_smooth()` using formula 'y ~ x'

ggplot(data = iris,mapping = aes(x=Petal.Width,y=Petal.Length,colour=Species))+geom_point()+geom_smooth(method = "lm")
## `geom_smooth()` using formula 'y ~ x'

ggplot(data = iris,mapping = aes(x=Petal.Width,y=Petal.Length,colour=Species))+geom_point()+facet_grid(~Species)

ggplot(data = iris,mapping = aes(x=Petal.Width,y=Petal.Length,colour=Species))+geom_point()+facet_grid(~Species)+geom_smooth(method = "lm")
## `geom_smooth()` using formula 'y ~ x'

##Graficos Dinamicos
g1=ggplot(data=iris,aes(y=Petal.Length,x=Species,fill=Species))+geom_boxplot()+theme_bw()+ylab("Long. de Petalo")+
  xlab("Species")+ggtitle("Diagrama de Cajas de Long. del Petalo")
ggplotly(g1)