A continuación vamos a realizar un ejemplo de exploración descriptiva de datos en R y visualización con la libreria ggplot2. Para esto usaremos datos de prueba que se encuentran en R en la libreria data.sets, particularmente la base de datos iris.
Algunos indicadores que se usaran son la varianza: \(\sigma^2\)
\[\sigma^2 =\frac{\sum_{i=1}^{n} (x_i -\mu )^2}{n-1}\]
data(iris)
head(iris,5)
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1 5.1 3.5 1.4 0.2 setosa
## 2 4.9 3.0 1.4 0.2 setosa
## 3 4.7 3.2 1.3 0.2 setosa
## 4 4.6 3.1 1.5 0.2 setosa
## 5 5.0 3.6 1.4 0.2 setosa
Como se puede observar en la tabla la base de datos contiene un total de 150 registros y 5 variables como la longitud y ancho del petalo o la especie de la flor.
#A. Exploratorio Variable Cuantitativa
mean(iris$Petal.Length) #promedio
## [1] 3.758
median(iris$Petal.Length) #mediana
## [1] 4.35
sd(iris$Petal.Length) #desviación estandar
## [1] 1.765298
max(iris$Petal.Length) #maximo
## [1] 6.9
min(iris$Petal.Length) #minimo
## [1] 1
length(iris$Petal.Length) #total
## [1] 150
summary(iris$Petal.Length) #resumen
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.000 1.600 4.350 3.758 5.100 6.900
library(ggplot2)
require(ggplot2)
require(plotly)
## Loading required package: plotly
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
ggplot(data = iris,mapping = aes(x=Petal.Length))+geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ggplot(data = iris,mapping = aes(y=Petal.Length))+geom_boxplot()
ggplot(data = iris,mapping = aes(x=Petal.Width,y=Petal.Length))+geom_point()
ggplot(data = iris,mapping = aes(x=Petal.Length))+geom_histogram()+theme_bw()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ggplot(data = iris,mapping = aes(x=Petal.Length))+geom_histogram()+theme_minimal()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ggplot(data = iris,mapping = aes(x=Petal.Length))+geom_histogram()+theme_classic()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ggplot(data = iris,mapping = aes(x=Petal.Length,fill="blue"))+geom_histogram()+theme_bw()+xlab("Long. de Petalo")+
ylab("Total")+ggtitle("Histograma de Long. del Petalo")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ggplot(data = iris,mapping = aes(x=Petal.Length,fill=Species))+geom_histogram()+theme_bw()+xlab("Long. de Petalo")+
ylab("Total")+ggtitle("Histograma de Long. del Petalo")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ggplot(data=iris,aes(y=Petal.Length,x=Species,fill=Species))+geom_boxplot()+theme_bw()+ylab("Long. de Petalo")+
xlab("Species")+ggtitle("Diagrama de Cajas de Long. del Petalo")
ggplot(data = iris,mapping = aes(x=Petal.Width,y=Petal.Length,colour=Species))+geom_point()
ggplot(data = iris,mapping = aes(x=Petal.Width,y=Petal.Length))+geom_point()+geom_smooth()
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
ggplot(data = iris,mapping = aes(x=Petal.Width,y=Petal.Length))+geom_point()+geom_smooth(method = "lm")
## `geom_smooth()` using formula 'y ~ x'
ggplot(data = iris,mapping = aes(x=Petal.Width,y=Petal.Length,colour=Species))+geom_point()+geom_smooth(method = "lm")
## `geom_smooth()` using formula 'y ~ x'
ggplot(data = iris,mapping = aes(x=Petal.Width,y=Petal.Length,colour=Species))+geom_point()+facet_grid(~Species)
ggplot(data = iris,mapping = aes(x=Petal.Width,y=Petal.Length,colour=Species))+geom_point()+facet_grid(~Species)+geom_smooth(method = "lm")
## `geom_smooth()` using formula 'y ~ x'
##Graficos Dinamicos
g1=ggplot(data=iris,aes(y=Petal.Length,x=Species,fill=Species))+geom_boxplot()+theme_bw()+ylab("Long. de Petalo")+
xlab("Species")+ggtitle("Diagrama de Cajas de Long. del Petalo")
ggplotly(g1)