A continuacion vamos a realizar un ejemplo de exploración descriptiva de datos en R y visualización con la librerÃa ggplot2. Para esto usaremos datos de prueba que se encuentran en R en la libreria data.sets, particularmente la base de datos Iris.
Algunos indicadores que se utilizarán serán la varianza: \(\sigma^2\)
\[\frac{\sum_{i=1}^{n}(x_i -\mu)^2}{n-1}\]
data(iris)
head(iris,5)
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1 5.1 3.5 1.4 0.2 setosa
## 2 4.9 3.0 1.4 0.2 setosa
## 3 4.7 3.2 1.3 0.2 setosa
## 4 4.6 3.1 1.5 0.2 setosa
## 5 5.0 3.6 1.4 0.2 setosa
Como se puede observar en la tabla la base de datos contiene un total de 150 registros y 5 variables de longitud y ancho del pétalo o la especie de la flor.
#A. Exploratorio variable cuantitaviva
mean(iris$Petal.Length) ##Promedio
## [1] 3.758
median(iris$Petal.Length) ##Mediana
## [1] 4.35
sd(iris$Petal.Length) ##Desviacion std
## [1] 1.765298
max(iris$Petal.Length) ##Máximo
## [1] 6.9
min(iris$Petal.Length)
## [1] 1
length(iris$Petal.Length)
## [1] 150
summary(iris$Petal.Length)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.000 1.600 4.350 3.758 5.100 6.900
library(ggplot2)
require(ggplot2)
require(plotly)
## Loading required package: plotly
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
ggplot(data = iris, mapping = aes( x = Petal.Length )) + geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ggplot(data = iris, mapping = aes( x = Petal.Length )) + geom_boxplot()
ggplot(data = iris, mapping = aes( x=Petal.Width, y = Petal.Length )) + geom_point()
ggplot(data=iris, mapping = aes(x=Petal.Length, fill=Species))+geom_histogram()+theme_bw()+xlab("longitud del petalo")+ylab("total")+ggtitle("Histograma long pétalo")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ggplot(data=iris, mapping = aes(y=Petal.Length, fill=Species))+geom_boxplot()+theme_bw()+xlab("Species")+ylab("long petalo")+ggtitle("diag cajas long petalo")
ggplot(data=iris, mapping = aes(x=Petal.Width, y=Petal.Length, colour=Species))+geom_point()+geom_smooth(method="lm")
## `geom_smooth()` using formula 'y ~ x'
ggplot(data=iris, mapping = aes(x=Petal.Width, y=Petal.Length, colour=Species))+geom_point()+facet_grid((~Species))
ggplot(data=iris, mapping = aes(x=Petal.Width, y=Petal.Length, colour=Species))+geom_point()+facet_wrap((~Species))+geom_smooth(method="lm")
## `geom_smooth()` using formula 'y ~ x'
##Gráficos dinámicos
g1=ggplot(data=iris, mapping = aes(x=Petal.Width, y=Petal.Length, colour=Species))+geom_point()+facet_wrap((~Species))+geom_smooth(method="lm")
ggplotly(g1)
## `geom_smooth()` using formula 'y ~ x'