Variables cualitativas y cuantitativas

Variables cualitativas

Variables categóricas y se dividen en dos: ordinales y nominales

Variables cuantitativas

Expresar numéricamente y se dividen en dos: discretas y continuas

Paquetes

library(dplyr)
## 
## Adjuntando el paquete: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(dslabs)
library(viridisLite)
library(RColorBrewer)

Dataset

data("murders")

Gráficos

Diagramas de barras

levels(murders$region) #Cualitativa nominal
## [1] "Northeast"     "South"         "North Central" "West"
tabla <- table(murders$region)
tabla
## 
##     Northeast         South North Central          West 
##             9            17            12            13
barplot(tabla, main = "Diagrama de barras", ylab = "Frecuencia", xlab = "Regiones",ylim = c(0,20),col="blue")

#help("barplot")
  1. Poner el valor sobre cada columna

Diagrama circular

pie(tabla, labels = row.names(tabla),col = rocket(4), main = "Diagrama circular", border = rocket(4))
legend("bottomright",legend = names(tabla),cex = 0.75,fill=rocket(4))

  1. Crear tabla de porcentajes y gráficar mostrando el dato

Variables ordinales que requieren un orden específico.

region<-factor(murders$region,levels=c("South","West","North Central","Northeast"))
tabla1<-table(region)
barplot(tabla1)

Histograma

murders <- murders %>% mutate(murders,rate= total/population*100000)
hist(murders$rate,main = "Histograma de la tasa de asesinatos en EEUU",xlim=c(0,20),ylim=c(0,25),col = brewer.pal(3,"Paired"),xlab = "Tasa de asesinatos",labels=TRUE,border =  brewer.pal(3,"Paired"))

  1. Modificar el eje x que se muestre de 1 en 1
  2. Agregar dos lineas verticales (Median, Mean)

Histograma - Regla de Sturges

hist(murders$population,breaks = "Sturges",
     main="Histogram of Population",
     xlab = "Population in US",col = brewer.pal(10,"Set1"),
     border=brewer.pal(10,"Set1"),ylim=c(0,40),labels = TRUE)
## Warning in brewer.pal(10, "Set1"): n too large, allowed maximum for palette Set1 is 9
## Returning the palette you asked for with that many colors
## Warning in brewer.pal(10, "Set1"): n too large, allowed maximum for palette Set1 is 9
## Returning the palette you asked for with that many colors

Gráfico de densidad

hist(murders$population,breaks = "Sturges",probability = TRUE,
     main="Histogram of Population Density",
     xlab = "Population in US",col = brewer.pal(10,"Set1"),
     border=brewer.pal(10,"Set1"),ylim = c(0,12e-08))
## Warning in brewer.pal(10, "Set1"): n too large, allowed maximum for palette Set1 is 9
## Returning the palette you asked for with that many colors
## Warning in brewer.pal(10, "Set1"): n too large, allowed maximum for palette Set1 is 9
## Returning the palette you asked for with that many colors
lines(density(murders$population), col = "black", lwd = 2)

  1. Interpretar

Boxplot

boxplot(murders$rate, col="Blue",ylab="Tasa de asesinatos",outline=FALSE,main="Boxplot",ylim=c(0,6))
#Adicionar la media
points(mean(murders$rate),col="black",pch=20)
text(paste(" ", round(mean(murders$rate), 2)),x=1.1,y=2.3) 

summary(murders$rate)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.3196  1.2526  2.6871  2.7791  3.3861 16.4528
  1. Agregar los puntos que se muestran al ejecutar summary(murders$rate) en el boxplot.

Comparación variables cuantitativas y cualitativas

#Diagrama de dispersión total vs population

x<- murders$population/10^6
y<-murders$total

plot(x,y,main="Total vs Population",col=murders$region,pch=20,xlab = "Population/10^6",ylab="Total")
legend("bottomright",legend=levels(murders$region),fill = unique(murders$region),cex=0.75)

#Boxplot rates vs region
boxplot(rate~region,data=murders,col=brewer.pal(9,"Set1"),outline=FALSE,ylim=c(0,6),main="Boxplot")
abline(h=mean(murders$rate),col="blue",lwd=2)
text(paste("",round(mean(murders$rate),2)),x=0.5,y=3,col="blue")

# Varios gráficos

par(mfrow=c(1,2))

plot(x,y,main="Total vs Population",col=murders$region,pch=20,xlab = "Population/10^6",ylab="Total")
legend("bottomright",legend=levels(murders$region),fill = unique(murders$region),cex=0.75)

boxplot(rate~region,data=murders,col=brewer.pal(9,"Set1"),outline=FALSE,ylim=c(0,6),main="Boxplot")
abline(h=mean(murders$rate),col="blue",lwd=2)
text(paste("",round(mean(murders$rate),2)),x=0.5,y=3,col="blue",cex=0.5)