Variables cualitativas y cuantitativas

variables cualitativas

Variable categóricas y se dividen en dos: ordinales y nominales

Variables cuantitativas

Expresar númericamente y se dividen en dos: discretas y continuas

#install.packages("dslabs")
library(dplyr)
## 
## Adjuntando el paquete: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(dslabs)
library(viridisLite)
library(RColorBrewer)
# Dataset
data("murders")

Gráficos

Diagrama de barras

levels(murders$region) #Cualitativa nominal
## [1] "Northeast"     "South"         "North Central" "West"
tabla<-table(murders$region)
tabla
## 
##     Northeast         South North Central          West 
##             9            17            12            13
bp<-barplot(tabla, main="Gráfico de barras", ylab = "Frecuencia", xlab = "Regiones",ylim = c(0,20),col = "blue")
text(bp,tabla+2,format(tabla),cex = 1)

#help("barplot")

Diagrama de torta

pie(tabla, labels=row.names(tabla), col=rocket(4), main="Diagrama circular", border = rocket(4))
legend("bottomright",legend=names(tabla),cex=0.75, fill = rocket(4))

# Crear tabla de porcentajes y gráficar mostrando el dato # Crea tabla con los porcentajes por valor en las regiones

region<-c("Northeast","South","North Central","West")
x<-c(9,17,12,13)
df_region_x<-data.frame(region,x)
#df_region_x
porc<-function(x){
  return(round(x*1.96))
}
porcentaje<-porc(x)
df_region_x$porcentaje<-porcentaje
df_region_x
##          region  x porcentaje
## 1     Northeast  9         18
## 2         South 17         33
## 3 North Central 12         24
## 4          West 13         25
# Gráficar mostrando el dato
pie(x,labels = paste0((df_region_x$porcentaje),"%"), col = viridis(4), main = "Diagrama circular", border = rocket(4))
legend("bottomright",legend=names(tabla),cex=0.75, fill = viridis(4))

Variables ordinales que requieren un orden específico.

region<-factor(murders$region,levels=c("South","West","North Central","Northeast"))
tabla1<-table(region)
barplot(tabla1)

# Histograma

murders<-murders %>% mutate(murders,rate=total/population*100000)
hist(murders$rate, main = "Histograma de la tasa de aesinatos en EEUU por arma de fuego",xlim = c(0,20),ylim =c(0,25), col=brewer.pal(9,"Set1"),xlab = "Tasa de asesinatos", labels=TRUE,border=brewer.pal(3,"Paired"))
axis(1,at=seq(0,25,by=1)) #Modificar la escala del eje x en R

# Boxplot

boxplot(murders$rate, col="Blue",ylab="Tasa de asesinatos",outline=FALSE,main="Boxplot",ylim=c(0,6))

#Adicionar la media

points(mean(murders$rate),col="black",pch=20)
text(paste(" ", round(mean(murders$rate), 2)),x=1.1,y=2.3) 
 #Agregar los puntos que se muestran al ejecutar summary(murders$rate) en el boxplot.
stripchart(summary(murders$rate),vertical = TRUE, method = "jitter", pch = 19, add = TRUE, col = "red")

summary(murders$rate)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.3196  1.2526  2.6871  2.7791  3.3861 16.4528

Comparación variables

#Diagrama de dispersión total vs population
x<-murders$population/10^6
y<-murders$total

plot(x,y, main = "Total vs Population", col=murders$region,pch=20, xlab="Populatio/10^6", ylab = "Populatio")
legend("bottomright",legend = levels(murders$region),fill = unique(murders$region))

#Boxplot rate vs region
boxplot(rate~region,data=murders,col=brewer.pal(9,"Set1"),outline=FALSE,ylim=c(0,6))
abline(h=mean(murders$rate),col="blue",lwd=2)
text(paste("",round(mean(murders$rate),2)),x=0.5,y=3,col="blue",cex=0.5)

# Varios gráficos

par(mfrow=c(1,2))

plot(x,y, main = "Total vs Population", col=murders$region,pch=20, xlab="Populatio/10^6", ylab = "Populatio")
legend("bottomright",legend = levels(murders$region),fill = unique(murders$region))

boxplot(rate~region,data=murders,col=brewer.pal(9,"Set1"),outline=FALSE,ylim=c(0,6))
abline(h=mean(murders$rate),col="blue",lwd=2)
text(paste("",round(mean(murders$rate),2)),x=0.5,y=3,col="blue",cex=0.5)