Variables cualitativas y cuantitativas

Variables cualitativas

Variables categóricas y se dividen en dos: ordinales y nominales

Variables cuantitativas

Se expresan numéricamente y se dividen en: discretas y continuas

library(dplyr)
## 
## Adjuntando el paquete: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(dslabs)
library(viridisLite)
library(RColorBrewer)

Dataset

data("murders")

Gráficos

Diagramas de barras

levels(murders$region) #Cualitativa nominal
## [1] "Northeast"     "South"         "North Central" "West"
tabla <- table(murders$region)
tabla
## 
##     Northeast         South North Central          West 
##             9            17            12            13
barras <-barplot(tabla, 
                 main = "Diagrama de barras",
                 ylab = "Frecuencia", 
                 xlab = "Regiones", 
                 ylim = c(0,20), 
                 col = "blue")

text(x= barras,
     y = tabla,
     labels = tabla,
     pos = 3,
     cex=0.8
     )

## Diagrama de torta

porcentajes <- round((tabla/sum(tabla))*100,1)


pie(tabla, 
    labels = names(tabla), 
    col = rocket(4), 
    main = "Diagrama circular", 
    border = rocket(4),
    )

#Calcular las posiciones para textos en gráficos circulares
angles <- cumsum(tabla) - tabla / 2
angles <- 2 * pi * angles / sum(tabla)

#Agregar la etiqueta dentro de los segmentos
text(x = 0.5 * cos(angles), 
     y = 0.5 * sin(angles), 
     labels = sprintf("%.1f%%", porcentajes), 
     cex = 0.9, 
     col = "white")

legend("bottomright",
       legend = names(tabla), 
       cex = 0.75,
       fill = rocket(4))

### Variables ordinales que requieren un orden específico

region<-factor(murders$region,levels=c("South","West","North Central","Northeast"))
tabla1<-table(region)
barplot(tabla1)

## Histograma

murders <- murders %>% mutate(murders,rate= total/population*100000)
hist(murders$rate,main = "Histograma de la tasa de asesinatos en EEUU",xlim=c(0,20),ylim=c(0,25),col = brewer.pal(3,"Paired"),xlab = "Tasa de asesinatos",labels=TRUE,border =  brewer.pal(3,"Paired"),
     )
axis(1, at = seq(0, 20, by = 1))

help()

Boxplot

boxplot(murders$rate, col="Blue",ylab="Tasa de asesinatos",outline=FALSE,main="Boxplot",ylim=c(0,6))

#Adicionar la media

points(mean(murders$rate),col="black",pch=20)
text(paste(" ", round(mean(murders$rate), 2)),x=1.1,y=2.3) 

# Obtener los valores de summary
summary_stats <- summary(murders$rate)[-4]

# Agregar los puntos de summary al boxplot
points(rep(1, length(summary_stats)), summary_stats, col = "red", pch = 16)

# Etiquetas para los puntos de summary
text(rep(1.1, length(summary_stats)), summary_stats, labels = round(summary_stats, 2), pos = 3)

summary(murders$rate)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.3196  1.2526  2.6871  2.7791  3.3861 16.4528

Clase 07

#Diagrama de dispersión total vs population

x<-murders$population/10^6
y<-murders$total

plot(x,y, main="Total vs Populations", col=murders$region, pch=20, xlab = "Population/10^6", ylab="Population")

legend("bottomright", legend = levels(murders$region), fill=unique(murders$region))

#Boxplot rates vs region

boxplot(rate~region,data=murders, col=brewer.pal(9,"Set1"),outline=FALSE, ylim=c(0,6), main="Boxplot")
abline(h=mean(murders$rate),col="blue")
text(paste("",round(mean(murders$rate),2)), x=0.5,y=3, col="blue")

# Varios gráficos

par(mfrow=c(1,2))

plot(x,y,main="Total vs Population",col=murders$region,pch=20,xlab = "Population/10^6",ylab="Population")
legend("bottomright",legend=levels(murders$region),fill = unique(murders$region),cex=0.75)

boxplot(rate~region,data=murders,col=brewer.pal(9,"Set1"),outline=FALSE,ylim=c(0,6),main="Boxplot")
abline(h=mean(murders$rate),col="blue",lwd=2)
text(paste("",round(mean(murders$rate),2)),x=0.5,y=3,col="blue",cex=0.5)