Variables categóricas y se dividen en dos: ordinales y nominales
Se expresan numéricamente y se dividen en: discretas y continuas
library(dplyr)
##
## Adjuntando el paquete: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(dslabs)
library(viridisLite)
library(RColorBrewer)
data("murders")
levels(murders$region) #Cualitativa nominal
## [1] "Northeast" "South" "North Central" "West"
tabla <- table(murders$region)
tabla
##
## Northeast South North Central West
## 9 17 12 13
barras <-barplot(tabla,
main = "Diagrama de barras",
ylab = "Frecuencia",
xlab = "Regiones",
ylim = c(0,20),
col = "blue")
text(x= barras,
y = tabla,
labels = tabla,
pos = 3,
cex=0.8
)
## Diagrama de torta
porcentajes <- round((tabla/sum(tabla))*100,1)
pie(tabla,
labels = names(tabla),
col = rocket(4),
main = "Diagrama circular",
border = rocket(4),
)
#Calcular las posiciones para textos en gráficos circulares
angles <- cumsum(tabla) - tabla / 2
angles <- 2 * pi * angles / sum(tabla)
#Agregar la etiqueta dentro de los segmentos
text(x = 0.5 * cos(angles),
y = 0.5 * sin(angles),
labels = sprintf("%.1f%%", porcentajes),
cex = 0.9,
col = "white")
legend("bottomright",
legend = names(tabla),
cex = 0.75,
fill = rocket(4))
### Variables ordinales que requieren un orden especÃfico
region<-factor(murders$region,levels=c("South","West","North Central","Northeast"))
tabla1<-table(region)
barplot(tabla1)
## Histograma
murders <- murders %>% mutate(murders,rate= total/population*100000)
hist(murders$rate,main = "Histograma de la tasa de asesinatos en EEUU",xlim=c(0,20),ylim=c(0,25),col = brewer.pal(3,"Paired"),xlab = "Tasa de asesinatos",labels=TRUE,border = brewer.pal(3,"Paired"),
)
axis(1, at = seq(0, 20, by = 1))
help()
boxplot(murders$rate, col="Blue",ylab="Tasa de asesinatos",outline=FALSE,main="Boxplot",ylim=c(0,6))
#Adicionar la media
points(mean(murders$rate),col="black",pch=20)
text(paste(" ", round(mean(murders$rate), 2)),x=1.1,y=2.3)
# Obtener los valores de summary
summary_stats <- summary(murders$rate)[-4]
# Agregar los puntos de summary al boxplot
points(rep(1, length(summary_stats)), summary_stats, col = "red", pch = 16)
# Etiquetas para los puntos de summary
text(rep(1.1, length(summary_stats)), summary_stats, labels = round(summary_stats, 2), pos = 3)
summary(murders$rate)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.3196 1.2526 2.6871 2.7791 3.3861 16.4528
#Diagrama de dispersión total vs population
x<-murders$population/10^6
y<-murders$total
plot(x,y, main="Total vs Populations", col=murders$region, pch=20, xlab = "Population/10^6", ylab="Population")
legend("bottomright", legend = levels(murders$region), fill=unique(murders$region))
#Boxplot rates vs region
boxplot(rate~region,data=murders, col=brewer.pal(9,"Set1"),outline=FALSE, ylim=c(0,6), main="Boxplot")
abline(h=mean(murders$rate),col="blue")
text(paste("",round(mean(murders$rate),2)), x=0.5,y=3, col="blue")
# Varios gráficos
par(mfrow=c(1,2))
plot(x,y,main="Total vs Population",col=murders$region,pch=20,xlab = "Population/10^6",ylab="Population")
legend("bottomright",legend=levels(murders$region),fill = unique(murders$region),cex=0.75)
boxplot(rate~region,data=murders,col=brewer.pal(9,"Set1"),outline=FALSE,ylim=c(0,6),main="Boxplot")
abline(h=mean(murders$rate),col="blue",lwd=2)
text(paste("",round(mean(murders$rate),2)),x=0.5,y=3,col="blue",cex=0.5)