#Varibales cualitativas: variables categoricas se dividen en dos: ordinales y nominales #Variables cuantitativas: expresar numericamente: discretas y continuas
#paquetes
library(dplyr)
##
## Adjuntando el paquete: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(dslabs)
library(viridisLite)
library(RColorBrewer)
library(ggplot2)
data("murders")
#diagrama de barras
levels(murders$region) #cualitativa nominal
## [1] "Northeast" "South" "North Central" "West"
tabla <- table(murders$region)
tabla
##
## Northeast South North Central West
## 9 17 12 13
barplot(tabla, main = "Gráfico de barras", ylab = "Frecuencia", xlab = "Regiones", ylim = c(0,20),col="blue")
#Poner el valor sobre cada columna
#Diagrama de torta
#1. Poner el valor sobre cada columna #2. Porcentajes mostrando el dato en la grafica
pie(tabla, paste(round(tabla*100/sum(tabla),3),"%"), col = rainbow(4), main = "Diagrama circular")
legend("bottomright", legend = row.names(tabla), cex = 0.75, fill = rainbow (4))
#Variables ordinales que requieren un orden especifico
region <- factor(murders$region, levels = c("South","West", "North Central","Northeast")) #Asi serÃa nominal y si yo la quisiera en orden especifico.
tabla1<- table(region)
barplot(tabla1)
#Histogramas #1. Modificar el eje x que se muestre de 1 en 1
murders <- murders %>% mutate(murders,rate= total/population*100000)
hist(murders$rate,main = "Histograma de la tasa de asesinatos en EEUU",xlim =c(0,20), ylim = c(0,25), col= brewer.pal(3, "Paired"), xlab = "Tasa de asesinatos",labels = TRUE, border = brewer.pal(3,"Paired"))
#Se utiliza la función axis para personalizar los ejes de la gráfica.
axis(1, at=1:20)
#Boxplot
boxplot(murders$rate, col = "Blue", ylab = "Tasa de asesinatos", outline = FALSE, main = "Boxplot", ylim = c(0,6))
#Adicionar la media
points(mean(murders$rate), col = "black", pch = 20)
text(paste(" ", round(mean(murders$rate), 2)), x = 1.1, y=2.3)
#Se añaden los valores resultantes del summary para asignarlos al gráfico
summary_stats <- summary(murders$rate)
points(rep(1, length(summary_stats)), summary_stats, legend = row.names(summary_stats), col= "yellow", pch=20)
#Se añade etiquetas para identificar los puntos estádisticos.
text(rep(1.1, length(summary_stats)), summary_stats, labels = names(summary_stats), col = "grey", cex = 0.8)
summary(murders$rate)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.3196 1.2526 2.6871 2.7791 3.3861 16.4528
#Comparación variables cuantitativas y cualitativas
##Diagrama de dispersión: total vs population
x<- murders$population/10^6
y<-murders$total
plot(x,y, main = "Total vs Population", col = murders$region, pch = 20, xlab = "Population/10^6", ylab = "Population")
legend("bottomright",legend = levels(murders$region), fill = unique(murders$region), cex = 0.75)
#Boxplot rates vs region
boxplot(rate~region, data = murders, col = brewer.pal(9,"Set1"), outline = FALSE, ylim = c(0,6), main ="Boxplot")
abline(h = mean(murders$rate), col = "blue", lwd = 2)
text(paste("", round(mean(murders$rate), 2)), x = 0.5, y = 3, col = "blue")
par(mfrow = c(1,2))
plot(x, y, main = "Total vs Population", col = murders$region, pch = 20, xlab = "Population/10^6", ylab="Population")
legend("bottomright", legend = levels(murders$region), fill = unique(murders$region), cex = 0.75)
boxplot(rate~region, data = murders, col = brewer.pal(9,"Set1"), outline = FALSE, ylim = c(0,6), main = "Boxplot")
abline(h = mean(murders$rate), col = "blue", lwd = 2)
text(paste("", round(mean(murders$rate), 2)), x = 0.5, y = 3, col = "blue", cex = 0.5)