#Varibales cualitativas: variables categoricas se dividen en dos: ordinales y nominales #Variables cuantitativas: expresar numericamente: discretas y continuas

#paquetes

library(dplyr)
## 
## Adjuntando el paquete: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(dslabs)
library(viridisLite)
library(RColorBrewer)
library(ggplot2)
data("murders")

#diagrama de barras

levels(murders$region) #cualitativa nominal
## [1] "Northeast"     "South"         "North Central" "West"
tabla <- table(murders$region)
tabla
## 
##     Northeast         South North Central          West 
##             9            17            12            13
barplot(tabla, main = "Gráfico de barras", ylab = "Frecuencia", xlab = "Regiones", ylim =  c(0,20),col="blue")

#Poner el valor sobre cada columna

#Diagrama de torta

#1. Poner el valor sobre cada columna #2. Porcentajes mostrando el dato en la grafica

pie(tabla, paste(round(tabla*100/sum(tabla),3),"%"), col =  rainbow(4), main = "Diagrama circular")
legend("bottomright", legend = row.names(tabla), cex = 0.75, fill = rainbow (4))

#Variables ordinales que requieren un orden especifico

region <- factor(murders$region, levels = c("South","West", "North Central","Northeast")) #Asi sería nominal y si yo la quisiera en orden especifico.

tabla1<- table(region)
barplot(tabla1)

#Histogramas #1. Modificar el eje x que se muestre de 1 en 1

murders <- murders %>% mutate(murders,rate= total/population*100000)

hist(murders$rate,main = "Histograma de la tasa de asesinatos en EEUU",xlim =c(0,20), ylim = c(0,25), col= brewer.pal(3, "Paired"), xlab = "Tasa de asesinatos",labels = TRUE, border = brewer.pal(3,"Paired"))

#Se utiliza la función axis para personalizar los ejes de la gráfica.

axis(1, at=1:20)

#Boxplot

boxplot(murders$rate, col = "Blue", ylab = "Tasa de asesinatos", outline = FALSE, main = "Boxplot", ylim = c(0,6))

#Adicionar la media

points(mean(murders$rate), col = "black", pch = 20)
text(paste(" ", round(mean(murders$rate), 2)), x = 1.1, y=2.3) 

#Se añaden los valores resultantes del summary para asignarlos al gráfico
summary_stats <- summary(murders$rate)
points(rep(1, length(summary_stats)), summary_stats, legend = row.names(summary_stats), col= "yellow", pch=20)

#Se añade etiquetas para identificar los puntos estádisticos.
text(rep(1.1, length(summary_stats)), summary_stats, labels = names(summary_stats), col = "grey", cex = 0.8)

summary(murders$rate)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.3196  1.2526  2.6871  2.7791  3.3861 16.4528

#Comparación variables cuantitativas y cualitativas

##Diagrama de dispersión: total vs population

x<- murders$population/10^6
y<-murders$total

plot(x,y, main = "Total vs Population", col = murders$region, pch = 20, xlab = "Population/10^6", ylab = "Population")
legend("bottomright",legend = levels(murders$region), fill = unique(murders$region), cex = 0.75)

#Boxplot rates vs region

boxplot(rate~region, data = murders, col = brewer.pal(9,"Set1"), outline = FALSE, ylim = c(0,6), main ="Boxplot")
abline(h = mean(murders$rate), col = "blue", lwd = 2)
text(paste("", round(mean(murders$rate), 2)), x = 0.5, y = 3, col = "blue")

par(mfrow = c(1,2))

plot(x, y, main = "Total vs Population", col = murders$region, pch = 20, xlab = "Population/10^6", ylab="Population")

legend("bottomright", legend = levels(murders$region), fill = unique(murders$region), cex = 0.75)

boxplot(rate~region, data = murders, col = brewer.pal(9,"Set1"), outline = FALSE, ylim = c(0,6), main = "Boxplot")
abline(h = mean(murders$rate), col = "blue", lwd = 2)
text(paste("", round(mean(murders$rate), 2)), x = 0.5, y = 3, col = "blue", cex = 0.5)