4_Plots

#Plots

#Variables cualitativas (Categorías) ¬Ordinales ¬Nominales

#Cuantitativas ¬Continuas ¬Discretas

#clasificar de su dataset las variables

library(dslabs)
library(dplyr)

## 
## Adjuntando el paquete: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(viridisLite)
library(RColorBrewer)

data("murders")

#Descripción del dataset

#Variables cualitativas nominales:
"
  -State
  -Abb
  -Region
  "

## [1] "\n  -State\n  -Abb\n  -Region\n  "

#Variables cuantitativas discretas:
  "
  -Population
  -Total
  "

## [1] "\n  -Population\n  -Total\n  "

#Diagrama de barras

#PRimero hay qeu hacer conteo
Tabla_region <- table(murders$region)
Tabla_region

## 
##     Northeast         South North Central          West 
##             9            17            12            13

#Diagrama

barplot(
  Tabla_region,                        #Datos
  main = "Grafico de barras (region)", #Título principal
  ylab ="Frecuencia",                  #titulo del eje X
  xlab = "Regiones",                   #Título del eje Y
  ylim = c(0, 20),                     #Frecuencias del eje Y
  col = brewer.pal(4, "Pastel2"),      #Colores (en lugar de viridis o brewer.pal                
  border = brewer.pal(4,"Pastel2"))    #se puede especificar un color "blue")

grid(nx=NA, ny=NULL, col = "lightgray", lty = "dotted", lwd = par("lwd"))

#Diagrama circular

porcentajes <- round(Tabla_region*100/sum(Tabla_region),1)
porcentajes

## 
##     Northeast         South North Central          West 
##          17.6          33.3          23.5          25.5

pie(
  porcentajes,
  labels = paste(porcentajes, "%"),
  main = "Regiónes",
  col = brewer.pal(4, "Accent"),
  border = brewer.pal(4, "Accent")
)

  legend("bottomright", legend = names(porcentajes), fill =  brewer.pal(4, "Accent"))

#Variables cuantitativas

media <- mean(murders$population)
mediana <- median(murders$population)
moda <- as.numeric(names(sort(-table(murders$population)))[1])

hist(murders$population, 
     breaks = "Sturges",
     main = "Histogram of population",
     xlab= "Population in US",
     col = brewer.pal(8, "Pastel2"),
     ylim=c(0,30),
     border = brewer.pal(8, "Set1"))

hist(murders$population, 
     breaks = "Sturges",
     probability = TRUE,
     main = "Histogram of population",
     xlab= "Population in US",
     col = brewer.pal(8, "Pastel2"),
     border = brewer.pal(4, "Set1"))
     lines(density(murders$population), col ="black", lwd=2)
     

     
     #Interpretar los gráficos
     
     #Es una distribución asimétrica hacia la izquierda, donde el promedio es menor que la mediana en la curva

     
     #Agregar dos lineas verticales (Mediana, Promedio)
      # Añadir líneas verticales para la media, mediana y moda
      abline(v = media, col = "red", lwd = 2, lty = 2)     # Línea de la media (roja)
      abline(v = mediana, col = "green", lwd = 2, lty = 2)  # Línea de la mediana (verde)
      
      legend("topright", legend = c("Media", "Mediana"), 
       col = c("red", "green"), lwd = 2, lty = 2)

4_Plots

Camilo Valencia

2024-09-04