Título

## Cargar librerías

library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(dslabs)
library(viridisLite)
library(RColorBrewer)
library(tidyverse)

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats   1.0.0     ✔ readr     2.1.5
## ✔ ggplot2   3.5.1     ✔ stringr   1.5.1
## ✔ lubridate 1.9.3     ✔ tibble    3.2.1
## ✔ purrr     1.0.2     ✔ tidyr     1.3.1

## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

install.packages("RColorBrewer")

## Warning: package 'RColorBrewer' is in use and will not be installed

library(RColorBrewer)



## Help
help("barplot")

## starting httpd help server ... done

help("pie")
help("legend")
help("text")

Variables cualitativas

Variables categóricas y se dividen en: ordinals y nominales

Variables cuantitativas

Discretas (enteros, conteo) y continuas (decimales, medición)

data(murders)

Diagrama de barras

levels(murders$region) # Cualitativa nominal

## [1] "Northeast"     "South"         "North Central" "West"

tabla <- table (murders$region)
tabla

## 
##     Northeast         South North Central          West 
##             9            17            12            13

barplot(tabla, 
        main = "Diagrama de barras", 
        ylab = "Frecuencia", 
        xlab = "Regiones", 
        ylim = c(0,20), 
        col = viridis(4),
        legend.text = rownames(tabla))

## Ejercicio escribir el valor sobre cada columna

## El número 4 en el valor del color es el número de colores que utilizo

pie(tabla, 
    labels = levels(murders$region), 
    col = viridis(4))

## Este se tiene que ejecutar seleccionando todo (pie+legend) al tiempo
pie(tabla,
    labels = levels(murders$region), 
    col = rocket(4),
    main = "Diagrama de torta",
    border = rocket(4)) 
legend("topright", 
       legend = names(tabla), 
       fill = rocket(4),
       cex = 0.75)

## Tarea: crear una columna con los porcentajes
## Mostrar esos % en la torta

Requiero un orden específico

region <- factor(murders$region,
    levels = c("South", "West", "North Central", "Northeast"))
tabla1 <- table(region)
barplot(tabla1,
        ylab = "Frecuencia", 
        xlab = "Regiones")

Histograma

murders <- murders %>% mutate(murders,rate=total/population*100000)
head(murders)

##        state abb region population total     rate
## 1    Alabama  AL  South    4779736   135 2.824424
## 2     Alaska  AK   West     710231    19 2.675186
## 3    Arizona  AZ   West    6392017   232 3.629527
## 4   Arkansas  AR  South    2915918    93 3.189390
## 5 California  CA   West   37253956  1257 3.374138
## 6   Colorado  CO   West    5029196    65 1.292453

hist(murders$rate,
     main = "Histograma tasa de asesinatos",
     ylim = c(0,50),
     xlim = c(0,25),
     col = brewer.pal(4,"Paired"),
     ylab = "Frecuencia", 
     xlab = "Tasa de asesinatos")

##axis()

## Breaks es el parámetro de agrupación natural breaks
## Cambiar el número de divisiones del eje X

which.max(murders$rate)

## [1] 9

which.min(murders$rate)

## [1] 46

Boxplot

## Seleccionar todas las líneas desde 144 a 151 para ejecutar

boxplot(murders$rate, col="Blue",ylab="Tasa de asesinatos",outline=TRUE,main="Boxplot",ylim=c(0,6))

## Adicionar la media

points(mean(murders$rate),
       col="Black",
       pch=20)
text(paste(" ", round(mean(murders$rate), 2)),x=1.1,y=2.3)

Título

## Cuando la variable es cualitativa siempre debe trabajarse como FACTOR

## Identificar el tipo de variable a trabajar con mi BD
## Generar el gráfico a partir de estos datos

##midata <- read.csv("co_properties.csv")

#Diagrama de dispersion total vs. population ## 20240806 - Clase 07

x<- murders$population/10^6
y<- murders$total

## levels obtiene el encabezado para la leyenda
## fill rellena la 
plot(x,y, main="Total vs. Population",
     col=murders$region,
     pch=20,
     xlab="population/10^6",
     ylab="Population")
legend("bottomright", legend=levels(murders$region),
fill=unique(murders$region), cex = 0.75)

Boxplot vs. Region

## outline en FALSE elimina del gráfico los datos atípicos
boxplot(rate~region,
        data=murders,
        col=brewer.pal(9,"Set1"),
        outline=FALSE, 
        ylim= c(0,6),
        main="Boxplot")
abline(h=mean(murders$rate), col="blue",lwd=1)
text(paste("", round(mean(murders$rate),1.5)), 
     x=0.5,
     y=3, col="blue")

Varios gráficos

## Se indica cuántas filas y cuántas columnas se desea
par(mfrow=c(1,2))

# Gráfico 1
plot(x,y, main="Total vs. Population",
     col=murders$region,
     pch=20,
     xlab="population/10^6",
     ylab="Population")
legend("bottomright", legend=levels(murders$region),
fill=unique(murders$region), cex = 0.75)

## Gráfico 2
boxplot(rate~region,
        data=murders,
        col=brewer.pal(9,"Set1"),
        outline=FALSE, 
        ylim= c(0,6),
        main="Boxplot")
abline(h=mean(murders$rate), col="blue",lwd=1)
text(paste("", round(mean(murders$rate),1.5)), 
     x=0.5,
     y=3, col="blue", cex=0.25)

20240801 - Clase 06

Andrea Zabala Quimbayo

2024-08-01

Título

Variables cualitativas

Variables categóricas y se dividen en: ordinals y nominales

Variables cuantitativas

Discretas (enteros, conteo) y continuas (decimales, medición)

Diagrama de barras

Requiero un orden específico

Histograma

Boxplot

Título

Boxplot vs. Region

Varios gráficos

Título