## Cargar librerías
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(dslabs)
library(viridisLite)
library(RColorBrewer)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.0 ✔ readr 2.1.5
## ✔ ggplot2 3.5.1 ✔ stringr 1.5.1
## ✔ lubridate 1.9.3 ✔ tibble 3.2.1
## ✔ purrr 1.0.2 ✔ tidyr 1.3.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
install.packages("RColorBrewer")
## Warning: package 'RColorBrewer' is in use and will not be installed
library(RColorBrewer)
## Help
help("barplot")
## starting httpd help server ... done
help("pie")
help("legend")
help("text")
data(murders)
levels(murders$region) # Cualitativa nominal
## [1] "Northeast" "South" "North Central" "West"
tabla <- table (murders$region)
tabla
##
## Northeast South North Central West
## 9 17 12 13
barplot(tabla,
main = "Diagrama de barras",
ylab = "Frecuencia",
xlab = "Regiones",
ylim = c(0,20),
col = viridis(4),
legend.text = rownames(tabla))
## Ejercicio escribir el valor sobre cada columna
## El número 4 en el valor del color es el número de colores que utilizo
pie(tabla,
labels = levels(murders$region),
col = viridis(4))
## Este se tiene que ejecutar seleccionando todo (pie+legend) al tiempo
pie(tabla,
labels = levels(murders$region),
col = rocket(4),
main = "Diagrama de torta",
border = rocket(4))
legend("topright",
legend = names(tabla),
fill = rocket(4),
cex = 0.75)
## Tarea: crear una columna con los porcentajes
## Mostrar esos % en la torta
region <- factor(murders$region,
levels = c("South", "West", "North Central", "Northeast"))
tabla1 <- table(region)
barplot(tabla1,
ylab = "Frecuencia",
xlab = "Regiones")
murders <- murders %>% mutate(murders,rate=total/population*100000)
head(murders)
## state abb region population total rate
## 1 Alabama AL South 4779736 135 2.824424
## 2 Alaska AK West 710231 19 2.675186
## 3 Arizona AZ West 6392017 232 3.629527
## 4 Arkansas AR South 2915918 93 3.189390
## 5 California CA West 37253956 1257 3.374138
## 6 Colorado CO West 5029196 65 1.292453
hist(murders$rate,
main = "Histograma tasa de asesinatos",
ylim = c(0,50),
xlim = c(0,25),
col = brewer.pal(4,"Paired"),
ylab = "Frecuencia",
xlab = "Tasa de asesinatos")
##axis()
## Breaks es el parámetro de agrupación natural breaks
## Cambiar el número de divisiones del eje X
which.max(murders$rate)
## [1] 9
which.min(murders$rate)
## [1] 46
## Seleccionar todas las líneas desde 144 a 151 para ejecutar
boxplot(murders$rate, col="Blue",ylab="Tasa de asesinatos",outline=TRUE,main="Boxplot",ylim=c(0,6))
## Adicionar la media
points(mean(murders$rate),
col="Black",
pch=20)
text(paste(" ", round(mean(murders$rate), 2)),x=1.1,y=2.3)
## Cuando la variable es cualitativa siempre debe trabajarse como FACTOR
## Identificar el tipo de variable a trabajar con mi BD
## Generar el gráfico a partir de estos datos
##midata <- read.csv("co_properties.csv")
#Diagrama de dispersion total vs. population ## 20240806 - Clase 07
x<- murders$population/10^6
y<- murders$total
## levels obtiene el encabezado para la leyenda
## fill rellena la
plot(x,y, main="Total vs. Population",
col=murders$region,
pch=20,
xlab="population/10^6",
ylab="Population")
legend("bottomright", legend=levels(murders$region),
fill=unique(murders$region), cex = 0.75)
## outline en FALSE elimina del gráfico los datos atípicos
boxplot(rate~region,
data=murders,
col=brewer.pal(9,"Set1"),
outline=FALSE,
ylim= c(0,6),
main="Boxplot")
abline(h=mean(murders$rate), col="blue",lwd=1)
text(paste("", round(mean(murders$rate),1.5)),
x=0.5,
y=3, col="blue")
## Se indica cuántas filas y cuántas columnas se desea
par(mfrow=c(1,2))
# Gráfico 1
plot(x,y, main="Total vs. Population",
col=murders$region,
pch=20,
xlab="population/10^6",
ylab="Population")
legend("bottomright", legend=levels(murders$region),
fill=unique(murders$region), cex = 0.75)
## Gráfico 2
boxplot(rate~region,
data=murders,
col=brewer.pal(9,"Set1"),
outline=FALSE,
ylim= c(0,6),
main="Boxplot")
abline(h=mean(murders$rate), col="blue",lwd=1)
text(paste("", round(mean(murders$rate),1.5)),
x=0.5,
y=3, col="blue", cex=0.25)