Primero, cargamos la librerías necesarias para analizar y visualizar los datos:
library(tidyverse)
library(ggplot2)
library(ggthemes)
library(gridExtra)
library(ggrepel)
library(readxl)
Luego se abre la base de datos con los datos de inseguridad de 21 ciudades a nivel mundial.
Inseguridad_ciudades <- read_excel("~/IIB/Inseguridad ciudades.xlsx")
Inseguridad_ciudades <- Inseguridad_ciudades %>%
mutate(Ano = as.numeric(Ano),
Tasa_hom = as.numeric(Tasa_hom),
Tasa_hur = as.numeric(Tasa_hur),
Tasa_PP = as.numeric(Tasa_PP),
Tasa_LP = as.numeric(Tasa_LP),
Extor = as.numeric(Extor),
Sec = as.numeric(Sec),
Perp_ins = as.numeric(Perp_ins),
Puesto_ICCA = as.numeric(Puesto_ICCA),
ICCA = as.numeric(ICCA))
str(Inseguridad_ciudades)
## tibble [57 × 13] (S3: tbl_df/tbl/data.frame)
## $ Ciudad : chr [1:57] "Rio de Janeiro (Regiao Metropolitana), Brasil" "Rio de Janeiro (Regiao Metropolitana), Brasil" "Sao Paulo, Brasil" "Sao Paulo, Brasil" ...
## $ Region : chr [1:57] "Latinoamerica" "Latinoamerica" "Latinoamerica" "Latinoamerica" ...
## $ Ano : num [1:57] 2022 2023 2022 2023 2021 ...
## $ Tasa_hom : num [1:57] 15.7 16.9 16.3 16.5 3.8 ...
## $ Tasa_hur : num [1:57] 1026 1050 2519 2548 1853 ...
## $ Tasa_PP : num [1:57] 201 218 990 1532 NA ...
## $ Tasa_LP : num [1:57] 313 330 589 651 268 ...
## $ Extor : num [1:57] 13.99 19.5 0.15 0.35 NA ...
## $ Sec : num [1:57] 1.29 0.77 0.1 0.03 NA NA NA 0.29 0.31 0.58 ...
## $ Perp_ins : num [1:57] 0.86 0.86 NA NA NA NA NA 0.832 0.801 NA ...
## $ ICCA : num [1:57] 29 39 27 48 37 41 51 53 51 17 ...
## $ Puesto_ICCA: num [1:57] 66 57 74 40 48 52 33 29 33 118 ...
## $ Poblacion : num [1:57] 13728000 13728000 22620000 22620000 8242459 ...
Ahora filtramos los datos por año para crear la primera visualización:
Ins22 <- Inseguridad_ciudades %>% filter(Ano=="2022")
Ins23 <- Inseguridad_ciudades %>% filter(Ano=="2023")
ggplot(Ins22, aes(x = reorder(Ciudad, Tasa_hom), y = Tasa_hom, fill = Region)) +
geom_bar(stat = "identity") +
labs(title = "Tasa de Homicidios por Ciudad - 2022",
y = "Tasa x 100.000 habs",
x = "Ciudad") +
theme_minimal() +
theme(
axis.text.x = element_text(angle = 45, hjust = 1, color = "black"),
axis.title = element_text(face = "bold"),
plot.title = element_text(face = "bold", size = 18)
)
ggplot(Ins22, aes(x = Tasa_hom, y = Puesto_ICCA)) +
geom_point(aes(color = Region, size = Poblacion), alpha = 0.7) +
scale_size_continuous(range = c(2, 12)) +
labs(title = "Tasa de Homicidios vs Puesto ICCA",
x = "Tasa de Homicidios",
y = "Puesto ICCA",
size = "Población",
color = "Región") +
theme_minimal() +
theme(
axis.title = element_text(face = "bold"),
plot.title = element_text(face = "bold", size = 18)) +
geom_text_repel(aes(label = Ciudad), size = 2.5, color= "#5A5A5A", box.padding = 0.7, point.padding = 0.9) +
geom_smooth(method = "lm", se = FALSE, aes(alpha = 0.9))
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 1 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 1 rows containing missing values (`geom_point()`).
## Warning: Removed 1 rows containing missing values (`geom_text_repel()`).
## Warning: ggrepel: 13 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
ggplot(Ins22, aes(x = Tasa_hom, y = ICCA)) +
geom_point(aes(color = Region, size = Poblacion), alpha = 0.7) +
scale_size_continuous(range = c(2, 12)) +
labs(title = "Tasa de Homicidios vs Desempeño MICE",
x = "Tasa de Homicidios",
y = "Número de eventos MICE en 2022",
size = "Población",
color = "Región") +
theme_minimal() +
theme(
axis.title = element_text(face = "bold"),
plot.title = element_text(face = "bold", size = 18)) +
geom_text_repel(aes(label = Ciudad), size = 2.5, color= "#5A5A5A", box.padding = 0.7, point.padding = 0.9) +
geom_smooth(method = "lm", se = FALSE, aes(alpha = 0.9))
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 1 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 1 rows containing missing values (`geom_point()`).
## Warning: Removed 1 rows containing missing values (`geom_text_repel()`).
## Warning: ggrepel: 13 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
ggplot(Ins22, aes(x = reorder(Ciudad, Tasa_hur), y = Tasa_hur, fill = Region)) +
geom_bar(stat = "identity") +
labs(title = "Tasa de Hurtos por Ciudad - 2022",
y = "Tasa x 100.000 habs",
x = "Ciudad") +
theme_minimal() +
theme(
axis.text.x = element_text(angle = 45, hjust = 1, color = "black"),
axis.title = element_text(face = "bold"),
plot.title = element_text(face = "bold", size = 18)
)
ggplot(Ins22, aes(x = Tasa_hur, y = Puesto_ICCA)) +
geom_point(aes(color = Region, size = Poblacion), alpha = 0.7) +
scale_size_continuous(range = c(2, 12)) +
labs(title = "Tasa de Hurtos vs Puesto ICCA",
x = "Tasa de Hurtos",
y = "Puesto ICCA",
size = "Población",
color = "Región") +
theme_minimal() +
theme(
axis.title = element_text(face = "bold"),
plot.title = element_text(face = "bold", size = 18)) +
geom_text_repel(aes(label = Ciudad), size = 2.5, color= "#5A5A5A", box.padding = 0.7, point.padding = 0.9) +
geom_smooth(method = "lm", se = FALSE, aes(alpha = 0.9))
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 1 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 1 rows containing missing values (`geom_point()`).
## Warning: Removed 1 rows containing missing values (`geom_text_repel()`).
## Warning: ggrepel: 14 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
ggplot(Ins22, aes(x = Tasa_hur, y = ICCA)) +
geom_point(aes(color = Region, size = Poblacion), alpha = 0.7) +
scale_size_continuous(range = c(2, 12)) +
labs(title = "Tasa de Hurtos vs Desempeño MICE",
x = "Tasa de Hurtos",
y = "Número de eventos MICE en 2022",
size = "Población",
color = "Región") +
theme_minimal() +
theme(
axis.title = element_text(face = "bold"),
plot.title = element_text(face = "bold", size = 18)) +
geom_text_repel(aes(label = Ciudad), size = 2.5, color= "#5A5A5A", box.padding = 0.7, point.padding = 0.9) +
geom_smooth(method = "lm", se = FALSE, aes(alpha = 0.9))
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 1 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 1 rows containing missing values (`geom_point()`).
## Warning: Removed 1 rows containing missing values (`geom_text_repel()`).
## Warning: ggrepel: 14 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
ggplot(Ins22, aes(x = reorder(Ciudad, Tasa_PP), y = Tasa_PP, fill = Region)) +
geom_bar(stat = "identity") +
labs(title = "Tasa de Hurtos a Personas por Ciudad - 2022",
y = "Tasa x 100.000 habs",
x = "Ciudad") +
theme_minimal() +
theme(
axis.text.x = element_text(angle = 45, hjust = 1, color = "black"),
axis.title = element_text(face = "bold"),
plot.title = element_text(face = "bold", size = 18)
)
ggplot(Ins22, aes(x = Tasa_PP, y = Puesto_ICCA)) +
geom_point(aes(color = Region, size = Poblacion), alpha = 0.7) +
scale_size_continuous(range = c(2, 12)) +
labs(title = "Tasa de Hurtos a Personas vs Puesto ICCA",
x = "Tasa de Hurtos a Personas",
y = "Puesto ICCA",
size = "Población",
color = "Región") +
theme_minimal() +
theme(
axis.title = element_text(face = "bold"),
plot.title = element_text(face = "bold", size = 18)) +
geom_text_repel(aes(label = Ciudad), size = 2.5, color= "#5A5A5A", box.padding = 0.7, point.padding = 0.9) +
geom_smooth(method = "lm", se = FALSE, aes(alpha = 0.9))
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 1 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 1 rows containing missing values (`geom_point()`).
## Warning: Removed 1 rows containing missing values (`geom_text_repel()`).
## Warning: ggrepel: 15 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
ggplot(Ins22, aes(x = Tasa_PP, y = ICCA)) +
geom_point(aes(color = Region, size = Poblacion), alpha = 0.7) +
scale_size_continuous(range = c(2, 12)) +
labs(title = "Tasa de Hurtos a Personas vs Desempeño MICE",
x = "Tasa de Hurtos a Personas",
y = "Número de eventos MICE en 2022",
size = "Población",
color = "Región") +
theme_minimal() +
theme(
axis.title = element_text(face = "bold"),
plot.title = element_text(face = "bold", size = 18)) +
geom_text_repel(aes(label = Ciudad), size = 2.5, color= "#5A5A5A", box.padding = 0.7, point.padding = 0.9) +
geom_smooth(method = "lm", se = FALSE, alpha = 0.2)
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 1 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 1 rows containing missing values (`geom_point()`).
## Warning: Removed 1 rows containing missing values (`geom_text_repel()`).
## Warning: ggrepel: 14 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
ggplot(Ins22, aes(x = reorder(Ciudad, Tasa_LP), y = Tasa_LP, fill = Region)) +
geom_bar(stat = "identity") +
labs(title = "Tasa de Lesiones Personales por Ciudad - 2022",
y = "Tasa x 100.000 habs",
x = "Ciudad") +
theme_minimal() +
theme(
axis.text.x = element_text(angle = 45, hjust = 1, color = "black"),
axis.title = element_text(face = "bold"),
plot.title = element_text(face = "bold", size = 18)
)
## Warning: Removed 1 rows containing missing values (`position_stack()`).
ggplot(Ins22, aes(x = Tasa_LP, y = Puesto_ICCA)) +
geom_point(aes(color = Region, size = Poblacion), alpha = 0.7) +
scale_size_continuous(range = c(2, 12)) +
labs(title = "Tasa de Lesiones Personales vs Puesto ICCA",
x = "Tasa de Lesiones Personales",
y = "Puesto ICCA",
size = "Población",
color = "Región") +
theme_minimal() +
theme(
axis.title = element_text(face = "bold"),
plot.title = element_text(face = "bold", size = 18)) +
geom_text_repel(aes(label = Ciudad), size = 2.5, color= "#5A5A5A", box.padding = 0.7, point.padding = 0.9) +
geom_smooth(method = "lm", se = FALSE, alpha = 0.2)
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 2 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 2 rows containing missing values (`geom_point()`).
## Warning: Removed 2 rows containing missing values (`geom_text_repel()`).
## Warning: ggrepel: 10 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
ggplot(Ins22, aes(x = Tasa_LP, y = ICCA)) +
geom_point(aes(color = Region, size = Poblacion), alpha = 0.7) +
scale_size_continuous(range = c(2, 12)) +
labs(title = "Tasa de Lesiones Personales vs Desempeño MICE",
x = "Tasa de Lesiones Personales",
y = "Número de eventos MICE en 2022",
size = "Población",
color = "Región") +
theme_minimal() +
theme(
axis.title = element_text(face = "bold"),
plot.title = element_text(face = "bold", size = 18)) +
geom_text_repel(aes(label = Ciudad), size = 2.5, color= "#5A5A5A", box.padding = 0.7, point.padding = 0.9) +
geom_smooth(method = "lm", se = FALSE, alpha = 0.2)
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 2 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 2 rows containing missing values (`geom_point()`).
## Warning: Removed 2 rows containing missing values (`geom_text_repel()`).
## Warning: ggrepel: 11 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
Inseguridad_regiones <- Inseguridad_ciudades %>%
group_by(Region) %>%
summarise(
Tasa_hom = mean(Tasa_hom, na.rm = TRUE),
Tasa_hur = mean(Tasa_hur, na.rm = TRUE),
Tasa_PP = mean(Tasa_PP, na.rm = TRUE),
Extor = mean(Extor, na.rm = TRUE),
Sec = mean(Sec, na.rm = TRUE),
ICCA = mean(ICCA, na.rm = TRUE),
Puesto_ICCA = mean(Puesto_ICCA, na.rm = TRUE)
)
# Convertir la base de datos a un formato largo
Inseguridad_regiones_long <- Inseguridad_regiones %>%
gather(key = "Variable", value = "Valor", -Region)
# Crear el gráfico de barras compuesto
ggplot(Inseguridad_regiones_long, aes(x = Variable, y = Valor, fill = Region)) +
geom_bar(stat = "identity", position = "dodge") +
labs(title = "Análisis de Inseguridad y Desempeño MICE por Región",
y = "Promedio",
x = "Variable") +
theme_minimal() +
theme(
axis.text.x = element_text(angle = 45, hjust = 1),
axis.title = element_text(face = "bold"),
plot.title = element_text(face = "bold", size = 18),
legend.title = element_blank(),
legend.position = "bottom"
)
## Warning: Removed 3 rows containing missing values (`geom_bar()`).
# Crear el gráfico de barras compuesto con escalas flexibles
ggplot(Inseguridad_regiones_long, aes(x = Variable, y = Valor, fill = Region)) +
geom_bar(stat = "identity", position = "dodge") +
facet_wrap(~ Variable, scales = "free_y", ncol = 7) +
labs(title = "Análisis de Inseguridad y Desempeño MICE por Región",
y = "Promedio",
x = "Variable") +
theme_minimal() +
theme(
axis.text.x = element_text(angle = 45, hjust = 1),
axis.title = element_text(face = "bold"),
plot.title = element_text(face = "bold", size = 18),
legend.title = element_blank(),
legend.position = "bottom"
)
## Warning: Removed 3 rows containing missing values (`geom_bar()`).