Vamos a utilizar la biblioteca de tidyverse para nuestro procesamiento de datos
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.2.1 ✔ readr 2.2.0
## ✔ forcats 1.0.1 ✔ stringr 1.6.0
## ✔ ggplot2 4.0.2 ✔ tibble 3.3.1
## ✔ lubridate 1.9.5 ✔ tidyr 1.3.2
## ✔ purrr 1.2.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
packageVersion("tidyverse")
## [1] '2.0.0'
Empezamos importando las estimaciones de poblaciĂ³n a mitad de año de CONAPO
pob <- read_csv("data/00_Pob_Mitad_1950_2070.csv")
## Rows: 711040 Columns: 5
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): ENTIDAD, SEXO
## dbl (3): ANIO, EDAD, POBLACION
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Convertimos el dataframe a formato tidy
mar_state <- read_csv("data/01_marriages_by_state.csv") %>%
pivot_longer(2:33, names_to="year", values_to = "marriages") %>%
rename(entidad = Entidad)
## Rows: 33 Columns: 33
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): Entidad
## num (32): 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, ...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
mar_state %>%
filter(entidad != "Estados Unidos Mexicanos") %>%
# filter(year >= 2020)%>%
ggplot(aes(x = year, y = marriages, group = entidad, color = entidad)) +
geom_line(linewidth = 0.8, alpha = 0.7) +
labs(
title = "Matrimonios por año y entidad",
x = "Año",
y = "Matrimonios",
color = "Entidad"
) +
theme_minimal()
mar_state %>%
filter(entidad != "Estados Unidos Mexicanos") %>%
ggplot(aes(x = year, y = marriages, group = entidad)) +
geom_line() +
facet_wrap(~ entidad, ncol = 8) +
labs(
title = "Matrimonios por año (1993-2024)",
x = "Año",
y = "Matrimonios"
) +
scale_x_discrete(breaks = c(2000,2010,2020)) +
theme_minimal()
pob_tot <- pob %>%
group_by(ANIO) %>%
summarise(
total_pob = sum(POBLACION)
) %>%
rename(year = ANIO)
mar_state %>%
mutate(year = as.numeric(year)) %>%
filter(entidad == "Estados Unidos Mexicanos") %>%
inner_join(pob_tot) %>%
mutate(marriage_rate = marriages / total_pob * 1000) %>%
ggplot(aes(x = year, y = marriage_rate)) +
geom_line() +
geom_point() +
labs(
title = "Tasa de Matrimonios por año (1993-2024)",
x = "Año",
y = "Tasa de Matrimonios"
) +
theme_minimal()
## Joining with `by = join_by(year)`
mar_by_sex <- read_csv("data/04_marriage_by_sex.csv") %>%
select(1:5)
## New names:
## Rows: 97 Columns: 6
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (2): Entidad, Sexo num (3): Total, Hombre, Mujer lgl (1): ...6
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...6`
Seleccionamos los matrimonios entre personas del mismo sexo
mat_men <- mar_by_sex %>%
filter(Entidad != "Total") %>%
filter(Sexo == "Hombre") %>%
select(Entidad,Hombre)
mat_wom <- mar_by_sex %>%
filter(Entidad != "Total") %>%
filter(Sexo == "Mujer") %>%
select(Entidad,Mujer)
mat_homo <- inner_join(mat_men, mat_wom)
## Joining with `by = join_by(Entidad)`
Graficando
mat_homo %>%
pivot_longer(
cols = c(Hombre, Mujer),
names_to = "Sexo",
values_to = "Matrimonios"
) %>%
ggplot(aes(
x = reorder(Entidad, Matrimonios, sum),
y = Matrimonios,
fill = Sexo
)) +
geom_col() +
coord_flip() +
labs(
title= "Matrimonios entre personas del mismo sexo\npor entidad federativa (2024)",
x = "Entidad",
y = "NĂºmero de matrimonios",
fill = "Sexo"
) +
theme_minimal()