library(haven)
## Warning: package 'haven' was built under R version 4.4.3
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.4.3
library(scales)
## Warning: package 'scales' was built under R version 4.4.3
library(lubridate)
## Warning: package 'lubridate' was built under R version 4.4.3
##
## Adjuntando el paquete: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.4.3
## Warning: package 'tibble' was built under R version 4.4.3
## Warning: package 'purrr' was built under R version 4.4.3
## Warning: package 'dplyr' was built under R version 4.4.3
## Warning: package 'stringr' was built under R version 4.4.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ stringr 1.6.0
## ✔ forcats 1.0.0 ✔ tibble 3.3.0
## ✔ purrr 1.2.0 ✔ tidyr 1.3.1
## ✔ readr 2.1.5
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ readr::col_factor() masks scales::col_factor()
## ✖ purrr::discard() masks scales::discard()
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(svglite)
## Warning: package 'svglite' was built under R version 4.4.3
library(cobalt)
## Warning: package 'cobalt' was built under R version 4.4.3
## cobalt (Version 4.6.2, Build Date: 2026-01-29)
library(patchwork)
## Warning: package 'patchwork' was built under R version 4.4.3
library(sandwich)
## Warning: package 'sandwich' was built under R version 4.4.3
library(lmtest)
## Warning: package 'lmtest' was built under R version 4.4.3
## Cargando paquete requerido: zoo
## Warning: package 'zoo' was built under R version 4.4.3
##
## Adjuntando el paquete: 'zoo'
##
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
gb_frame<-read_dta("C:/Users/Hp/OneDrive/Escritorio/IOB Module II/Unit IV. Quant action lab/Analysis/gender bias.dta")
# convert categorical variables to factors
gb_frame$genero <- as_factor(gb_frame$genero)
gb_frame$consentimiento <- as_factor(gb_frame$consentimiento)
gb_frame$highest_educ <- as_factor(gb_frame$highest_educ)
gb_frame$region <- as_factor(gb_frame$region)
gb_frame$country_ca <- as_factor(gb_frame$country_ca)
gb_frame$country_sa <- as_factor(gb_frame$country_sa)
gb_frame$country_na <- as_factor(gb_frame$country_na)
gb_frame$country_af <- as_factor(gb_frame$country_af)
gb_frame$country_as <- as_factor(gb_frame$country_as)
gb_frame$country_eu <- as_factor(gb_frame$country_eu)
gb_frame$experience_001 <- as_factor(gb_frame$experience_001)
gb_frame$experience <- as_factor(gb_frame$experience)
gb_frame$out1_vote <- as_factor(gb_frame$out1_vote)
gb_frame$policy_family <- as_factor(gb_frame$policy_family)
gb_frame$policy_defense <- as_factor(gb_frame$policy_defense)
gb_frame$policy_socinc <- as_factor(gb_frame$policy_socinc)
gb_frame$policy_trade <- as_factor(gb_frame$policy_trade)
gb_frame$policy_financial <- as_factor(gb_frame$policy_financial)
gb_frame$policy_educ <- as_factor(gb_frame$policy_educ)
gb_frame$policy_crime <- as_factor(gb_frame$policy_crime)
gb_frame$policy_env <- as_factor(gb_frame$policy_env)
gb_frame$policy_health <- as_factor(gb_frame$policy_health)
gb_frame$policy_transparency <- as_factor(gb_frame$policy_transparency)
gb_frame$country_all <- as_factor(gb_frame$country_all)
gb_frame$treatment <- as_factor(gb_frame$treatment)
gb_frame$age_cat <- as_factor(gb_frame$age_cat)
gb_frame$check_all <- as_factor(gb_frame$check_all)
##============================================================================##
## Step 2: Create age distribution descriptive graph
##============================================================================##
age_dist <- gb_frame %>%
count(age_cat) %>%
mutate(percent = n / sum(n))
figure3_age <- ggplot(age_dist,
aes(x = age_cat, y = percent)) +
geom_col(fill = "#2C3E50", width = .7) +
geom_text(aes(label = percent(percent, accuracy = 0.1)),
hjust = -0.2,
size = 3.5) +
coord_flip() +
scale_y_continuous(labels = percent_format(),
expand = expansion(mult = c(0, .08))) +
labs(
x = NULL,
y = "Percent of respondents",
title = "Age distribution of respondents",
caption = "Source: Short political survey"
) +
theme_classic(base_size = 12) +
theme(
plot.title = element_text(size = 12),
axis.title.x = element_text(size = 12)
)
figure3_age

ggsave("Figure3_age.svg",figure3_age)
## Saving 7 x 5 in image
##============================================================================##
## Step 3: Create region distribution descriptive graph
##============================================================================##
region_dist <- gb_frame %>%
count(region) %>%
mutate(percent = n / sum(n))
figure1_region <- ggplot(region_dist,
aes(x = region, y = percent)) +
geom_col(fill = "#2C3E50", width = .7) +
geom_text(aes(label = percent(percent, accuracy = 0.1)),
hjust = -0.2,
size = 3.5) +
coord_flip() +
scale_y_continuous(labels = percent_format(),
expand = expansion(mult = c(0, .08))) +
labs(
x = NULL,
y = "Percent of respondents",
title = "Region distribution of respondents",
caption = "Source: Short political survey"
) +
theme_classic(base_size = 12) +
theme(
plot.title = element_text(size = 12),
axis.title.x = element_text(size = 12)
)
figure1_region

ggsave("Figure1_region.svg",figure1_region)
## Saving 7 x 5 in image
##============================================================================##
## Step 4: Create education distribution descriptive graph
##============================================================================##
edu_dist <- gb_frame %>%
count(highest_educ) %>%
mutate(percent = n / sum(n))
figure2_edu <- ggplot(edu_dist,
aes(x = highest_educ, y = percent)) +
geom_col(fill = "#2C3E50", width = .7) +
geom_text(aes(label = percent(percent, accuracy = 0.1)),
hjust = -0.2,
size = 3.5) +
coord_flip() +
scale_y_continuous(labels = percent_format(),
expand = expansion(mult = c(0, .08))) +
labs(
x = NULL,
y = "Percent of respondents",
title = "Education level distribution of respondents",
caption = "Source: Short political survey"
) +
theme_classic(base_size = 12) +
theme(
plot.title = element_text(size = 12),
axis.title.x = element_text(size = 12)
)
figure2_edu

ggsave("Figure2_edu.svg",figure2_edu)
## Saving 7 x 5 in image
##============================================================================##
## Step 4: Balance graphic age
##============================================================================##
figure4_age_balance <- ggplot(gb_frame,
aes(x = age, fill = factor(treatment))) +
geom_density(alpha = .65, color = NA) +
scale_fill_manual(
values = c("black", "#0B3C6F"),
name = "Group",
labels = c("Control", "Treatment")
) +
labs(
title = "Age distribution by treatment status",
x = "Age",
y = "Density"
) +
theme_classic(base_size = 8) +
theme(
plot.title = element_text(size = 14, face = "plain"),
legend.position = "top",
legend.title = element_text(size = 12),
legend.text = element_text(size = 12),
axis.title = element_text(size = 12),
axis.text = element_text(size = 12)
)
figure4_age_balance

ggsave("Figure4_age_balance.svg",figure4_age_balance)
## Saving 7 x 5 in image
##============================================================================##
## Step 5: Balance graphic education
##============================================================================##
educ_dist_group <- gb_frame %>%
count(highest_educ, treatment) %>%
group_by(highest_educ) %>%
mutate(percent = n / sum(n))
figure5_edu_balance <- ggplot(educ_dist_group,
aes(x = highest_educ,
y = percent,
fill = factor(treatment))) +
geom_col(width = .7,
position = "stack",
color = "white") +
geom_text(aes(label = percent(percent, accuracy = 0.1)),
position = position_stack(vjust = .5),
color = "white",
size = 4) +
coord_flip() +
scale_fill_manual(
values = c("black", "#0B3C6F"),
name = "Group",
labels = c("Control", "Treatment")
) +
scale_y_continuous(labels = percent_format()) +
labs(
title = "Educational attainment by treatment status",
x = NULL,
y = "Percent within education category"
) +
theme_classic(base_size = 16) +
theme(
plot.title = element_text(size = 14, face = "plain"),
axis.title = element_text(size = 12),
legend.position = "top"
)
figure5_edu_balance

ggsave("Figure5_edu_balance.svg",figure5_edu_balance)
## Saving 7 x 5 in image
##============================================================================##
## Step 6: Balance graphic region
##============================================================================##
region_dist_group <- gb_frame %>%
count(region, treatment) %>%
group_by(region) %>%
mutate(percent = n / sum(n))
figure6_reg_balance <- ggplot(region_dist_group,
aes(x = region,
y = percent,
fill = factor(treatment))) +
geom_col(width = .7,
position = "stack",
color = "white") +
geom_text(aes(label = percent(percent, accuracy = 0.1)),
position = position_stack(vjust = .5),
color = "white",
size = 4) +
coord_flip() +
scale_fill_manual(
values = c("black", "#0B3C6F"),
name = "Group",
labels = c("Control", "Treatment")
) +
scale_y_continuous(labels = percent_format()) +
labs(
title = "Region by treatment status",
x = NULL,
y = "Percent within each region"
) +
theme_classic(base_size = 16) +
theme(
plot.title = element_text(size = 14, face = "plain"),
axis.title = element_text(size = 12),
legend.position = "top"
)
figure6_reg_balance

ggsave("Figure6_reg_balance.svg",figure6_reg_balance)
## Saving 7 x 5 in image
##============================================================================##
## Step 7: Balance graph gender
##============================================================================##
gender_dist_group <- gb_frame %>%
filter(genero != "4. I prefer not to respond") %>%
count(genero, treatment) %>%
group_by(genero) %>%
mutate(percent = n / sum(n))
figure7_gen_balance <- ggplot(gender_dist_group,
aes(x = genero,
y = percent,
fill = factor(treatment))) +
geom_col(width = .7,
position = "stack",
color = "white") +
geom_text(aes(label = percent(percent, accuracy = 0.1)),
position = position_stack(vjust = .5),
color = "white",
size = 4) +
coord_flip() +
scale_fill_manual(
values = c("black", "#0B3C6F"),
name = "Group",
labels = c("Control", "Treatment")
) +
scale_y_continuous(labels = percent_format()) +
labs(
title = "Gender by treatment status",
x = NULL,
y = "Percent within each gender"
) +
theme_classic(base_size = 16) +
theme(
plot.title = element_text(size = 14, face = "plain"),
axis.title = element_text(size = 12),
legend.position = "top"
)
figure7_gen_balance

ggsave("Figure7_gen_balance.svg",figure7_gen_balance)
## Saving 7 x 5 in image
balance_figurescomb <- figure5_edu_balance + figure6_reg_balance + figure7_gen_balance + figure4_age_balance
ggsave("all_balancefig.svg", balance_figurescomb)
## Saving 7 x 5 in image