Marco Tulio Eguez Hurtado
Soc-3320: Methodology and Research II
Instructor: Sébastien Parker
knitr::opts_chunk$set(echo = TRUE, message = FALSE, warning = FALSE)
# List of packages
packages <- c("tidyverse", "fst", "gt", "scales", "viridis", "patchwork", "ggrepel", "ggridges")
# Install packages if they aren't installed already
new_packages <- packages[!(packages %in% installed.packages()[,"Package"])]
if(length(new_packages)) install.packages(new_packages)
# Load the packages
lapply(packages, library, character.only = TRUE)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
##
## Attaching package: 'scales'
##
##
## The following object is masked from 'package:purrr':
##
## discard
##
##
## The following object is masked from 'package:readr':
##
## col_factor
##
##
## Loading required package: viridisLite
##
##
## Attaching package: 'viridis'
##
##
## The following object is masked from 'package:scales':
##
## viridis_pal
## [[1]]
## [1] "lubridate" "forcats" "stringr" "dplyr" "purrr" "readr"
## [7] "tidyr" "tibble" "ggplot2" "tidyverse" "stats" "graphics"
## [13] "grDevices" "utils" "datasets" "methods" "base"
##
## [[2]]
## [1] "fst" "lubridate" "forcats" "stringr" "dplyr" "purrr"
## [7] "readr" "tidyr" "tibble" "ggplot2" "tidyverse" "stats"
## [13] "graphics" "grDevices" "utils" "datasets" "methods" "base"
##
## [[3]]
## [1] "gt" "fst" "lubridate" "forcats" "stringr" "dplyr"
## [7] "purrr" "readr" "tidyr" "tibble" "ggplot2" "tidyverse"
## [13] "stats" "graphics" "grDevices" "utils" "datasets" "methods"
## [19] "base"
##
## [[4]]
## [1] "scales" "gt" "fst" "lubridate" "forcats" "stringr"
## [7] "dplyr" "purrr" "readr" "tidyr" "tibble" "ggplot2"
## [13] "tidyverse" "stats" "graphics" "grDevices" "utils" "datasets"
## [19] "methods" "base"
##
## [[5]]
## [1] "viridis" "viridisLite" "scales" "gt" "fst"
## [6] "lubridate" "forcats" "stringr" "dplyr" "purrr"
## [11] "readr" "tidyr" "tibble" "ggplot2" "tidyverse"
## [16] "stats" "graphics" "grDevices" "utils" "datasets"
## [21] "methods" "base"
##
## [[6]]
## [1] "patchwork" "viridis" "viridisLite" "scales" "gt"
## [6] "fst" "lubridate" "forcats" "stringr" "dplyr"
## [11] "purrr" "readr" "tidyr" "tibble" "ggplot2"
## [16] "tidyverse" "stats" "graphics" "grDevices" "utils"
## [21] "datasets" "methods" "base"
##
## [[7]]
## [1] "ggrepel" "patchwork" "viridis" "viridisLite" "scales"
## [6] "gt" "fst" "lubridate" "forcats" "stringr"
## [11] "dplyr" "purrr" "readr" "tidyr" "tibble"
## [16] "ggplot2" "tidyverse" "stats" "graphics" "grDevices"
## [21] "utils" "datasets" "methods" "base"
##
## [[8]]
## [1] "ggridges" "ggrepel" "patchwork" "viridis" "viridisLite"
## [6] "scales" "gt" "fst" "lubridate" "forcats"
## [11] "stringr" "dplyr" "purrr" "readr" "tidyr"
## [16] "tibble" "ggplot2" "tidyverse" "stats" "graphics"
## [21] "grDevices" "utils" "datasets" "methods" "base"
gss <- read_fst("C:/Users/marco/OneDrive/R ecosystem/RStudio/gss2022.fst")
table(gss$abany)
##
## yes no
## 16626 22628
## don't know iap
## 0 0
## I don't have a job dk, na, iap
## 0 0
## no answer not imputable_(2147483637)
## 0 0
## not imputable_(2147483638) refused
## 0 0
## skipped on web uncodeable
## 0 0
## not available in this release not available in this year
## 0 0
## see codebook
## 0
unique(gss$abany)
## [1] <NA> yes no
## 15 Levels: yes no don't know iap I don't have a job dk, na, iap ... see codebook
gss %>%
filter(!is.na(abany)) %>%
distinct(year) %>%
arrange(year) %>%
pull()
## [1] 1977 1978 1980 1982 1983 1984 1985 1987 1988 1989 1990 1991 1993 1994 1996
## [16] 1998 2000 2002 2004 2006 2008 2010 2012 2014 2016 2018 2021 2022
Explore the year range and see if there is any missing data.
gss_clean <- gss %>%
mutate(
abortion_any_reason = case_when(
abany == "yes" ~ "Support",
abany == "no" ~ "Oppose",
TRUE ~ NA_character_
),
abortion_any_reason = factor(abortion_any_reason,
levels = c("Support", "Oppose")
),
education = case_when(
degree %in% c("less than high school", "high school") ~ "High School or Less",
degree %in% c("associate/junior college") ~ "Some College",
degree %in% c("bachelor's", "graduate") ~ "Bachelor’s or Higher",
TRUE ~ NA_character_
),
education = factor(education,
levels = c("High School or Less",
"Some College",
"Bachelor’s or Higher")
),
political_views = case_when(
polviews %in% c("extremely liberal",
"liberal",
"slightly liberal") ~ "Liberal",
polviews %in% c("moderate, middle of the road") ~ "Moderate",
polviews %in% c("slightly conservative",
"conservative",
"extremely conservative") ~ "Conservative",
TRUE ~ NA_character_
),
political_views = factor(political_views, levels = c("Liberal", "Moderate", "Conservative")
),
sex = case_when(
sex == "male" ~ "Male",
sex == "female" ~ "Female",
TRUE ~ NA_character_
),
sex = factor(sex, levels = c("Male", "Female"))
)
table(gss_clean$abortion_any_reason)
##
## Support Oppose
## 16626 22628
table(gss_clean$education, useNA = "ifany")
##
## High School or Less Some College Bachelor’s or Higher
## 50638 4355 17201
## <NA>
## 196
table(gss_clean$political_views, useNA = "ifany")
##
## Liberal Moderate Conservative <NA>
## 17604 23992 21122 9672
table(gss_clean$sex, useNA = "ifany")
##
## Male Female <NA>
## 31977 40301 112
Here I transformed the variables to match the ones requested on the exercise utilizing the mutate() function, and then I converted them into meaningful categories with the factor() function.
table(gss$degree)
##
## less than high school high school
## 14192 36446
## associate/junior college bachelor's
## 4355 11248
## graduate don't know
## 5953 0
## iap I don't have a job
## 0 0
## dk, na, iap no answer
## 0 0
## not imputable_(2147483637) not imputable_(2147483638)
## 0 0
## refused skipped on web
## 0 0
## uncodeable not available in this release
## 0 0
## not available in this year see codebook
## 0 0
gss %>%
filter(!is.na(degree), !is.na(abany)) %>%
distinct(year) %>%
arrange(year) %>%
pull()
## [1] 1977 1978 1980 1982 1983 1984 1985 1987 1988 1989 1990 1991 1993 1994 1996
## [16] 1998 2000 2002 2004 2006 2008 2010 2012 2014 2016 2018 2021 2022
missing some values. I check the years where we have available data regarding the categories we ant to explore in the dataset.
abortion_support_stats <- gss_clean %>%
select(education, political_views, sex, abortion_any_reason) %>%
pivot_longer(
cols = -abortion_any_reason,
names_to = "category",
values_to = "characteristic"
) %>%
filter(!is.na(characteristic), !is.na(abortion_any_reason)) %>%
group_by(characteristic, category) %>%
summarize(
total = n(),
support = sum(abortion_any_reason == "Support"),
support_pct = (support / total) * 100,
.groups = "drop"
) %>%
mutate(
category = case_when(
category == "education" ~ "Education",
category == "political_views" ~ "Political Views",
category == "sex" ~ "Gender",
TRUE ~ category
)
)
abortion_support_stats
## # A tibble: 8 × 5
## characteristic category total support support_pct
## <fct> <chr> <int> <int> <dbl>
## 1 High School or Less Education 27601 10180 36.9
## 2 Some College Education 2373 1083 45.6
## 3 Bachelor’s or Higher Education 9193 5333 58.0
## 4 Liberal Political Views 10395 6189 59.5
## 5 Moderate Political Views 14040 5881 41.9
## 6 Conservative Political Views 12450 3854 31.0
## 7 Male Gender 17229 7475 43.4
## 8 Female Gender 22000 9142 41.6
I got stuck trying to make separate measures and then join them together. After a while, I discovered this youtube video which help me to solve this problem by using the pivot_longer() command. After merging the dataset, I performed the required calculations and then mutate my characteristics to match the ones required in the exercise. This is the video link: https://youtu.be/UR-4vBEN3Fw?si=rv5wg8hAvv22oKHp
abortion_support_edu_table <- abortion_support_stats %>%
select(characteristic, total, support, support_pct) %>%
gt() %>%
tab_header(
title = md("**Support for Abortion Rights by Demographic Characteristics**"),
subtitle = "General Social Survey 1977-2022"
) %>%
fmt_number(
columns = c(support_pct),
decimals = 1
)%>%
fmt_number(
columns = c(total, support),
decimals = 0
) %>%
cols_label(
characteristic = "Characteristic",
total = "Sample Size",
support = "Support Count",
support_pct = "Support (%)"
) %>%
tab_source_note(
source_note = "Note: Responses to: 'Please tell me whether or not you think it should be possible for a pregnant woman to obtain a legal abortion if: The woman wants it for any reason?' Sample includes all valid responses across survey years."
) %>%
tab_style(
style = cell_text(weight = "bold"),
locations = cells_column_labels()
)%>%
tab_style(
style = cell_text(align = "center"),
locations = cells_body(columns = c(total, support, support_pct))
)%>%
tab_style(
style = cell_text(align = "center"),
locations = cells_column_labels()
)
abortion_support_edu_table
| Support for Abortion Rights by Demographic Characteristics | |||
| General Social Survey 1977-2022 | |||
| Characteristic | Sample Size | Support Count | Support (%) |
|---|---|---|---|
| High School or Less | 27,601 | 10,180 | 36.9 |
| Some College | 2,373 | 1,083 | 45.6 |
| Bachelor’s or Higher | 9,193 | 5,333 | 58.0 |
| Liberal | 10,395 | 6,189 | 59.5 |
| Moderate | 14,040 | 5,881 | 41.9 |
| Conservative | 12,450 | 3,854 | 31.0 |
| Male | 17,229 | 7,475 | 43.4 |
| Female | 22,000 | 9,142 | 41.6 |
| Note: Responses to: 'Please tell me whether or not you think it should be possible for a pregnant woman to obtain a legal abortion if: The woman wants it for any reason?' Sample includes all valid responses across survey years. | |||
After merging my data and doing the necessary calculations, I did my table using the gt() package.
gender_trends <- gss_clean %>%
filter(!is.na(sex), !is.na(abortion_any_reason), !is.na(year)) %>%
group_by(year, sex) %>%
summarize(
oppose = mean(abortion_any_reason == "Oppose"),
n = n(),
.groups = "drop"
)
gender_trends
## # A tibble: 56 × 4
## year sex oppose n
## <int> <fct> <dbl> <int>
## 1 1977 Male 0.611 665
## 2 1977 Female 0.633 814
## 3 1978 Male 0.653 619
## 4 1978 Female 0.677 865
## 5 1980 Male 0.589 616
## 6 1980 Female 0.589 790
## 7 1982 Male 0.610 734
## 8 1982 Female 0.618 1026
## 9 1983 Male 0.648 653
## 10 1983 Female 0.665 862
## # ℹ 46 more rows
I chose to make three different graphs with individual calculations for each graph as I was struggling to merge all the data into a single graph. I started by filtering my data to remove missing values. Then I did my calculations.
last_points_gender <- gender_trends %>%
group_by(sex) %>%
slice_max(order_by = year, n = 1)
ggplot(gender_trends,
aes(x = year, y = oppose, color = sex)) +
geom_line(size = 1.2) +
scale_color_manual(
values = c(
"Male" = "#005F73",
"Female" = "#C098D0"
)
) +
geom_label_repel(
data = last_points_gender,
aes(label = sex),
nudge_x = 2,
direction = "y",
hjust = 0,
segment.size = 0.3,
segment.color = "grey70",
box.padding = 0.5,
point.padding = 0.5,
size = 3.5,
fontface = "bold",
label.size = 0.1,
label.r = unit(0.2, "lines"),
fill = alpha("white", 0.7)
) +
scale_y_continuous(
labels = scales::percent_format(),
limits = c(0.3, 0.7),
breaks = seq(0.3, 0.7, by = 0.1),
expand = c(0.01, 0.01)
) +
scale_x_continuous(
breaks = seq(1977, 2022, by = 5),
limits = c(1975, 2025),
expand = c(0.01, 0.01)
) +
labs(
title = "Opposition to Abortion Rights Over Time",
subtitle = "By Gender (Binary)",
x = NULL,
y = "Proportion Opposing Abortion Rights"
) +
theme_minimal() +
theme(
plot.title = element_text(
face = "bold",
size = 14,
margin = margin(b = 10)
),
plot.subtitle = element_text(
color = "grey40",
size = 12,
margin = margin(b = 20)
),
axis.line = element_line(color = "black", size = 0.5),
axis.ticks = element_line(color = "black", size = 0.5),
panel.grid = element_blank(),
axis.text = element_text(size = 10),
legend.position = "none",
plot.margin = margin(20, 20, 20, 20)
)
I use the ggplot 2 package to make the graph and I followed the steps provided in tutorial 4. I did this for all the graphs I needed to do for this section.
education_trends <- gss_clean %>%
filter(!is.na(education), !is.na(abortion_any_reason), !is.na(year)) %>%
group_by(year, education) %>%
summarize(
oppose = mean(abortion_any_reason == "Oppose"),
n = n(),
.groups = "drop"
)
education_trends
## # A tibble: 84 × 4
## year education oppose n
## <int> <fct> <dbl> <int>
## 1 1977 High School or Less 0.656 1233
## 2 1977 Some College 0.562 32
## 3 1977 Bachelor’s or Higher 0.428 208
## 4 1978 High School or Less 0.698 1231
## 5 1978 Some College 0.6 40
## 6 1978 Bachelor’s or Higher 0.495 210
## 7 1980 High School or Less 0.629 1134
## 8 1980 Some College 0.636 44
## 9 1980 Bachelor’s or Higher 0.379 224
## 10 1982 High School or Less 0.651 1438
## # ℹ 74 more rows
last_points_edu <- education_trends %>%
group_by(education) %>%
slice_max(order_by = year, n = 1)
ggplot(education_trends,
aes(x = year, y = oppose, color = education)) +
geom_line(size = 1.2) +
scale_color_manual(
values = c(
"High School or Less" = "#005F73",
"Some College" = "#C098D0",
"Bachelor’s or Higher" = "#48A0A8"
)
) +
geom_label_repel(
data = last_points_edu,
aes(label = education),
nudge_x = 2,
direction = "y",
hjust = 0,
segment.size = 0.3,
segment.color = "grey70",
box.padding = 0.5,
point.padding = 0.5,
size = 3.5,
fontface = "bold",
label.size = 0.1,
label.r = unit(0.2, "lines"),
fill = alpha("white", 0.7)
) +
scale_y_continuous(
labels = scales::percent_format(),
limits = c(0.2, 0.8),
breaks = seq(0.2, 0.8, by = 0.1),
expand = c(0.01, 0.01)
) +
scale_x_continuous(
breaks = seq(1977, 2022, by = 5),
limits = c(1975, 2025),
expand = c(0.01, 0.01)
) +
# Add labels
labs(
title = "Opposition to Abortion Rights Over Time",
subtitle = "By Education Level",
x = NULL,
y = "Proportion Opposing Abortion Rights"
) +
theme_minimal() +
theme(
plot.title = element_text(
face = "bold",
size = 14,
margin = margin(b = 10)
),
plot.subtitle = element_text(
color = "grey40",
size = 12,
margin = margin(b = 20)
),
axis.line = element_line(color = "black", size = 0.5),
axis.ticks = element_line(color = "black", size = 0.5),
panel.grid = element_blank(),
axis.text = element_text(size = 10),
legend.position = "none",
plot.margin = margin(20, 20, 20, 20)
)
political_views_trends <- gss_clean %>%
filter(!is.na(political_views), !is.na(abortion_any_reason), !is.na(year)) %>%
group_by(year, political_views) %>%
summarize(
oppose = mean(abortion_any_reason == "Oppose"),
n = n(),
.groups = "drop"
)
political_views_trends
## # A tibble: 84 × 4
## year political_views oppose n
## <int> <fct> <dbl> <int>
## 1 1977 Liberal 0.551 410
## 2 1977 Moderate 0.622 548
## 3 1977 Conservative 0.668 452
## 4 1978 Liberal 0.558 398
## 5 1978 Moderate 0.713 534
## 6 1978 Conservative 0.692 461
## 7 1980 Liberal 0.503 358
## 8 1980 Moderate 0.605 555
## 9 1980 Conservative 0.627 461
## 10 1982 Liberal 0.511 499
## # ℹ 74 more rows
last_points_pol <- political_views_trends %>%
group_by(political_views) %>%
slice_max(order_by = year, n = 1)
ggplot(political_views_trends,
aes(x = year, y = oppose, color = political_views)) +
geom_line(size = 1.2) +
scale_color_manual(
values = c(
"Liberal" = "#377EB8",
"Moderate" = "#4DAF4A",
"Conservative" = "#E41A1C"
)
) +
geom_label_repel(
data = last_points_pol,
aes(label = political_views),
nudge_x = 2,
direction = "y",
hjust = 0,
segment.size = 0.3,
segment.color = "grey70",
box.padding = 0.5,
point.padding = 0.5,
size = 3.5,
fontface = "bold",
label.size = 0.1,
label.r = unit(0.2, "lines"),
fill = alpha("white", 0.7)
) +
scale_y_continuous(
labels = scales::percent_format(),
limits = c(0.1, 0.8),
breaks = seq(0.1, 0.8, by = 0.1),
expand = c(0.01, 0.01)
) +
scale_x_continuous(
breaks = seq(1977, 2022, by = 5),
limits = c(1975, 2025),
expand = c(0.01, 0.01)
) +
labs(
title = "Opposition to Abortion Rights Over Time",
subtitle = "By Political Views",
x = NULL,
y = "Proportion Opposing Abortion Rights"
) +
theme_minimal() +
theme(
plot.title = element_text(
face = "bold",
size = 14,
margin = margin(b = 10)
),
plot.subtitle = element_text(
color = "grey40",
size = 12,
margin = margin(b = 20)
),
axis.line = element_line(color = "black", size = 0.5),
axis.ticks = element_line(color = "black", size = 0.5),
panel.grid = element_blank(),
axis.text = element_text(size = 10),
legend.position = "none",
plot.margin = margin(20, 20, 20, 20)
)
The analysis of abortion attitudes for any reasons in American society, based on the General Social Survey (1977-2022), reveals distinct patterns across demographic groups and over time. Education emerges as a strong predictor of support for abortion rights for any reason. Individuals with a Bachelor’s or Higher degree exhibit the highest support (58.0%), followed by those with Some College (45.6%) and High School or Less (36.9%). This 21.1% gap between the highest and lowest education levels underscores the persistent influence of educational attainment on social attitudes.
Political ideology further stratifies views. Liberals show the strongest support (59.5%), while Conservatives oppose abortion rights for any reason most frequently (69.0% opposition, equivalent to 31.0% support). Moderates occupy an intermediate position, with 41.9% support. These disparities reflect deep-rooted partisan divides, with Conservative opposition remaining consistently high over time, as seen in the trend graph where Conservative opposition hovers near 60–70% historically, compared to Liberals’ opposition declining from around 55% in 1977 to approximately 20% by 2022.
Gender differences are less pronounced but notable. Males report slightly higher support (43.4%) compared to females (41.6%). Over time, opposition among both genders has declined, converging near 40% by 2022, though females initially exhibited marginally higher opposition and by 2022 they show slightly less opposition.
Temporal trends highlight evolving societal attitudes. Opposition to abortion rights has decreased across nearly all groups since 1977, with the sharpest declines among Bachelor’s or Higher holders (from 42% opposition in 1977 to around 30% in 2022) and Liberals (from 55% to around 40% opposition). These shifts suggest growing acceptance of abortion rights among more educated and politically progressive demographics, while resistance remains entrenched among Conservatives and less-educated groups.
library(fst)
hungary_data <- read_fst("C:/Users/marco/OneDrive/R ecosystem/RStudio/hungary_data.fst")
table(hungary_data$freehms)
##
## 1 2 3 4 5 7 8 9
## 2239 4517 3806 2297 2260 216 1305 2
library(fst)
france_data <- read_fst("C:/Users/marco/OneDrive/R ecosystem/RStudio/france_data.fst")
table(france_data$freehms)
##
## 1 2 3 4 5 7 8
## 11046 4702 1537 679 921 43 110
I chose to do the exercise with the individual datasets because my desktop was having trouble processing the entire ess dataset.
hungary_attitudes <- hungary_data %>%
mutate(
cntry = "Hungary",
attitudes = case_when(
freehms %in% 1:2 ~ "Support",
freehms == 3 ~ "Neutral",
freehms %in% 4:5 ~ "Oppose",
TRUE ~ NA_character_
),
attitudes = factor(
attitudes,
levels = c("Support", "Neutral",
"Oppose")
),
education = case_when(
eisced %in% 1:2 ~ "Lower Secondary or Less",
eisced %in% 3:4 ~ "Upper Secondary",
eisced %in% 5:7 ~ "Tertiary",
TRUE ~ NA_character_
),
education = factor(
education,
levels = c("Lower Secondary or Less", "Upper Secondary",
"Tertiary")
),
residence = case_when(
domicil == 1 ~ "Major city",
domicil %in% 2:3 ~ "Urban periphery",
domicil %in% 4:5 ~ "Rural",
TRUE ~ NA_character_
),
residence = factor(
residence,
levels = c("Major city", "Urban periphery",
"Rural")
)
)
table(hungary_attitudes$attitudes)
##
## Support Neutral Oppose
## 6756 3806 4557
table(hungary_attitudes$education)
##
## Lower Secondary or Less Upper Secondary Tertiary
## 4137 9080 3329
table(hungary_attitudes$residence)
##
## Major city Urban periphery Rural
## 4196 6460 5971
Trying to improve the efficiency of my code, I merged all the naming process into a single chunk of code for each country. I followed the direction of the exercise when transforming the dataset, similar to task 1.
france_attitudes <- france_data %>%
mutate(
cntry = "France",
attitudes = case_when(
freehms %in% 1:2 ~ "Support",
freehms == 3 ~ "Neutral",
freehms %in% 4:5 ~ "Oppose",
TRUE ~ NA_character_
),
attitudes = factor(
attitudes,
levels = c("Support", "Neutral",
"Oppose")
),
education = case_when(
eisced %in% 1:2 ~ "Lower Secondary or Less",
eisced %in% 3:4 ~ "Upper Secondary",
eisced %in% 5:7 ~ "Tertiary",
TRUE ~ NA_character_
),
education = factor(
education,
levels = c("Lower Secondary or Less", "Upper Secondary",
"Tertiary")
),
residence = case_when(
domicil == 1 ~ "Major city",
domicil %in% 2:3 ~ "Urban periphery",
domicil %in% 4:5 ~ "Rural",
TRUE ~ NA_character_
),
residence = factor(
residence,
levels = c("Major city", "Urban periphery",
"Rural")
)
)
table(france_attitudes$attitudes)
##
## Support Neutral Oppose
## 15748 1537 1600
table(france_attitudes$education)
##
## Lower Secondary or Less Upper Secondary Tertiary
## 4218 6529 4949
table(france_attitudes$residence)
##
## Major city Urban periphery Rural
## 3584 8618 6830
edu_france_stats <- france_attitudes %>%
filter(!is.na(education), !is.na(attitudes)) %>%
group_by(education) %>%
summarize(
group_size = n(),
oppose = sum(attitudes == "Oppose"),
oppose_pct = (oppose / group_size) * 100,
.groups = "drop"
) %>%
mutate(
sample_pct = (group_size / sum(group_size)) * 100
)
edu_france_stats
## # A tibble: 3 × 5
## education group_size oppose oppose_pct sample_pct
## <fct> <int> <int> <dbl> <dbl>
## 1 Lower Secondary or Less 4168 580 13.9 26.7
## 2 Upper Secondary 6490 430 6.63 41.6
## 3 Tertiary 4928 193 3.92 31.6
I performed my calculations including the conditional probability P(A|B).
edu_france_table <- edu_france_stats %>%
select(education, group_size, sample_pct, oppose_pct) %>%
gt() %>%
tab_header(
title = md("**Opposition to LGBTQ+ Rights in France**"),
subtitle = "by Education Level"
) %>%
fmt_number(
columns = c(oppose_pct, sample_pct),
decimals = 1
)%>%
cols_label(
education = "Education",
group_size = "Group Size",
sample_pct = "Sample (%)",
oppose_pct = "Oppose (%)"
) %>%
tab_source_note(
source_note = "Data: European Social Survey (ESS)"
) %>%
tab_style(
style = cell_text(weight = "bold"),
locations = cells_column_labels()
)%>%
tab_style(
style = cell_text(align = "center"),
locations = cells_body(columns = c(group_size, sample_pct, oppose_pct))
)
edu_france_table
| Opposition to LGBTQ+ Rights in France | |||
| by Education Level | |||
| Education | Group Size | Sample (%) | Oppose (%) |
|---|---|---|---|
| Lower Secondary or Less | 4168 | 26.7 | 13.9 |
| Upper Secondary | 6490 | 41.6 | 6.6 |
| Tertiary | 4928 | 31.6 | 3.9 |
| Data: European Social Survey (ESS) | |||
I started with the select function as I did not need the oppose variable created above and I focused on the variables asked in the exercise. I used the gt package to make the table and I was guided by exercise 4. I repeated the same process with the rest of my tables.
edu_hungary_stats <- hungary_attitudes %>%
filter(!is.na(education), !is.na(attitudes)) %>%
group_by(education) %>%
summarize(
group_size = n(),
oppose = sum(attitudes == "Oppose"),
oppose_pct = (oppose / group_size) * 100,
.groups = "drop"
) %>%
mutate(
sample_pct = (group_size / sum(group_size)) * 100
)
edu_hungary_stats
## # A tibble: 3 × 5
## education group_size oppose oppose_pct sample_pct
## <fct> <int> <int> <dbl> <dbl>
## 1 Lower Secondary or Less 3476 1344 38.7 23.1
## 2 Upper Secondary 8434 2476 29.4 56.1
## 3 Tertiary 3126 713 22.8 20.8
edu_hungary_table <- edu_hungary_stats %>%
select(education, group_size, sample_pct, oppose_pct) %>%
gt() %>%
tab_header(
title = md("**Opposition to LGBTQ+ Rights in Hungary**"),
subtitle = "by Education Level"
) %>%
fmt_number(
columns = c(oppose_pct, sample_pct),
decimals = 1
)%>%
cols_label(
education = "Education",
group_size = "Group Size",
sample_pct = "Sample (%)",
oppose_pct = "Oppose (%)"
) %>%
tab_source_note(
source_note = "Data: European Social Survey (ESS)"
) %>%
tab_style(
style = cell_text(weight = "bold"),
locations = cells_column_labels()
)%>%
tab_style(
style = cell_text(align = "center"),
locations = cells_body(columns = c(group_size, sample_pct, oppose_pct))
)
edu_hungary_table
| Opposition to LGBTQ+ Rights in Hungary | |||
| by Education Level | |||
| Education | Group Size | Sample (%) | Oppose (%) |
|---|---|---|---|
| Lower Secondary or Less | 3476 | 23.1 | 38.7 |
| Upper Secondary | 8434 | 56.1 | 29.4 |
| Tertiary | 3126 | 20.8 | 22.8 |
| Data: European Social Survey (ESS) | |||
place_france_stats <- france_attitudes %>%
filter(!is.na(residence), !is.na(attitudes)) %>%
group_by(residence) %>%
summarize(
group_size = n(),
oppose = sum(attitudes == "Oppose"),
oppose_pct = (oppose / group_size) * 100,
.groups = "drop"
) %>%
mutate(
sample_pct = (group_size / sum(group_size)) * 100
)
place_france_stats
## # A tibble: 3 × 5
## residence group_size oppose oppose_pct sample_pct
## <fct> <int> <int> <dbl> <dbl>
## 1 Major city 3556 312 8.77 18.8
## 2 Urban periphery 8543 728 8.52 45.3
## 3 Rural 6780 560 8.26 35.9
place_france_table <- place_france_stats %>%
select(residence, group_size, sample_pct, oppose_pct) %>%
gt() %>%
tab_header(
title = md("**Opposition to LGBTQ+ Rights in France**"),
subtitle = "by Place of Residence"
) %>%
fmt_number(
columns = c(oppose_pct, sample_pct),
decimals = 1
)%>%
cols_label(
residence = "Place of Residence",
group_size = "Group Size",
sample_pct = "Sample (%)",
oppose_pct = "Oppose (%)"
) %>%
tab_source_note(
source_note = "Data: European Social Survey (ESS)"
) %>%
tab_style(
style = cell_text(weight = "bold"),
locations = cells_column_labels()
)%>%
tab_style(
style = cell_text(align = "center"),
locations = cells_body(columns = c(group_size, sample_pct, oppose_pct))
)
place_france_table
| Opposition to LGBTQ+ Rights in France | |||
| by Place of Residence | |||
| Place of Residence | Group Size | Sample (%) | Oppose (%) |
|---|---|---|---|
| Major city | 3556 | 18.8 | 8.8 |
| Urban periphery | 8543 | 45.3 | 8.5 |
| Rural | 6780 | 35.9 | 8.3 |
| Data: European Social Survey (ESS) | |||
place_hungary_stats <- hungary_attitudes %>%
filter(!is.na(residence), !is.na(attitudes)) %>%
group_by(residence) %>%
summarize(
group_size = n(),
oppose = sum(attitudes == "Oppose"),
oppose_pct = (oppose / group_size) * 100,
.groups = "drop"
) %>%
mutate(
sample_pct = (group_size / sum(group_size)) * 100
)
place_hungary_stats
## # A tibble: 3 × 5
## residence group_size oppose oppose_pct sample_pct
## <fct> <int> <int> <dbl> <dbl>
## 1 Major city 3870 1063 27.5 25.6
## 2 Urban periphery 5921 1806 30.5 39.2
## 3 Rural 5315 1684 31.7 35.2
place_hungary_table <- place_hungary_stats %>%
select(residence, group_size, sample_pct, oppose_pct) %>%
gt() %>%
tab_header(
title = md("**Opposition to LGBTQ+ Rights in Hungary**"),
subtitle = "by Place of Residence"
) %>%
fmt_number(
columns = c(oppose_pct, sample_pct),
decimals = 1
)%>%
cols_label(
residence = "Place of Residence",
group_size = "Group Size",
sample_pct = "Sample (%)",
oppose_pct = "Oppose (%)"
) %>%
tab_source_note(
source_note = "Data: European Social Survey (ESS)"
) %>%
tab_style(
style = cell_text(weight = "bold"),
locations = cells_column_labels()
)%>%
tab_style(
style = cell_text(align = "center"),
locations = cells_body(columns = c(group_size, sample_pct, oppose_pct))
)
place_hungary_table
| Opposition to LGBTQ+ Rights in Hungary | |||
| by Place of Residence | |||
| Place of Residence | Group Size | Sample (%) | Oppose (%) |
|---|---|---|---|
| Major city | 3870 | 25.6 | 27.5 |
| Urban periphery | 5921 | 39.2 | 30.5 |
| Rural | 5315 | 35.2 | 31.7 |
| Data: European Social Survey (ESS) | |||
edu_france_plot <- ggplot(edu_france_stats,
aes(x = oppose_pct, y = reorder(education, oppose_pct))) +
geom_col(
width = 0.6,
fill = "#114B5F",
alpha = 0.9
) +
geom_point(
size = 3,
color = "#028090"
) +
geom_text(
aes(label = sprintf("%.1f%%", oppose_pct)),
hjust = -0.5,
family = "sans",
size = 4,
fontface = "bold",
color = "#114B5F"
) +
scale_x_continuous(
limits = c(0, max(edu_france_stats$oppose_pct) * 1.2),
breaks = seq(0, 15, by = 3),
labels = scales::label_number(suffix = "%")
) +
labs(
title = "Educational Differences in Opposition to \nLGBTQ+ Rights in France",
subtitle = "Conditional Probability",
caption = "Data: European Social Survey",
x = "Probability of Opposition to LGBTQ+ Rights",
y = NULL
) +
theme_minimal() +
theme(
plot.title = element_text(
family = "sans",
face = "bold",
size = 16,
margin = margin(b = 10)
),
plot.subtitle = element_text(
family = "sans",
color = "#114B5F",
size = 12,
margin = margin(b = 20)
),
plot.caption = element_text(
color = "grey40",
margin = margin(t = 20)
),
axis.text = element_text(
family = "sans",
size = 11,
color = "grey20"
),
axis.text.y = element_text(
face = "bold"
),
axis.title.x = element_text(
margin = margin(t = 10),
color = "grey20"
),
panel.grid.major.x = element_line(
color = "grey95",
size = 0.3
),
panel.grid.minor = element_blank(),
panel.grid.major.y = element_blank(),
plot.margin = margin(30, 30, 30, 30)
)
edu_france_plot
I made the graphs using the ggplot 2 package and I utilized the data I previously calculated and I followed the steps of tutorial 4 to make the formatting of the plot. I repeate dthis steps for all the remaining tables and changed the colors for a more professional look.
edu_hungary_plot <- ggplot(edu_hungary_stats,
aes(x = oppose_pct, y = reorder(education, oppose_pct))) +
geom_col(
width = 0.6,
fill = "#7E6B8F",
alpha = 0.9
) +
geom_point(
size = 3,
color = "#4A4063"
) +
geom_text(
aes(label = sprintf("%.1f%%", oppose_pct)),
hjust = -0.5,
family = "sans",
size = 4,
fontface = "bold",
color = "#7E6B8F"
) +
scale_x_continuous(
limits = c(0, max(edu_hungary_stats$oppose_pct) * 1.2),
breaks = seq(0, 40, by = 5),
labels = scales::label_number(suffix = "%")
) +
labs(
title = "Educational Differences in Opposition to \nLGBTQ+ Rights in Hungary",
subtitle = "Conditional Probability",
caption = "Data: European Social Survey",
x = "Probability of Opposition to LGBTQ+ Rights",
y = NULL
) +
theme_minimal() +
theme(
plot.title = element_text(
family = "sans",
face = "bold",
size = 16,
margin = margin(b = 10)
),
plot.subtitle = element_text(
family = "sans",
color = "#7E6B8F",
size = 12,
margin = margin(b = 20)
),
plot.caption = element_text(
color = "grey40",
margin = margin(t = 20)
),
axis.text = element_text(
family = "sans",
size = 11,
color = "grey20"
),
axis.text.y = element_text(
face = "bold"
),
axis.title.x = element_text(
margin = margin(t = 10),
color = "grey20"
),
panel.grid.major.x = element_line(
color = "grey95",
size = 0.3
),
panel.grid.minor = element_blank(),
panel.grid.major.y = element_blank(),
plot.margin = margin(30, 30, 30, 30)
)
edu_hungary_plot
place_france_plot <- ggplot(place_france_stats,
aes(x = oppose_pct, y = reorder(residence, oppose_pct))) +
geom_col(
width = 0.6,
fill = "#2E86AB",
alpha = 0.9
) +
geom_point(
size = 3,
color = "#084B83"
) +
geom_text(
aes(label = sprintf("%.1f%%", oppose_pct)),
hjust = -0.5,
family = "sans",
size = 4,
fontface = "bold",
color = "#2E86AB"
) +
scale_x_continuous(
limits = c(0, max(place_france_stats $oppose_pct) * 1.2),
breaks = seq(0, 10, by = 2),
labels = scales::label_number(suffix = "%")
) +
labs(
title = "Opposition to LGBTQ+ Rights by Place of \nResidence in France",
subtitle = "Conditional Probability",
caption = "Data: European Social Survey",
x = "Probability of Opposition to LGBTQ+ Rights",
y = NULL
) +
theme_minimal() +
theme(
plot.title = element_text(
family = "sans",
face = "bold",
size = 16,
margin = margin(b = 10)
),
plot.subtitle = element_text(
family = "sans",
color = "#2E86AB",
size = 12,
margin = margin(b = 20)
),
plot.caption = element_text(
color = "grey40",
margin = margin(t = 20)
),
axis.text = element_text(
family = "sans",
size = 11,
color = "grey20"
),
axis.text.y = element_text(
face = "bold"
),
axis.title.x = element_text(
margin = margin(t = 10),
color = "grey20"
),
panel.grid.major.x = element_line(
color = "grey95",
size = 0.3
),
panel.grid.minor = element_blank(),
panel.grid.major.y = element_blank(),
plot.margin = margin(30, 30, 30, 30)
)
place_france_plot
place_hungary_plot <- ggplot(place_hungary_stats,
aes(x = oppose_pct, y = reorder(residence, oppose_pct))) +
geom_col(
width = 0.6,
fill = "#D4B483",
alpha = 0.9
) +
geom_point(
size = 3,
color = "#B68D4C"
) +
geom_text(
aes(label = sprintf("%.1f%%", oppose_pct)),
hjust = -0.5,
family = "sans",
size = 4,
fontface = "bold",
color = "#D4B483"
) +
scale_x_continuous(
limits = c(0, max(place_hungary_stats$oppose_pct) * 1.2),
breaks = seq(0, 35, by = 5),
labels = scales::label_number(suffix = "%")
) +
labs(
title = "Opposition to LGBTQ+ Rights by Place of \nResidence in Hungary",
subtitle = "Conditional Probability",
caption = "Data: European Social Survey",
x = "Probability of Opposition to LGBTQ+ Rights",
y = NULL
) +
theme_minimal() +
theme(
plot.title = element_text(
family = "sans",
face = "bold",
size = 16,
margin = margin(b = 10)
),
plot.subtitle = element_text(
family = "sans",
color = "#D4B483",
size = 12,
margin = margin(b = 20)
),
plot.caption = element_text(
color = "grey40",
margin = margin(t = 20)
),
axis.text = element_text(
family = "sans",
size = 11,
color = "grey20"
),
axis.text.y = element_text(
face = "bold"
),
axis.title.x = element_text(
margin = margin(t = 10),
color = "grey20"
),
panel.grid.major.x = element_line(
color = "grey95",
size = 0.3
),
panel.grid.minor = element_blank(),
panel.grid.major.y = element_blank(),
plot.margin = margin(30, 30, 30, 30)
)
place_hungary_plot
The analysis of LGBTQ+ rights opposition in Hungary and France reveals distinct patterns across education levels and residential settings. In France, opposition decreases markedly with higher education: 13.9% among those with lower secondary education or less, 6.6% for upper secondary, and 3.9% for tertiary-educated individuals. This 10% gap between the lowest and highest education tiers underscores education’s role in fostering acceptance. Hungary exhibits a similar inverse relationship but with significantly higher opposition rates: 38.7% (lower secondary), 29.4% (upper secondary), and 22.8% (tertiary), reflecting a 15.9% disparity between the highest and the lowest. Notably, opposition in Hungary’s least educated cohort is nearly triple that of France’s equivalent group, highlighting sharp cross-national differences.
Residential patterns diverge between the two countries. On the one hand, in France, probability of opposition remains relatively stable across urban and rural areas, ranging narrowly from 8.8% (major cities) to 8.3% (rural). On the other hand, Hungary shows a clear urban-rural divide: probability of opposition peaks in rural areas (31.7%), followed by urban peripheries (30.5%), and is lowest in major cities (27.5%), a 4.2% gap. This suggests urbanization and exposure to diverse perspectives may mitigate opposition in Hungary, whereas France’s uniformly lower rates indicate broader societal acceptance regardless of residence.
These trends align with broader socio-political contexts. Hungary’s higher opposition rates—particularly among less educated and rural populations—may reflect its conservative political climate and restrictive LGBTQ+ policies. France’s progressive stance and emphasis on secular values likely contribute to lower opposition, amplified by education’s stronger moderating effect. Both countries, however, demonstrate that education systematically reduces opposition, emphasizing its critical role in shaping attitudes.