## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
##
## Attaching package: 'scales'
##
##
## The following object is masked from 'package:purrr':
##
## discard
##
##
## The following object is masked from 'package:readr':
##
## col_factor
## [[1]]
## [1] "lubridate" "forcats" "stringr" "dplyr" "purrr" "readr"
## [7] "tidyr" "tibble" "ggplot2" "tidyverse" "stats" "graphics"
## [13] "grDevices" "utils" "datasets" "methods" "base"
##
## [[2]]
## [1] "gt" "lubridate" "forcats" "stringr" "dplyr" "purrr"
## [7] "readr" "tidyr" "tibble" "ggplot2" "tidyverse" "stats"
## [13] "graphics" "grDevices" "utils" "datasets" "methods" "base"
##
## [[3]]
## [1] "scales" "gt" "lubridate" "forcats" "stringr" "dplyr"
## [7] "purrr" "readr" "tidyr" "tibble" "ggplot2" "tidyverse"
## [13] "stats" "graphics" "grDevices" "utils" "datasets" "methods"
## [19] "base"
##
## [[4]]
## [1] "fst" "scales" "gt" "lubridate" "forcats" "stringr"
## [7] "dplyr" "purrr" "readr" "tidyr" "tibble" "ggplot2"
## [13] "tidyverse" "stats" "graphics" "grDevices" "utils" "datasets"
## [19] "methods" "base"
##
## [[5]]
## [1] "ggridges" "fst" "scales" "gt" "lubridate" "forcats"
## [7] "stringr" "dplyr" "purrr" "readr" "tidyr" "tibble"
## [13] "ggplot2" "tidyverse" "stats" "graphics" "grDevices" "utils"
## [19] "datasets" "methods" "base"
##
## [[6]]
## [1] "ggrepel" "ggridges" "fst" "scales" "gt" "lubridate"
## [7] "forcats" "stringr" "dplyr" "purrr" "readr" "tidyr"
## [13] "tibble" "ggplot2" "tidyverse" "stats" "graphics" "grDevices"
## [19] "utils" "datasets" "methods" "base"
##
## [[7]]
## [1] "patchwork" "ggrepel" "ggridges" "fst" "scales" "gt"
## [7] "lubridate" "forcats" "stringr" "dplyr" "purrr" "readr"
## [13] "tidyr" "tibble" "ggplot2" "tidyverse" "stats" "graphics"
## [19] "grDevices" "utils" "datasets" "methods" "base"
First, let’s examine the abortion variable (abany
) and
the other variables requested in the guidelines:
## [1] "Distribution of abortion views:"
##
## yes no
## 16626 22628
## don't know iap
## 0 0
## I don't have a job dk, na, iap
## 0 0
## no answer not imputable_(2147483637)
## 0 0
## not imputable_(2147483638) refused
## 0 0
## skipped on web uncodeable
## 0 0
## not available in this release not available in this year
## 0 0
## see codebook <NA>
## 0 33136
## [1] "\nDistribution of political views:"
##
## extremely liberal liberal
## 2081 7623
## slightly liberal moderate, middle of the road
## 7900 23992
## slightly conservative conservative
## 9596 9361
## extremely conservative don't know
## 2165 0
## iap I don't have a job
## 0 0
## dk, na, iap no answer
## 0 0
## not imputable_(2147483637) not imputable_(2147483638)
## 0 0
## refused skipped on web
## 0 0
## uncodeable not available in this release
## 0 0
## not available in this year see codebook
## 0 0
## <NA>
## 9672
## [1] "\nDistribution of education:"
##
## less than high school high school
## 14192 36446
## associate/junior college bachelor's
## 4355 11248
## graduate don't know
## 5953 0
## iap I don't have a job
## 0 0
## dk, na, iap no answer
## 0 0
## not imputable_(2147483637) not imputable_(2147483638)
## 0 0
## refused skipped on web
## 0 0
## uncodeable not available in this release
## 0 0
## not available in this year see codebook
## 0 0
## <NA>
## 196
## [1] "\nDistribution of gender:"
##
## male female
## 31977 40301
## don't know iap
## 0 0
## I don't have a job dk, na, iap
## 0 0
## no answer not imputable_(2147483637)
## 0 0
## not imputable_(2147483638) refused
## 0 0
## skipped on web uncodeable
## 0 0
## not available in this release not available in this year
## 0 0
## see codebook <NA>
## 0 112
Now, let’s clean the data according to the assignment requirements.
gss <- gss %>%
mutate(
abortion_view = case_when(
abany == "yes" ~ "Support",
abany == "no" ~ "Oppose",
TRUE ~ NA_character_
),
abortion_view = factor(abortion_view),
# Clean political views - three categories
political_view = case_when(
polviews %in% c("extremely liberal", "liberal", "slightly liberal") ~ "Liberal",
polviews == "moderate, middle of the road" ~ "Moderate",
polviews %in% c("extremely conservative", "conservative", "slightly conservative") ~ "Conservative",
TRUE ~ NA_character_
),
political_view = factor(political_view,
levels = c("Liberal", "Moderate", "Conservative")),
education = case_when(
degree %in% c("less than high school", "high school") ~ "High School or Less",
degree == "associate/junior college" ~ "Some College",
degree %in% c("bachelor's", "graduate") ~ "Bachelor's or Higher",
TRUE ~ NA_character_
),
education = factor(education,
levels = c("High School or Less", "Some College", "Bachelor's or Higher")),
# Clean gender - binary as coded in GSS
gender = case_when(
sex == "male" ~ "Male",
sex == "female" ~ "Female",
TRUE ~ NA_character_
),
gender = factor(gender)
)
Importantly, let’s check our recoding:
## [1] "Cleaned abortion views:"
##
## Oppose Support <NA>
## 22628 16626 33136
## [1] "\nCleaned political views:"
##
## Liberal Moderate Conservative <NA>
## 17604 23992 21122 9672
## [1] "\nCleaned education:"
##
## High School or Less Some College Bachelor's or Higher
## 50638 4355 17201
## <NA>
## 196
## [1] "\nCleaned gender:"
##
## Female Male <NA>
## 40301 31977 112
Let’s create a combined table showing support for abortion rights across all the social/political characteristics (note, you can do it all at once, but we will do it in separate chunks similar to the tutorial):
political_summary <- gss %>%
filter(!is.na(abortion_view), !is.na(political_view)) %>%
group_by(political_view) %>%
summarize(
n = n(),
n_support = sum(abortion_view == "Support"),
pct_support = round(100 * n_support/n, 1)
)
political_summary
## # A tibble: 3 × 4
## political_view n n_support pct_support
## <fct> <int> <int> <dbl>
## 1 Liberal 10395 6189 59.5
## 2 Moderate 14040 5881 41.9
## 3 Conservative 12450 3854 31
You can even do a check by taking the Number supporting reported (e.g., 6189) divided by the sample size for that given political views label (e.g., 10395) = 59.5.
education_summary <- gss %>%
filter(!is.na(abortion_view), !is.na(education)) %>%
group_by(education) %>%
summarize(
n = n(),
n_support = sum(abortion_view == "Support"),
pct_support = round(100 * n_support/n, 1)
)
education_summary
## # A tibble: 3 × 4
## education n n_support pct_support
## <fct> <int> <int> <dbl>
## 1 High School or Less 27601 10180 36.9
## 2 Some College 2373 1083 45.6
## 3 Bachelor's or Higher 9193 5333 58
gender_summary <- gss %>%
filter(!is.na(abortion_view), !is.na(gender)) %>%
group_by(gender) %>%
summarize(
n = n(),
n_support = sum(abortion_view == "Support"),
pct_support = round(100 * n_support/n, 1)
)
gender_summary
## # A tibble: 2 × 4
## gender n n_support pct_support
## <fct> <int> <int> <dbl>
## 1 Female 22000 9142 41.6
## 2 Male 17229 7475 43.4
Now we have all the values we want to report. Now, we can combine all the values with a bind_rows. There are other ways to complete this part of the assignment, including calculating all of the values at once and then putting them into the formatted gt table.
all_values <- bind_rows(
political_summary %>%
mutate(characteristic = "Political Views",
category = political_view) %>%
select(-political_view),
education_summary %>%
mutate(characteristic = "Education",
category = education) %>%
select(-education),
gender_summary %>%
mutate(characteristic = "Gender",
category = gender) %>%
select(-gender)
)
all_values %>%
gt(
groupname_col = "characteristic"
) %>%
tab_header(
title = md("**Support for Legal Abortion by Social Characteristics**"),
subtitle = "GSS, 1977-2022"
) %>%
fmt_number(
columns = c(n),
decimals = 0,
use_seps = TRUE
) %>%
cols_label(
category = "Category",
n = "Sample Size",
n_support = "Number Supporting",
pct_support = "% Support"
) %>%
tab_style(
style = cell_text(weight = "bold"),
locations = cells_row_groups()
)
Support for Legal Abortion by Social Characteristics | |||
GSS, 1977-2022 | |||
Sample Size | Number Supporting | % Support | Category |
---|---|---|---|
Political Views | |||
10,395 | 6189 | 59.5 | Liberal |
14,040 | 5881 | 41.9 | Moderate |
12,450 | 3854 | 31.0 | Conservative |
Education | |||
27,601 | 10180 | 36.9 | High School or Less |
2,373 | 1083 | 45.6 | Some College |
9,193 | 5333 | 58.0 | Bachelor's or Higher |
Gender | |||
22,000 | 9142 | 41.6 | Female |
17,229 | 7475 | 43.4 | Male |
Make sure to always double check what displays in your formatted table vs. what you calculated to ensure it matches.
Now, let’s create a visualization showing opposition to abortion over time for all three variables as requested in the guidelines. The steps are very similar but instead we are doing for “Oppose” instead of “Support”. That could have been handled all at once, but we are breaking down the assignment per task.
political_trends <- gss %>%
filter(!is.na(abortion_view), !is.na(political_view)) %>%
group_by(year, political_view) %>%
summarize(
n = n(),
prop_oppose = mean(abortion_view == "Oppose"),
.groups = "drop"
)
education_trends <- gss %>%
filter(!is.na(abortion_view), !is.na(education)) %>%
group_by(year, education) %>%
summarize(
n = n(),
prop_oppose = mean(abortion_view == "Oppose"),
.groups = "drop"
)
gender_trends <- gss %>%
filter(!is.na(abortion_view), !is.na(gender)) %>%
group_by(year, gender) %>%
summarize(
n = n(),
prop_oppose = mean(abortion_view == "Oppose"),
.groups = "drop"
)
all_trends <- bind_rows(
political_trends %>%
mutate(characteristic = "Political Views",
category = political_view),
education_trends %>%
mutate(characteristic = "Education",
category = education),
gender_trends %>%
mutate(characteristic = "Gender",
category = gender)
)
last_points <- all_trends %>%
group_by(characteristic, category) %>%
slice_max(order_by = year, n = 1)
ggplot(all_trends,
aes(x = year, y = prop_oppose, color = category)) +
geom_line(size = 1.2) +
scale_color_manual(
values = c(
"Liberal" = "#2E74C0",
"Moderate" = "#6C6C6C",
"Conservative" = "#CB454A",
"High School or Less" = "#6C3C89",
"Some College" = "#C098D0",
"Bachelor's or Higher" = "#005F73",
"Female" = "#E69F00",
"Male" = "#56B4E9"
)
) +
geom_label_repel(
data = last_points,
aes(label = category),
nudge_x = 2,
direction = "y",
hjust = 0,
segment.size = 0.3,
segment.color = "grey70",
box.padding = 0.5,
point.padding = 0.5,
size = 3.5,
fontface = "bold",
label.size = 0.1,
label.r = unit(0.2, "lines"),
fill = alpha("white", 0.7)
) +
facet_wrap(~characteristic, scales = "free_y") +
scale_y_continuous(
labels = scales::percent_format(),
limits = c(0, 1.0),
breaks = seq(0, 1.0, by = 0.2),
expand = c(0.01, 0.01)
) +
scale_x_continuous(
breaks = seq(1975, 2025, by = 10),
limits = c(1972, 2027),
expand = c(0.01, 0.01)
) +
labs(
title = "Opposition to Legal Abortion Across Social Groups",
subtitle = "Percentage opposing abortion when 'woman wants it for any reason'",
x = NULL,
y = "Proportion Opposing",
caption = "Source: General Social Survey, 1977-2022"
) +
theme_minimal() +
theme(
plot.title = element_text(
face = "bold",
size = 14,
margin = margin(b = 10)
),
plot.subtitle = element_text(
color = "grey40",
size = 12,
margin = margin(b = 20)
),
axis.line = element_line(color = "black", size = 0.5),
axis.ticks = element_line(color = "black", size = 0.5),
panel.grid = element_blank(),
strip.text = element_text(face = "bold", size = 12),
strip.background = element_rect(fill = "grey95", color = NA),
legend.position = "none",
axis.text = element_text(size = 9, color = "black"),
plot.margin = margin(20, 20, 20, 20)
)
Not required but for ease of interpretation:
milestone_years <- c(1977, 1985, 1990, 2000, 2010, 2022)
opposition_milestones <- all_trends %>%
filter(year %in% milestone_years) %>%
mutate(pct_oppose = round(prop_oppose * 100, 1)) %>%
select(year, characteristic, category, pct_oppose) %>%
pivot_wider(
id_cols = c(characteristic, category),
names_from = year,
values_from = pct_oppose
) %>%
arrange(characteristic, category)
opposition_milestones
## # A tibble: 8 × 8
## characteristic category `1977` `1985` `1990` `2000` `2010` `2022`
## <chr> <fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Education High School or Less 65.6 67.3 61.2 63.6 62.2 49.5
## 2 Education Some College 56.2 50 40.4 63.9 59.8 45.5
## 3 Education Bachelor's or Higher 42.8 46.7 43.7 48.9 43.4 29.7
## 4 Gender Female 63.3 64.5 56.7 60.2 57.9 41.3
## 5 Gender Male 61.1 61.3 56.4 60.1 54.5 45.2
## 6 Political Views Liberal 55.1 48.5 41.6 44.7 37.6 18
## 7 Political Views Moderate 62.2 65.6 54.5 58.3 57 41.8
## 8 Political Views Conservative 66.8 69.3 68.9 72.5 70.4 67.8
Based on the table and visualization, I’ll provide a clear and focused interpretation of the abortion attitude patterns:
The GSS data reveal significant differences and substantial changes in abortion attitudes across social groups from 1977 to 2022, with three distinct patterns emerging.
Political ideology shows the most dramatic divergence. Conservative opposition to abortion has remained consistently high (67-70% in recent decades) with minimal change from the 66.8% opposition recorded in 1977. In stark contrast, liberal opposition has plummeted from 55.1% in 1977 to just 18.0% in 2022. This growing polarization is particularly pronounced after 1990, with the ideological gap expanding from approximately 20 percentage points to over 50 percentage points by 2022. Moderates follow a middle trajectory, declining from 62.2% opposition in 1977 to 41.8% in 2022.
Educational differences show persistent but narrowing gaps. Opposition among those with bachelor’s degrees or higher has declined modestly from 42.8% in 1977 to 29.7% in 2022. Those with high school education or less demonstrated a larger decline from 65.6% to 49.5% during the same period. The “some college” group typically falls between these extremes, showing more fluctuation over time, with notable variation in 2000 (63.9%) and 2022 (45.5%). note: this is directly related to the smaller sample size relative to any of the other sub-categories (i.e., smaller sample sizes will be more prone to year-to-year fluctuation since they are more sensitive to outliers and to compositional differences).
Gender differences remain surprisingly modest despite abortion directly related to women healthcare issues (there are theories about direct interests, which here are not really supported). Both women and men show parallel declines in opposition, from about 63.3% and 61.1% respectively in 1977 to 41.3% and 45.2% in 2022. This relatively small gender gap (never exceeding 4 percentage points in recent surveys) suggests that abortion attitudes are not significantly structured by gender.
These patterns demonstrate that abortion views have become increasingly defined by political ideology and educational attainment rather than gender, reflecting abortion’s transformation to a defining partisan divide in American politics.
First, let’s examine the variables we’ll be working with:
## [1] "LGBTQ+ rights variable (freehms):"
##
## 1 2 3 4 5 7 8 9
## 152529 167624 69465 43607 36444 1592 18458 836
## [1] "\nEducation variable (eisced):"
##
## 0 1 2 3 4 5 6 7 55 77 88 99
## 73306 38823 71917 74258 87348 49268 42651 49558 1052 483 497 1394
## [1] "\nDomicile variable (domicil):"
##
## 1 2 3 4 5 7 8 9
## 107787 55216 149256 148750 27534 137 493 1382
## [1] "\nCountry distribution:"
##
## AL AT BE BG CH CY CZ DE DK EE ES FI FR
## 1201 15225 17451 13240 16925 6065 20090 34425 12408 16856 19452 19532 19038
## GB GR HR HU IE IL IS IT LT LU LV ME MK
## 20979 12558 6535 16642 22233 16218 3975 10178 11652 3187 3921 2478 1429
## NL NO PL PT RO RS RU SE SI SK TR UA XK
## 18329 16065 17689 17881 2146 3548 12458 18216 13484 11292 4272 9987 1295
Now, let’s clean the data according to the assignment requirements (need also to consult survey documentation/ESS data portal):
ess_clean <- ess %>%
filter(cntry %in% c("FR", "HU")) %>%
mutate(
lgbtq_rights = case_when(
freehms %in% c(1, 2) ~ "Support",
freehms == 3 ~ "Neutral",
freehms %in% c(4, 5) ~ "Oppose",
TRUE ~ NA_character_
),
lgbtq_rights = factor(lgbtq_rights,
levels = c("Support", "Neutral", "Oppose")),
education = case_when(
eisced %in% 1:2 ~ "Lower Secondary or Less",
eisced %in% 3:4 ~ "Upper Secondary",
eisced %in% 5:7 ~ "Tertiary",
TRUE ~ NA_character_
),
education = factor(education,
levels = c("Lower Secondary or Less",
"Upper Secondary",
"Tertiary")),
residence = case_when(
domicil == 1 ~ "Major city",
domicil %in% c(2, 3) ~ "Urban periphery",
domicil %in% c(4, 5) ~ "Rural",
TRUE ~ NA_character_
),
residence = factor(residence,
levels = c("Major city", "Urban periphery", "Rural")),
country = case_when(
cntry == "FR" ~ "France",
cntry == "HU" ~ "Hungary",
TRUE ~ NA_character_
),
country = factor(country)
)
Let’s check what we did:
## [1] "Checking LGBTQ+ rights distribution:"
##
## Support Neutral Oppose <NA>
## 22504 5343 6157 1676
## [1] "\nChecking education distribution:"
##
## Lower Secondary or Less Upper Secondary Tertiary
## 8355 15609 8278
## <NA>
## 3438
## [1] "\nChecking residence distribution:"
##
## Major city Urban periphery Rural <NA>
## 7780 15078 12801 21
## [1] "\nChecking country distribution:"
##
## France Hungary
## 19038 16642
Now let’s create the four required tables, starting with France and education:
france_edu <- ess_clean %>%
filter(country == "France",
!is.na(lgbtq_rights),
!is.na(education)) %>%
mutate(total_n = n()) %>%
group_by(education) %>%
summarize(
n = n(),
pct_of_sample = round(100 * n/first(total_n), 1),
pct_oppose = round(100 * mean(lgbtq_rights == "Oppose"), 1)
)
france_edu
## # A tibble: 3 × 4
## education n pct_of_sample pct_oppose
## <fct> <int> <dbl> <dbl>
## 1 Lower Secondary or Less 4168 26.7 13.9
## 2 Upper Secondary 6490 41.6 6.6
## 3 Tertiary 4928 31.6 3.9
france_edu_table <- france_edu %>%
gt() %>%
tab_header(
title = md("**Opposition to LGBTQ+ Rights by Education in France**"),
subtitle = "ESS 2002-2020"
) %>%
fmt_number(
columns = n,
decimals = 0,
use_seps = TRUE
) %>%
fmt_number(
columns = c(pct_of_sample, pct_oppose),
decimals = 1
) %>%
cols_label(
education = "Education Level",
n = "Group Size",
pct_of_sample = "% of Sample",
pct_oppose = "% Opposing"
) %>%
tab_source_note(
source_note = md("Data: European Social Survey. *% opposing represents conditional probabilities.*")
)
france_edu_table
Opposition to LGBTQ+ Rights by Education in France | |||
ESS 2002-2020 | |||
Education Level | Group Size | % of Sample | % Opposing |
---|---|---|---|
Lower Secondary or Less | 4,168 | 26.7 | 13.9 |
Upper Secondary | 6,490 | 41.6 | 6.6 |
Tertiary | 4,928 | 31.6 | 3.9 |
Data: European Social Survey. % opposing represents conditional probabilities. |
Repeating, but for Hungary now:
hungary_edu <- ess_clean %>%
filter(country == "Hungary",
!is.na(lgbtq_rights),
!is.na(education)) %>%
mutate(total_n = n()) %>%
group_by(education) %>%
summarize(
n = n(),
pct_of_sample = round(100 * n/first(total_n), 1),
pct_oppose = round(100 * mean(lgbtq_rights == "Oppose"), 1)
)
hungary_edu
## # A tibble: 3 × 4
## education n pct_of_sample pct_oppose
## <fct> <int> <dbl> <dbl>
## 1 Lower Secondary or Less 3476 23.1 38.7
## 2 Upper Secondary 8434 56.1 29.4
## 3 Tertiary 3126 20.8 22.8
hungary_edu_table <- hungary_edu %>%
gt() %>%
tab_header(
title = md("**Opposition to LGBTQ+ Rights by Education in Hungary**"),
subtitle = "ESS 2002-2020"
) %>%
fmt_number(
columns = n,
decimals = 0,
use_seps = TRUE
) %>%
fmt_number(
columns = c(pct_of_sample, pct_oppose),
decimals = 1
) %>%
cols_label(
education = "Education Level",
n = "Group Size",
pct_of_sample = "% of Sample",
pct_oppose = "% Opposing"
) %>%
tab_source_note(
source_note = md("Data: European Social Survey. *% opposing represents conditional probabilities.*")
)
hungary_edu_table
Opposition to LGBTQ+ Rights by Education in Hungary | |||
ESS 2002-2020 | |||
Education Level | Group Size | % of Sample | % Opposing |
---|---|---|---|
Lower Secondary or Less | 3,476 | 23.1 | 38.7 |
Upper Secondary | 8,434 | 56.1 | 29.4 |
Tertiary | 3,126 | 20.8 | 22.8 |
Data: European Social Survey. % opposing represents conditional probabilities. |
Now France with place of residence:
france_res <- ess_clean %>%
filter(country == "France",
!is.na(lgbtq_rights),
!is.na(residence)) %>%
mutate(total_n = n()) %>%
group_by(residence) %>%
summarize(
n = n(),
pct_of_sample = round(100 * n/first(total_n), 1),
pct_oppose = round(100 * mean(lgbtq_rights == "Oppose"), 1)
)
france_res
## # A tibble: 3 × 4
## residence n pct_of_sample pct_oppose
## <fct> <int> <dbl> <dbl>
## 1 Major city 3556 18.8 8.8
## 2 Urban periphery 8543 45.3 8.5
## 3 Rural 6780 35.9 8.3
france_res_table <- france_res %>%
gt() %>%
tab_header(
title = md("**Opposition to LGBTQ+ Rights by Residence in France**"),
subtitle = "ESS 2002-2020"
) %>%
fmt_number(
columns = n,
decimals = 0,
use_seps = TRUE
) %>%
fmt_number(
columns = c(pct_of_sample, pct_oppose),
decimals = 1
) %>%
cols_label(
residence = "Place of Residence",
n = "Group Size",
pct_of_sample = "% of Sample",
pct_oppose = "% Opposing"
) %>%
tab_source_note(
source_note = md("Data: European Social Survey. *% opposing represents conditional probabilities.*")
)
france_res_table
Opposition to LGBTQ+ Rights by Residence in France | |||
ESS 2002-2020 | |||
Place of Residence | Group Size | % of Sample | % Opposing |
---|---|---|---|
Major city | 3,556 | 18.8 | 8.8 |
Urban periphery | 8,543 | 45.3 | 8.5 |
Rural | 6,780 | 35.9 | 8.3 |
Data: European Social Survey. % opposing represents conditional probabilities. |
Repeating for Hungary:
hungary_res <- ess_clean %>%
filter(country == "Hungary",
!is.na(lgbtq_rights),
!is.na(residence)) %>%
mutate(total_n = n()) %>%
group_by(residence) %>%
summarize(
n = n(),
pct_of_sample = round(100 * n/first(total_n), 1),
pct_oppose = round(100 * mean(lgbtq_rights == "Oppose"), 1)
)
hungary_res
## # A tibble: 3 × 4
## residence n pct_of_sample pct_oppose
## <fct> <int> <dbl> <dbl>
## 1 Major city 3870 25.6 27.5
## 2 Urban periphery 5921 39.2 30.5
## 3 Rural 5315 35.2 31.7
hungary_res_table <- hungary_res %>%
gt() %>%
tab_header(
title = md("**Opposition to LGBTQ+ Rights by Residence in Hungary**"),
subtitle = "ESS 2002-2020"
) %>%
fmt_number(
columns = n,
decimals = 0,
use_seps = TRUE
) %>%
fmt_number(
columns = c(pct_of_sample, pct_oppose),
decimals = 1
) %>%
cols_label(
residence = "Place of Residence",
n = "Group Size",
pct_of_sample = "% of Sample",
pct_oppose = "% Opposing"
) %>%
tab_source_note(
source_note = md("Data: European Social Survey. *% opposing represents conditional probabilities.*")
)
hungary_res_table
Opposition to LGBTQ+ Rights by Residence in Hungary | |||
ESS 2002-2020 | |||
Place of Residence | Group Size | % of Sample | % Opposing |
---|---|---|---|
Major city | 3,870 | 25.6 | 27.5 |
Urban periphery | 5,921 | 39.2 | 30.5 |
Rural | 5,315 | 35.2 | 31.7 |
Data: European Social Survey. % opposing represents conditional probabilities. |
Now let’s create the four required visualizations following the conditional probability pattern, notably shown in Tutorial 4. First France x Edu:
ggplot(france_edu,
aes(x = pct_oppose, y = reorder(education, pct_oppose))) +
geom_col(
width = 0.7,
fill = "#2E74C0",
alpha = 0.9
) +
geom_point(
size = 3,
color = "#1B4B78"
) +
geom_text(
aes(label = sprintf("%.1f%%", pct_oppose)),
hjust = -0.5,
size = 4,
fontface = "bold",
color = "#2E74C0"
) +
scale_x_continuous(
limits = c(0, max(france_edu$pct_oppose) * 1.2),
breaks = seq(0, 15, by = 5),
labels = scales::label_number(suffix = "%")
) +
labs(
title = "Opposition to LGBTQ+ Rights by Education in France",
subtitle = "ESS 2002-2020",
x = "Percentage Opposing",
y = NULL
) +
theme_minimal() +
theme(
plot.title = element_text(face = "bold", size = 14),
plot.subtitle = element_text(color = "grey40", size = 12),
panel.grid.major.x = element_line(color = "grey95", size = 0.3),
panel.grid.minor = element_blank(),
panel.grid.major.y = element_blank(),
plot.margin = margin(30, 30, 30, 30)
)
Now, Hungary x Edu:
ggplot(hungary_edu,
aes(x = pct_oppose, y = reorder(education, pct_oppose))) +
geom_col(
width = 0.7,
fill = "#CB454A",
alpha = 0.9
) +
geom_point(
size = 3,
color = "#7A2A2D"
) +
geom_text(
aes(label = sprintf("%.1f%%", pct_oppose)),
hjust = -0.5,
size = 4,
fontface = "bold",
color = "#CB454A"
) +
scale_x_continuous(
limits = c(0, max(hungary_edu$pct_oppose) * 1.2),
breaks = seq(0, 40, by = 10),
labels = scales::label_number(suffix = "%")
) +
labs(
title = "Opposition to LGBTQ+ Rights by Education in Hungary",
subtitle = "ESS 2002-2020",
x = "Percentage Opposing",
y = NULL
) +
theme_minimal() +
theme(
plot.title = element_text(face = "bold", size = 14),
plot.subtitle = element_text(color = "grey40", size = 12),
panel.grid.major.x = element_line(color = "grey95", size = 0.3),
panel.grid.minor = element_blank(),
panel.grid.major.y = element_blank(),
plot.margin = margin(30, 30, 30, 30)
)
France x Place:
ggplot(france_res,
aes(x = pct_oppose, y = reorder(residence, pct_oppose))) +
geom_col(
width = 0.7,
fill = "#2E74C0",
alpha = 0.9
) +
geom_point(
size = 3,
color = "#1B4B78"
) +
geom_text(
aes(label = sprintf("%.1f%%", pct_oppose)),
hjust = -0.5,
size = 4,
fontface = "bold",
color = "#2E74C0"
) +
scale_x_continuous(
limits = c(0, max(france_res$pct_oppose) * 1.2),
breaks = seq(0, 10, by = 2),
labels = scales::label_number(suffix = "%")
) +
labs(
title = "Opposition to LGBTQ+ Rights by Residence in France",
subtitle = "ESS 2002-2020",
x = "Percentage Opposing",
y = NULL
) +
theme_minimal() +
theme(
plot.title = element_text(face = "bold", size = 14),
plot.subtitle = element_text(color = "grey40", size = 12),
panel.grid.major.x = element_line(color = "grey95", size = 0.3),
panel.grid.minor = element_blank(),
panel.grid.major.y = element_blank(),
plot.margin = margin(30, 30, 30, 30)
)
Hungary x Place:
ggplot(hungary_res,
aes(x = pct_oppose, y = reorder(residence, pct_oppose))) +
geom_col(
width = 0.7,
fill = "#CB454A",
alpha = 0.9
) +
geom_point(
size = 3,
color = "#7A2A2D"
) +
geom_text(
aes(label = sprintf("%.1f%%", pct_oppose)),
hjust = -0.5,
size = 4,
fontface = "bold",
color = "#CB454A"
) +
scale_x_continuous(
limits = c(0, max(hungary_res$pct_oppose) * 1.2),
breaks = seq(0, 35, by = 5),
labels = scales::label_number(suffix = "%")
) +
labs(
title = "Opposition to LGBTQ+ Rights by Residence in Hungary",
subtitle = "ESS 2002-2020",
x = "Percentage Opposing",
y = NULL
) +
theme_minimal() +
theme(
plot.title = element_text(face = "bold", size = 14),
plot.subtitle = element_text(color = "grey40", size = 12),
panel.grid.major.x = element_line(color = "grey95", size = 0.3),
panel.grid.minor = element_blank(),
panel.grid.major.y = element_blank(),
plot.margin = margin(30, 30, 30, 30)
)
The ESS data reveal differences in LGBTQ+ rights attitudes between France and Hungary, with notable patterns across the conditional probabilties for educational and geographic groups.
France demonstrates remarkably low opposition to LGBTQ+ rights, with a clear educational gradient. Among those with lower secondary education or less, 13.9% oppose LGBTQ+ rights, while this drops to just 6.6% among those with upper secondary education and falls further to a mere 3.9% among those with tertiary education. This represents a substantial 10 percentage point difference between the lowest and highest education groups. Interestingly, geography shows minimal impact on attitudes in France, with opposition remaining consistently low regardless of location (8.8% in major cities, 8.5% in urban peripheries, and 8.3% in rural areas).
In stark contrast, Hungary exhibits much higher opposition across all demographics. Educational differences are particularly pronounced, with 38.7% of those with lower secondary education or less opposing LGBTQ+ rights, compared to 29.4% among those with upper secondary education and 22.8% among those with tertiary education. This represents a 15.9 percentage point gap between the lowest and highest education groups - even larger than in France. Unlike France, Hungary also demonstrates geographic variation, with major cities showing lower opposition (27.5%) compared to urban peripheries (30.5%) and rural areas (31.7%).
The cross-national comparison is especially revealing. The opposition level among Hungary’s most progressive demographic (tertiary-educated at 22.8%) exceeds that of France’s least progressive group (lower secondary or less at 13.9%). This indicates fundamental differences in national contexts. While education emerges as important on highlighting patterns of attitudes in both countries, residential differences appear as relatively important only in Hungary.
Here is one way to do it (note: the output should be the same as above):
combined_data <- bind_rows(
france_edu %>% mutate(country = "France", demographic = "Education"),
hungary_edu %>% mutate(country = "Hungary", demographic = "Education"),
france_res %>% mutate(country = "France", demographic = "Residence",
education = residence) %>% select(-residence),
hungary_res %>% mutate(country = "Hungary", demographic = "Residence",
education = residence) %>% select(-residence)
)
combined_table <- combined_data %>%
pivot_wider(
names_from = country,
values_from = c(n, pct_of_sample, pct_oppose)
) %>%
select(demographic, education,
n_France, pct_of_sample_France, pct_oppose_France,
n_Hungary, pct_of_sample_Hungary, pct_oppose_Hungary) %>%
arrange(demographic, education)
combined_gt <- combined_table %>%
gt(groupname_col = "demographic") %>%
tab_header(
title = md("**Opposition to LGBTQ+ Rights: France vs. Hungary**"),
subtitle = "Comparison by Education Level and Place of Residence (ESS 2002-2020)"
) %>%
fmt_number(
columns = c(n_France, n_Hungary),
decimals = 0,
use_seps = TRUE
) %>%
fmt_number(
columns = contains("pct"),
decimals = 1
) %>%
cols_label(
education = "Group",
n_France = "Group Size",
pct_of_sample_France = "% of Sample",
pct_oppose_France = "% Opposing",
n_Hungary = "Group Size",
pct_of_sample_Hungary = "% of Sample",
pct_oppose_Hungary = "% Opposing"
) %>%
tab_spanner(
label = md("**France**"),
columns = c(n_France, pct_of_sample_France, pct_oppose_France)
) %>%
tab_spanner(
label = md("**Hungary**"),
columns = c(n_Hungary, pct_of_sample_Hungary, pct_oppose_Hungary)
) %>%
tab_style(
style = list(
cell_fill(color = "aliceblue")
),
locations = cells_body(
columns = c(n_France, pct_of_sample_France, pct_oppose_France)
)
) %>%
tab_style(
style = list(
cell_fill(color = "#FFEFEF")
),
locations = cells_body(
columns = c(n_Hungary, pct_of_sample_Hungary, pct_oppose_Hungary)
)
) %>%
tab_style(
style = cell_text(weight = "bold"),
locations = cells_row_groups()
) %>%
tab_source_note(
source_note = md("Data: European Social Survey. *% opposing represents conditional probabilities.*")
)
combined_gt
Opposition to LGBTQ+ Rights: France vs. Hungary | ||||||
Comparison by Education Level and Place of Residence (ESS 2002-2020) | ||||||
Group |
France
|
Hungary
|
||||
---|---|---|---|---|---|---|
Group Size | % of Sample | % Opposing | Group Size | % of Sample | % Opposing | |
Education | ||||||
Lower Secondary or Less | 4,168 | 26.7 | 13.9 | 3,476 | 23.1 | 38.7 |
Upper Secondary | 6,490 | 41.6 | 6.6 | 8,434 | 56.1 | 29.4 |
Tertiary | 4,928 | 31.6 | 3.9 | 3,126 | 20.8 | 22.8 |
Residence | ||||||
Major city | 3,556 | 18.8 | 8.8 | 3,870 | 25.6 | 27.5 |
Urban periphery | 8,543 | 45.3 | 8.5 | 5,921 | 39.2 | 30.5 |
Rural | 6,780 | 35.9 | 8.3 | 5,315 | 35.2 | 31.7 |
Data: European Social Survey. % opposing represents conditional probabilities. |
And two examples of a visualization. One simpler where we just combine all the visuals created before together. And another where we try something else to compare all the values together:
france_edu_plot <- ggplot(france_edu,
aes(x = pct_oppose, y = reorder(education, pct_oppose))) +
geom_col(
width = 0.7,
fill = "#2E74C0",
alpha = 0.9
) +
geom_point(
size = 3,
color = "#1B4B78"
) +
geom_text(
aes(label = sprintf("%.1f%%", pct_oppose)),
hjust = -0.5,
size = 4,
fontface = "bold",
color = "#2E74C0"
) +
scale_x_continuous(
limits = c(0, max(france_edu$pct_oppose) * 1.2),
breaks = seq(0, 15, by = 5),
labels = scales::label_number(suffix = "%")
) +
labs(
title = "France: Education",
x = "Percentage Opposing",
y = NULL
) +
theme_minimal() +
theme(
plot.title = element_text(face = "bold", size = 12),
panel.grid.major.x = element_line(color = "grey95", size = 0.3),
panel.grid.minor = element_blank(),
panel.grid.major.y = element_blank(),
plot.margin = margin(10, 10, 10, 10)
)
hungary_edu_plot <- ggplot(hungary_edu,
aes(x = pct_oppose, y = reorder(education, pct_oppose))) +
geom_col(
width = 0.7,
fill = "#CB454A",
alpha = 0.9
) +
geom_point(
size = 3,
color = "#7A2A2D"
) +
geom_text(
aes(label = sprintf("%.1f%%", pct_oppose)),
hjust = -0.5,
size = 4,
fontface = "bold",
color = "#CB454A"
) +
scale_x_continuous(
limits = c(0, max(hungary_edu$pct_oppose) * 1.2),
breaks = seq(0, 40, by = 10),
labels = scales::label_number(suffix = "%")
) +
labs(
title = "Hungary: Education",
x = "Percentage Opposing",
y = NULL
) +
theme_minimal() +
theme(
plot.title = element_text(face = "bold", size = 12),
panel.grid.major.x = element_line(color = "grey95", size = 0.3),
panel.grid.minor = element_blank(),
panel.grid.major.y = element_blank(),
plot.margin = margin(10, 10, 10, 10)
)
france_res_plot <- ggplot(france_res,
aes(x = pct_oppose, y = reorder(residence, pct_oppose))) +
geom_col(
width = 0.7,
fill = "#2E74C0",
alpha = 0.9
) +
geom_point(
size = 3,
color = "#1B4B78"
) +
geom_text(
aes(label = sprintf("%.1f%%", pct_oppose)),
hjust = -0.5,
size = 4,
fontface = "bold",
color = "#2E74C0"
) +
scale_x_continuous(
limits = c(0, max(france_res$pct_oppose) * 1.2),
breaks = seq(0, 10, by = 2),
labels = scales::label_number(suffix = "%")
) +
labs(
title = "France: Residence",
x = "Percentage Opposing",
y = NULL
) +
theme_minimal() +
theme(
plot.title = element_text(face = "bold", size = 12),
panel.grid.major.x = element_line(color = "grey95", size = 0.3),
panel.grid.minor = element_blank(),
panel.grid.major.y = element_blank(),
plot.margin = margin(10, 10, 10, 10)
)
hungary_res_plot <- ggplot(hungary_res,
aes(x = pct_oppose, y = reorder(residence, pct_oppose))) +
geom_col(
width = 0.7,
fill = "#CB454A",
alpha = 0.9
) +
geom_point(
size = 3,
color = "#7A2A2D"
) +
geom_text(
aes(label = sprintf("%.1f%%", pct_oppose)),
hjust = -0.5,
size = 4,
fontface = "bold",
color = "#CB454A"
) +
scale_x_continuous(
limits = c(0, max(hungary_res$pct_oppose) * 1.2),
breaks = seq(0, 35, by = 5),
labels = scales::label_number(suffix = "%")
) +
labs(
title = "Hungary: Residence",
x = "Percentage Opposing",
y = NULL
) +
theme_minimal() +
theme(
plot.title = element_text(face = "bold", size = 12),
panel.grid.major.x = element_line(color = "grey95", size = 0.3),
panel.grid.minor = element_blank(),
panel.grid.major.y = element_blank(),
plot.margin = margin(10, 10, 10, 10)
)
combined_plots <- (france_edu_plot + hungary_edu_plot) / (france_res_plot + hungary_res_plot)
combined_plots_with_title <- combined_plots +
plot_annotation(
title = "Opposition to LGBTQ+ Rights in France and Hungary",
subtitle = "ESS 2002-2020",
caption = "Data: European Social Survey. Percentages represent conditional probabilities.",
theme = theme(
plot.title = element_text(face = "bold", size = 16),
plot.subtitle = element_text(size = 12, color = "grey40"),
plot.caption = element_text(size = 10, color = "grey40", hjust = 0)
)
)
combined_plots_with_title
combined_data <- bind_rows(
france_edu %>% mutate(country = "France", demographic = "Education", group = education) %>% select(-education),
hungary_edu %>% mutate(country = "Hungary", demographic = "Education", group = education) %>% select(-education),
france_res %>% mutate(country = "France", demographic = "Residence", group = residence) %>% select(-residence),
hungary_res %>% mutate(country = "Hungary", demographic = "Residence", group = residence) %>% select(-residence)
)
combined_data$category <- paste(combined_data$country, combined_data$demographic, combined_data$group)
all_in_one_plot <- ggplot(combined_data,
aes(x = reorder(category, pct_oppose), y = pct_oppose, fill = country)) +
geom_col(
width = 0.7,
alpha = 0.9
) +
geom_point(
aes(color = country),
size = 3
) +
geom_text(
aes(label = sprintf("%.1f%%", pct_oppose)),
hjust = -0.3,
size = 3.5,
fontface = "bold"
) +
scale_y_continuous(
labels = scales::label_percent(scale = 1),
limits = c(0, max(combined_data$pct_oppose) * 1.2),
breaks = seq(0, 40, by = 10)
) +
scale_fill_manual(
values = c(
"France" = "#2E74C0",
"Hungary" = "#CB454A"
)
) +
scale_color_manual(
values = c(
"France" = "#1B4B78",
"Hungary" = "#7A2A2D"
)
) +
labs(
title = "Opposition to LGBTQ+ Rights: All Conditional Probabilities",
subtitle = "Ordered from highest to lowest opposition (ESS 2002-2020)",
x = NULL,
y = "Percentage Opposing",
caption = "Data: European Social Survey"
) +
theme_minimal() +
theme(
plot.title = element_text(face = "bold", size = 16, margin = margin(b = 10)),
plot.subtitle = element_text(color = "grey40", size = 12, margin = margin(b = 20)),
axis.text.x = element_text(),
panel.grid.major.x = element_blank(),
panel.grid.minor = element_blank(),
panel.grid.major.y = element_line(color = "grey95", size = 0.3),
legend.position = "top",
legend.title = element_blank(),
legend.text = element_text(size = 10, face = "bold"),
plot.margin = margin(20, 20, 20, 20)
) +
coord_flip()
all_in_one_plot
Trying one more improved version:
improved_categories <- combined_data %>%
mutate(
demographic_group = paste(demographic, group),
country_color = country
)
improved_plot <- ggplot(improved_categories,
aes(x = reorder(demographic_group, pct_oppose), y = pct_oppose, fill = country_color)) +
geom_col(
width = 0.7,
position = position_dodge(width = 0.8),
alpha = 0.9
) +
geom_point(
aes(color = country_color),
position = position_dodge(width = 0.8),
size = 3
) +
geom_text(
aes(label = sprintf("%.1f%%", pct_oppose)),
position = position_dodge(width = 0.8),
hjust = -0.3,
size = 3.5,
fontface = "bold"
) +
scale_y_continuous(
labels = scales::label_percent(scale = 1),
limits = c(0, max(improved_categories$pct_oppose) * 1.2),
breaks = seq(0, 40, by = 10)
) +
scale_fill_manual(
values = c(
"France" = "#2E74C0",
"Hungary" = "#CB454A"
),
name = "Country"
) +
scale_color_manual(
values = c(
"France" = "#1B4B78",
"Hungary" = "#7A2A2D"
),
name = "Country"
) +
labs(
title = "Opposition to LGBTQ+ Rights by Demographic Categories",
subtitle = "Comparing France and Hungary (ESS 2002-2020)",
x = NULL,
y = "Percentage Opposing",
caption = "Data: European Social Survey"
) +
theme_minimal() +
theme(
plot.title = element_text(face = "bold", size = 16, margin = margin(b = 10)),
plot.subtitle = element_text(color = "grey40", size = 12, margin = margin(b = 20)),
axis.text.x = element_text(size = 9),
axis.text.y = element_text(size = 9),
panel.grid.major.x = element_blank(),
panel.grid.minor = element_blank(),
panel.grid.major.y = element_line(color = "grey95", size = 0.3),
legend.position = "top",
legend.title = element_text(size = 10, face = "bold"),
legend.text = element_text(size = 10),
plot.margin = margin(20, 20, 20, 20)
) +
coord_flip() +
guides(color = "none")
improved_plot