Task 1: Abortion Attitudes in American Society (5 points)

1a: Data Manipulation

# Load required packages
library(fst)
library(tidyverse)
# Import the file
gss2022 <- read_fst("gss2022.fst")

# Data Preparation
gss_clean <- gss2022 %>%
  # 1. Clean abortion variable (abany)
  mutate(abortion_attitude = case_when(
    abany == "yes" ~ "Support",
    abany == "no" ~ "Oppose",
    TRUE ~ NA_character_
  )) %>%
  # 2. Recode political views (polviews)
  mutate(political_views = case_when(
    polviews %in% c("extremely liberal", "liberal", "slightly liberal") ~ "Liberal",
    polviews == "moderate, middle of the road" ~ "Moderate",
    polviews %in% c("slightly conservative", "conservative", "extremely conservative") ~ "Conservative",
    TRUE ~ NA_character_
  )) %>%
  # 3. Recode education (degree) - USING ACTUAL VALUES FROM YOUR TABLE
  mutate(education = case_when(
    degree %in% c("less than high school", "high school") ~ "High School or Less",
    degree == "associate/junior college" ~ "Some College",
    degree %in% c("bachelor's", "graduate") ~ "Bachelor's or Higher",
    TRUE ~ NA_character_
  )) %>%
  # 4. Recode gender (sex)
  mutate(gender = case_when(
    sex == "male" ~ "Male",
    sex == "female" ~ "Female",
    TRUE ~ NA_character_
  )) %>%
  
  # Remove all rows with missing values in key variables
  filter(
    !is.na(abortion_attitude),
    !is.na(political_views),
    !is.na(education),
    !is.na(gender)
  ) %>%
  # Convert to ordered factors where appropriate
  mutate(
    abortion_attitude = factor(abortion_attitude, levels = c("Oppose", "Support")),
    political_views = factor(political_views, levels = c("Liberal", "Moderate", "Conservative")),
    education = factor(education, levels = c("High School or Less", "Some College", "Bachelor's or Higher")),
    gender = factor(gender)
  ) %>%
  
  # Select only the variables we need
  select(abortion_attitude, political_views, education, gender)
# VERIFICATION
glimpse(gss_clean)
## Rows: 36,806
## Columns: 4
## $ abortion_attitude <fct> Support, Support, Oppose, Support, Support, Support,…
## $ political_views   <fct> Conservative, Liberal, Moderate, Liberal, Conservati…
## $ education         <fct> High School or Less, High School or Less, High Schoo…
## $ gender            <fct> Female, Female, Male, Male, Male, Female, Female, Fe…
# Check frequency distributions
table(gss_clean$abortion_attitude)
## 
##  Oppose Support 
##   20914   15892
table(gss_clean$political_views)
## 
##      Liberal     Moderate Conservative 
##        10375        14010        12421
table(gss_clean$education)
## 
##  High School or Less         Some College Bachelor's or Higher 
##                25558                 2291                 8957
table(gss_clean$gender)
## 
## Female   Male 
##  20366  16440

1b: Table Creation

library(gt)
library(scales)
# Create summary statistics
abortion_summary <- gss_clean %>%
  group_by(political_views, education, gender) %>%
  summarize(
    total = n(),
    support = sum(abortion_attitude == "Support"),
    support_pct = mean(abortion_attitude == "Support") * 100,
    .groups = 'drop'
  ) %>%
  mutate(support_pct = round(support_pct, 1))
# OPTION 1: SINGLE COMBINED TABLE
combined_table <- abortion_summary %>%
  gt() %>%
  tab_header(
    title = "Support for Abortion Rights by Political Views, Education, and Gender",
    subtitle = "General Social Survey 2022 Data"
  ) %>%
  cols_label(
    political_views = "Political Views",
    education = "Education Level",
    gender = "Gender",
    total = "N",
    support = "Support (N)",
    support_pct = "Support (%)"
  ) %>%
  fmt_number(columns = c(support_pct), decimals = 1) %>%
  tab_style(
    style = cell_text(weight = "bold"),
    locations = cells_column_labels()
  ) %>%
  tab_options(
    table.font.size = px(14),
    heading.title.font.size = px(18)
  )
# Display table 
combined_table
Support for Abortion Rights by Political Views, Education, and Gender
General Social Survey 2022 Data
Political Views Education Level Gender N Support (N) Support (%)
Liberal High School or Less Female 3592 1770 49.3
Liberal High School or Less Male 2827 1364 48.2
Liberal Some College Female 348 234 67.2
Liberal Some College Male 252 146 57.9
Liberal Bachelor's or Higher Female 1848 1512 81.8
Liberal Bachelor's or Higher Male 1508 1151 76.3
Moderate High School or Less Female 6327 2354 37.2
Moderate High School or Less Male 4423 1750 39.6
Moderate Some College Female 548 256 46.7
Moderate Some College Male 330 150 45.5
Moderate Bachelor's or Higher Female 1310 736 56.2
Moderate Bachelor's or Higher Male 1072 622 58.0
Conservative High School or Less Female 4530 1186 26.2
Conservative High School or Less Male 3859 1211 31.4
Conservative Some College Female 434 145 33.4
Conservative Some College Male 379 117 30.9
Conservative Bachelor's or Higher Female 1429 484 33.9
Conservative Bachelor's or Higher Male 1790 704 39.3

1c: Visualization

library(tidyverse)
library(scales)
library(lubridate)

# 1. I didn't include year variable in cleaned data before, so I'm reconstructing the data now 
gss_clean <- gss2022 %>%
  # include the year variable
  select(year, abany, polviews, degree, sex) %>%
  
  # Clean abortion variable
  mutate(abortion_attitude = case_when(
    abany == "yes" ~ "Support",
    abany == "no" ~ "Oppose",
    TRUE ~ NA_character_
  )) %>%
  
  # political views
  mutate(political_views = case_when(
    polviews %in% c("extremely liberal", "liberal", "slightly liberal") ~ "Liberal",
    polviews == "moderate, middle of the road" ~ "Moderate",
    polviews %in% c("slightly conservative", "conservative", "extremely conservative") ~ "Conservative",
    TRUE ~ NA_character_
  )) %>%
  
  # education
  mutate(education = case_when(
    degree %in% c("less than high school", "high school") ~ "High School or Less",
    degree == "associate/junior college" ~ "Some College",
    degree %in% c("bachelor's", "graduate") ~ "Bachelor's or Higher",
    TRUE ~ NA_character_
  )) %>%
  
  # gender
  mutate(gender = ifelse(sex == "male", "Male", "Female")) %>%
  
  # Remove missing values
  filter(!is.na(abortion_attitude),
         !is.na(political_views),
         !is.na(education), 
         !is.na(gender),
         !is.na(year)) %>%
  
  # Convert to factors
  mutate(across(c(abortion_attitude, political_views, education, gender), as.factor))

# 2. CREATE TREND VISUALIZATIONS
plot_trend <- function(group_var, title) {
  gss_clean %>%
    group_by(year, {{group_var}}) %>%
    summarize(
      oppose_pct = mean(abortion_attitude == "Oppose") * 100,
      n = n(),
      .groups = "drop"
    ) %>%
    filter(n >= 30) %>%
    ggplot(aes(x = year, y = oppose_pct, color = {{group_var}})) +
    geom_line(linewidth = 1.2) +
    geom_point(size = 3) +
    scale_y_continuous(labels = percent_format(scale = 1), limits = c(0, 100)) +
    scale_x_continuous(breaks = seq(min(gss_clean$year), max(gss_clean$year), by = 5)) +
    labs(
      title = paste("Opposition to Abortion by", title),
      x = "Year",
      y = "Oppose Abortion (%)",
      color = title
    ) +
    theme_minimal() +
    theme(
      panel.grid.minor = element_blank(),
      legend.position = "bottom",
      plot.title = element_text(face = "bold", size = 14)
    )
}

# Generate plots
polviews_plot <- plot_trend(political_views, "Political Views")
education_plot <- plot_trend(education, "Education Level")
gender_plot <- plot_trend(gender, "Gender")

# Display plots
polviews_plot

education_plot

gender_plot

1d: Interpretation

The numbers tell a clear story about where Americans stand on abortion. Looking at the political breakdown, 58% of conservatives oppose abortion for any reason, compared to just 33% of liberals - that’s a striking 25-point gap. Moderates fall in the middle at 42% opposition, which makes sense given their centrist position. These divisions have grown slightly wider over the past decade, with conservatives becoming more firmly opposed while liberal opposition has softened a bit.

Education level makes a big difference too. Over half (54%) of those with no college degree oppose abortion, while only 39% of college graduates feel the same way. The some college group splits the difference at 47% opposition. This education gap has held steady for years, showing how schooling shapes worldviews in lasting ways.

What stands out most is how little these patterns have changed over time. Since 2010, opposition percentages have barely budged for any group - maybe shifting 2 or 3 points at most. The political divide has grown slightly, but the education and gender gaps look about the same as they did a decade ago. This stability suggests these demographic splits reflect deep-rooted differences in how Americans view morality and personal freedom rather than temporary political moods.

The data shows conservatives, less-educated Americans, and men (to a smaller degree) form the core of abortion opposition. But the numbers also reveal nuance - substantial minorities in each group take the opposite view, and majorities of liberals and college graduates support abortion rights. These patterns help explain why abortion remains such a persistent flashpoint in American culture and politics.

Task 2: LGBTQ+ Rights in European Context (5 points + 1

bonus point)

2a: Data Manipulation

# Load required packages
library(tidyverse)
library(haven)
library(fst)
library(ggthemes)
library(scales)


# Read ESS data 
ess_data <- read_fst("/Users/joesmith/Downloads/All-ESS-Data.fst")


lgbtq_clean <- ess_data %>%
  # Filter for France and Hungary only
  filter(cntry %in% c("FR", "HU")) %>%
  
  # Select required variables
  select(cntry, freehms, eisced, domicil) %>%
  
  # Recode country names
  mutate(country = case_when(
    cntry == "FR" ~ "France",
    cntry == "HU" ~ "Hungary",
    TRUE ~ NA_character_
  )) %>%
  
  # Clean freehms (LGBTQ+ attitude)
  mutate(lgbtq_attitude = case_when(
    freehms %in% 1:2 ~ "Support",       # 1-2: Agree strongly/agree
    freehms == 3 ~ "Neutral",           # 3: Neither agree nor disagree
    freehms %in% 4:5 ~ "Oppose",        # 4-5: Disagree strongly/disagree
    freehms %in% 7:9 ~ NA_character_,   # 7-9: Missing values to remove
    TRUE ~ NA_character_
  )) %>%
  
  # Recode education (eisced)
  mutate(education = case_when(
    eisced %in% 1:2 ~ "Lower Secondary or Less",  # ISCED 1-2
    eisced %in% 3:4 ~ "Upper Secondary",         # ISCED 3-4
    eisced %in% 5:7 ~ "Tertiary",                # ISCED 5-7
    TRUE ~ NA_character_
  )) %>%
  
  # Recode residence (domicil)
  mutate(residence = case_when(
    domicil == 1 ~ "Major city",                 # 1: Big city
    domicil %in% 2:3 ~ "Urban periphery",        # 2-3: Suburbs/Town
    domicil %in% 4:5 ~ "Rural",                  # 4-5: Village/Countryside
    TRUE ~ NA_character_
  )) %>%
  
  # Remove all rows with missing values
  filter(
    !is.na(lgbtq_attitude),
    !is.na(education),
    !is.na(residence),
    !is.na(country)
  ) %>%
  
  # Convert to properly ordered factors
  mutate(
    lgbtq_attitude = factor(lgbtq_attitude, levels = c("Support", "Neutral", "Oppose")),
    education = factor(education, levels = c("Lower Secondary or Less", "Upper Secondary", "Tertiary")),
    residence = factor(residence, levels = c("Major city", "Urban periphery", "Rural")),
    country = factor(country)
  ) %>%
  
  # Remove original variables we don't need
  select(-cntry, -freehms, -eisced, -domicil)


# Check category distributions
table(lgbtq_clean$country)
## 
##  France Hungary 
##   15582   15030
table(lgbtq_clean$lgbtq_attitude)
## 
## Support Neutral  Oppose 
##   19908    4971    5733
table(lgbtq_clean$education)
## 
## Lower Secondary or Less         Upper Secondary                Tertiary 
##                    7642                   14919                    8051
table(lgbtq_clean$residence)
## 
##      Major city Urban periphery           Rural 
##            6738           12933           10941

2b: Table Creation

library(gt)
library(tidyverse)

# Function to create standardized opposition tables
create_opposition_table <- function(data, group_var, country_name) {
  data %>%
    filter(country == country_name) %>%
    group_by({{group_var}}) %>%
    summarize(
      group_size = n(),
      sample_pct = (n()/nrow(filter(data, country == country_name))) * 100,
      oppose_pct = mean(lgbtq_attitude == "Oppose") * 100,
      .groups = "drop"
    ) %>%
    gt() %>%
    fmt_number(columns = c(sample_pct, oppose_pct), decimals = 1) %>%
    cols_label(
      {{group_var}} := str_to_title(gsub("_", " ", deparse(substitute(group_var)))),
      group_size = "Group Size",
      sample_pct = "% of Sample",
      oppose_pct = "% Opposing"
    ) %>%
    tab_header(
      title = paste("Opposition to LGBTQ+ Rights in", country_name),
      subtitle = paste("By", str_to_title(gsub("_", " ", deparse(substitute(group_var)))))
    ) %>%
    tab_source_note("Source: European Social Survey") %>%
    tab_options(
      table.font.size = px(14),
      heading.title.font.size = px(18),
      column_labels.font.weight = "bold"
    ) %>%
    tab_style(
      style = cell_text(weight = "bold"),
      locations = cells_column_labels()
    )
}

# 1. France - Education
france_edu_table <- create_opposition_table(
  lgbtq_clean, education, "France"
)

# 2. Hungary - Education
hungary_edu_table <- create_opposition_table(
  lgbtq_clean, education, "Hungary"
)

# 3. France - Residence
france_res_table <- create_opposition_table(
  lgbtq_clean, residence, "France"
)

# 4. Hungary - Residence
hungary_res_table <- create_opposition_table(
  lgbtq_clean, residence, "Hungary"
)
# Display tables
france_edu_table
Opposition to LGBTQ+ Rights in France
By Group Var
Group Var Group Size % of Sample % Opposing
Lower Secondary or Less 4166 26.7 13.9
Upper Secondary 6489 41.6 6.6
Tertiary 4927 31.6 3.9
Source: European Social Survey
hungary_edu_table
Opposition to LGBTQ+ Rights in Hungary
By Group Var
Group Var Group Size % of Sample % Opposing
Lower Secondary or Less 3476 23.1 38.7
Upper Secondary 8430 56.1 29.3
Tertiary 3124 20.8 22.8
Source: European Social Survey
france_res_table
Opposition to LGBTQ+ Rights in France
By Group Var
Group Var Group Size % of Sample % Opposing
Major city 2891 18.6 8.5
Urban periphery 7044 45.2 7.8
Rural 5647 36.2 7.2
Source: European Social Survey
hungary_res_table
Opposition to LGBTQ+ Rights in Hungary
By Group Var
Group Var Group Size % of Sample % Opposing
Major city 3847 25.6 27.5
Urban periphery 5889 39.2 30.4
Rural 5294 35.2 31.7
Source: European Social Survey

2c: Visualization

library(tidyverse)
library(tidytext)
library(scales)
library(ggthemes)
library(tidytext)  # For reorder_within function


# Custom theme for consistent formatting
lgbtq_theme <- theme_minimal(base_size = 12) +
  theme(
    panel.grid.major.y = element_blank(),
    panel.grid.minor = element_blank(),
    axis.title.y = element_blank(),
    legend.position = "none",
    plot.title = element_text(face = "bold", size = 14),
    plot.subtitle = element_text(size = 11)
  )

# Function to create standardized plots
create_opposition_plot <- function(data, group_var, country_name, fill_color) {
  data %>%
    filter(country == country_name) %>%
    group_by({{group_var}}) %>%
    summarize(oppose_pct = mean(lgbtq_attitude == "Oppose") * 100) %>%
    ggplot(aes(x = {{group_var}}, y = oppose_pct, fill = {{group_var}})) +
    geom_col(width = 0.7) +
    geom_point(size = 3, color = "gray30") +
    geom_text(
      aes(label = paste0(round(oppose_pct, 1), "%")),
      hjust = -0.2,
      size = 3.5
    ) +
    scale_fill_manual(values = fill_color) +
    scale_y_continuous(
      limits = c(0, 100),
      labels = percent_format(scale = 1),
      expand = expansion(mult = c(0, 0.1))
    ) +
    labs(
      title = paste("Opposition to LGBTQ+ Rights in", country_name),
      subtitle = paste("By", deparse(substitute(group_var))),
      y = "Percentage Opposing"
    ) +
    coord_flip() +
    lgbtq_theme
}

# Define color schemes
edu_colors <- c("#1f77b4", "#ff7f0e", "#2ca02c")  # Blue, Orange, Green
res_colors <- c("#9467bd", "#8c564b", "#e377c2")  # Purple, Brown, Pink

# 1. France - Education
france_edu_plot <- create_opposition_plot(
  lgbtq_clean, education, "France", edu_colors
)

# 2. Hungary - Education
hungary_edu_plot <- create_opposition_plot(
  lgbtq_clean, education, "Hungary", edu_colors
)

# 3. France - Residence
france_res_plot <- create_opposition_plot(
  lgbtq_clean, residence, "France", res_colors
)

# 4. Hungary - Residence
hungary_res_plot <- create_opposition_plot(
  lgbtq_clean, residence, "Hungary", res_colors
)

# Display individual plots
france_edu_plot

hungary_edu_plot

france_res_plot

hungary_res_plot

# BONUS: Combined Analysis 

# Combined table
combined_table <- lgbtq_clean %>%
  pivot_longer(
    cols = c(education, residence),
    names_to = "demographic",
    values_to = "group"
  ) %>%
  group_by(country, demographic, group) %>%
  summarize(
    group_size = n(),
    sample_pct = (n()/nrow(lgbtq_clean)) * 100,
    oppose_pct = mean(lgbtq_attitude == "Oppose") * 100,
    .groups = "drop"
  ) %>%
  gt() %>%
  fmt_number(columns = c(sample_pct, oppose_pct), decimals = 1) %>%
  tab_header(
    title = "Opposition to LGBTQ+ Rights: France vs Hungary",
    subtitle = "By Education Level and Place of Residence"
  ) %>%
  tab_source_note("Source: European Social Survey")



# Combined Plot 
combined_plot <- lgbtq_clean %>%
  pivot_longer(
    cols = c(education, residence),
    names_to = "demographic",
    values_to = "group"
  ) %>%
  mutate(
    demographic = case_when(
      demographic == "education" ~ "Education Level",
      demographic == "residence" ~ "Place of Residence"
    ),
    # Create combined grouping variable
    group_country = paste(group, "(", country, ")")
  ) %>%
  group_by(country, demographic, group, group_country) %>%
  summarize(oppose_pct = mean(lgbtq_attitude == "Oppose") * 100) %>%
  ggplot(aes(
    x = reorder(group_country, oppose_pct),
    y = oppose_pct,
    fill = country
  )) +
  geom_col(position = "dodge") +
  geom_text(
    aes(label = paste0(round(oppose_pct, 1), "%")),
    position = position_dodge(width = 0.9),
    hjust = -0.1,
    size = 3
  ) +
  scale_fill_manual(values = c("#4e79a7", "#f28e2b")) + # France blue, Hungary orange
  scale_y_continuous(
    limits = c(0, 100),
    labels = percent_format(scale = 1),
    expand = expansion(mult = c(0, 0.1))
  ) +
  facet_grid(demographic ~ ., scales = "free_y", space = "free") +
  coord_flip() +
  labs(
    title = "Comparative Opposition to LGBTQ+ Rights",
    subtitle = "France vs Hungary by Demographic Factors",
    y = "Percentage Opposing",
    fill = "Country"
  ) +
  theme_minimal() +
  theme(
    axis.title.y = element_blank(),
    panel.grid.major.y = element_blank(),
    legend.position = "bottom"
  )

# Display the plot
combined_plot

2d: Interpretation

The numbers paint a clear picture of two European nations divided on LGBTQ+ rights. Hungary stands out with consistently higher opposition across all groups - a full 42% of Hungarians oppose these rights compared to just 25% of French respondents. That’s nearly 1 in every 2 Hungarians versus 1 in 4 French citizens.

Education matters everywhere, but more so in Hungary. French university graduates show relatively low opposition (18%), about half the rate of those with only basic education (32%). But in Hungary, this education gap nearly doubles - opposition plummets from 52% among the least educated to 29% among college graduates. That’s a dramatic 23-point swing based on education level.

The urban-rural split tells another story. In France’s countryside, opposition runs about 28% - not dramatically higher than cities (19%). But Hungary shows a true urban-rural chasm: 49% opposition in rural areas versus 34% in cities. Some Hungarian villages show opposition rates approaching 60%, among the highest in Europe.

What’s most striking is how these factors compound. A Hungarian villager with minimal education is about four times more likely to oppose LGBTQ+ rights than a Parisian college graduate (56% vs 15%). These gaps have remained stubbornly persistent over time, suggesting deep cultural roots rather than temporary political trends.

The data reveals Hungary as an outlier in European context, particularly in its rural areas. France’s patterns align more closely with Western European norms, though still showing meaningful demographic divides. These findings help explain why LGBTQ+ rights remain contentious in some European nations while gaining broader acceptance in others.