library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(gt)
library(scales)
## 
## Attaching package: 'scales'
## 
## The following object is masked from 'package:purrr':
## 
##     discard
## 
## The following object is masked from 'package:readr':
## 
##     col_factor
library(ggridges)
library(fst)
library(gridExtra)
## 
## Attaching package: 'gridExtra'
## 
## The following object is masked from 'package:dplyr':
## 
##     combine

TASK ONE

gss <- read_fst("gss2022.fst")
gss_clean <- gss %>%
  mutate(
    abortion = case_when(
      abany == "yes" ~ "Support",
      abany == "no" ~ "Oppose", 
    TRUE ~ NA_character_),
    abortion = factor(abortion, levels = c("Support", "Oppose"))) %>% 
   mutate(
    politic = case_when( 
      polviews %in% c("extremely liberal", "liberal", "slightly liberal") ~ "Liberal", 
      polviews %in% c("moderate, middle of the road") ~ "Moderate",
      polviews %in% c("slightly conservative", "conservative", "extremely conservative") ~ "Conservative",
      TRUE ~ NA_character_
    ), 
    politic = factor(politic, levels = c("Liberal", "Moderate", "Conservative"))) %>%
  mutate(
    education = case_when(
      degree %in% c("less than high school", "high school") ~ "High School or Less",
      degree %in% c("associate/junior college") ~ "Some College",
      degree %in% c("bachelor's", "graduate") ~ "Bachelor's or Higher",
      TRUE ~ NA_character_
    ), 
    education = factor(education, levels = c("High School or Less", "Some College", "Bachelor's or Higher"))) %>%
   mutate(
    gender = case_when(
      sex %in% c("male") ~ "Male",
      sex %in% c("female") ~ "Female",
      TRUE ~ NA_character_
    ), 
    gender = factor(gender, levels = c("Male", "Female"))) 

gss_clean <- gss_clean %>%
  select(abortion, politic, education, gender, year) %>%
  count(
    abortion, politic, education, gender, year) %>%
  filter(
    !is.na(abortion), !is.na(politic), !is.na(education), !is.na(gender), !is.na(year)) 

gss_clean <- gss_clean %>%
  mutate(total_n = n(),
    prop = n/sum(n),
    pct = round(100 * prop, 2)) %>%
  mutate(pct = round(pct, digits = 3)) 
#cleaning data and assigning factors to the abortion, political views, education, and gender variables. Then I counted by those variables and created percentages.
gss %>%
  filter(!is.na(abany)) %>%
  distinct(year) %>%
  arrange(year) %>%
  pull()
##  [1] 1977 1978 1980 1982 1983 1984 1985 1987 1988 1989 1990 1991 1993 1994 1996
## [16] 1998 2000 2002 2004 2006 2008 2010 2012 2014 2016 2018 2021 2022
#finding years that this variable was analyzed.
sum(gss_clean$n)
## [1] 36806
sum(gss_clean$prop) #this should be the total responses in the whole cleaned data.
## [1] 1
gss_education <- gss_clean %>%
  select(abortion, education, pct, prop, n) %>% #selecting
group_by(abortion, education) %>%
summarise(Percent = sum(prop, na.rm=TRUE),
            Respondents = sum(n, na.rm = TRUE)) %>% #summarising to make picot wider work across two values, percent and respondents.
  pivot_wider(names_from = abortion, values_from = c(Percent, Respondents)) 
## `summarise()` has grouped output by 'abortion'. You can override using the
## `.groups` argument.
colnames(gss_education) <- c("Education", "Percentage", "Percent_Oppose", "Respondents", "Respondents_Oppose") #renaming

gss_educational <- gss_education %>% select(Respondents_Oppose, Respondents) %>% mutate(New2=rowSums(.)) #here i am calculating the total sample size of yes and no. When i tried to add a total_n column, it broke my table. So i decided to add the respondents supporting and opposing columns together.
colnames(gss_educational) <- c("Drop1", "Drop2", "Sample") #the drop columns are the other unneeded percentages.

gss_education <- cbind(gss_educational, gss_education) %>% #here I am combining the two tables and mobing the sample to the start of the table
  relocate(Sample, .after = Education)

gss_education$Drop1 <- NULL
gss_education$Drop2 <- NULL
sum(gss_education$Respondents_Oppose)
## [1] 20914
sum(gss_education$Respondents)
## [1] 15892
sum(gss_education$Percent_Oppose)
## [1] 0.5682226
sum(gss_education$Percentage) #this checks out, adding the two together gives the number from gss_clean above. THis is why when I add the two columns together I am using that as a sample. I couldn't get "n" to show, I kept getting a lot of NA's everywhere. Percentages work too.
## [1] 0.4317774
gss_education %>% 
  gt(rowname_col = "row", groupname_col = "group") %>% 
  cols_hide(c("Percent_Oppose", "Respondents_Oppose")) %>%
  tab_header(md("**Abortion Support across Different Education Levels**")) %>%
  tab_source_note(md("*Data taken from the GSS website link provided in the assignment instructions.*")) %>%
  fmt_percent(Percentage, decimals = 1) %>%
   tab_options(
    table.border.top.width = 4, 
    table.border.bottom.width = 4,
    column_labels.border.bottom.width = 2,
    heading.title.font.size = px(18),
    heading.subtitle.font.size = px(12),
    source_notes.font.size = px(10),
    data_row.padding = px(16)
  ) %>%
  cols_label(Education = md("*Education*"), Percentage = md("*Percentage*"), Respondents = md("*Respondents*"), Sample = md("*Sample Size*")) %>%
  cols_align("center")
Abortion Support across Different Education Levels
Education Sample Size Percentage Respondents
High School or Less 25558 26.2% 9635
Some College 2291 2.8% 1048
Bachelor's or Higher 8957 14.2% 5209
Data taken from the GSS website link provided in the assignment instructions.
#table creation! At the beginning I am hiding the opposing data since we are looking for support. The title has abortion support so it is clear what is being shown. I formated the percentage column, and added thicker lines to the table. 
gss_gender <- gss_clean %>%
  select(abortion, gender, pct, prop, n) %>%
group_by(abortion, gender) %>%
summarise(Percent = sum(prop, na.rm=TRUE),
            Respondents = sum(n, na.rm = TRUE)) %>%
  pivot_wider(names_from = abortion, values_from = c(Percent, Respondents)) 
## `summarise()` has grouped output by 'abortion'. You can override using the
## `.groups` argument.
colnames(gss_gender) <- c("Gender", "Percentage", "Percent_Oppose", "Respondents", "Respondents_Oppose")

gss_genders <- gss_gender %>% select(Respondents_Oppose, Respondents) %>% mutate(Sample=rowSums(.)) 
colnames(gss_genders) <- c("Drop1", "Drop2", "Sample")

gss_gender <- cbind(gss_genders, gss_gender) %>%
  relocate(Sample, .after = Gender)

gss_gender$Drop1 <- NULL
gss_gender$Drop2 <- NULL

#did all the same things as above but just with gender.
sum(gss_gender$Respondents)
## [1] 15892
sum(gss_gender$Respondents_Oppose)
## [1] 20914
sum(gss_gender$Percent_Oppose)
## [1] 0.5682226
sum(gss_gender$Percentage)
## [1] 0.4317774
gss_gender %>% 
  gt(rowname_col = "row", groupname_col = "group") %>% 
  cols_hide(c("Percent_Oppose", "Respondents_Oppose")) %>%
  tab_header(md("**Abortion Support across Genders**")) %>%
  tab_source_note(md("*Data taken from the GSS website link provided in the assignment instructions.*")) %>%
  fmt_percent(Percentage, decimals = 1) %>%
   tab_options(
    table.border.top.width = 4, 
    table.border.bottom.width = 4,
    column_labels.border.bottom.width = 2,
    heading.title.font.size = px(18),
    heading.subtitle.font.size = px(12),
    source_notes.font.size = px(10),
    data_row.padding = px(16)
  ) %>%
  cols_label(Gender = md("*Gender*"), Percentage = md("*Percentage*"), Respondents = md("*Respondents*"), Sample = md("*Sample Size*")) %>%
  cols_align("center")
Abortion Support across Genders
Gender Sample Size Percentage Respondents
Male 16440 19.6% 7215
Female 20366 23.6% 8677
Data taken from the GSS website link provided in the assignment instructions.
#gt table for gender. Hiding opposing opinions. Renaming columns.
gss_politic <- gss_clean %>%
  select(abortion, politic, pct, prop, n) %>%
group_by(abortion, politic) %>%
summarise(Percent = sum(prop, na.rm=TRUE),
            Respondents = sum(n, na.rm = TRUE)) %>%
  pivot_wider(names_from = abortion, values_from = c(Percent, Respondents)) 
## `summarise()` has grouped output by 'abortion'. You can override using the
## `.groups` argument.
colnames(gss_politic) <- c("Political_Views", "Percentage", "Percent_Oppose", "Respondents", "Respondents_Oppose")

gss_political <- gss_politic %>% select(Respondents_Oppose, Respondents) %>% mutate(Sample=rowSums(.)) 
colnames(gss_political) <- c("Drop1", "Drop2", "Sample")

gss_politic <- cbind(gss_political, gss_politic) %>%
  relocate(Sample, .after = Political_Views)

gss_politic$Drop1 <- NULL
gss_politic$Drop2 <- NULL

  
  #political views data prep. I am summarising across percent and respondents so that the values from abortion show for both of those values. 
sum(gss_politic$Respondents)
## [1] 15892
sum(gss_politic$Respondents_Oppose)
## [1] 20914
sum(gss_politic$Percent_Oppose)
## [1] 0.5682226
sum(gss_politic$Percentage)
## [1] 0.4317774
gss_politic %>% 
  gt(rowname_col = "row", groupname_col = "group") %>% 
  cols_hide(c("Percent_Oppose", "Respondents_Oppose")) %>%
  tab_header(md("**Abortion Support across Different Political Views**")) %>%
  tab_source_note(md("*Data taken from the GSS website link provided in the assignment instructions.*")) %>%
  fmt_percent(Percentage, decimals = 1) %>%
   tab_options(
    table.border.top.width = 4, 
    table.border.bottom.width = 4,
    column_labels.border.bottom.width = 2,
    heading.title.font.size = px(18),
    heading.subtitle.font.size = px(12),
    source_notes.font.size = px(10),
    data_row.padding = px(16)
  ) %>%
  cols_label(Political_Views = md("*Political Views*"), Percentage = md("*Percentage*"), Respondents = md("*Respondents*"), Sample = md("*Sample Size*")) %>%
  cols_align("center")
Abortion Support across Different Political Views
Political Views Sample Size Percentage Respondents
Liberal 10375 16.8% 6177
Moderate 14010 15.9% 5868
Conservative 12421 10.5% 3847
Data taken from the GSS website link provided in the assignment instructions.
#gt table for political views. Showing support of abortion. 
gss_time_education <- gss_clean %>%
  select(abortion, education, year, n, prop, pct) %>%
  group_by(education, year, abortion) %>%
  summarise(across(starts_with("prop"), ~sum(., na.rm = TRUE))) %>%
  pivot_wider(names_from = abortion, values_from = prop)
## `summarise()` has grouped output by 'education', 'year'. You can override using
## the `.groups` argument.
#here I move on to the timeline for the ggplots. I am adding year into the select and grouping by year. This time I am also summarising starting with proportion, so that the values are filled with the proportion. That makes it easier to format the ggplot in percentages. Because if I filled with percent, formatting with percent would times that by 100. Prop is better.
gss_time_gender <- gss_clean %>%
  select(abortion, gender, year, n, prop, pct) %>%
  group_by(gender, year, abortion) %>%
  summarise(across(starts_with("prop"), ~sum(., na.rm = TRUE))) %>%
  pivot_wider(names_from = abortion, values_from = prop)
## `summarise()` has grouped output by 'gender', 'year'. You can override using
## the `.groups` argument.
#time line data for gender
gss_time_politic <- gss_clean %>%
  select(abortion, politic, year, n, prop, pct) %>%
  group_by(politic, year, abortion) %>%
  summarise(across(starts_with("prop"), ~sum(., na.rm = TRUE))) %>%
  pivot_wider(names_from = abortion, values_from = prop)
## `summarise()` has grouped output by 'politic', 'year'. You can override using
## the `.groups` argument.
#timeline data for political views.
gss_time_politic
## # A tibble: 84 × 4
## # Groups:   politic, year [84]
##    politic  year Support  Oppose
##    <fct>   <int>   <dbl>   <dbl>
##  1 Liberal  1977 0.00500 0.00609
##  2 Liberal  1978 0.00478 0.00603
##  3 Liberal  1980 0.00484 0.00489
##  4 Liberal  1982 0.00663 0.00693
##  5 Liberal  1983 0.00201 0.00280
##  6 Liberal  1984 0.00462 0.00435
##  7 Liberal  1985 0.00505 0.00475
##  8 Liberal  1987 0.00636 0.00701
##  9 Liberal  1988 0.00361 0.00342
## 10 Liberal  1989 0.00408 0.00323
## # ℹ 74 more rows
ggplot(gss_time_education, 
       aes(x = year, y = Oppose, color = education)) +
  geom_line(size = 1.2) +
  scale_color_manual(
    values = c(
      "High School or Less" = "#2AAA8A",
      "Some College" = "#097969",
      "Bachelor's or Higher" = "#023020")) +
  scale_y_continuous(
    labels = scales::percent_format(),
    limits = c(0, 0.025),
    breaks = seq(0, 0.025, by = 0.005)
  ) + 
  scale_x_continuous(
    breaks = seq(1975, 2020, by = 5),
    limits = c(1972, 2027), 
    expand = c(0.01, 0.01)) + theme_minimal() +
  labs(title = ("Abortion Opposition across Different Education Levels Over Time"), subtitle = ("Proportion disagreeing: '[Do] you think it should be possible for a pregnant woman to obtain a legal abortion?'"), x = (NULL), y = ("Opposition Percentages"), color = "Education") +
  theme(
    plot.title = element_text(
      face = "bold",
      size = 14,
      margin = margin(b = 10)
    ),
    plot.subtitle = element_text(
      color = "grey40",
      size = 9), axis.line = element_line(color = "black", size = 0.75), legend.title = element_text(face = "bold"), panel.grid = element_blank(),
    ) 
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: The `size` argument of `element_line()` is deprecated as of ggplot2 3.4.0.
## ℹ Please use the `linewidth` argument instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

#Plot for educationg over time. I am really happy with how this turned out. The percentages are different because they are over time, so it makes sense that it is lower per year than overall. I included the question asked in GSS in the subtitle. Y-axis is in percent, and the years are showing on the x-axis by 5 year gaps.
ggplot(gss_time_gender, 
       aes(x = year, y = Oppose, color = gender)) +
  geom_line(size = 1.2) + 
  scale_color_manual(
    values = c(
      "Male" = "#48A0A8",
      "Female" = "#51074a")) + theme_minimal() + 
      scale_y_continuous(
    labels = scales::percent_format(),
    limits = c(0, 0.02),
    breaks = seq(0, 0.02, by = 0.005)
  ) + 
  scale_x_continuous(
    breaks = seq(1975, 2020, by = 5),
    limits = c(1972, 2027), 
    expand = c(0.01, 0.01)) +
  labs(title = ("Abortion Opposition across Genders Over Time"), subtitle = ("Proportion disagreeing: '[Do] you think it should be possible for a pregnant woman to obtain a legal abortion?'"), x = (NULL), y = ("Opposition Percentages"), color = "Gender") +
  theme(
    plot.title = element_text(
      face = "bold",
      size = 14,
      margin = margin(b = 10)
    ),
    plot.subtitle = element_text(
      color = "grey40",
      size = 9), axis.line = element_line(color = "black", size = 0.75), legend.title = element_text(face = "bold"), panel.grid = element_blank(),
    ) 

#I like how clean the plot is. I chose colors that stand out from one another.
ggplot(gss_time_politic, 
       aes(x = year, y = Oppose, color = politic)) +
  geom_line(size = 1.2) + 
  scale_color_manual(
    values = c(
      "Liberal" = "#DC143C",
      "Moderate" = "#808080",
      "Conservative" = "#1F51FF")) +
    scale_y_continuous(
    labels = scales::percent_format(),
    limits = c(0, 0.015),
    breaks = seq(0, 0.015, by = 0.005)
  ) + 
  scale_x_continuous(
    breaks = seq(1975, 2020, by = 5),
    limits = c(1972, 2027), 
    expand = c(0.01, 0.01)) + theme_minimal() +
  labs(title = ("Abortion Opposition across Different Political Views Over Time"), subtitle = ("Proportion disagreeing: '[Do] you think it should be possible for a pregnant woman to obtain a legal abortion?'"), x = (NULL), y = ("Opposition Percentages"), color = "Political Views") +
  theme(
    plot.title = element_text(
      face = "bold",
      size = 14,
      margin = margin(b = 10)
    ),
    plot.subtitle = element_text(
      color = "grey40",
      size = 9), axis.line = element_line(color = "black", size = 0.75), legend.title = element_text(face = "bold"), panel.grid = element_blank(),
    )

#I chose politically appropriate colors for this one.

Interpretation There were many patterns in this task that were surprising and went against what I was expecting for patterns. Looking at the GT table comparing education and abortion, the highest support of abortion at 26.2% came from the ‘High School or Less’ category. This looks high because there were more respondents in this category (9635 compared to 1048 in the ‘Some College’ category), but also because we are only looking at the support values in these tables. When we switch to the education plot, we see that the category ‘High School or Less’ is actually the biggest opposition towards abortion. However, these opinions are in a downwards trend overall, with what looks like a small increase beginning in 2022, that is when Roe V Wade was overturned. Maybe some opinions are reflective of events happening at the time.

One pattern that did not surprise me was the gender and abortion values in the GT table. With female support being 23.6%, and male 19.6%. It is common to hear female support is higher than male support when it comes to abortion. However, looking at the ggplot for opposition, it is actually females who oppose abortion at the highest rates over time. The female line is sort of parallel to the male one, just higher percentages. This was surprising, but similarly to the education plot, opposition is decreasing.

Finally, looking at the political views GT table, I was expecting to see liberal percentages supporting abortion to be higher, which they were. Liberal percentage is at 16.9% compared to conservative percentage at 10.5%. This makes sense just thinking about what the different parties stand for. When switching to the ggplot on opposition, it checks that the conservative respondents were the biggest opposition. I was surprised to see that the moderate category was very similar to the conservative respondents. The lines are very similar, they spike almost identically around 2005, with liberals also having a smaller spike in that year. I wonder what happened then? Categories are decreasing in opposition, except for conservative respondents, which seem to be on and upward trend.

In the gpplots overall, all the lines keep the same patterns as other categorys within their plot. Sure some spikes are muted and some are bigger than others, but over all, the trends are very similar. Respondents seem to be moving in similar directions. This says a lot about the current social norms, values, and morals of our society. I do not have literature to back this up, but just looking at the numbers we can see that overall, opposition to abortion is decreasing, while support is increasing. I even imputted “Support” in the place of “Oppose” in the aes line of all the plots to verify that my assumptions about these numbers were on track. I believe that I have the right idea.

TASK TWO

ess <- read_fst("All-ESS-Data.fst")
ess_clean <- ess %>%
  filter(cntry == "FR" | cntry == "HU") %>%
  group_by(cntry) %>%
   mutate(
    lgbtq = case_when( 
      freehms %in% c(1:2) ~ "Support", 
      freehms %in% c(3) ~ "Neutral",
      freehms %in% c(4:5) ~ "Oppose",
      TRUE ~ NA_character_
      ), 
    lgbtq = factor(lgbtq, levels = c("Support", "Neutral", "Oppose"))) %>%
  mutate(
    education = case_when( 
      eisced %in% c(1:2) ~ "Lower Secondary or Less", 
      eisced %in% c(3:4) ~ "Upper Secondary",
      eisced %in% c(5:7) ~ "Tertiary",
      TRUE ~ NA_character_
    ), 
    education = factor(education, levels = c("Lower Secondary or Less", "Upper Secondary", "Tertiary"))) %>%
  mutate(
    residence = case_when(
      domicil %in% c(1) ~ "Major City",
      domicil %in% c(2:3) ~ "Urban Periphery",
      domicil %in% c(4:5) ~ "Rural",
      TRUE ~ NA_character_
    ), 
    residence = factor(residence, levels = c("Major City", "Urban Periphery", "Rural"))) 
#data cleaning! Assigning values to numbers, also filtering to focus just on france and hungary.
check_missing <- ess_clean %>%
  summarize(across(c(education, lgbtq, residence),
                  ~sum(is.na(.))))
#here I am checking missing values just out of curiosity
patterns_FR <- ess_clean %>%
  filter(!is.na(education), !is.na(lgbtq), !is.na(residence), cntry == "FR") %>%
  mutate(total_n = n()) %>%
  group_by(education, residence) %>%
  summarize(
    n = n(),
    pct_of_sample = n/first(total_n) * 100,
    n_none = sum(lgbtq == "Oppose"),
    pct_none = n_none/n * 100
  ) 
## `summarise()` has grouped output by 'education'. You can override using the
## `.groups` argument.
colnames(patterns_FR) <- c("EducationFR", "ResidenceFR", "GroupSizeFR", "PercentSampleFR", "NumberOpposeFR", "PercentOpposeFR")
#It was important to me to do two seperate data sets for the table for the two countries. I often run into trouble when I try to do it together. I often get NA's when trying to flip the table. So by making two data sets, yeah it is a little more work and takes more space, but I don't run into that issue and I can just use cbind to put them together. 
patterns_HU <- ess_clean %>%
  filter(!is.na(education), !is.na(lgbtq), !is.na(residence), cntry == "HU") %>%
  mutate(total_n = n()) %>%
  group_by(education, residence) %>%
  summarize(
    n = n(),
    pct_of_sample = n/first(total_n) * 100,
    n_none = sum(lgbtq == "Oppose"),
    pct_none = n_none/n * 100
  )
## `summarise()` has grouped output by 'education'. You can override using the
## `.groups` argument.
colnames(patterns_HU) <- c("EducationHU", "ResidenceHU", "GroupSizeHU", "PercentSampleHU", "NumberOpposeHU", "PercentOpposeHU")
#Data set for hungary.
group_size_total <- ess_clean %>%
  filter(!is.na(education), !is.na(lgbtq), !is.na(residence)) %>%
  mutate(total_n = n()) %>%
  group_by(education, residence) %>%
  summarize(
    n = n())
## `summarise()` has grouped output by 'education'. You can override using the
## `.groups` argument.
colnames(group_size_total) <- c("EducationGroup", "ResidenceGroup", "GroupSize")
#Here is where I found the total sample size.
patterns <- cbind(patterns_FR, patterns_HU, group_size_total) %>%
  relocate(GroupSize, .after = ResidenceFR)
patterns$EducationHU = NULL
patterns$ResidenceHU = NULL
patterns$GroupSizeFR = NULL
patterns$GroupSizeHU = NULL
patterns$EducationGroup = NULL
patterns$ResidenceGroup = NULL
#used cbind to put it all together and to drop the repeated columns
patterns %>% 
  gt(rowname_col = "row", groupname_col = "group") %>%
  tab_header(md("**Opposition to LGBTQ+ Rights across Demographics in France and Hungary**"), subtitle = md("*Distribution and Conditional Probabilities*")) %>%
  tab_source_note(md("*Data: European Social Survey.*")) %>%
   tab_options(
    table.border.top.width = 4, 
    table.border.bottom.width = 4,
    column_labels.border.bottom.width = 2,
    heading.title.font.size = px(18),
    heading.subtitle.font.size = px(14),
    data_row.padding = px(12)
  ) %>%
  cols_align(
    align = "center") %>%
  tab_spanner(label = md("**France**"), columns = c(PercentSampleFR, NumberOpposeFR, PercentOpposeFR)) %>%
  tab_spanner(label = md("**Hungary**"), columns = c(PercentSampleHU, NumberOpposeHU, PercentOpposeHU)) %>%
  cols_label(EducationFR = md("*Education Level*"), ResidenceFR = md("*Residence Area*"), GroupSize = md("*Group Size*"), PercentSampleFR = md("*Percent of Sample*"), NumberOpposeFR = md("*Number Opposing*"), PercentOpposeFR = md("*Percent Opposing*"), PercentSampleHU = md("*Percent of Sample*"), NumberOpposeHU = md("*Number Opposing*"), PercentOpposeHU = md("*Percent Opposing*")) %>%
  tab_footnote(footnote = md("*'Percent Opposing' shows conditional probability: P(Oppose|Education Level, Place of Residence)*"), locations = cells_column_labels(columns = c(PercentOpposeFR, PercentOpposeHU))) %>%
  fmt_number(columns = PercentSampleFR, decimals = 2) %>%
  fmt_number(columns = PercentOpposeFR, decimals = 2) %>%
  fmt_number(columns = PercentSampleHU, decimals = 2) %>%
  fmt_number(columns = PercentOpposeHU, decimals = 2)
Opposition to LGBTQ+ Rights across Demographics in France and Hungary
Distribution and Conditional Probabilities
Education Level Residence Area Group Size
France
Hungary
Percent of Sample Number Opposing Percent Opposing1 Percent of Sample Number Opposing Percent Opposing1
Lower Secondary or Less Major City 1112 4.19 113 17.30 3.05 171 37.25
Lower Secondary or Less Urban Periphery 3164 12.30 259 13.51 8.30 466 37.37
Lower Secondary or Less Rural 3366 10.24 208 13.03 11.78 707 39.94
Upper Secondary Major City 3115 6.08 75 7.91 14.42 618 28.52
Upper Secondary Urban Periphery 6351 18.95 198 6.71 22.61 1028 30.25
Upper Secondary Rural 5453 16.61 157 6.07 19.06 828 28.90
Tertiary Major City 2511 8.28 59 4.57 8.12 270 22.11
Tertiary Urban Periphery 3418 13.95 91 4.19 8.28 299 24.04
Tertiary Rural 2122 9.39 43 2.94 4.38 143 21.70
Data: European Social Survey.
1 ‘Percent Opposing’ shows conditional probability: P(Oppose|Education Level, Place of Residence)
#Here is my gt table. Even though cbind may not have been the most effective way to create this table, this is the outcome that I wanted. I wanted to be able to use tab_spanner and have seperate categories for the two countries so that it is easier to read. I also added a footnote of what the conditional probability calculation is. Even though 
education_data_FR <- ess_clean %>%
  filter(!is.na(education), !is.na(lgbtq), cntry == "FR") %>%
  group_by(education, cntry) %>%
  summarize(
    n = n(),
    n_none = sum(lgbtq == "Oppose"),
    p_none = n_none/n,
    pct_none = p_none * 100
  )
## `summarise()` has grouped output by 'education'. You can override using the
## `.groups` argument.
#Creating data for the ggplot. Creating conditional probability values. For france education.
education_data_HU <- ess_clean %>%
  filter(!is.na(education), !is.na(lgbtq), cntry == "HU") %>%
  group_by(education, cntry) %>%
  summarize(
    n = n(),
    n_none = sum(lgbtq == "Oppose"),
    p_none = n_none/n,
    pct_none = p_none * 100
  )
## `summarise()` has grouped output by 'education'. You can override using the
## `.groups` argument.
#Same thing but for education in hungary
residence_data_FR <- ess_clean %>%
  filter(!is.na(residence), !is.na(lgbtq), cntry == "FR") %>%
  group_by(residence, cntry) %>%
  summarize(
    n = n(),
    n_none = sum(lgbtq == "Oppose"),
    p_none = n_none/n,
    pct_none = p_none * 100
  )
## `summarise()` has grouped output by 'residence'. You can override using the
## `.groups` argument.
#France residence values
residence_data_HU <- ess_clean %>%
  filter(!is.na(residence), !is.na(lgbtq), cntry == "HU") %>%
  group_by(residence, cntry) %>%
  summarize(
    n = n(),
    n_none = sum(lgbtq == "Oppose"),
    p_none = n_none/n,
    pct_none = p_none * 100
  )
## `summarise()` has grouped output by 'residence'. You can override using the
## `.groups` argument.
#Hungary residence values
education_plot_FR <- ggplot(education_data_FR, 
       aes(x = pct_none, y = education)) +
  geom_col(
    width = 0.6,
    fill = "#000091",
    alpha = 0.9
  ) +
  geom_point(
    size = 3,
    color = "#ed2939"
  ) +
  geom_text(
    aes(label = sprintf("%.1f%%", pct_none)),
    hjust = -0.5,
    family = "sans",
    size = 4,
    fontface = "bold",
    color = "black"
  ) +
  scale_x_continuous(
    limits = c(0, max(education_data_FR$pct_none) * 1.2),
    breaks = seq(0, 20, by = 5),
    labels = scales::label_number(suffix = "%")
  ) +
  labs(
    title = "Opposition to LGBTQ+ rights across Education Levels in France",
    caption = "Data: European Social Survey",
    x = "Probability of Opposition",
    y = NULL
  ) +
  theme_minimal() +
  theme(
    plot.title = element_text(
      face = "bold",
      size = 11,
      margin = margin(b = 10)
    ),
    plot.caption = element_text(
      color = "grey40",
      margin = margin(t = 20)
    ),
    # Axis formatting
    axis.text = element_text(
      size = 11,
      color = "black"
    ),
    axis.text.y = element_text(
      face = "bold",
      size = 9
    ),
    axis.title.x = element_text(
      margin = margin(t = 10),
      face = "bold",
      size = 9
    ),
    panel.grid.major.x = element_line(
      color = "grey95",
      size = 0.3
    ),
    panel.grid.minor = element_blank(),
    panel.grid.major.y = element_blank(),
    plot.margin = margin(30, 30, 30, 30)
  )
#I then went ahead and created 4 seperate ggplots. I couldn't facet them all because there are three different variables. I know I wont get the bonus point, but I still wanted to try to put them together. 
education_plot_HU <- ggplot(education_data_HU, 
       aes(x = pct_none, y = education)) +
  geom_col(
    width = 0.6,
    fill = "#477050",
    alpha = 0.9
  ) +
  geom_point(
    size = 3,
    color = "#CE2939"
  ) +
  geom_text(
    aes(label = sprintf("%.1f%%", pct_none)),
    hjust = -0.5,
    family = "sans",
    size = 4,
    fontface = "bold",
    color = "black"
  ) +
  scale_x_continuous(
    limits = c(0, max(education_data_HU$pct_none) * 1.2),
    breaks = seq(0, 40, by = 5),
    labels = scales::label_number(suffix = "%")
  ) +
  labs(
    title = "Opposition to LGBTQ+ rights across Education Levels in Hungary",
    caption = "Data: European Social Survey",
    x = "Probability of Opposition",
    y = NULL
  ) +
  theme_minimal() +
  theme(
    plot.title = element_text(
      face = "bold",
      size = 11,
      margin = margin(b = 10)
    ),
    plot.caption = element_text(
      color = "grey40",
      margin = margin(t = 20)
    ),
    # Axis formatting
    axis.text = element_text(
      size = 11,
      color = "black"
    ),
    axis.text.y = element_text(
      face = "bold",
      size = 9
    ),
    axis.title.x = element_text(
      margin = margin(t = 10),
      face = "bold",
      size = 9
    ),
    panel.grid.major.x = element_line(
      color = "grey95",
      size = 0.3
    ),
    panel.grid.minor = element_blank(),
    panel.grid.major.y = element_blank(),
    plot.margin = margin(30, 30, 30, 30)
  )
#another plot
residence_plot_FR <- ggplot(residence_data_FR, 
       aes(x = pct_none, y = residence)) +
  geom_col(
    width = 0.6,
    fill = "#000091",
    alpha = 0.9
  ) +
  geom_point(
    size = 3,
    color = "#ed2939"
  ) +
  geom_text(
    aes(label = sprintf("%.1f%%", pct_none)),
    hjust = -0.5,
    family = "sans",
    size = 4,
    fontface = "bold",
    color = "black"
  ) +
  scale_x_continuous(
    limits = c(0, max(residence_data_FR$pct_none) * 1.2),
    breaks = seq(0, 20, by = 2),
    labels = scales::label_number(suffix = "%")
  ) +
  labs(
    title = "Opposition to LGBTQ+ rights across Areas of Residence in France",
    caption = "Data: European Social Survey",
    x = "Probability of Opposition",
    y = NULL
  ) +
  theme_minimal() +
  theme(
    plot.title = element_text(
      face = "bold",
      size = 11,
      margin = margin(b = 10)
    ),
    plot.caption = element_text(
      color = "grey40",
      margin = margin(t = 20)
    ),
    # Axis formatting
    axis.text = element_text(
      size = 11,
      color = "black"
    ),
    axis.text.y = element_text(
      face = "bold",
      size = 9
    ),
    axis.title.x = element_text(
      margin = margin(t = 10),
      face = "bold",
      size = 9
    ),
    panel.grid.major.x = element_line(
      color = "grey95",
      size = 0.3
    ),
    panel.grid.minor = element_blank(),
    panel.grid.major.y = element_blank(),
    plot.margin = margin(30, 30, 30, 30)
  )
#another plot
residence_plot_HU <- ggplot(residence_data_HU, 
       aes(x = pct_none, y = residence)) +
  geom_col(
    width = 0.6,
    fill = "#477050",
    alpha = 0.9
  ) +
  geom_point(
    size = 3,
    color = "#CE2939"
  ) +
  geom_text(
    aes(label = sprintf("%.1f%%", pct_none)),
    hjust = -0.5,
    family = "sans",
    size = 4,
    fontface = "bold",
    color = "black"
  ) +
  scale_x_continuous(
    limits = c(0, max(residence_data_HU$pct_none) * 1.2),
    breaks = seq(0, 40, by = 5),
    labels = scales::label_number(suffix = "%")
  ) +
  labs(
    title = "Opposition to LGBTQ+ rights across Areas of Residence in Hungary",
    caption = "Data: European Social Survey",
    x = "Probability of Opposition",
    y = NULL
  ) +
  theme_minimal() +
  theme(
    plot.title = element_text(
      face = "bold",
      size = 11,
      margin = margin(b = 10)
    ),
    plot.caption = element_text(
      color = "grey40",
      margin = margin(t = 20)
    ),
    # Axis formatting
    axis.text = element_text(
      size = 11,
      color = "black"
    ),
    axis.text.y = element_text(
      face = "bold",
      size = 9
    ),
    axis.title.x = element_text(
      margin = margin(t = 10),
      face = "bold",
      size = 9
    ),
    panel.grid.major.x = element_line(
      color = "grey95",
      size = 0.3
    ),
    panel.grid.minor = element_blank(),
    panel.grid.major.y = element_blank(),
    plot.margin = margin(30, 30, 30, 30)
  )
#another plot
grid.arrange(education_plot_FR, education_plot_HU, residence_plot_FR, residence_plot_HU, ncol = 2, nrow = 2, top = "Opposition to LGBTQ+ rights across Demographics in France and Hungary")

#here are the combined plots. I picked the red and blue for Frances flag, and the same for Hungary. I like this side by side comparison because its clearer. 

INTERPRETATION The first thing that stood out to me in the plot is that place of residence does not seem to be a big factor in peoples opposition on LGBTQ+ rights. The percentages across places of residence are not very different. This is the case for both countries. France having a 0.5% difference from Rural to Major City, and Hungary having a 4.2% difference from Rural to Major City. However, education seems to be a much more influencial variable when it comes to opposition to LGBTQ+ rights. Both country education plots are right skewed. We can see that the lower the education level, the higher the probability of opposing LGBTQ+ rights.

In the GT table, it is clear that hungary had higher respondents for opposing lgbtq rights. However, the random percentages and respondent numbers were not telling me much until I created the plot. The numbers seemed very high in the GT table, for example, in Hungary, between around 20-40% of people in all categories are opposing to LGBTQ+ rights. That seems so so high. We can also see this in the plots, Hungary percentages are towering over France. I wonder what this says about the general acceptance of the people in Hungary, or what is considered a cultural norm. 38.7% is definitely quite high. France is also low, which is great, with 13.9% being the nations highest value.