Final

# accessing variable data
table(gss_data$class)

## 
##                   lower class                 working class 
##                          4601                         30940 
##                  middle class                   upper class 
##                         31014                          2302 
##                      no class                    don't know 
##                             1                             0 
##                           iap            I don't have a job 
##                             0                             0 
##                   dk, na, iap                     no answer 
##                             0                             0 
##    not imputable_(2147483637)    not imputable_(2147483638) 
##                             0                             0 
##                       refused                skipped on web 
##                             0                             0 
##                    uncodeable not available in this release 
##                             0                             0 
##    not available in this year                  see codebook 
##                             0                             0

unique(gss_data$class)

## [1] middle class  working class upper class   lower class   <NA>         
## [6] no class     
## 18 Levels: lower class working class middle class upper class ... see codebook

table(gss_data$degree)

## 
##         less than high school                   high school 
##                         14192                         36446 
##      associate/junior college                    bachelor's 
##                          4355                         11248 
##                      graduate                    don't know 
##                          5953                             0 
##                           iap            I don't have a job 
##                             0                             0 
##                   dk, na, iap                     no answer 
##                             0                             0 
##    not imputable_(2147483637)    not imputable_(2147483638) 
##                             0                             0 
##                       refused                skipped on web 
##                             0                             0 
##                    uncodeable not available in this release 
##                             0                             0 
##    not available in this year                  see codebook 
##                             0                             0

unique(gss_data$degree)

## [1] bachelor's               less than high school    high school             
## [4] graduate                 associate/junior college <NA>                    
## 18 Levels: less than high school high school ... see codebook

table(gss_data$polint)

## 
##               very interested             fairly interested 
##                           864                          1330 
##           somewhat interested           not very interested 
##                          1630                           932 
##         not at all interested                  can't choose 
##                           561                             0 
##                           iap            I don't have a job 
##                             0                             0 
##                   dk, na, iap                     no answer 
##                             0                             0 
##    not imputable_(2147483637)    not imputable_(2147483638) 
##                             0                             0 
##                       refused                skipped on web 
##                             0                             0 
##                    uncodeable not available in this release 
##                             0                             0 
##    not available in this year                  see codebook 
##                             0                             0

unique(gss_data$polint)

## [1] <NA>                  fairly interested     somewhat interested  
## [4] not at all interested not very interested   very interested      
## 18 Levels: very interested fairly interested ... see codebook

# Clean and recode the 'class' variable, and filter the data for respondents aged 18-29
gss_data <- gss_data %>%
  mutate(class_cleaned = case_when(
    class %in% c("lower class") ~ "Lower Class",
    class %in% c("working class") ~ "Working Class",
    class %in% c("middle class") ~ "Middle Class",
    class %in% c("upper class") ~ "Upper Class",
    TRUE ~ NA_character_
  ),
# Convert the cleaned class variable into a factor with ordered levels
  class_cleaned = factor(class_cleaned, levels = c("Lower Class", "Working Class", "Middle Class", "Upper Class"))
  ) %>%
# Filter the dataset to include only respondents with valid class data and within the age range 18-29
  filter(!is.na(class_cleaned) & age >= 18 & age <= 29)
# Check to see if data is cleaned properly
gss_data %>%
  count(class_cleaned)

##   class_cleaned    n
## 1   Lower Class  914
## 2 Working Class 7184
## 3  Middle Class 5255
## 4   Upper Class  302

# Same process for 'polint' variable
gss_data <- gss_data %>%
  mutate(polint_cleaned = case_when(
    polint %in% c("very interested") ~ "Very Interested",
    polint %in% c("fairly interested") ~ "Fairly Interested",
    polint %in% c("somewhat interested") ~ "Somewhat Interested",
    polint %in% c("not very interested") ~ "Not Very Interested",
    polint %in% c("not at all interested") ~ "Not at All Interested",
    TRUE ~ NA_character_
  ),
  polint_cleaned = factor(polint_cleaned, levels = c("Very Interested", "Fairly Interested", "Somewhat Interested", "Not Very Interested", "Not at All Interested"))
  ) %>%
  filter(!is.na(polint_cleaned) & age >= 18 & age <= 29)

gss_data %>%
  count(polint_cleaned)

##          polint_cleaned   n
## 1       Very Interested  57
## 2     Fairly Interested 163
## 3   Somewhat Interested 243
## 4   Not Very Interested 172
## 5 Not at All Interested 109

# Same process for 'degree' variable, however, we are removing 'associate/junior college' to simplify the levels as it is fairly uncommon especially after filtering for ages 18-29
gss_data <- gss_data %>%
  mutate(degree_cleaned = case_when(
    degree %in% c("less than high school") ~ "Less Than High School",
    degree %in% c("high school") ~ "High School",
    degree %in% c("bachelor's") ~ "Bachelor's",
    degree %in% c("graduate") ~ "Graduate",
    TRUE ~ NA_character_
  ),
  degree_cleaned = factor(degree_cleaned, levels = c("Less Than High School", "High School", "Bachelor's", "Graduate"))
  ) %>%
  filter(!is.na(degree_cleaned) & age >= 18 & age <= 29)

gss_data %>%
  count(degree_cleaned)

##          degree_cleaned   n
## 1 Less Than High School  93
## 2           High School 477
## 3            Bachelor's 104
## 4              Graduate  20

gss_data <- gss_data %>%
  mutate(
    class_cleaned_numeric = case_when(
      class_cleaned == "Lower Class" ~ 0,
      class_cleaned == "Working Class" ~ 1,
      class_cleaned == "Middle Class" ~ 2,
      class_cleaned == "Upper Class" ~ 3,
      TRUE ~ NA_real_
    ),
    polint_cleaned_numeric = case_when(
      polint_cleaned == "Not at All Interested" ~ 0,
      polint_cleaned == "Not Very Interested" ~ 1,
      polint_cleaned == "Somewhat Interested" ~ 2,
      polint_cleaned == "Fairly Interested" ~ 3,
      polint_cleaned == "Very Interested" ~ 4,
      TRUE ~ NA_real_
    ),
    degree_cleaned_numeric = case_when(
      degree_cleaned == "Less Than High School" ~ 0,
      degree_cleaned == "High School" ~ 1,
      degree_cleaned == "Bachelor's" ~ 2,
      degree_cleaned == "Graduate" ~ 3,
      TRUE ~ NA_real_
    ))

# Rename the numeric variables to more descriptive names
gss_data <- gss_data %>%
  rename(
    "Social Class" = class_cleaned_numeric,
    "Political Interest" = polint_cleaned_numeric,
    "Educational Attainment" = degree_cleaned_numeric
  )

# Generate a descriptive statistics table of the cleaned and numeric main variables
datasummary_skim(
  gss_data %>% select(
    "Social Class",
    "Political Interest",
    "Educational Attainment"
  ),
  histogram = TRUE
)

	Unique	Mean	SD	Median	Max
Social Class	4	1.3	0.6	1.0	3.0
Political Interest	5	1.8	1.1	2.0	4.0
Educational Attainment	4	1.1	0.6	1.0	3.0

# Create a boxplot of political interest by socioeconomic status with customized colors and labels
ggplot(gss_data, aes(x = factor(class_cleaned, levels = c("Lower Class", "Working Class", "Middle Class", "Upper Class")), 
                     y = `Political Interest`)) +
  geom_boxplot(aes(fill = class_cleaned)) +
  scale_fill_manual(values = c("Lower Class" = "#FF9999", 
                               "Working Class" = "#66B2FF", 
                               "Middle Class" = "#99FF99", 
                               "Upper Class" = "#FFCC99")) +
  labs(title = "Political Interest by Socioeconomic Status",
       x = "Socioeconomic Status",
       y = "Political Interest",
       fill = "Socioeconomic Status") +
  theme_minimal()

# Create a diverging stacked bar chart to visualize the relationship between educational attainment and socioeconomic status on political interest levels
ggplot(gss_data, aes(x = factor(degree_cleaned, levels = c("Less Than High School", "High School", "Bachelor's", "Graduate")), 
                     fill = factor(class_cleaned, levels = c("Lower Class", "Working Class", "Middle Class", "Upper Class")))) +
  geom_bar(position = "fill", width = 0.7) +
  facet_wrap(~ polint_cleaned, ncol = 2) +
  scale_fill_manual(
    values = c(
      "Lower Class" = "#FF9999",   # Tomato Red
      "Working Class" = "#66B2FF", # Steel Blue
      "Middle Class" = "#99FF99",  # Lime Green
      "Upper Class" = "#FFCC99"    # Gold
    ),
    name = "Socioeconomic Status"
  ) +
  labs(
    title = "Socioeconomic Status by Educational Attainment\nand Political Interest",
    x = "Educational Attainment",
    y = "Proportion"
  ) +
  coord_flip() +
  theme_minimal(base_size = 14) +
  theme(
    plot.title = element_text(hjust = 0.5, face = "bold"),
    axis.text.x = element_text(angle = 45, hjust = 1),
    legend.position = "top",
    strip.background = element_rect(fill = "#f0f0f0", color = NA),
    strip.text = element_text(face = "bold"),
    plot.margin = margin(20, 20, 20, 20)
)

# Fitting regression models using the numeric variables related to your hypotheses
model1 <- lm(`Political Interest` ~ `Social Class`, data = gss_data)
model2 <- lm(`Political Interest` ~ `Educational Attainment`, data = gss_data)
model3 <- lm(`Political Interest` ~ `Social Class` + `Educational Attainment`, data = gss_data)

# Creating a regression table with sjPlot 
tab_model(
  model1, model2, model3,
  dv.labels = c(
    "Model 1<br>Social Class<br>VS<br>Political Interest",
    "Model 2<br>Educational Attainment<br>VS<br>Political Interest",
    "Model 3<br>Social Class + Educational Attainment<br>VS<br>Political Interest"
  ),
  string.pred = "Predictors", 
  string.est = "Coefficient", 
  string.ci = "CI",
  string.p = "P-Value"
)

	Model 1 Social Class VS Political Interest			Model 2 Educational Attainment VS Political Interest			Model 3 Social Class + Educational Attainment VS Political Interest
Predictors	Coefficient	CI	P-Value	Coefficient	CI	P-Value	Coefficient	CI	P-Value
(Intercept)	1.56	1.36 – 1.76	<0.001	1.47	1.31 – 1.64	<0.001	1.31	1.08 – 1.54	<0.001
Social Class	0.21	0.07 – 0.34	0.004				0.14	0.01 – 0.28	0.042
Educational Attainment				0.33	0.20 – 0.47	<0.001	0.31	0.17 – 0.44	<0.001
Observations	694			694			694
R² / R² adjusted	0.012 / 0.011			0.034 / 0.032			0.040 / 0.037

# Bar plot showing the average Political Interest by Social Class with adjusted y-axis label
ggplot(gss_data, aes(x = factor(`Social Class`, levels = c(0, 1, 2, 3)), 
                     y = `Political Interest`, 
                     fill = factor(`Social Class`, levels = c(0, 1, 2, 3)))) +
  stat_summary(fun = mean, geom = "bar", position = "dodge", width = 0.7) +  # Bar plot of mean Political Interest
  scale_x_discrete(labels = c("Lower Class", "Working Class", "Middle Class", "Upper Class")) +  # Replace x-axis numbers with words
  scale_fill_manual(values = c("#FF9999", "#66B2FF", "#99FF99", "#FFCC99")) + 
  labs(title = "Average Political Interest by Social Class",
       x = "Social Class",
       y = "Political Interest\n(0 = Not at All Interested, 4 = Very Interested)",  # Adjusted y-axis label
       fill = "Social Class") +
  theme_minimal(base_size = 14) +  
  theme(
    plot.title = element_text(hjust = 0.5, face = "bold"),
    axis.title.y = element_text(size = 12)  # Adjust the y-axis label font size
  )