df_raw <- read.csv("/Users/alekhyakotha/Desktop/Data Visualization Course/CCES Sample.csv")

df <- df_raw %>%
  mutate(
    # Demographics
    Gender = factor(gender, levels=1:2, labels=c("Male","Female")),
    Region = factor(region, levels=1:4, labels=c("Northeast","Midwest","South","West")),
    Race = case_when(
      race == 1 ~ "White",
      race == 2 ~ "Black",
      race == 3 ~ "Hispanic",
      race == 4 ~ "Asian",
      TRUE      ~ "Other"
    ),
    Education = factor(educ, levels=1:6,
                       labels=c("No HS","High School","Some College",
                                "2-Year Degree","4-Year Degree","Post-Grad")),
    Employment = case_when(
      employ == 1 ~ "Full-time",
      employ == 2 ~ "Part-time",
      employ == 5 ~ "Retired",
      employ == 6 ~ "Not employed",
      employ == 7 ~ "Self-employed",
      employ == 4 ~ "Homemaker",
      TRUE        ~ "Other"
    ),
    MarStatus = case_when(
      marstat == 1 ~ "Married",
      marstat == 3 ~ "Divorced",
      marstat == 4 ~ "Separated",
      marstat == 5 ~ "Never Married",
      marstat == 6 ~ "Widowed",
      TRUE         ~ "Other"
    ),

    # Political identity
    PID = factor(pid7, levels=1:7,
                 labels=c("Strong Dem","Lean Dem","Weak Dem",
                          "Independent","Weak Rep","Lean Rep","Strong Rep")),
    PID3 = case_when(
      pid7 %in% 1:3 ~ "Democrat",
      pid7 == 4     ~ "Independent",
      pid7 %in% 5:7 ~ "Republican"
    ),
    Ideology = factor(ideo5, levels=1:5,
                      labels=c("Very Liberal","Liberal","Moderate","Conservative","Very Conservative")),

    # Income (midpoints in $thousands)
    Income_mid = case_when(
      faminc_new == 1  ~ 10,   faminc_new == 2  ~ 17.5,
      faminc_new == 3  ~ 25,   faminc_new == 4  ~ 37.5,
      faminc_new == 5  ~ 50,   faminc_new == 6  ~ 62.5,
      faminc_new == 7  ~ 75,   faminc_new == 8  ~ 87.5,
      faminc_new == 9  ~ 100,  faminc_new == 10 ~ 125,
      faminc_new == 11 ~ 150,  faminc_new == 12 ~ 175,
      faminc_new == 13 ~ 200,  faminc_new == 14 ~ 250,
      faminc_new == 15 ~ 350,  faminc_new == 16 ~ 500
    ),
    IncomeGroup = case_when(
      Income_mid < 40  ~ "Low (<$40K)",
      Income_mid < 100 ~ "Middle ($40K–$99K)",
      TRUE             ~ "High ($100K+)"
    ),
    IncomeGroup = factor(IncomeGroup,
                         levels=c("Low (<$40K)","Middle ($40K–$99K)","High ($100K+)")),

    # Religion importance
    ReligImp = factor(pew_religimp, levels=1:4,
                      labels=c("Very Important","Somewhat Important",
                               "Not Too Important","Not at All Important")),

    # News interest
    NewsInt = factor(newsint, levels=1:4,
                     labels=c("Most of the time","Some of the time",
                              "Only now and then","Hardly at all")),

    # Policy views (CC18_310a-d): 2=support, 3=oppose, 5=unsure, etc.
    # Gun background checks (310b): 2=support, 3=oppose
    Gun_BG = case_when(
      CC18_310b == 2 ~ "Support",
      CC18_310b == 3 ~ "Oppose",
      CC18_310b == 5 ~ "Not sure",
      TRUE ~ NA_character_
    ),
    # Assault weapons ban (310c)
    AssaultBan = case_when(
      CC18_310c == 2 ~ "Support",
      CC18_310c == 3 ~ "Oppose",
      CC18_310c == 5 ~ "Not sure",
      TRUE ~ NA_character_
    ),
    # CC18_308a: approval of Trump (1=Strongly approve, 4=Strongly disapprove)
    TrumpApproval = factor(CC18_308a, levels=1:4,
                           labels=c("Strongly Approve","Somewhat Approve",
                                    "Somewhat Disapprove","Strongly Disapprove")),

    # CC18_325: immigration policy (1=yes, 2=no)
    DACA     = ifelse(CC18_325a == 1, "Support DACA", "Oppose DACA"),
    BorderWall = ifelse(CC18_325b == 1, "Support Wall", "Oppose Wall"),

    # Union membership
    Union = case_when(
      union == 1 ~ "Self in union",
      union == 2 ~ "Household member in union",
      union == 3 ~ "No union"
    )
  )

Figure 1: Party Identification by Region (Stacked Bar Chart)

fig1_data <- df %>%
  filter(!is.na(PID3), !is.na(Region)) %>%
  count(Region, PID3) %>%
  group_by(Region) %>%
  mutate(pct = n / sum(n))

pal <- c("Democrat" = "#1f77b4", "Independent" = "#7f7f7f", "Republican" = "#d62728")

p1 <- ggplot(fig1_data, aes(x = Region, y = pct, fill = PID3)) +
  geom_col(width = 0.7, color = "white", linewidth = 0.3) +
  geom_text(aes(label = ifelse(pct >= 0.07, paste0(round(pct*100), "%"), "")),
            position = position_stack(vjust = 0.5),
            color = "white", fontface = "bold", size = 3.5) +
  scale_y_continuous(labels = percent_format(), expand = c(0,0)) +
  scale_fill_manual(values = pal) +
  labs(
    title = "Party Identification by U.S. Region",
    subtitle = "2018 Cooperative Congressional Election Study (CCES) · n = 1,000",
    x = NULL, y = "Share of Respondents", fill = "Party ID",
    caption = "Source: CCES 2018"
  ) +
  theme_minimal(base_size = 13) +
  theme(
    plot.title    = element_text(face = "bold", size = 15),
    plot.subtitle = element_text(color = "grey50"),
    legend.position = "top",
    panel.grid.major.x = element_blank(),
    panel.grid.minor   = element_blank()
  )

p1
Party identification varies across U.S. regions, with the South leaning more Republican and the Northeast more Democratic.

Party identification varies across U.S. regions, with the South leaning more Republican and the Northeast more Democratic.


Figure 2: Ideology Distribution by Party ID (Ridge Plot)

fig2_data <- df %>%
  filter(!is.na(Ideology), !is.na(PID3)) %>%
  mutate(Ideology_num = as.numeric(Ideology),
         PID3 = factor(PID3, levels = c("Republican","Independent","Democrat")))

p2 <- ggplot(fig2_data, aes(x = Ideology_num, y = PID3, fill = PID3)) +
  geom_density_ridges(
    scale = 1.4, rel_min_height = 0.01, alpha = 0.85,
    quantile_lines = TRUE, quantiles = 2, color = "white"
  ) +
  scale_x_continuous(
    breaks = 1:5,
    labels = c("Very\nLiberal","Liberal","Moderate","Conservative","Very\nConservative")
  ) +
  scale_fill_manual(values = c("Democrat"="#1f77b4","Independent"="#7f7f7f","Republican"="#d62728")) +
  labs(
    title = "Self-Reported Ideology by Party Identification",
    subtitle = "Ridge plots with median line · 2018 CCES",
    x = "Ideology Scale", y = NULL,
    caption = "Source: CCES 2018"
  ) +
  theme_ridges(grid = FALSE, center_axis_labels = TRUE) +
  theme(
    plot.title    = element_text(face = "bold", size = 15),
    plot.subtitle = element_text(color = "grey50"),
    legend.position = "none"
  )

p2
Strong partisans cluster at ideological extremes; Independents center on Moderate.

Strong partisans cluster at ideological extremes; Independents center on Moderate.


Figure 3: Trump Approval by Party ID (Grouped Heatmap)

fig3_data <- df %>%
  filter(!is.na(TrumpApproval), !is.na(PID)) %>%
  count(PID, TrumpApproval) %>%
  group_by(PID) %>%
  mutate(pct = n / sum(n))

p3 <- ggplot(fig3_data, aes(x = TrumpApproval, y = PID, fill = pct)) +
  geom_tile(color = "white", linewidth = 0.8) +
  geom_text(aes(label = paste0(round(pct*100), "%")),
            color = ifelse(fig3_data$pct > 0.45, "white", "grey20"), size = 3.5, fontface = "bold") +
  scale_fill_viridis_c(option = "plasma", labels = percent_format(), name = "Share") +
  scale_x_discrete(position = "top") +
  labs(
    title = "Trump Approval Ratings by Party Identification",
    subtitle = "Percentage of each party group holding each approval level · 2018 CCES",
    x = NULL, y = "Party ID (Strong Dem → Strong Rep)",
    caption = "Source: CCES 2018"
  ) +
  theme_minimal(base_size = 13) +
  theme(
    plot.title     = element_text(face = "bold", size = 15),
    plot.subtitle  = element_text(color = "grey50"),
    axis.text.x    = element_text(angle = 20, hjust = 0),
    panel.grid     = element_blank(),
    legend.position = "right"
  )

p3
A heatmap showing how Trump approval starkly divides along party lines.

A heatmap showing how Trump approval starkly divides along party lines.


Figure 4: Education & Income Distribution (Box Plot)

fig4_data <- df %>%
  filter(!is.na(Income_mid), !is.na(Education))

p4 <- ggplot(fig4_data, aes(x = Education, y = Income_mid, fill = Education)) +
  geom_boxplot(outlier.shape = 21, outlier.size = 1.5, outlier.alpha = 0.5,
               width = 0.6, alpha = 0.85) +
  stat_summary(fun = mean, geom = "point", shape = 23, size = 3,
               fill = "white", color = "black") +
  scale_fill_viridis_d(option = "turbo", begin = 0.1, end = 0.9) +
  scale_y_continuous(labels = dollar_format(suffix="K"), breaks = seq(0,500,50)) +
  labs(
    title = "Household Income Distribution by Education Level",
    subtitle = "Box plots with mean (◇) · 2018 CCES · n = 1,000",
    x = "Highest Education Attained", y = "Household Income (midpoint, $K)",
    caption = "Source: CCES 2018"
  ) +
  theme_minimal(base_size = 13) +
  theme(
    plot.title     = element_text(face = "bold", size = 15),
    plot.subtitle  = element_text(color = "grey50"),
    axis.text.x    = element_text(angle = 20, hjust = 1),
    legend.position = "none",
    panel.grid.major.x = element_blank()
  )

p4
Higher education levels associate with higher household income.

Higher education levels associate with higher household income.


Figure 5: Gun Policy Support by Party (Dumbbell Chart)

dumbbell_data <- df %>%
  filter(!is.na(Gun_BG), !is.na(AssaultBan), !is.na(PID3)) %>%
  pivot_longer(cols = c(Gun_BG, AssaultBan), names_to = "Policy", values_to = "View") %>%
  filter(View == "Support") %>%
  mutate(Policy = recode(Policy,
                         "Gun_BG"     = "Background Check Requirement",
                         "AssaultBan" = "Assault Weapons Ban")) %>%
  group_by(PID3, Policy) %>%
  summarise(pct = mean(View == "Support"), .groups = "drop") %>%
  group_by(PID3, Policy) %>%
  summarise(pct = n() / {df %>% filter(!is.na(Gun_BG), !is.na(PID3)) %>%
      group_by(PID3) %>% summarise(n=n()) %>% filter(PID3 == first(PID3)) %>% pull(n)},
    .groups = "drop")

# Re-compute cleanly
gun_data <- df %>%
  filter(!is.na(PID3)) %>%
  group_by(PID3) %>%
  summarise(
    `Background Check` = mean(CC18_310b == 2, na.rm = TRUE),
    `Assault Weapons Ban` = mean(CC18_310c == 2, na.rm = TRUE),
    .groups = "drop"
  ) %>%
  pivot_longer(-PID3, names_to = "Policy", values_to = "pct")

dumbbell_wide <- gun_data %>%
  pivot_wider(names_from = PID3, values_from = pct)

p5 <- ggplot(gun_data, aes(x = pct, y = Policy, color = PID3, group = Policy)) +
  geom_line(aes(group = Policy), color = "grey70", linewidth = 1.5) +
  geom_point(size = 7, alpha = 0.9) +
  geom_text(aes(label = paste0(round(pct*100), "%")),
            color = "white", size = 2.8, fontface = "bold") +
  scale_color_manual(values = c("Democrat"="#1f77b4","Independent"="#7f7f7f","Republican"="#d62728")) +
  scale_x_continuous(labels = percent_format(), limits = c(0.4, 1.0)) +
  labs(
    title = "Support for Gun Control Policies by Party",
    subtitle = "Dumbbell chart showing % support among each party group · 2018 CCES",
    x = "Percentage Supporting Policy", y = NULL, color = "Party ID",
    caption = "Source: CCES 2018"
  ) +
  theme_minimal(base_size = 13) +
  theme(
    plot.title     = element_text(face = "bold", size = 15),
    plot.subtitle  = element_text(color = "grey50"),
    panel.grid.minor   = element_blank(),
    panel.grid.major.y = element_blank(),
    legend.position = "top"
  )

p5
Both parties broadly support background checks, but sharply diverge on an assault-weapons ban.

Both parties broadly support background checks, but sharply diverge on an assault-weapons ban.


Figure 6: News Interest by Ideology (Faceted Bar Chart)

fig6_data <- df %>%
  filter(!is.na(NewsInt), !is.na(Ideology)) %>%
  count(Ideology, NewsInt) %>%
  group_by(Ideology) %>%
  mutate(pct = n / sum(n))

p6 <- ggplot(fig6_data, aes(x = NewsInt, y = pct, fill = NewsInt)) +
  geom_col(alpha = 0.9, show.legend = FALSE) +
  geom_text(aes(label = paste0(round(pct*100), "%")), vjust = -0.3, size = 3) +
  facet_wrap(~Ideology, nrow = 1) +
  scale_fill_viridis_d(option = "cividis", direction = -1) +
  scale_y_continuous(labels = percent_format(), expand = expansion(mult = c(0, 0.12))) +
  labs(
    title = "News Interest Level by Ideology",
    subtitle = "Faceted bar charts for each ideological group · 2018 CCES",
    x = NULL, y = "Share of Group",
    caption = "Source: CCES 2018"
  ) +
  theme_minimal(base_size = 11) +
  theme(
    plot.title     = element_text(face = "bold", size = 15),
    plot.subtitle  = element_text(color = "grey50"),
    axis.text.x    = element_text(angle = 35, hjust = 1, size = 8),
    strip.text     = element_text(face = "bold"),
    panel.grid.major.x = element_blank(),
    panel.grid.minor   = element_blank()
  )

p6
Political interest varies across the ideological spectrum; strong partisans on both ends follow news most closely.

Political interest varies across the ideological spectrum; strong partisans on both ends follow news most closely.


Figure 7: Immigration Policy Support (Grouped Bar – DACA vs. Border Wall)

imm_data <- df %>%
  filter(!is.na(PID3)) %>%
  group_by(PID3) %>%
  summarise(
    DACA      = mean(CC18_325a == 1, na.rm = TRUE),
    BorderWall = mean(CC18_325b == 1, na.rm = TRUE),
    .groups = "drop"
  ) %>%
  pivot_longer(-PID3, names_to = "Policy", values_to = "pct") %>%
  mutate(Policy = recode(Policy,
                         "DACA"       = "Support DACA",
                         "BorderWall" = "Support Border Wall"),
         PID3 = factor(PID3, levels = c("Democrat","Independent","Republican")))

p7 <- ggplot(imm_data, aes(x = PID3, y = pct, fill = Policy)) +
  geom_col(position = position_dodge(width = 0.7), width = 0.6, alpha = 0.9) +
  geom_text(aes(label = paste0(round(pct*100),"%")),
            position = position_dodge(width = 0.7), vjust = -0.4, size = 3.5, fontface = "bold") +
  scale_fill_manual(values = c("Support DACA" = "#2ecc71", "Support Border Wall" = "#e74c3c")) +
  scale_y_continuous(labels = percent_format(), limits = c(0,1), expand = expansion(mult = c(0,0.05))) +
  labs(
    title = "Support for Immigration Policies by Party",
    subtitle = "Grouped bar chart: DACA protection vs. Border Wall construction · 2018 CCES",
    x = "Party Identification", y = "Percentage Supporting", fill = "Policy",
    caption = "Source: CCES 2018"
  ) +
  theme_minimal(base_size = 13) +
  theme(
    plot.title     = element_text(face = "bold", size = 15),
    plot.subtitle  = element_text(color = "grey50"),
    legend.position = "top",
    panel.grid.major.x = element_blank(),
    panel.grid.minor   = element_blank()
  )

p7
Stark partisan divide on immigration: DACA support is high among Democrats while wall support is high among Republicans.

Stark partisan divide on immigration: DACA support is high among Democrats while wall support is high among Republicans.


Figure 8: Interactive — Income, Education & Party ID (Plotly Scatter)

scatter_data <- df %>%
  filter(!is.na(Income_mid), !is.na(Education), !is.na(PID3), !is.na(Region)) %>%
  mutate(
    educ_jitter = as.numeric(Education) + runif(n(), -0.25, 0.25),
    inc_jitter  = Income_mid + runif(n(), -3, 3)
  )

p8 <- plot_ly(
  scatter_data,
  x = ~educ_jitter,
  y = ~inc_jitter,
  color = ~PID3,
  colors = c("Democrat" = "#1f77b4", "Independent" = "#7f7f7f", "Republican" = "#d62728"),
  symbol = ~Region,
  symbols = c("circle","square","diamond","cross"),
  type = "scatter", mode = "markers",
  marker = list(size = 7, opacity = 0.7, line = list(width = 0.5, color = "white")),
  text = ~paste0(
    "<b>Party:</b> ", PID3, "<br>",
    "<b>Region:</b> ", Region, "<br>",
    "<b>Education:</b> ", Education, "<br>",
    "<b>Income (approx):</b> $", Income_mid, "K<br>",
    "<b>Ideology:</b> ", Ideology
  ),
  hoverinfo = "text"
) %>%
  layout(
    title = list(
      text = "<b>Income vs. Education by Party ID and Region</b><br><sup>2018 CCES · Hover for respondent details</sup>",
      font = list(size = 16)
    ),
    xaxis = list(
      title = "Education Level",
      tickvals = 1:6,
      ticktext = c("No HS","High School","Some College","2-Year","4-Year","Post-Grad"),
      gridcolor = "#eeeeee"
    ),
    yaxis = list(
      title = "Household Income (midpoint, $K)",
      gridcolor = "#eeeeee"
    ),
    legend = list(title = list(text = "<b>Party / Region</b>")),
    paper_bgcolor = "white",
    plot_bgcolor  = "white",
    annotations = list(
      list(text = "Source: CCES 2018", showarrow = FALSE,
           xref = "paper", yref = "paper", x = 1, y = -0.12,
           font = list(size = 10, color = "grey"), xanchor = "right")
    )
  )

p8

Interactive plot: explore how income and education relate to party identification across regions. Hover for details.


This portfolio presents eight data-driven visualizations built from the 2018 Cooperative Congressional Election Study (CCES), a nationally representative survey of American political attitudes and demographics. Figures span bar charts, ridge plots, heatmaps, box plots, dumbbell charts, faceted bars, grouped bars, and an interactive Plotly scatter plot — covering party identity, ideology, income, education, immigration, and gun policy.