df_raw <- read.csv("/Users/alekhyakotha/Desktop/Data Visualization Course/CCES Sample.csv")

df <- df_raw %>%
  mutate(
    # Demographics
    Gender = factor(gender, levels=1:2, labels=c("Male","Female")),
    Region = factor(region, levels=1:4, labels=c("Northeast","Midwest","South","West")),
    Race = case_when(
      race == 1 ~ "White",
      race == 2 ~ "Black",
      race == 3 ~ "Hispanic",
      race == 4 ~ "Asian",
      TRUE      ~ "Other"
    ),
    Education = factor(educ, levels=1:6,
                       labels=c("No HS","High School","Some College",
                                "2-Year Degree","4-Year Degree","Post-Grad")),
    Employment = case_when(
      employ == 1 ~ "Full-time",
      employ == 2 ~ "Part-time",
      employ == 5 ~ "Retired",
      employ == 6 ~ "Not employed",
      employ == 7 ~ "Self-employed",
      employ == 4 ~ "Homemaker",
      TRUE        ~ "Other"
    ),
    MarStatus = case_when(
      marstat == 1 ~ "Married",
      marstat == 3 ~ "Divorced",
      marstat == 4 ~ "Separated",
      marstat == 5 ~ "Never Married",
      marstat == 6 ~ "Widowed",
      TRUE         ~ "Other"
    ),

    # Political identity
    PID = factor(pid7, levels=1:7,
                 labels=c("Strong Dem","Lean Dem","Weak Dem",
                          "Independent","Weak Rep","Lean Rep","Strong Rep")),
    PID3 = case_when(
      pid7 %in% 1:3 ~ "Democrat",
      pid7 == 4     ~ "Independent",
      pid7 %in% 5:7 ~ "Republican"
    ),
    Ideology = factor(ideo5, levels=1:5,
                      labels=c("Very Liberal","Liberal","Moderate","Conservative","Very Conservative")),

    # Income (midpoints in $thousands)
    Income_mid = case_when(
      faminc_new == 1  ~ 10,   faminc_new == 2  ~ 17.5,
      faminc_new == 3  ~ 25,   faminc_new == 4  ~ 37.5,
      faminc_new == 5  ~ 50,   faminc_new == 6  ~ 62.5,
      faminc_new == 7  ~ 75,   faminc_new == 8  ~ 87.5,
      faminc_new == 9  ~ 100,  faminc_new == 10 ~ 125,
      faminc_new == 11 ~ 150,  faminc_new == 12 ~ 175,
      faminc_new == 13 ~ 200,  faminc_new == 14 ~ 250,
      faminc_new == 15 ~ 350,  faminc_new == 16 ~ 500
    ),
    IncomeGroup = case_when(
      Income_mid < 40  ~ "Low (<$40K)",
      Income_mid < 100 ~ "Middle ($40K–$99K)",
      TRUE             ~ "High ($100K+)"
    ),
    IncomeGroup = factor(IncomeGroup,
                         levels=c("Low (<$40K)","Middle ($40K–$99K)","High ($100K+)")),

    # Religion importance
    ReligImp = factor(pew_religimp, levels=1:4,
                      labels=c("Very Important","Somewhat Important",
                               "Not Too Important","Not at All Important")),

    # News interest
    NewsInt = factor(newsint, levels=1:4,
                     labels=c("Most of the time","Some of the time",
                              "Only now and then","Hardly at all")),

    # Policy views (CC18_310a-d): 2=support, 3=oppose, 5=unsure, etc.
    # Gun background checks (310b): 2=support, 3=oppose
    Gun_BG = case_when(
      CC18_310b == 2 ~ "Support",
      CC18_310b == 3 ~ "Oppose",
      CC18_310b == 5 ~ "Not sure",
      TRUE ~ NA_character_
    ),
    # Assault weapons ban (310c)
    AssaultBan = case_when(
      CC18_310c == 2 ~ "Support",
      CC18_310c == 3 ~ "Oppose",
      CC18_310c == 5 ~ "Not sure",
      TRUE ~ NA_character_
    ),
    # CC18_308a: approval of Trump (1=Strongly approve, 4=Strongly disapprove)
    TrumpApproval = factor(CC18_308a, levels=1:4,
                           labels=c("Strongly Approve","Somewhat Approve",
                                    "Somewhat Disapprove","Strongly Disapprove")),

    # CC18_325: immigration policy (1=yes, 2=no)
    DACA     = ifelse(CC18_325a == 1, "Support DACA", "Oppose DACA"),
    BorderWall = ifelse(CC18_325b == 1, "Support Wall", "Oppose Wall"),

    # Union membership
    Union = case_when(
      union == 1 ~ "Self in union",
      union == 2 ~ "Household member in union",
      union == 3 ~ "No union"
    )
  )

Figure 1: Party Identification by Region (Stacked Bar Chart)

fig1_data <- df %>%
  filter(!is.na(PID3), !is.na(Region)) %>%
  count(Region, PID3) %>%
  group_by(Region) %>%
  mutate(pct = n / sum(n))

pal <- c("Democrat" = "#1f77b4", "Independent" = "#7f7f7f", "Republican" = "#d62728")

p1 <- ggplot(fig1_data, aes(x = Region, y = pct, fill = PID3)) +
  geom_col(width = 0.7, color = "white", linewidth = 0.3) +
  geom_text(aes(label = ifelse(pct >= 0.07, paste0(round(pct*100), "%"), "")),
            position = position_stack(vjust = 0.5),
            color = "white", fontface = "bold", size = 3.5) +
  scale_y_continuous(labels = percent_format(), expand = c(0,0)) +
  scale_fill_manual(values = pal) +
  labs(
    title = "Party Identification by U.S. Region",
    subtitle = "2018 Cooperative Congressional Election Study (CCES) · n = 1,000",
    x = NULL, y = "Share of Respondents", fill = "Party ID",
    caption = "Source: CCES 2018"
  ) +
  theme_minimal(base_size = 13) +
  theme(
    plot.title    = element_text(face = "bold", size = 15),
    plot.subtitle = element_text(color = "grey50"),
    legend.position = "top",
    panel.grid.major.x = element_blank(),
    panel.grid.minor   = element_blank()
  )

p1

Party identification varies across U.S. regions, with the South leaning more Republican and the Northeast more Democratic.

Figure 2: Ideology Distribution by Party ID (Ridge Plot)

fig2_data <- df %>%
  filter(!is.na(Ideology), !is.na(PID3)) %>%
  mutate(Ideology_num = as.numeric(Ideology),
         PID3 = factor(PID3, levels = c("Republican","Independent","Democrat")))

p2 <- ggplot(fig2_data, aes(x = Ideology_num, y = PID3, fill = PID3)) +
  geom_density_ridges(
    scale = 1.4, rel_min_height = 0.01, alpha = 0.85,
    quantile_lines = TRUE, quantiles = 2, color = "white"
  ) +
  scale_x_continuous(
    breaks = 1:5,
    labels = c("Very\nLiberal","Liberal","Moderate","Conservative","Very\nConservative")
  ) +
  scale_fill_manual(values = c("Democrat"="#1f77b4","Independent"="#7f7f7f","Republican"="#d62728")) +
  labs(
    title = "Self-Reported Ideology by Party Identification",
    subtitle = "Ridge plots with median line · 2018 CCES",
    x = "Ideology Scale", y = NULL,
    caption = "Source: CCES 2018"
  ) +
  theme_ridges(grid = FALSE, center_axis_labels = TRUE) +
  theme(
    plot.title    = element_text(face = "bold", size = 15),
    plot.subtitle = element_text(color = "grey50"),
    legend.position = "none"
  )

p2

Strong partisans cluster at ideological extremes; Independents center on Moderate.

Figure 3: Trump Approval by Party ID (Grouped Heatmap)

fig3_data <- df %>%
  filter(!is.na(TrumpApproval), !is.na(PID)) %>%
  count(PID, TrumpApproval) %>%
  group_by(PID) %>%
  mutate(pct = n / sum(n))

p3 <- ggplot(fig3_data, aes(x = TrumpApproval, y = PID, fill = pct)) +
  geom_tile(color = "white", linewidth = 0.8) +
  geom_text(aes(label = paste0(round(pct*100), "%")),
            color = ifelse(fig3_data$pct > 0.45, "white", "grey20"), size = 3.5, fontface = "bold") +
  scale_fill_viridis_c(option = "plasma", labels = percent_format(), name = "Share") +
  scale_x_discrete(position = "top") +
  labs(
    title = "Trump Approval Ratings by Party Identification",
    subtitle = "Percentage of each party group holding each approval level · 2018 CCES",
    x = NULL, y = "Party ID (Strong Dem → Strong Rep)",
    caption = "Source: CCES 2018"
  ) +
  theme_minimal(base_size = 13) +
  theme(
    plot.title     = element_text(face = "bold", size = 15),
    plot.subtitle  = element_text(color = "grey50"),
    axis.text.x    = element_text(angle = 20, hjust = 0),
    panel.grid     = element_blank(),
    legend.position = "right"
  )

p3

A heatmap showing how Trump approval starkly divides along party lines.

Figure 4: Education & Income Distribution (Box Plot)

fig4_data <- df %>%
  filter(!is.na(Income_mid), !is.na(Education))

p4 <- ggplot(fig4_data, aes(x = Education, y = Income_mid, fill = Education)) +
  geom_boxplot(outlier.shape = 21, outlier.size = 1.5, outlier.alpha = 0.5,
               width = 0.6, alpha = 0.85) +
  stat_summary(fun = mean, geom = "point", shape = 23, size = 3,
               fill = "white", color = "black") +
  scale_fill_viridis_d(option = "turbo", begin = 0.1, end = 0.9) +
  scale_y_continuous(labels = dollar_format(suffix="K"), breaks = seq(0,500,50)) +
  labs(
    title = "Household Income Distribution by Education Level",
    subtitle = "Box plots with mean (◇) · 2018 CCES · n = 1,000",
    x = "Highest Education Attained", y = "Household Income (midpoint, $K)",
    caption = "Source: CCES 2018"
  ) +
  theme_minimal(base_size = 13) +
  theme(
    plot.title     = element_text(face = "bold", size = 15),
    plot.subtitle  = element_text(color = "grey50"),
    axis.text.x    = element_text(angle = 20, hjust = 1),
    legend.position = "none",
    panel.grid.major.x = element_blank()
  )

p4

Higher education levels associate with higher household income.

Figure 5: Gun Policy Support by Party (Dumbbell Chart)

dumbbell_data <- df %>%
  filter(!is.na(Gun_BG), !is.na(AssaultBan), !is.na(PID3)) %>%
  pivot_longer(cols = c(Gun_BG, AssaultBan), names_to = "Policy", values_to = "View") %>%
  filter(View == "Support") %>%
  mutate(Policy = recode(Policy,
                         "Gun_BG"     = "Background Check Requirement",
                         "AssaultBan" = "Assault Weapons Ban")) %>%
  group_by(PID3, Policy) %>%
  summarise(pct = mean(View == "Support"), .groups = "drop") %>%
  group_by(PID3, Policy) %>%
  summarise(pct = n() / {df %>% filter(!is.na(Gun_BG), !is.na(PID3)) %>%
      group_by(PID3) %>% summarise(n=n()) %>% filter(PID3 == first(PID3)) %>% pull(n)},
    .groups = "drop")

# Re-compute cleanly
gun_data <- df %>%
  filter(!is.na(PID3)) %>%
  group_by(PID3) %>%
  summarise(
    `Background Check` = mean(CC18_310b == 2, na.rm = TRUE),
    `Assault Weapons Ban` = mean(CC18_310c == 2, na.rm = TRUE),
    .groups = "drop"
  ) %>%
  pivot_longer(-PID3, names_to = "Policy", values_to = "pct")

dumbbell_wide <- gun_data %>%
  pivot_wider(names_from = PID3, values_from = pct)

p5 <- ggplot(gun_data, aes(x = pct, y = Policy, color = PID3, group = Policy)) +
  geom_line(aes(group = Policy), color = "grey70", linewidth = 1.5) +
  geom_point(size = 7, alpha = 0.9) +
  geom_text(aes(label = paste0(round(pct*100), "%")),
            color = "white", size = 2.8, fontface = "bold") +
  scale_color_manual(values = c("Democrat"="#1f77b4","Independent"="#7f7f7f","Republican"="#d62728")) +
  scale_x_continuous(labels = percent_format(), limits = c(0.4, 1.0)) +
  labs(
    title = "Support for Gun Control Policies by Party",
    subtitle = "Dumbbell chart showing % support among each party group · 2018 CCES",
    x = "Percentage Supporting Policy", y = NULL, color = "Party ID",
    caption = "Source: CCES 2018"
  ) +
  theme_minimal(base_size = 13) +
  theme(
    plot.title     = element_text(face = "bold", size = 15),
    plot.subtitle  = element_text(color = "grey50"),
    panel.grid.minor   = element_blank(),
    panel.grid.major.y = element_blank(),
    legend.position = "top"
  )

p5

Both parties broadly support background checks, but sharply diverge on an assault-weapons ban.

Figure 6: News Interest by Ideology (Faceted Bar Chart)

fig6_data <- df %>%
  filter(!is.na(NewsInt), !is.na(Ideology)) %>%
  count(Ideology, NewsInt) %>%
  group_by(Ideology) %>%
  mutate(pct = n / sum(n))

p6 <- ggplot(fig6_data, aes(x = NewsInt, y = pct, fill = NewsInt)) +
  geom_col(alpha = 0.9, show.legend = FALSE) +
  geom_text(aes(label = paste0(round(pct*100), "%")), vjust = -0.3, size = 3) +
  facet_wrap(~Ideology, nrow = 1) +
  scale_fill_viridis_d(option = "cividis", direction = -1) +
  scale_y_continuous(labels = percent_format(), expand = expansion(mult = c(0, 0.12))) +
  labs(
    title = "News Interest Level by Ideology",
    subtitle = "Faceted bar charts for each ideological group · 2018 CCES",
    x = NULL, y = "Share of Group",
    caption = "Source: CCES 2018"
  ) +
  theme_minimal(base_size = 11) +
  theme(
    plot.title     = element_text(face = "bold", size = 15),
    plot.subtitle  = element_text(color = "grey50"),
    axis.text.x    = element_text(angle = 35, hjust = 1, size = 8),
    strip.text     = element_text(face = "bold"),
    panel.grid.major.x = element_blank(),
    panel.grid.minor   = element_blank()
  )

p6

Political interest varies across the ideological spectrum; strong partisans on both ends follow news most closely.

Figure 7: Immigration Policy Support (Grouped Bar – DACA vs. Border Wall)

imm_data <- df %>%
  filter(!is.na(PID3)) %>%
  group_by(PID3) %>%
  summarise(
    DACA      = mean(CC18_325a == 1, na.rm = TRUE),
    BorderWall = mean(CC18_325b == 1, na.rm = TRUE),
    .groups = "drop"
  ) %>%
  pivot_longer(-PID3, names_to = "Policy", values_to = "pct") %>%
  mutate(Policy = recode(Policy,
                         "DACA"       = "Support DACA",
                         "BorderWall" = "Support Border Wall"),
         PID3 = factor(PID3, levels = c("Democrat","Independent","Republican")))

p7 <- ggplot(imm_data, aes(x = PID3, y = pct, fill = Policy)) +
  geom_col(position = position_dodge(width = 0.7), width = 0.6, alpha = 0.9) +
  geom_text(aes(label = paste0(round(pct*100),"%")),
            position = position_dodge(width = 0.7), vjust = -0.4, size = 3.5, fontface = "bold") +
  scale_fill_manual(values = c("Support DACA" = "#2ecc71", "Support Border Wall" = "#e74c3c")) +
  scale_y_continuous(labels = percent_format(), limits = c(0,1), expand = expansion(mult = c(0,0.05))) +
  labs(
    title = "Support for Immigration Policies by Party",
    subtitle = "Grouped bar chart: DACA protection vs. Border Wall construction · 2018 CCES",
    x = "Party Identification", y = "Percentage Supporting", fill = "Policy",
    caption = "Source: CCES 2018"
  ) +
  theme_minimal(base_size = 13) +
  theme(
    plot.title     = element_text(face = "bold", size = 15),
    plot.subtitle  = element_text(color = "grey50"),
    legend.position = "top",
    panel.grid.major.x = element_blank(),
    panel.grid.minor   = element_blank()
  )

p7

Stark partisan divide on immigration: DACA support is high among Democrats while wall support is high among Republicans.

Figure 8: Interactive — Income, Education & Party ID (Plotly Scatter)

scatter_data <- df %>%
  filter(!is.na(Income_mid), !is.na(Education), !is.na(PID3), !is.na(Region)) %>%
  mutate(
    educ_jitter = as.numeric(Education) + runif(n(), -0.25, 0.25),
    inc_jitter  = Income_mid + runif(n(), -3, 3)
  )

p8 <- plot_ly(
  scatter_data,
  x = ~educ_jitter,
  y = ~inc_jitter,
  color = ~PID3,
  colors = c("Democrat" = "#1f77b4", "Independent" = "#7f7f7f", "Republican" = "#d62728"),
  symbol = ~Region,
  symbols = c("circle","square","diamond","cross"),
  type = "scatter", mode = "markers",
  marker = list(size = 7, opacity = 0.7, line = list(width = 0.5, color = "white")),
  text = ~paste0(
    "<b>Party:</b> ", PID3, "<br>",
    "<b>Region:</b> ", Region, "<br>",
    "<b>Education:</b> ", Education, "<br>",
    "<b>Income (approx):</b> $", Income_mid, "K<br>",
    "<b>Ideology:</b> ", Ideology
  ),
  hoverinfo = "text"
) %>%
  layout(
    title = list(
      text = "<b>Income vs. Education by Party ID and Region</b><br><sup>2018 CCES · Hover for respondent details</sup>",
      font = list(size = 16)
    ),
    xaxis = list(
      title = "Education Level",
      tickvals = 1:6,
      ticktext = c("No HS","High School","Some College","2-Year","4-Year","Post-Grad"),
      gridcolor = "#eeeeee"
    ),
    yaxis = list(
      title = "Household Income (midpoint, $K)",
      gridcolor = "#eeeeee"
    ),
    legend = list(title = list(text = "<b>Party / Region</b>")),
    paper_bgcolor = "white",
    plot_bgcolor  = "white",
    annotations = list(
      list(text = "Source: CCES 2018", showarrow = FALSE,
           xref = "paper", yref = "paper", x = 1, y = -0.12,
           font = list(size = 10, color = "grey"), xanchor = "right")
    )
  )

p8

Interactive plot: explore how income and education relate to party identification across regions. Hover for details.

This portfolio presents eight data-driven visualizations built from the 2018 Cooperative Congressional Election Study (CCES), a nationally representative survey of American political attitudes and demographics. Figures span bar charts, ridge plots, heatmaps, box plots, dumbbell charts, faceted bars, grouped bars, and an interactive Plotly scatter plot — covering party identity, ideology, income, education, immigration, and gun policy.

American Political Identity: Visualizing the 2018 CCES Survey

Data Visualization Portfolio

2026-05-18