df_raw <- read.csv("/Users/alekhyakotha/Desktop/Data Visualization Course/CCES Sample.csv")
df <- df_raw %>%
mutate(
# Demographics
Gender = factor(gender, levels=1:2, labels=c("Male","Female")),
Region = factor(region, levels=1:4, labels=c("Northeast","Midwest","South","West")),
Race = case_when(
race == 1 ~ "White",
race == 2 ~ "Black",
race == 3 ~ "Hispanic",
race == 4 ~ "Asian",
TRUE ~ "Other"
),
Education = factor(educ, levels=1:6,
labels=c("No HS","High School","Some College",
"2-Year Degree","4-Year Degree","Post-Grad")),
Employment = case_when(
employ == 1 ~ "Full-time",
employ == 2 ~ "Part-time",
employ == 5 ~ "Retired",
employ == 6 ~ "Not employed",
employ == 7 ~ "Self-employed",
employ == 4 ~ "Homemaker",
TRUE ~ "Other"
),
MarStatus = case_when(
marstat == 1 ~ "Married",
marstat == 3 ~ "Divorced",
marstat == 4 ~ "Separated",
marstat == 5 ~ "Never Married",
marstat == 6 ~ "Widowed",
TRUE ~ "Other"
),
# Political identity
PID = factor(pid7, levels=1:7,
labels=c("Strong Dem","Lean Dem","Weak Dem",
"Independent","Weak Rep","Lean Rep","Strong Rep")),
PID3 = case_when(
pid7 %in% 1:3 ~ "Democrat",
pid7 == 4 ~ "Independent",
pid7 %in% 5:7 ~ "Republican"
),
Ideology = factor(ideo5, levels=1:5,
labels=c("Very Liberal","Liberal","Moderate","Conservative","Very Conservative")),
# Income (midpoints in $thousands)
Income_mid = case_when(
faminc_new == 1 ~ 10, faminc_new == 2 ~ 17.5,
faminc_new == 3 ~ 25, faminc_new == 4 ~ 37.5,
faminc_new == 5 ~ 50, faminc_new == 6 ~ 62.5,
faminc_new == 7 ~ 75, faminc_new == 8 ~ 87.5,
faminc_new == 9 ~ 100, faminc_new == 10 ~ 125,
faminc_new == 11 ~ 150, faminc_new == 12 ~ 175,
faminc_new == 13 ~ 200, faminc_new == 14 ~ 250,
faminc_new == 15 ~ 350, faminc_new == 16 ~ 500
),
IncomeGroup = case_when(
Income_mid < 40 ~ "Low (<$40K)",
Income_mid < 100 ~ "Middle ($40K–$99K)",
TRUE ~ "High ($100K+)"
),
IncomeGroup = factor(IncomeGroup,
levels=c("Low (<$40K)","Middle ($40K–$99K)","High ($100K+)")),
# Religion importance
ReligImp = factor(pew_religimp, levels=1:4,
labels=c("Very Important","Somewhat Important",
"Not Too Important","Not at All Important")),
# News interest
NewsInt = factor(newsint, levels=1:4,
labels=c("Most of the time","Some of the time",
"Only now and then","Hardly at all")),
# Policy views (CC18_310a-d): 2=support, 3=oppose, 5=unsure, etc.
# Gun background checks (310b): 2=support, 3=oppose
Gun_BG = case_when(
CC18_310b == 2 ~ "Support",
CC18_310b == 3 ~ "Oppose",
CC18_310b == 5 ~ "Not sure",
TRUE ~ NA_character_
),
# Assault weapons ban (310c)
AssaultBan = case_when(
CC18_310c == 2 ~ "Support",
CC18_310c == 3 ~ "Oppose",
CC18_310c == 5 ~ "Not sure",
TRUE ~ NA_character_
),
# CC18_308a: approval of Trump (1=Strongly approve, 4=Strongly disapprove)
TrumpApproval = factor(CC18_308a, levels=1:4,
labels=c("Strongly Approve","Somewhat Approve",
"Somewhat Disapprove","Strongly Disapprove")),
# CC18_325: immigration policy (1=yes, 2=no)
DACA = ifelse(CC18_325a == 1, "Support DACA", "Oppose DACA"),
BorderWall = ifelse(CC18_325b == 1, "Support Wall", "Oppose Wall"),
# Union membership
Union = case_when(
union == 1 ~ "Self in union",
union == 2 ~ "Household member in union",
union == 3 ~ "No union"
)
)
fig1_data <- df %>%
filter(!is.na(PID3), !is.na(Region)) %>%
count(Region, PID3) %>%
group_by(Region) %>%
mutate(pct = n / sum(n))
pal <- c("Democrat" = "#1f77b4", "Independent" = "#7f7f7f", "Republican" = "#d62728")
p1 <- ggplot(fig1_data, aes(x = Region, y = pct, fill = PID3)) +
geom_col(width = 0.7, color = "white", linewidth = 0.3) +
geom_text(aes(label = ifelse(pct >= 0.07, paste0(round(pct*100), "%"), "")),
position = position_stack(vjust = 0.5),
color = "white", fontface = "bold", size = 3.5) +
scale_y_continuous(labels = percent_format(), expand = c(0,0)) +
scale_fill_manual(values = pal) +
labs(
title = "Party Identification by U.S. Region",
subtitle = "2018 Cooperative Congressional Election Study (CCES) · n = 1,000",
x = NULL, y = "Share of Respondents", fill = "Party ID",
caption = "Source: CCES 2018"
) +
theme_minimal(base_size = 13) +
theme(
plot.title = element_text(face = "bold", size = 15),
plot.subtitle = element_text(color = "grey50"),
legend.position = "top",
panel.grid.major.x = element_blank(),
panel.grid.minor = element_blank()
)
p1
Party identification varies across U.S. regions, with the South leaning more Republican and the Northeast more Democratic.
fig2_data <- df %>%
filter(!is.na(Ideology), !is.na(PID3)) %>%
mutate(Ideology_num = as.numeric(Ideology),
PID3 = factor(PID3, levels = c("Republican","Independent","Democrat")))
p2 <- ggplot(fig2_data, aes(x = Ideology_num, y = PID3, fill = PID3)) +
geom_density_ridges(
scale = 1.4, rel_min_height = 0.01, alpha = 0.85,
quantile_lines = TRUE, quantiles = 2, color = "white"
) +
scale_x_continuous(
breaks = 1:5,
labels = c("Very\nLiberal","Liberal","Moderate","Conservative","Very\nConservative")
) +
scale_fill_manual(values = c("Democrat"="#1f77b4","Independent"="#7f7f7f","Republican"="#d62728")) +
labs(
title = "Self-Reported Ideology by Party Identification",
subtitle = "Ridge plots with median line · 2018 CCES",
x = "Ideology Scale", y = NULL,
caption = "Source: CCES 2018"
) +
theme_ridges(grid = FALSE, center_axis_labels = TRUE) +
theme(
plot.title = element_text(face = "bold", size = 15),
plot.subtitle = element_text(color = "grey50"),
legend.position = "none"
)
p2
Strong partisans cluster at ideological extremes; Independents center on Moderate.
fig3_data <- df %>%
filter(!is.na(TrumpApproval), !is.na(PID)) %>%
count(PID, TrumpApproval) %>%
group_by(PID) %>%
mutate(pct = n / sum(n))
p3 <- ggplot(fig3_data, aes(x = TrumpApproval, y = PID, fill = pct)) +
geom_tile(color = "white", linewidth = 0.8) +
geom_text(aes(label = paste0(round(pct*100), "%")),
color = ifelse(fig3_data$pct > 0.45, "white", "grey20"), size = 3.5, fontface = "bold") +
scale_fill_viridis_c(option = "plasma", labels = percent_format(), name = "Share") +
scale_x_discrete(position = "top") +
labs(
title = "Trump Approval Ratings by Party Identification",
subtitle = "Percentage of each party group holding each approval level · 2018 CCES",
x = NULL, y = "Party ID (Strong Dem → Strong Rep)",
caption = "Source: CCES 2018"
) +
theme_minimal(base_size = 13) +
theme(
plot.title = element_text(face = "bold", size = 15),
plot.subtitle = element_text(color = "grey50"),
axis.text.x = element_text(angle = 20, hjust = 0),
panel.grid = element_blank(),
legend.position = "right"
)
p3
A heatmap showing how Trump approval starkly divides along party lines.
fig4_data <- df %>%
filter(!is.na(Income_mid), !is.na(Education))
p4 <- ggplot(fig4_data, aes(x = Education, y = Income_mid, fill = Education)) +
geom_boxplot(outlier.shape = 21, outlier.size = 1.5, outlier.alpha = 0.5,
width = 0.6, alpha = 0.85) +
stat_summary(fun = mean, geom = "point", shape = 23, size = 3,
fill = "white", color = "black") +
scale_fill_viridis_d(option = "turbo", begin = 0.1, end = 0.9) +
scale_y_continuous(labels = dollar_format(suffix="K"), breaks = seq(0,500,50)) +
labs(
title = "Household Income Distribution by Education Level",
subtitle = "Box plots with mean (◇) · 2018 CCES · n = 1,000",
x = "Highest Education Attained", y = "Household Income (midpoint, $K)",
caption = "Source: CCES 2018"
) +
theme_minimal(base_size = 13) +
theme(
plot.title = element_text(face = "bold", size = 15),
plot.subtitle = element_text(color = "grey50"),
axis.text.x = element_text(angle = 20, hjust = 1),
legend.position = "none",
panel.grid.major.x = element_blank()
)
p4
Higher education levels associate with higher household income.
dumbbell_data <- df %>%
filter(!is.na(Gun_BG), !is.na(AssaultBan), !is.na(PID3)) %>%
pivot_longer(cols = c(Gun_BG, AssaultBan), names_to = "Policy", values_to = "View") %>%
filter(View == "Support") %>%
mutate(Policy = recode(Policy,
"Gun_BG" = "Background Check Requirement",
"AssaultBan" = "Assault Weapons Ban")) %>%
group_by(PID3, Policy) %>%
summarise(pct = mean(View == "Support"), .groups = "drop") %>%
group_by(PID3, Policy) %>%
summarise(pct = n() / {df %>% filter(!is.na(Gun_BG), !is.na(PID3)) %>%
group_by(PID3) %>% summarise(n=n()) %>% filter(PID3 == first(PID3)) %>% pull(n)},
.groups = "drop")
# Re-compute cleanly
gun_data <- df %>%
filter(!is.na(PID3)) %>%
group_by(PID3) %>%
summarise(
`Background Check` = mean(CC18_310b == 2, na.rm = TRUE),
`Assault Weapons Ban` = mean(CC18_310c == 2, na.rm = TRUE),
.groups = "drop"
) %>%
pivot_longer(-PID3, names_to = "Policy", values_to = "pct")
dumbbell_wide <- gun_data %>%
pivot_wider(names_from = PID3, values_from = pct)
p5 <- ggplot(gun_data, aes(x = pct, y = Policy, color = PID3, group = Policy)) +
geom_line(aes(group = Policy), color = "grey70", linewidth = 1.5) +
geom_point(size = 7, alpha = 0.9) +
geom_text(aes(label = paste0(round(pct*100), "%")),
color = "white", size = 2.8, fontface = "bold") +
scale_color_manual(values = c("Democrat"="#1f77b4","Independent"="#7f7f7f","Republican"="#d62728")) +
scale_x_continuous(labels = percent_format(), limits = c(0.4, 1.0)) +
labs(
title = "Support for Gun Control Policies by Party",
subtitle = "Dumbbell chart showing % support among each party group · 2018 CCES",
x = "Percentage Supporting Policy", y = NULL, color = "Party ID",
caption = "Source: CCES 2018"
) +
theme_minimal(base_size = 13) +
theme(
plot.title = element_text(face = "bold", size = 15),
plot.subtitle = element_text(color = "grey50"),
panel.grid.minor = element_blank(),
panel.grid.major.y = element_blank(),
legend.position = "top"
)
p5
Both parties broadly support background checks, but sharply diverge on an assault-weapons ban.
fig6_data <- df %>%
filter(!is.na(NewsInt), !is.na(Ideology)) %>%
count(Ideology, NewsInt) %>%
group_by(Ideology) %>%
mutate(pct = n / sum(n))
p6 <- ggplot(fig6_data, aes(x = NewsInt, y = pct, fill = NewsInt)) +
geom_col(alpha = 0.9, show.legend = FALSE) +
geom_text(aes(label = paste0(round(pct*100), "%")), vjust = -0.3, size = 3) +
facet_wrap(~Ideology, nrow = 1) +
scale_fill_viridis_d(option = "cividis", direction = -1) +
scale_y_continuous(labels = percent_format(), expand = expansion(mult = c(0, 0.12))) +
labs(
title = "News Interest Level by Ideology",
subtitle = "Faceted bar charts for each ideological group · 2018 CCES",
x = NULL, y = "Share of Group",
caption = "Source: CCES 2018"
) +
theme_minimal(base_size = 11) +
theme(
plot.title = element_text(face = "bold", size = 15),
plot.subtitle = element_text(color = "grey50"),
axis.text.x = element_text(angle = 35, hjust = 1, size = 8),
strip.text = element_text(face = "bold"),
panel.grid.major.x = element_blank(),
panel.grid.minor = element_blank()
)
p6
Political interest varies across the ideological spectrum; strong partisans on both ends follow news most closely.
imm_data <- df %>%
filter(!is.na(PID3)) %>%
group_by(PID3) %>%
summarise(
DACA = mean(CC18_325a == 1, na.rm = TRUE),
BorderWall = mean(CC18_325b == 1, na.rm = TRUE),
.groups = "drop"
) %>%
pivot_longer(-PID3, names_to = "Policy", values_to = "pct") %>%
mutate(Policy = recode(Policy,
"DACA" = "Support DACA",
"BorderWall" = "Support Border Wall"),
PID3 = factor(PID3, levels = c("Democrat","Independent","Republican")))
p7 <- ggplot(imm_data, aes(x = PID3, y = pct, fill = Policy)) +
geom_col(position = position_dodge(width = 0.7), width = 0.6, alpha = 0.9) +
geom_text(aes(label = paste0(round(pct*100),"%")),
position = position_dodge(width = 0.7), vjust = -0.4, size = 3.5, fontface = "bold") +
scale_fill_manual(values = c("Support DACA" = "#2ecc71", "Support Border Wall" = "#e74c3c")) +
scale_y_continuous(labels = percent_format(), limits = c(0,1), expand = expansion(mult = c(0,0.05))) +
labs(
title = "Support for Immigration Policies by Party",
subtitle = "Grouped bar chart: DACA protection vs. Border Wall construction · 2018 CCES",
x = "Party Identification", y = "Percentage Supporting", fill = "Policy",
caption = "Source: CCES 2018"
) +
theme_minimal(base_size = 13) +
theme(
plot.title = element_text(face = "bold", size = 15),
plot.subtitle = element_text(color = "grey50"),
legend.position = "top",
panel.grid.major.x = element_blank(),
panel.grid.minor = element_blank()
)
p7
Stark partisan divide on immigration: DACA support is high among Democrats while wall support is high among Republicans.
scatter_data <- df %>%
filter(!is.na(Income_mid), !is.na(Education), !is.na(PID3), !is.na(Region)) %>%
mutate(
educ_jitter = as.numeric(Education) + runif(n(), -0.25, 0.25),
inc_jitter = Income_mid + runif(n(), -3, 3)
)
p8 <- plot_ly(
scatter_data,
x = ~educ_jitter,
y = ~inc_jitter,
color = ~PID3,
colors = c("Democrat" = "#1f77b4", "Independent" = "#7f7f7f", "Republican" = "#d62728"),
symbol = ~Region,
symbols = c("circle","square","diamond","cross"),
type = "scatter", mode = "markers",
marker = list(size = 7, opacity = 0.7, line = list(width = 0.5, color = "white")),
text = ~paste0(
"<b>Party:</b> ", PID3, "<br>",
"<b>Region:</b> ", Region, "<br>",
"<b>Education:</b> ", Education, "<br>",
"<b>Income (approx):</b> $", Income_mid, "K<br>",
"<b>Ideology:</b> ", Ideology
),
hoverinfo = "text"
) %>%
layout(
title = list(
text = "<b>Income vs. Education by Party ID and Region</b><br><sup>2018 CCES · Hover for respondent details</sup>",
font = list(size = 16)
),
xaxis = list(
title = "Education Level",
tickvals = 1:6,
ticktext = c("No HS","High School","Some College","2-Year","4-Year","Post-Grad"),
gridcolor = "#eeeeee"
),
yaxis = list(
title = "Household Income (midpoint, $K)",
gridcolor = "#eeeeee"
),
legend = list(title = list(text = "<b>Party / Region</b>")),
paper_bgcolor = "white",
plot_bgcolor = "white",
annotations = list(
list(text = "Source: CCES 2018", showarrow = FALSE,
xref = "paper", yref = "paper", x = 1, y = -0.12,
font = list(size = 10, color = "grey"), xanchor = "right")
)
)
p8
Interactive plot: explore how income and education relate to party identification across regions. Hover for details.
This portfolio presents eight data-driven visualizations built from the 2018 Cooperative Congressional Election Study (CCES), a nationally representative survey of American political attitudes and demographics. Figures span bar charts, ridge plots, heatmaps, box plots, dumbbell charts, faceted bars, grouped bars, and an interactive Plotly scatter plot — covering party identity, ideology, income, education, immigration, and gun policy.