library(kirkegaard)
## Loading required package: tidyverse
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.2 ✔ tibble 3.2.1
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.0.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## Loading required package: magrittr
##
##
## Attaching package: 'magrittr'
##
##
## The following object is masked from 'package:purrr':
##
## set_names
##
##
## The following object is masked from 'package:tidyr':
##
## extract
##
##
## Loading required package: weights
##
## Loading required package: Hmisc
##
##
## Attaching package: 'Hmisc'
##
##
## The following objects are masked from 'package:dplyr':
##
## src, summarize
##
##
## The following objects are masked from 'package:base':
##
## format.pval, units
##
##
## Loading required package: assertthat
##
##
## Attaching package: 'assertthat'
##
##
## The following object is masked from 'package:tibble':
##
## has_name
##
##
## Loading required package: psych
##
##
## Attaching package: 'psych'
##
##
## The following object is masked from 'package:Hmisc':
##
## describe
##
##
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
##
##
##
## Attaching package: 'kirkegaard'
##
##
## The following object is masked from 'package:psych':
##
## rescale
##
##
## The following object is masked from 'package:assertthat':
##
## are_equal
##
##
## The following object is masked from 'package:purrr':
##
## is_logical
##
##
## The following object is masked from 'package:base':
##
## +
load_packages(
haven,
survey
)
## Loading required package: grid
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
##
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
##
## Loading required package: survival
##
## Attaching package: 'survey'
##
## The following object is masked from 'package:Hmisc':
##
## deff
##
## The following object is masked from 'package:graphics':
##
## dotchart
theme_set(theme_bw())
options(
digits = 3
)
#multithreading
#library(future)
#plan(multisession(workers = 8))
#recode ideology functions
recode_ideology = function(x, levels) {
if (levels == 2) {
y = x %>% case_match(
c(1, 2, 3) ~ "left/liberal",
c(5, 6, 7) ~ "right/conservative",
.default = NA_character_
)
} else if (levels == 3) {
y = x %>% case_match(
c(1, 2, 3) ~ "left/liberal",
c(4) ~ "middle/moderate",
c(5, 6, 7) ~ "right/conservative",
.default = NA_character_
) %>% ordered(levels = c("left/liberal", "middle/moderate", "right/conservative"))
} else if (levels == 7) {
y = x %>% case_match(
1 ~ "far left",
2 ~ "left",
3 ~ "slightly left",
4 ~ "middle",
5 ~ "slightly right",
6 ~ "right",
7 ~ "far right"
) %>% ordered(levels = c("far left", "left", "slightly left", "middle", "slightly right", "right", "far right"))
} else {
stop("levels must be 2, 3, or 7")
}
y
}
d = read_sav("data/anes_timeseries_cdf_sav/anes_timeseries_cdf.sav")
d_vars = d %>% df_var_table()
d %<>% mutate(
feel_Black = VCF0206,
feel_White = VCF0207,
feel_Asian = VCF0227,
feel_Hispanic = VCF0217,
own_race = VCF0105a %>% as_factor() %>% as.character() %>% case_match(
"1. White non-Hispanic (1948-2012)" ~ "White",
"2. Black non-Hispanic (1948-2012)" ~ "Black",
"5. Hispanic (1966-2012)" ~ "Hispanic",
"3. Asian or Pacific Islander, non-Hispanic (1966-2012)" ~ "Asian",
.default = NA_character_
),
year = VCF0004 %>% as.numeric(),
#weights
w_full = VCF0009z,
#politics
ideology = VCF0803 %>% as_factor(),
ideology_2way = VCF0803 %>% recode_ideology(levels = 2),
ideology_3way = VCF0803 %>% recode_ideology(levels = 3),
ideology_7way = VCF0803 %>% recode_ideology(levels = 7)
)
#check encodings
d$own_race %>% table2()
#feelings for races by race as function of year
d_feel = d %>%
select(
feel_Black:feel_Hispanic,
own_race,
year,
w_full,
ideology_2way, ideology_3way, ideology_7way
)
#long format
d_feel_long = d_feel %>%
pivot_longer(
cols = feel_Black:feel_Hispanic,
names_to = "target_race",
values_to = "feeling"
) %>%
#filter missing feeling
filter(
!is.na(feeling)
) %>%
#recode 98-99 to NA
mutate(
feeling = case_match(
feeling,
98 ~ NA_real_,
99 ~ NA_real_,
97 ~ 98,
.default = as.numeric(feeling)
)
)
#clean
d_feel_long$target_race %<>% str_remove("feel_")
anes2020 = read_dta("data/anes_timeseries_2020_stata_20220210/anes_timeseries_2020_stata_20220210.dta")
anes2020_vars = anes2020 %>% df_var_table()
#remove out of bounds codes, meaning these are NAs and oher invalid data
anes2020_sub <- anes2020 %>%
filter(
between(V202477, 0, 100),
between(V202479, 0, 100),
between(V202480, 0, 100),
between(V202482, 0, 100)
)
#recode
anes2020_sub %<>% mutate(
own_race = V201549x %>% as_factor() %>% as.character() %>% case_match(
"1. White, non-Hispanic" ~ "White",
"2. Black, non-Hispanic" ~ "Black",
"3. Hispanic" ~ "Hispanic",
"4. Asian or Native Hawaiian/other Pacific Islander, non-Hispanic alone" ~ "Asian",
.default = NA_character_
),
feel_Asian = V202477,
feel_Hispanic = V202479,
feel_Black = V202480,
feel_White = V202482,
#politics
ideology = V201200 %>% as_factor(),
ideology_2way = V201200 %>% recode_ideology(levels = 2),
ideology_3way = V201200 %>% recode_ideology(levels = 3),
ideology_7way = V201200 %>% recode_ideology(levels = 7)
)
#race dist
anes2020_sub$own_race %>% table2()
#long format for feelings
anes2020_sub_long = anes2020_sub %>%
select(
own_race,
feel_Asian:feel_White,
ideology_2way, ideology_3way, ideology_7way
) %>%
pivot_longer(
cols = feel_Asian:feel_White,
names_to = "target_race",
values_to = "feeling"
) %>%
mutate(
target_race = target_race %>% str_remove("feel_"),
feeling = feeling %>% as.numeric()
)
anes2024 = read_dta("data/anes_timeseries_2024_stata_20250430/anes_timeseries_2024_stata_20250430.dta")
anes2024_vars = anes2024 %>% df_var_table()
#2024
anes2024_sub <- anes2024 %>%
filter(
between(V242514, 0, 100),
between(V242515, 0, 100),
between(V242516, 0, 100),
between(V242518, 0, 100)
)
#recode
anes2024_sub %<>% mutate(
own_race = V241501x %>% as_factor() %>% case_match(
"1. White, non-Hispanic" ~ "White",
"2. Black, non-Hispanic" ~ "Black",
"3. Hispanic" ~ "Hispanic",
"4. Asian or Native Hawaiian/other Pacific Islander, non-Hispanic" ~ "Asian",
.default = NA_character_
),
feel_Asian = V242514,
feel_Hispanic = V242515,
feel_Black = V242516,
feel_White = V242518,
#politics
ideology = V241177 %>% as_factor(),
ideology_3way = V241177 %>% case_match(
c(1, 2, 3) ~ "left/liberal",
c(4) ~ "middle/moderate",
c(5, 6, 7) ~ "right/conservative",
.default = NA_character_
),
ideology_2way = V241177 %>% recode_ideology(levels = 2),
ideology_3way = V241177 %>% recode_ideology(levels = 3),
ideology_7way = V241177 %>% recode_ideology(levels = 7)
)
#long format for feelings
anes2024_sub_long = anes2024_sub %>%
select(
V240107c, V240107d, V240107b,
own_race,
feel_Asian:feel_White,
ideology_2way, ideology_3way, ideology_7way
) %>%
pivot_longer(
cols = feel_Asian:feel_White,
names_to = "target_race",
values_to = "feeling"
) %>%
mutate(
target_race = target_race %>% str_remove("feel_"),
feeling = feeling %>% as.numeric()
)
#overall
d_feel_long %>%
GG_group_means("feeling", groupvar = "own_race", subgroupvar = "target_race")
## Missing values were removed.
#2020
d_feel_long %>%
filter(year == 2016) %>%
GG_group_means("feeling", groupvar = "own_race", subgroupvar = "target_race")
## Missing values were removed.
#survey weights
sur_design =
svydesign(
id = ~1,
weights = ~ w_full,
data = d_feel_long
)
#compute means
sur_means = svyby(~feeling, ~interaction(own_race, target_race), design = sur_design, svymean, na.rm = TRUE, level = .95)
#plot
sur_means %>%
separate_wider_delim(
cols = `interaction(own_race, target_race)`,
delim = ".",
names = c("own_race", "target_race")
) %>%
ggplot(aes(own_race, feeling, fill = target_race)) +
geom_col(position = "dodge") +
geom_errorbar(aes(ymin = feeling - 2*se, ymax = feeling + 2*se), width = 0.2, position = position_dodge(0.9))
#without weights
anes2020_sub_long %>%
GG_group_means(
"feeling",
groupvar = "own_race",
subgroupvar = "target_race"
) +
labs(
title = "How races rate each other",
subtitle = "2020 ANES",
y = "Feeling thermometer rating mean [0-100]",
x = "Race of rater",
fill = "Target race",
caption = "Made by Emil Kirkegaard, @KirkegaardEmil"
)
## Missing values were removed.
# Define the survey design
survey_design <- svydesign(
id = ~V240107c,
strata = ~V240107d,
weights = ~V240107b,
data = anes2024_sub_long,
nest = TRUE,
single.unit.method = "centered"
)
# Compute means by group interaction (own_race and target_race) with 83.4% confidence level
df <- svyby(
formula = ~feeling,
by = ~own_race + target_race, # This includes the interaction between own_race and target_race
design = survey_design,
FUN = svymean,
vartype = "ci",
level = 0.95
) %>%
as_tibble()
#plot
df %>%
ggplot(aes(own_race, feeling, fill = target_race)) +
geom_col(position = "dodge") +
geom_errorbar(aes(ymin = ci_l, ymax = ci_u), width = 0.2, position = position_dodge(0.9))
#without weights
anes2024_sub_long %>%
GG_group_means(
"feeling",
groupvar = "own_race",
subgroupvar = "target_race"
)
## Missing values were removed.
#bind data
d_all = bind_rows(
d_feel_long %>% select(-w_full),
anes2020_sub_long %>% mutate(year = 2020),
anes2024_sub_long %>% select(-V240107c, -V240107d, -V240107b) %>% mutate(year = 2024)
)
#calculate ethnocentrism by year
d_all %>%
plyr::ddply(c("own_race", "year"), function(dd) {
# if (dd$year[1]== 1988) browser()
self_row = dd$own_race == dd$target_race
tibble(
ethnocentrism = mean(dd$feeling[self_row], na.rm = T) - mean(dd$feeling[!self_row], na.rm = T),
)
}) %>%
#filter missing data which causes lines to disappear
miss_filter() %>%
#plot by year
ggplot(aes(year, ethnocentrism, color = own_race)) +
geom_line() +
scale_x_continuous(breaks = seq(1960, 2050, by = 5)) +
labs(
title = "Ethnocentrism by year",
subtitle = "ANES 1964-2024.",
y = "Feeling ethnocentrism [Rating of own race - rating of other races]",
x = "Year of survey",
color = "Race",
caption = "Made by Emil Kirkegaard, @KirkegaardEmil"
)
GG_save("figs/feeling_ethnocentrism.png")
#investigate missing data issue
d_feel_sum = d_all %>%
plyr::ddply(c("own_race", "year", "target_race"), function(dd) {
tibble(
n = nrow(dd),
feeling = mean(dd$feeling, na.rm = T)
)
})
#plot missing data
d_feel_sum %>%
ggplot(aes(year, target_race, fill = feeling)) +
geom_tile() +
scale_fill_continuous(low = "red", high = "blue") +
facet_wrap("own_race", labeller = "label_both")
#spellings match
unique(d_all$target_race) %in% unique(d_all$own_race)
## [1] TRUE TRUE TRUE TRUE
#plot again
d_feel_sum %>%
ggplot(aes(year, feeling, color = target_race)) +
geom_point() +
facet_wrap("own_race")
#ratings of each race by race with facets
d_all %>%
#calculate means:
group_by(year, own_race, target_race) %>%
summarise(
feeling = mean(feeling, na.rm = T),
n = n()
) %>%
#filter observations with less than 10 people
filter(n > 10) %>%
ggplot(aes(year, feeling, color = target_race)) +
scale_x_continuous(breaks = seq(1900, 3000, by=10)) +
geom_line() +
facet_wrap("own_race", labeller = "label_both") +
labs(
title = "Feeling thermometer ratings of races in USA",
y = "Feeling thermometer mean rating [0-100]",
color = "Target",
caption = "Made by Emil Kirkegaard, @KirkegaardEmil"
)
## `summarise()` has grouped output by 'year', 'own_race'. You can override using
## the `.groups` argument.
GG_save("figs/race_feelings.png")
#regress out 2002 apparent bias
d_all %>%
group_by(year) %>%
summarise(
feeling = mean(feeling, na.rm = T),
n = n()
) %>%
ggplot(aes(year, feeling)) +
geom_line() +
geom_smooth()
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
d_all$year2002 = d_all$year == 2002
m_race_race_year = lm(feeling ~ year2002 * own_race, data = d_all)
m_race_race_year %>% summary()
##
## Call:
## lm(formula = feeling ~ year2002 * own_race, data = d_all)
##
## Residuals:
## Min 1Q Median 3Q Max
## -73.86 -17.23 2.77 17.77 35.24
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 66.569 0.411 161.88 <2e-16 ***
## year2002TRUE -3.809 2.412 -1.58 0.114
## own_raceBlack 7.291 0.439 16.61 <2e-16 ***
## own_raceHispanic 4.956 0.447 11.09 <2e-16 ***
## own_raceWhite 0.662 0.416 1.59 0.111
## year2002TRUE:own_raceBlack -5.834 2.614 -2.23 0.026 *
## year2002TRUE:own_raceHispanic -0.775 2.691 -0.29 0.773
## year2002TRUE:own_raceWhite 1.696 2.435 0.70 0.486
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 20.6 on 149809 degrees of freedom
## (23253 observations deleted due to missingness)
## Multiple R-squared: 0.0137, Adjusted R-squared: 0.0137
## F-statistic: 297 on 7 and 149809 DF, p-value: <2e-16
#check ideology distribution
d_all$ideology_2way %>% table2()
d_all$ideology_3way %>% table2()
d_all$ideology_7way %>% table2()
#get ratings of races among whites by ideology
d_all %>%
filter(
!is.na(ideology_3way),
own_race == "White"
) %>%
ggplot(aes(year, feeling, color = target_race)) +
geom_smooth(method = "gam", formula = y ~ s(x, bs = "cs", k = 5)) +
facet_wrap("ideology_3way") +
labs(
title = "White Americans' feelings towards races",
y = "Feeling thermometer mean rating [0-100]",
x = "Year of survey",
color = "Target race",
caption = "Made by Emil Kirkegaard, @KirkegaardEmil"
)
## Warning: Removed 6223 rows containing non-finite outside the scale range
## (`stat_smooth()`).
GG_save("figs/white_ratings_by_ideology3.png")
## Warning: Removed 6223 rows containing non-finite outside the scale range
## (`stat_smooth()`).
#7 way split
d_all %>%
filter(
!is.na(ideology_7way),
own_race == "White"
) %>%
ggplot(aes(year, feeling, color = target_race)) +
geom_smooth(method = "gam", formula = y ~ s(x, bs = "cs", k = 5)) +
facet_wrap("ideology_7way") +
labs(
title = "White Americans' feelings towards races",
y = "Feeling thermometer mean rating [0-100]",
x = "Year of survey",
color = "Target race",
caption = "Made by Emil Kirkegaard, @KirkegaardEmil"
)
## Warning: Removed 6223 rows containing non-finite outside the scale range
## (`stat_smooth()`).
GG_save("figs/white_ratings_by_ideology7.png")
## Warning: Removed 6223 rows containing non-finite outside the scale range
## (`stat_smooth()`).
#actively dislike, below some rating
d_all %>%
filter(
!is.na(ideology_3way),
!is.na(feeling),
own_race == "White"
) %>%
mutate(
dislike = (feeling <= 30) %>% as.numeric()
) %>%
ggplot(aes(year, dislike, color = target_race)) +
geom_smooth(method = "gam", formula = y ~ s(x, bs = "cs", k = 5)) +
scale_y_continuous(labels = scales::percent_format(accuracy = 1)) +
facet_wrap("ideology_3way") +
labs(
title = "White Americans' feelings towards races",
subtitle = "Dislike defined as rating below 30",
y = "Feeling thermometer mean rating [0-100]",
x = "Year of survey",
color = "Target race",
caption = "Made by Emil Kirkegaard, @KirkegaardEmil"
)
GG_save("figs/white_dislike_by_ideology3.png")
#actively dislike, below some rating, 7 split
d_all %>%
filter(
!is.na(ideology_7way),
!is.na(feeling),
own_race == "White"
) %>%
mutate(
dislike = (feeling <= 30) %>% as.numeric()
) %>%
ggplot(aes(year, dislike, color = target_race)) +
geom_smooth(method = "gam", formula = y ~ s(x, bs = "cs", k = 5)) +
scale_y_continuous(labels = scales::percent_format(accuracy = 1)) +
facet_wrap("ideology_7way") +
labs(
title = "White Americans' feelings towards races",
subtitle = "Dislike defined as rating below 30",
y = "Feeling thermometer mean rating [0-100]",
x = "Year of survey",
color = "Target race",
caption = "Made by Emil Kirkegaard, @KirkegaardEmil"
)
GG_save("figs/white_dislike_by_ideology7.png")
#disliking, all races, no ideology
d_all %>%
filter(
!is.na(feeling),
!is.na(own_race)
) %>%
mutate(
dislike = (feeling <= 30) %>% as.numeric()
) %>%
ggplot(aes(year, dislike, color = target_race)) +
geom_smooth(method = "gam", formula = y ~ s(x, bs = "cs", k = 5)) +
scale_y_continuous(labels = scales::percent_format(accuracy = 1)) +
facet_wrap("own_race", labeller = "label_both") +
labs(
title = "Americans' dislikings of races",
subtitle = "Dislike defined as rating below 30",
y = "Feeling thermometer mean rating [0-100]",
x = "Year of survey",
color = "Target race",
caption = "Made by Emil Kirkegaard, @KirkegaardEmil"
)
GG_save("figs/dislike_by_race.png")
#jews
anes2024_sub %>%
mutate(
feel_jews = V242149 %>% case_match(
c(1:100) ~ V242149,
.default = NA
)
) %>%
filter(
!is.na(ideology_7way)
) %>%
GG_group_means("feel_jews", "ideology_7way", type = "point") +
labs(
title = "Feelings towards Jews among Americans",
subtitle = "ANES 2024, all Americans",
y = "Feeling thermometer mean rating [0-100]",
x = "Ideology self-placement",
caption = "Made by Emil Kirkegaard, @KirkegaardEmil"
)
## Missing values were removed.
GG_save("figs/jews~ideology7.png")
#versions
write_sessioninfo()
## R version 4.5.0 (2025-04-11)
## Platform: x86_64-pc-linux-gnu
## Running under: Linux Mint 21.1
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.10.0
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.10.0 LAPACK version 3.10.0
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_DK.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_DK.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_DK.UTF-8 LC_IDENTIFICATION=C
##
## time zone: Europe/Brussels
## tzcode source: system (glibc)
##
## attached base packages:
## [1] grid stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] survey_4.4-2 survival_3.8-3 Matrix_1.7-3
## [4] haven_2.5.4 kirkegaard_2025-05-09 psych_2.5.3
## [7] assertthat_0.2.1 weights_1.0.4 Hmisc_5.2-3
## [10] magrittr_2.0.3 lubridate_1.9.4 forcats_1.0.0
## [13] stringr_1.5.1 dplyr_1.1.4 purrr_1.0.4
## [16] readr_2.1.5 tidyr_1.3.1 tibble_3.2.1
## [19] ggplot2_3.5.2 tidyverse_2.0.0
##
## loaded via a namespace (and not attached):
## [1] tidyselect_1.2.1 farver_2.1.2 fastmap_1.2.0 digest_0.6.37
## [5] rpart_4.1.24 timechange_0.3.0 lifecycle_1.0.4 cluster_2.1.8.1
## [9] gdata_3.0.1 compiler_4.5.0 rlang_1.1.6 sass_0.4.10
## [13] tools_4.5.0 yaml_2.3.10 data.table_1.17.0 knitr_1.50
## [17] labeling_0.4.3 htmlwidgets_1.6.4 mnormt_2.1.1 plyr_1.8.9
## [21] withr_3.0.2 foreign_0.8-90 nnet_7.3-20 jomo_2.7-6
## [25] colorspace_2.1-1 mice_3.17.0 scales_1.3.0 gtools_3.9.5
## [29] iterators_1.0.14 MASS_7.3-65 cli_3.6.4 rmarkdown_2.29
## [33] ragg_1.4.0 reformulas_0.4.0 generics_0.1.3 rstudioapi_0.17.1
## [37] tzdb_0.5.0 DBI_1.2.3 minqa_1.2.8 cachem_1.1.0
## [41] splines_4.5.0 parallel_4.5.0 mitools_2.4 base64enc_0.1-3
## [45] vctrs_0.6.5 boot_1.3-31 glmnet_4.1-8 jsonlite_2.0.0
## [49] hms_1.1.3 mitml_0.4-5 Formula_1.2-5 htmlTable_2.4.3
## [53] systemfonts_1.2.2 foreach_1.5.2 jquerylib_0.1.4 glue_1.8.0
## [57] nloptr_2.2.1 pan_1.9 codetools_0.2-19 stringi_1.8.7
## [61] shape_1.4.6.1 gtable_0.3.6 lme4_1.1-37 munsell_0.5.1
## [65] pillar_1.10.2 htmltools_0.5.8.1 R6_2.6.1 textshaping_1.0.0
## [69] Rdpack_2.6.4 evaluate_1.0.3 lattice_0.22-5 rbibutils_2.3
## [73] backports_1.5.0 broom_1.0.8 bslib_0.9.0 Rcpp_1.0.14
## [77] gridExtra_2.3 nlme_3.1-168 checkmate_2.3.2 mgcv_1.9-1
## [81] xfun_0.52 pkgconfig_2.0.3
#write data to file for reuse
d_all %>% write_rds("data/data_for_reuse.rds")
#OSF
if (F) {
library(osfr)
#login
osf_auth(readr::read_lines("~/.config/osf_token"))
#the project we will use
osf_proj = osf_retrieve_node("https://osf.io/XXX/")
#upload all files in project
#overwrite existing (versioning)
osf_upload(
osf_proj,
path = c("data", "figures", "papers", "notebook.Rmd", "notebook.html", "sessions_info.txt"),
conflicts = "overwrite"
)
}