Init
Functions
Data
- Cumulative
  - Recode
- 2020
  - Recode
- 2024
  - Recode
Analysis
Meta

Init

library(kirkegaard)

## Loading required package: tidyverse

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.2     ✔ tibble    3.2.1
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.0.4     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## Loading required package: magrittr
## 
## 
## Attaching package: 'magrittr'
## 
## 
## The following object is masked from 'package:purrr':
## 
##     set_names
## 
## 
## The following object is masked from 'package:tidyr':
## 
##     extract
## 
## 
## Loading required package: weights
## 
## Loading required package: Hmisc
## 
## 
## Attaching package: 'Hmisc'
## 
## 
## The following objects are masked from 'package:dplyr':
## 
##     src, summarize
## 
## 
## The following objects are masked from 'package:base':
## 
##     format.pval, units
## 
## 
## Loading required package: assertthat
## 
## 
## Attaching package: 'assertthat'
## 
## 
## The following object is masked from 'package:tibble':
## 
##     has_name
## 
## 
## Loading required package: psych
## 
## 
## Attaching package: 'psych'
## 
## 
## The following object is masked from 'package:Hmisc':
## 
##     describe
## 
## 
## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha
## 
## 
## 
## Attaching package: 'kirkegaard'
## 
## 
## The following object is masked from 'package:psych':
## 
##     rescale
## 
## 
## The following object is masked from 'package:assertthat':
## 
##     are_equal
## 
## 
## The following object is masked from 'package:purrr':
## 
##     is_logical
## 
## 
## The following object is masked from 'package:base':
## 
##     +

load_packages(
  haven,
  survey
)

## Loading required package: grid
## Loading required package: Matrix
## 
## Attaching package: 'Matrix'
## 
## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack
## 
## Loading required package: survival
## 
## Attaching package: 'survey'
## 
## The following object is masked from 'package:Hmisc':
## 
##     deff
## 
## The following object is masked from 'package:graphics':
## 
##     dotchart

theme_set(theme_bw())

options(
    digits = 3
)

#multithreading
#library(future)
#plan(multisession(workers = 8))

Functions

#recode ideology functions
recode_ideology = function(x, levels) {
  if (levels == 2) {
    y = x %>% case_match(
    c(1, 2, 3) ~ "left/liberal",
    c(5, 6, 7) ~ "right/conservative",
    .default = NA_character_
  )
  } else if (levels == 3) {
    y = x %>% case_match(
    c(1, 2, 3) ~ "left/liberal",
    c(4) ~ "middle/moderate",
    c(5, 6, 7) ~ "right/conservative",
    .default = NA_character_
  ) %>% ordered(levels = c("left/liberal", "middle/moderate", "right/conservative"))
  } else if (levels == 7) {
    y = x %>% case_match(
      1 ~ "far left",
      2 ~ "left",
      3 ~ "slightly left",
      4 ~ "middle",
      5 ~ "slightly right",
      6 ~ "right",
      7 ~ "far right"
    ) %>% ordered(levels = c("far left", "left", "slightly left", "middle", "slightly right", "right", "far right")) 
  } else {
    stop("levels must be 2, 3, or 7")
  }
  y
}

Data

Cumulative

d = read_sav("data/anes_timeseries_cdf_sav/anes_timeseries_cdf.sav")
d_vars = d %>% df_var_table()

Recode

d %<>% mutate(
  feel_Black = VCF0206,
  feel_White = VCF0207,
  feel_Asian = VCF0227,
  feel_Hispanic = VCF0217,
  own_race = VCF0105a %>% as_factor() %>% as.character() %>% case_match(
    "1. White non-Hispanic (1948-2012)" ~ "White",
    "2. Black non-Hispanic (1948-2012)" ~ "Black",
    "5. Hispanic (1966-2012)" ~ "Hispanic",
    "3. Asian or Pacific Islander, non-Hispanic (1966-2012)" ~ "Asian",
    .default = NA_character_
  ),
  year = VCF0004 %>% as.numeric(),
  
  #weights
  w_full = VCF0009z,
  
  #politics
  ideology = VCF0803 %>% as_factor(),
  ideology_2way = VCF0803 %>% recode_ideology(levels = 2),
  ideology_3way = VCF0803 %>% recode_ideology(levels = 3),
  ideology_7way = VCF0803 %>% recode_ideology(levels = 7)
)

#check encodings
d$own_race %>% table2()

#feelings for races by race as function of year
d_feel = d %>% 
  select(
    feel_Black:feel_Hispanic,
    own_race,
    year,
    w_full,
    ideology_2way, ideology_3way, ideology_7way
  )

#long format
d_feel_long = d_feel %>% 
  pivot_longer(
    cols = feel_Black:feel_Hispanic,
    names_to = "target_race",
    values_to = "feeling"
  ) %>% 
  #filter missing feeling
  filter(
    !is.na(feeling)
  ) %>% 
  #recode 98-99 to NA
  mutate(
    feeling = case_match(
      feeling,
      98 ~ NA_real_,
      99 ~ NA_real_,
      97 ~ 98,
      .default = as.numeric(feeling)
    )
  )

#clean
d_feel_long$target_race %<>% str_remove("feel_")

2020

anes2020 = read_dta("data/anes_timeseries_2020_stata_20220210/anes_timeseries_2020_stata_20220210.dta")
anes2020_vars = anes2020 %>% df_var_table()

Recode

#remove out of bounds codes, meaning these are NAs and oher invalid data
anes2020_sub <- anes2020 %>%
  filter(
    between(V202477, 0, 100),
    between(V202479, 0, 100),
    between(V202480, 0, 100),
    between(V202482, 0, 100)
  )

#recode
anes2020_sub %<>% mutate(
  own_race = V201549x %>% as_factor() %>% as.character() %>% case_match(
    "1. White, non-Hispanic" ~ "White",
    "2. Black, non-Hispanic" ~ "Black",
    "3. Hispanic" ~ "Hispanic",
    "4. Asian or Native Hawaiian/other Pacific Islander, non-Hispanic alone" ~ "Asian",
    .default = NA_character_
  ),
  
  feel_Asian = V202477,
  feel_Hispanic = V202479,
  feel_Black = V202480,
  feel_White = V202482,
  
  #politics
  ideology = V201200 %>% as_factor(),
  ideology_2way = V201200 %>% recode_ideology(levels = 2),
  ideology_3way = V201200 %>% recode_ideology(levels = 3),
  ideology_7way = V201200 %>% recode_ideology(levels = 7)
)

#race dist
anes2020_sub$own_race %>% table2()

#long format for feelings
anes2020_sub_long = anes2020_sub %>% 
  select(
    own_race,
    feel_Asian:feel_White,
    ideology_2way, ideology_3way, ideology_7way
  ) %>% 
  pivot_longer(
    cols = feel_Asian:feel_White,
    names_to = "target_race",
    values_to = "feeling"
  ) %>% 
  mutate(
    target_race = target_race %>% str_remove("feel_"),
    feeling = feeling %>% as.numeric()
  )

2024

anes2024 = read_dta("data/anes_timeseries_2024_stata_20250430/anes_timeseries_2024_stata_20250430.dta")
anes2024_vars = anes2024 %>% df_var_table()

Recode

#2024
anes2024_sub <- anes2024 %>%
  filter(
    between(V242514, 0, 100),
    between(V242515, 0, 100),
    between(V242516, 0, 100),
    between(V242518, 0, 100)
  )

#recode
anes2024_sub %<>% mutate(
  own_race = V241501x %>% as_factor() %>% case_match(
    "1. White, non-Hispanic" ~ "White",
    "2. Black, non-Hispanic" ~ "Black",
    "3. Hispanic" ~ "Hispanic",
    "4. Asian or Native Hawaiian/other Pacific Islander, non-Hispanic" ~ "Asian",
    .default = NA_character_
  ),
  
  feel_Asian = V242514,
  feel_Hispanic = V242515,
  feel_Black = V242516,
  feel_White = V242518,
  
  #politics
  ideology = V241177 %>% as_factor(),
  ideology_3way = V241177 %>% case_match(
    c(1, 2, 3) ~ "left/liberal",
    c(4) ~ "middle/moderate",
    c(5, 6, 7) ~ "right/conservative",
    .default = NA_character_
  ),
  ideology_2way = V241177 %>% recode_ideology(levels = 2),
  ideology_3way = V241177 %>% recode_ideology(levels = 3),
  ideology_7way = V241177 %>% recode_ideology(levels = 7)
)

#long format for feelings
anes2024_sub_long = anes2024_sub %>% 
  select(
    V240107c, V240107d, V240107b,
    own_race,
    feel_Asian:feel_White,
    ideology_2way, ideology_3way, ideology_7way
  ) %>% 
  pivot_longer(
    cols = feel_Asian:feel_White,
    names_to = "target_race",
    values_to = "feeling"
  ) %>% 
  mutate(
    target_race = target_race %>% str_remove("feel_"),
    feeling = feeling %>% as.numeric()
  )

Analysis

1964-2016

#overall
d_feel_long %>% 
  GG_group_means("feeling", groupvar = "own_race", subgroupvar = "target_race")

## Missing values were removed.

#2020
d_feel_long %>% 
  filter(year == 2016) %>% 
  GG_group_means("feeling", groupvar = "own_race", subgroupvar = "target_race")

## Missing values were removed.

#survey weights
sur_design = 
  svydesign(
    id = ~1,
    weights = ~ w_full,
    data = d_feel_long
  )

#compute means
sur_means = svyby(~feeling, ~interaction(own_race, target_race), design = sur_design, svymean, na.rm = TRUE, level = .95)

#plot
sur_means %>% 
  separate_wider_delim(
    cols = `interaction(own_race, target_race)`,
    delim = ".",
    names = c("own_race", "target_race")
  ) %>% 
  ggplot(aes(own_race, feeling, fill = target_race)) +
  geom_col(position = "dodge") +
  geom_errorbar(aes(ymin = feeling - 2*se, ymax = feeling + 2*se), width = 0.2, position = position_dodge(0.9))

2020

#without weights
anes2020_sub_long %>% 
  GG_group_means(
    "feeling",
    groupvar = "own_race",
    subgroupvar = "target_race"
  ) +
  labs(
    title = "How races rate each other",
    subtitle = "2020 ANES",
    y = "Feeling thermometer rating mean [0-100]",
    x = "Race of rater",
    fill = "Target race",
    caption = "Made by Emil Kirkegaard, @KirkegaardEmil"
  )

## Missing values were removed.

2024

# Define the survey design
survey_design <- svydesign(
  id = ~V240107c,
  strata = ~V240107d,
  weights = ~V240107b,
  data = anes2024_sub_long,
  nest = TRUE,
  single.unit.method = "centered"
)

# Compute means by group interaction (own_race and target_race) with 83.4% confidence level
df <- svyby(
  formula = ~feeling,
  by = ~own_race + target_race,  # This includes the interaction between own_race and target_race
  design = survey_design,
  FUN = svymean,
  vartype = "ci",
  level = 0.95
) %>% 
  as_tibble()

#plot
df %>% 
  ggplot(aes(own_race, feeling, fill = target_race)) +
  geom_col(position = "dodge") +
  geom_errorbar(aes(ymin = ci_l, ymax = ci_u), width = 0.2, position = position_dodge(0.9))

#without weights
anes2024_sub_long %>% 
  GG_group_means(
    "feeling",
    groupvar = "own_race",
    subgroupvar = "target_race"
  )

## Missing values were removed.

All years

#bind data
d_all = bind_rows(
  d_feel_long %>% select(-w_full),
  anes2020_sub_long %>% mutate(year = 2020),
  anes2024_sub_long %>% select(-V240107c, -V240107d, -V240107b) %>% mutate(year = 2024)
)

#calculate ethnocentrism by year
d_all %>% 
  plyr::ddply(c("own_race", "year"), function(dd) {
    # if (dd$year[1]== 1988) browser()
    self_row = dd$own_race == dd$target_race
    tibble(
      ethnocentrism = mean(dd$feeling[self_row], na.rm = T) - mean(dd$feeling[!self_row], na.rm = T),
    )
  }) %>% 
  #filter missing data which causes lines to disappear
  miss_filter() %>% 
  #plot by year
  ggplot(aes(year, ethnocentrism, color = own_race)) +
  geom_line() +
  scale_x_continuous(breaks = seq(1960, 2050, by = 5)) +
  labs(
    title = "Ethnocentrism by year",
    subtitle = "ANES 1964-2024.",
    y = "Feeling ethnocentrism [Rating of own race - rating of other races]",
    x = "Year of survey",
    color = "Race",
    caption = "Made by Emil Kirkegaard, @KirkegaardEmil"
  )

GG_save("figs/feeling_ethnocentrism.png")

#investigate missing data issue
d_feel_sum = d_all %>% 
  plyr::ddply(c("own_race", "year", "target_race"), function(dd) {
    tibble(
      n = nrow(dd),
      feeling = mean(dd$feeling, na.rm = T)
    )
  })

#plot missing data
d_feel_sum %>% 
  ggplot(aes(year, target_race, fill = feeling)) +
  geom_tile() +
  scale_fill_continuous(low = "red", high = "blue") +
  facet_wrap("own_race", labeller = "label_both")

#spellings match
unique(d_all$target_race) %in% unique(d_all$own_race)

## [1] TRUE TRUE TRUE TRUE

#plot again
d_feel_sum %>% 
  ggplot(aes(year, feeling, color = target_race)) +
  geom_point() +
  facet_wrap("own_race")

#ratings of each race by race with facets
d_all %>% 
  #calculate means:
  group_by(year, own_race, target_race) %>% 
  summarise(
    feeling = mean(feeling, na.rm = T),
    n = n()
  ) %>% 
  #filter observations with less than 10 people
  filter(n > 10) %>%
  ggplot(aes(year, feeling, color = target_race)) +
  scale_x_continuous(breaks = seq(1900, 3000, by=10)) +
  geom_line() +
  facet_wrap("own_race", labeller = "label_both") +
  labs(
    title = "Feeling thermometer ratings of races in USA",
    y = "Feeling thermometer mean rating [0-100]",
    color = "Target",
    caption = "Made by Emil Kirkegaard, @KirkegaardEmil"
  )

## `summarise()` has grouped output by 'year', 'own_race'. You can override using
## the `.groups` argument.

GG_save("figs/race_feelings.png")

#regress out 2002 apparent bias
d_all %>% 
  group_by(year) %>% 
  summarise(
    feeling = mean(feeling, na.rm = T),
    n = n()
  ) %>% 
  ggplot(aes(year, feeling)) +
  geom_line() + 
  geom_smooth()

## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'

d_all$year2002 = d_all$year == 2002
m_race_race_year = lm(feeling ~ year2002 * own_race, data = d_all)
m_race_race_year %>% summary()

## 
## Call:
## lm(formula = feeling ~ year2002 * own_race, data = d_all)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -73.86 -17.23   2.77  17.77  35.24 
## 
## Coefficients:
##                               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                     66.569      0.411  161.88   <2e-16 ***
## year2002TRUE                    -3.809      2.412   -1.58    0.114    
## own_raceBlack                    7.291      0.439   16.61   <2e-16 ***
## own_raceHispanic                 4.956      0.447   11.09   <2e-16 ***
## own_raceWhite                    0.662      0.416    1.59    0.111    
## year2002TRUE:own_raceBlack      -5.834      2.614   -2.23    0.026 *  
## year2002TRUE:own_raceHispanic   -0.775      2.691   -0.29    0.773    
## year2002TRUE:own_raceWhite       1.696      2.435    0.70    0.486    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 20.6 on 149809 degrees of freedom
##   (23253 observations deleted due to missingness)
## Multiple R-squared:  0.0137, Adjusted R-squared:  0.0137 
## F-statistic:  297 on 7 and 149809 DF,  p-value: <2e-16

White ideology and ratings

#check ideology distribution
d_all$ideology_2way %>% table2()

d_all$ideology_3way %>% table2()

d_all$ideology_7way %>% table2()

#get ratings of races among whites by ideology
d_all %>% 
  filter(
    !is.na(ideology_3way),
    own_race == "White"
    ) %>% 
  ggplot(aes(year, feeling, color = target_race)) +
  geom_smooth(method = "gam", formula = y ~ s(x, bs = "cs", k = 5)) +
  facet_wrap("ideology_3way") +
  labs(
    title = "White Americans' feelings towards races",
    y = "Feeling thermometer mean rating [0-100]",
    x = "Year of survey",
    color = "Target race",
    caption = "Made by Emil Kirkegaard, @KirkegaardEmil"
  )

## Warning: Removed 6223 rows containing non-finite outside the scale range
## (`stat_smooth()`).

GG_save("figs/white_ratings_by_ideology3.png")

## Warning: Removed 6223 rows containing non-finite outside the scale range
## (`stat_smooth()`).

#7 way split
d_all %>% 
  filter(
    !is.na(ideology_7way),
    own_race == "White"
    ) %>% 
  ggplot(aes(year, feeling, color = target_race)) +
  geom_smooth(method = "gam", formula = y ~ s(x, bs = "cs", k = 5)) +
  facet_wrap("ideology_7way") +
  labs(
    title = "White Americans' feelings towards races",
    y = "Feeling thermometer mean rating [0-100]",
    x = "Year of survey",
    color = "Target race",
    caption = "Made by Emil Kirkegaard, @KirkegaardEmil"
  )

## Warning: Removed 6223 rows containing non-finite outside the scale range
## (`stat_smooth()`).

GG_save("figs/white_ratings_by_ideology7.png")

## Warning: Removed 6223 rows containing non-finite outside the scale range
## (`stat_smooth()`).

#actively dislike, below some rating
d_all %>% 
  filter(
    !is.na(ideology_3way), 
    !is.na(feeling),
    own_race == "White"
    ) %>% 
  mutate(
    dislike = (feeling <= 30) %>% as.numeric()
  ) %>% 
  ggplot(aes(year, dislike, color = target_race)) +
  geom_smooth(method = "gam", formula = y ~ s(x, bs = "cs", k = 5)) +
  scale_y_continuous(labels = scales::percent_format(accuracy = 1)) +
  facet_wrap("ideology_3way") +
  labs(
    title = "White Americans' feelings towards races",
    subtitle = "Dislike defined as rating below 30",
    y = "Feeling thermometer mean rating [0-100]",
    x = "Year of survey",
    color = "Target race",
    caption = "Made by Emil Kirkegaard, @KirkegaardEmil"
  )

GG_save("figs/white_dislike_by_ideology3.png")

#actively dislike, below some rating, 7 split
d_all %>% 
  filter(
    !is.na(ideology_7way), 
    !is.na(feeling),
    own_race == "White"
    ) %>% 
  mutate(
    dislike = (feeling <= 30) %>% as.numeric()
  ) %>% 
  ggplot(aes(year, dislike, color = target_race)) +
  geom_smooth(method = "gam", formula = y ~ s(x, bs = "cs", k = 5)) +
  scale_y_continuous(labels = scales::percent_format(accuracy = 1)) +
  facet_wrap("ideology_7way") +
  labs(
    title = "White Americans' feelings towards races",
    subtitle = "Dislike defined as rating below 30",
    y = "Feeling thermometer mean rating [0-100]",
    x = "Year of survey",
    color = "Target race",
    caption = "Made by Emil Kirkegaard, @KirkegaardEmil"
  )

GG_save("figs/white_dislike_by_ideology7.png")

#disliking, all races, no ideology
d_all %>% 
  filter(
    !is.na(feeling),
    !is.na(own_race)
    ) %>% 
  mutate(
    dislike = (feeling <= 30) %>% as.numeric()
  ) %>% 
  ggplot(aes(year, dislike, color = target_race)) +
  geom_smooth(method = "gam", formula = y ~ s(x, bs = "cs", k = 5)) +
  scale_y_continuous(labels = scales::percent_format(accuracy = 1)) +
  facet_wrap("own_race", labeller = "label_both") +
  labs(
    title = "Americans' dislikings of races",
    subtitle = "Dislike defined as rating below 30",
    y = "Feeling thermometer mean rating [0-100]",
    x = "Year of survey",
    color = "Target race",
    caption = "Made by Emil Kirkegaard, @KirkegaardEmil"
  )

GG_save("figs/dislike_by_race.png")

#jews
anes2024_sub %>% 
  mutate(
    feel_jews = V242149 %>% case_match(
      c(1:100) ~ V242149,
      .default = NA
    )
  ) %>% 
  filter(
    !is.na(ideology_7way)
  ) %>% 
  GG_group_means("feel_jews", "ideology_7way", type = "point") +
  labs(
    title = "Feelings towards Jews among Americans",
    subtitle = "ANES 2024, all Americans",
    y = "Feeling thermometer mean rating [0-100]",
    x = "Ideology self-placement",
    caption = "Made by Emil Kirkegaard, @KirkegaardEmil"
  )

## Missing values were removed.

GG_save("figs/jews~ideology7.png")

US race relations

15 May, 2025

Init

Functions

Data

Cumulative

Recode

2020

Recode

2024

Recode

Analysis

1964-2016

2020

2024

All years

White ideology and ratings

Meta