Boutwell et al (2017) found only minor differences in self-perceived discrimination between identified race/ethnicity groups in the USA. I predicted on Twitter that the minor difference seen would likely reflect the mean levels of cognitive ability (intelligence) among the groups, because there was a clear gradient with Asians reporting the lowest levels and Blacks the most. This document tests that prediction, as well as including skin tone as a predictor based on claims that this physical feature is the most used one for racial discrimination (colorism, see e.g. Hunter, 2007).
options(digits = 2, contrasts=c("contr.treatment", "contr.treatment"))
#libs
library(pacman)
p_load(kirkegaard, readr, purrr, foreign, dplyr, rms, lubridate, polycor)
data_frame(discrim = c(.19, .24, .27, .27, .32),
group = c("Asian", "White", "Hispanic", "American Indian", "Black"),
IQ = c(104, 100, 89, 86, 85)) %>%
GG_scatter("IQ", "discrim", case_names = "group")
Load the Add Health data. Merge it. Available from http://www.cpc.unc.edu/projects/addhealth/documentation/publicdata.
#ah files
(ah_files = dir("data", pattern = ".SAV"))
## [1] "Add Health, Peabody, Wave III.SAV"
## [2] "Add Health, Public Use Children and Parenting Data, Wave III.SAV"
## [3] "Add Health, Public Use Children and Parenting Data, Wave IV.SAV"
## [4] "Add Health, Public Use Completed Pregnancies Data, Wave III.SAV"
## [5] "Add Health, Public Use Current Pregnancies Data, Wave III.SAV"
## [6] "Add Health, Public Use Education Data, Wave III.SAV"
## [7] "Add Health, Public Use Education Data Weights, Wave III.SAV"
## [8] "Add Health, Public Use Glucose Homeostasis Data, Wave IV.SAV"
## [9] "Add Health, Public Use Graduation Data, Wave III.SAV"
## [10] "Add Health, Public Use Inflammation and Immune Function Data, Wave IV.SAV"
## [11] "Add Health, Public Use In-Home Data, Wave III.SAV"
## [12] "Add Health, Public Use In-Home Data, Wave IV.SAV"
## [13] "Add Health, Public Use Lipids Data, Wave IV.SAV"
## [14] "Add Health, Public Use Live Births Data, Wave III.SAV"
## [15] "Add Health, Public Use Live Births Data, Wave IV.SAV"
## [16] "Add Health, Public Use Pregnancy Data, Wave III.SAV"
## [17] "Add Health, Public Use Pregnancy Data, Wave IV.SAV"
## [18] "Add Health, Public Use Relationships Data, Wave III.SAV"
## [19] "Add Health, Public Use Relationships Data, Wave IV.SAV"
## [20] "Add Health, Public Use Relationships in Detail Data, Wave III.SAV"
## [21] "Add Health, Public Use Relationships (Time Segments) Data, Wave IV.SAV"
## [22] "Add Health, Public Use School Weights, Wave III.SAV"
## [23] "Add Health, Public Use Weights, Wave III.SAV"
## [24] "Add Health, Public Use Weights, Wave IV.SAV"
## [25] "National Longitudinal Study of Adolescent to Adult Health, Wave I Grand Sample Weights.SAV"
## [26] "National Longitudinal Study of Adolescent to Adult Health, Wave II Grand Sample Weights.SAV"
## [27] "National Longitudinal Study of Adolescent to Adult Health, Wave II In-Home Questionnaire Data.SAV"
## [28] "National Longitudinal Study of Adolescent to Adult Health, Wave I In-Home, In-School, and Parent Questionnaire Data.SAV"
## [29] "National Longitudinal Study of Adolescent to Adult Health, Wave II Public Use Contextual Database.SAV"
## [30] "National Longitudinal Study of Adolescent to Adult Health, Wave I Public Use Contextual Database.SAV"
## [31] "National Longitudinal Study of Adolescent to Adult Health, Wave I Public Use Network Data.SAV"
#data files to a list
ah_list = map(ah_files, function(x) {
#read and conver to df
#for some reason, the default is a list (?)
silence({read.spss("data/" + x) %>% as_data_frame})
})
#set nice names
ah_nice_names = ah_files %>%
#remove dataset overall name, with variations
str_replace(".+?Health\\, ", "") %>%
#remove public use
str_replace("Public Use ", "") %>%
#move wave to the back
(function(x) {
#remove SAV
x = str_replace(x, ".SAV", "")
#loop
map_chr(x, function(x) {
#is wave in front?
if (str_detect(x, "^Wave")) {
#which wave?
.wave = str_match(x, "(Wave [IV]+)")[, 2]
#remove from front
x = str_replace(x, "Wave [IV]+ ", "")
#add to end
x = x + ", " + .wave
return(x)
} else {
return(x)
}
})
})
#set nice names
names(ah_list) = ah_nice_names
#all have id for merging?
which(map_lgl(ah_list, ~ !"AID" %in% names(.)))
## School Weights, Wave III
## 22
#one doesn't, because it's school level data
#filter it
ah_list = ah_list[-which(map_lgl(ah_list, ~ !"AID" %in% names(.)))]
#merge to one dataset
#this causes memory leak that uses >60 GB memory!
# d = reduce(ah_list, dplyr::full_join, by = "AID")
#merge just the datasets we need
d = dplyr::full_join(ah_list$`Peabody, Wave III`,
ah_list$`In-Home, In-School, and Parent Questionnaire Data, Wave I`, by = "AID")
d = dplyr::full_join(d, ah_list$`In-Home Data, Wave IV`, by = "AID")
d = dplyr::full_join(d, ah_list$`In-Home Data, Wave III`, by = "AID")
#var table
var_table = data_frame(
var = names(d),
type = map_chr(d, typeof),
class = map_chr(d, ~class(.) %>% .[1])
)
Add Health provides little in terms of pre-coding the variables. We use the online explorer to find the variables we need.
#NA to F
NA_to_F = function(x) {
x[is.na(x)] = F
x
}
#discrim
d$discrimination = d$H4MH28
d$discrimination_binary = !d$H4MH28 %in% c("Never", "Rarely")
#tables
table2(d$discrimination)
table2(d$discrimination_binary)
#race ethnicity
#we have interview-rated already
d$OPRE = d$H4IR4 %>% plyr::revalue(c(
"Don't know" = NA,
"Refused" = NA,
"Black or African American" = "Black",
"Asian or Pacific Islander" = "Asian",
"American Indian or Alaska Native" = "American Indian"
))
## The following `from` values were not present in `x`: Don't know, Refused
#table
table2(d$OPRE)
#relevel
levels(d$OPRE)
## [1] "White" "Black" "American Indian" "Asian"
d$OPRE %<>% fct_relevel(c("White", "Black", "Asian", "American Indian"))
#self-rated is in 5 different variables
d_SPRE_w3 = d[c("H3OD4" + LETTERS[1:4], "H3OD2")] %>%
#recode to T/F
map_df(~str_detect(., "^(Yes|Marked)"))
colnames(d_SPRE_w3) = c("White", "Black", "American Indian", "Asian", "Hispanic")
#how many of each?
colSums(d_SPRE_w3, na.rm = T)
## White Black American Indian Asian
## 3376 1213 226 227
## Hispanic
## 522
#sometimes more than 1
#so we have to recode
#code the standard way
d$SPRE_w3 = plyr::adply(d_SPRE_w3, .margins = 1, function(x) {
#no data
if (all(is.na(x))) return(NA)
#recode missing data as F
x[is.na(x)] = F
#hispanic?
if (x$Hispanic) return("Hispanic")
#if only 1?
if (sum(x %>% unlist()) == 1) {
return(names(x)[which(x %>% unlist())])
}
#multi
"Multi-racial"
}, .expand = F, .id = NULL) %>% unlist()
#table
table2(d$SPRE_w3)
#set white as reference
d$SPRE_w3 %<>% fct_relevel(c("White", "Black", "Hispanic", "Asian", "American Indian", "Multi-racial"))
#alternative, in school
#self-rated is in 5 different variables
#http://www.cpc.unc.edu/projects/addhealth/documentation/ace/tool/variablecollection?VariableCollectionId=38
d_SPRE_s = d[c("S6" + LETTERS[1:5], "S4")] %>%
#recode to T/F
map_df(~str_detect(., "^(Yes|Marked)"))
colnames(d_SPRE_s) = c("White", "Black", "Asian", "American Indian", "Other", "Hispanic")
#how many of each?
colSums(d_SPRE_s, na.rm = T)
## White Black Asian American Indian
## 2868 1258 238 266
## Other Hispanic
## 354 561
#sometimes more than 1
#so we have to recode
#code the standard way
d$SPRE_s = plyr::adply(d_SPRE_s, .margins = 1, function(x) {
#no data
if (all(is.na(x))) return(NA)
#recode missing data as F
x[is.na(x)] = F
#hispanic?
if (x$Hispanic) return("Hispanic")
#if only 1?
if (sum(x %>% unlist()) == 1) {
return(names(x)[which(x %>% unlist())])
}
#multi
"Multi-racial"
}, .expand = F, .id = NULL) %>% unlist()
#table
table2(d$SPRE_s)
#factor order
d$SPRE_s %<>% fct_relevel(c("White", "Black", "Hispanic", "Asian", "American Indian", "Multi-racial"))
##Third alternative wave 1 home interview
#self-rated is in 5 different variables
d_SPRE_w1 = d[c("H1GI6" + LETTERS[1:5], "H1GI4")] %>%
#recode to T/F
map_df(~str_detect(., "^(Yes|Marked)"))
colnames(d_SPRE_w1) = c("White", "Black", "American Indian", "Asian", "Other", "Hispanic")
#how many of each?
colSums(d_SPRE_w1, na.rm = T)
## White Black American Indian Asian
## 4294 1619 236 270
## Other Hispanic
## 425 743
#sometimes more than 1
#so we have to recode
#code the standard way
d$SPRE_w1 = plyr::adply(d_SPRE_w1, .margins = 1, function(x) {
#no data
if (all(is.na(x))) return(NA)
#recode missing data as F
x[is.na(x)] = F
#hispanic?
if (x$Hispanic) return("Hispanic")
#if only 1?
if (sum(x %>% unlist()) == 1) {
return(names(x)[which(x %>% unlist())])
}
#multi
"Multi-racial"
}, .expand = F, .id = NULL) %>% unlist()
#table
table2(d$SPRE_w1)
#set white as reference
d$SPRE_w1 %<>% fct_relevel(c("White", "Black", "Hispanic", "Asian", "American Indian", "Multi-racial"))
#SPRE reliability?
table(d$SPRE_s, d$SPRE_w1) %>% prop.table(margin = 1) %>% round(2)
##
## White Black Hispanic Asian American Indian Multi-racial
## White 0.98 0.00 0.01 0.00 0.00 0.01
## Black 0.00 0.96 0.00 0.00 0.00 0.03
## Hispanic 0.11 0.11 0.73 0.01 0.01 0.02
## Asian 0.03 0.01 0.02 0.87 0.00 0.03
## American Indian 0.31 0.10 0.02 0.00 0.33 0.23
## Multi-racial 0.43 0.22 0.04 0.03 0.01 0.26
## Other 0.64 0.11 0.07 0.07 0.00 0.02
##
## Other
## White 0.00
## Black 0.01
## Hispanic 0.01
## Asian 0.04
## American Indian 0.02
## Multi-racial 0.01
## Other 0.09
table(d$SPRE_s, d$SPRE_w3) %>% prop.table(margin = 1) %>% round(2)
##
## White Black Hispanic Asian American Indian Multi-racial
## White 0.98 0.00 0.00 0.00 0.00 0.02
## Black 0.00 0.96 0.01 0.00 0.00 0.03
## Hispanic 0.12 0.11 0.73 0.02 0.00 0.02
## Asian 0.04 0.01 0.02 0.89 0.01 0.04
## American Indian 0.29 0.07 0.05 0.00 0.44 0.15
## Multi-racial 0.47 0.24 0.03 0.05 0.01 0.20
## Other 0.68 0.08 0.11 0.11 0.00 0.03
table(d$SPRE_w1, d$SPRE_w3) %>% prop.table(margin = 1) %>% round(2)
##
## White Black Hispanic Asian American Indian Multi-racial
## White 0.97 0.00 0.01 0.00 0.00 0.02
## Black 0.01 0.96 0.01 0.00 0.00 0.03
## Hispanic 0.05 0.01 0.92 0.01 0.01 0.01
## Asian 0.01 0.00 0.04 0.90 0.01 0.04
## American Indian 0.07 0.00 0.07 0.00 0.82 0.04
## Multi-racial 0.33 0.23 0.02 0.05 0.04 0.32
## Other 0.24 0.30 0.14 0.24 0.00 0.08
#using W3 as it is closest in time
d$SPRE = d$SPRE_w3
#congruency of self and other
d %$% table(OPRE, SPRE) %>% prop.table(margin = 1) %>% unclass() %>% write_clipboard()
## White Black Hispanic Asian American Indian Multi-racial
## White 0.84 0.00 0.12 0.01 0.00 0.03
## Black 0.01 0.91 0.03 0.00 0.01 0.05
## Asian 0.01 0.01 0.06 0.79 0.01 0.11
## American Indian 0.06 0.00 0.33 0.08 0.47 0.06
#IQ (Peabody)
#this automatically removed the non-integer values
d$IQ = d$PVTSTD3L %>% as.character() %>% as.integer()
## Warning in function_list[[k]](value): NAs introduced by coercion
#plot
GG_denhist(d, "IQ")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 1801 rows containing non-finite values (stat_bin).
## Warning: Removed 1801 rows containing non-finite values (stat_density).
#recode below 60 to NA
d[(d$IQ < 60) %>% NA_to_F, "IQ"] = NA
#plot
GG_denhist(d, "IQ")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 1850 rows containing non-finite values (stat_bin).
## Warning: Removed 1850 rows containing non-finite values (stat_density).
#skin tone/color
d$skin_brightness = d$H3IR17 %>% factor()
d$skin_brightness_numeric = d$H3IR17 %>% as.numeric()
#table
table2(d$skin_brightness)
levels(d$skin_brightness)
## [1] "Black" "Dark brown" "Medium brown" "Light brown"
## [5] "White"
#sex
d$sex = d$BIO_SEX3
#table
table2(d$sex)
#income
d$H4EC2 %>% table2()
d$income = d$H4EC2 %>% as.character() %>% plyr::mapvalues(from = c("Don't know", "Refused"), to = rep(NA, 2)) %>% as.numeric()
GG_denhist(d$income)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 1658 rows containing non-finite values (stat_bin).
## Warning: Removed 1658 rows containing non-finite values (stat_density).
#log recode
d$income_log = d$income %>% {log10(. + 1)}
GG_denhist(d$income_log)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 1658 rows containing non-finite values (stat_bin).
## Warning: Removed 1658 rows containing non-finite values (stat_density).
#deal with outliers
#convert to NA
d$income_log_alt1 = d$income_log
d$income_log_alt1[d$income_log_alt1 < 2] = NA
GG_denhist(d$income_log_alt1)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 2029 rows containing non-finite values (stat_bin).
## Warning: Removed 2029 rows containing non-finite values (stat_density).
#winsorize
d$income_log_alt2 = d$income_log %>% winsorise(Inf, 2)
GG_denhist(d$income_log_alt2)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 1658 rows containing non-finite values (stat_bin).
## Warning: Removed 1658 rows containing non-finite values (stat_density).
#age
#given as two variables
d$birth_date = (sprintf("%s %d", d$H4OD1M, d$H4OD1Y %>% as.integer()) %>% parse_date("%B %Y"))
## Warning: 1390 parsing failures.
## row # A tibble: 5 x 4 col row col expected actual expected <int> <int> <chr> <chr> actual 1 1 NA date like %B %Y NA NA row 2 4 NA date like %B %Y NA NA col 3 7 NA date like %B %Y NA NA expected 4 10 NA date like %B %Y NA NA actual 5 18 NA date like %B %Y NA NA
## ... ................. ... .................................... ........ .................................... ...... .................................... ... .................................... ... .................................... ........ .................................... ...... ....................................
## See problems(...) for more details.
#subtract 2008-june to get age
#because wave 4 was collected in 2008 june
d$age = (parse_date("June 2008", "%B %Y") - d$birth_date) %>% as.numeric() %>% divide_by(365.24)
GG_denhist(d$age)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 1390 rows containing non-finite values (stat_bin).
## Warning: Removed 1390 rows containing non-finite values (stat_density).
#standardized units
#annoying to have to do this manually...
d2 = d[c("SPRE", "OPRE", "skin_brightness", "skin_brightness_numeric", "sex", "IQ", "income", "income_log", "income_log_alt1", "income_log_alt2", "age", "discrimination_binary")] %>% df_standardize()
## Skipped SPRE because it is a factor.
## Skipped OPRE because it is a factor.
## Skipped skin_brightness because it is a factor.
## Skipped sex because it is a factor.
## Skipped discrimination_binary because it is a logical.
#income codings IQ, sex
silence(polycor::hetcor(d2[-c(1:3)] %>% as.data.frame(),
use = "pairwise.complete.obs")) %>%
.$correlations %>%
write_clipboard()
## Skin brightness numeric Sex IQ Income
## Skin brightness numeric 1.00 0.02 0.33 0.07
## Sex 0.02 1.00 -0.05 -0.21
## IQ 0.33 -0.05 1.00 0.15
## Income 0.07 -0.21 0.15 1.00
## Income log 0.04 -0.22 0.14 0.42
## Income log alt1 0.10 -0.23 0.17 0.62
## Income log alt2 0.06 -0.24 0.16 0.51
## Age 0.00 -0.07 0.04 0.08
## Discrimination binary -0.10 -0.07 -0.14 -0.15
## Income log Income log alt1 Income log alt2 Age
## Skin brightness numeric 0.04 0.10 0.06 0.00
## Sex -0.22 -0.23 -0.24 -0.07
## IQ 0.14 0.17 0.16 0.04
## Income 0.42 0.62 0.51 0.08
## Income log 1.00 0.97 0.98 0.02
## Income log alt1 0.97 1.00 0.99 0.09
## Income log alt2 0.98 0.99 1.00 0.03
## Age 0.02 0.09 0.03 1.00
## Discrimination binary -0.09 -0.15 -0.11 0.01
## Discrimination binary
## Skin brightness numeric -0.10
## Sex -0.07
## IQ -0.14
## Income -0.15
## Income log -0.09
## Income log alt1 -0.15
## Income log alt2 -0.11
## Age 0.01
## Discrimination binary 1.00
#by subgroup
#blacks
silence(polycor::hetcor(d2 %>% filter(SPRE == "Black") %>% .[-c(1:3)] %>% as.data.frame(),
use = "pairwise.complete.obs")) %>%
.$correlations %>%
write_clipboard()
## Skin brightness numeric Sex IQ Income
## Skin brightness numeric 1.00 0.17 0.15 0.07
## Sex 0.17 1.00 -0.02 -0.19
## IQ 0.15 -0.02 1.00 0.22
## Income 0.07 -0.19 0.22 1.00
## Income log 0.05 -0.02 0.18 0.49
## Income log alt1 0.10 -0.14 0.27 0.69
## Income log alt2 0.06 -0.05 0.22 0.59
## Age -0.02 -0.09 -0.01 0.04
## Discrimination binary 0.00 -0.10 -0.09 -0.05
## Income log Income log alt1 Income log alt2 Age
## Skin brightness numeric 0.05 0.10 0.06 -0.02
## Sex -0.02 -0.14 -0.05 -0.09
## IQ 0.18 0.27 0.22 -0.01
## Income 0.49 0.69 0.59 0.04
## Income log 1.00 0.96 0.98 -0.01
## Income log alt1 0.96 1.00 0.98 0.05
## Income log alt2 0.98 0.98 1.00 0.00
## Age -0.01 0.05 0.00 1.00
## Discrimination binary -0.14 -0.13 -0.15 0.04
## Discrimination binary
## Skin brightness numeric 0.00
## Sex -0.10
## IQ -0.09
## Income -0.05
## Income log -0.14
## Income log alt1 -0.13
## Income log alt2 -0.15
## Age 0.04
## Discrimination binary 1.00
#hispanics
silence(polycor::hetcor(d2 %>% filter(SPRE == "Hispanic") %>% .[-c(1:3)] %>% as.data.frame(),
use = "pairwise.complete.obs")) %>%
.$correlations %>%
write_clipboard()
## Skin brightness numeric Sex IQ Income
## Skin brightness numeric 1.00 0.05 0.16 -0.04
## Sex 0.05 1.00 -0.05 -0.10
## IQ 0.16 -0.05 1.00 0.09
## Income -0.04 -0.10 0.09 1.00
## Income log -0.06 -0.19 0.08 0.38
## Income log alt1 0.02 -0.23 0.07 0.60
## Income log alt2 -0.06 -0.21 0.09 0.47
## Age 0.03 -0.10 0.09 0.03
## Discrimination binary -0.06 -0.10 -0.15 -0.12
## Income log Income log alt1 Income log alt2 Age
## Skin brightness numeric -0.06 0.02 -0.06 0.03
## Sex -0.19 -0.23 -0.21 -0.10
## IQ 0.08 0.07 0.09 0.09
## Income 0.38 0.60 0.47 0.03
## Income log 1.00 0.96 0.99 -0.01
## Income log alt1 0.96 1.00 0.98 0.10
## Income log alt2 0.99 0.98 1.00 0.00
## Age -0.01 0.10 0.00 1.00
## Discrimination binary -0.05 -0.03 -0.06 0.00
## Discrimination binary
## Skin brightness numeric -0.06
## Sex -0.10
## IQ -0.15
## Income -0.12
## Income log -0.05
## Income log alt1 -0.03
## Income log alt2 -0.06
## Age 0.00
## Discrimination binary 1.00
#plots
d2 %>% filter(SPRE == "Black") %>% GG_scatter("IQ", "skin_brightness_numeric")
d2 %>% filter(SPRE == "Hispanic") %>% GG_scatter("IQ", "skin_brightness_numeric", text_pos = "bl")
Now we try a number of models. The main finding from the primary study was that there were only slight differences by social race. Here we have two different codings for social race, namely based on self report (self-identified race/ethnicity, SPRE) and interview report (interview-identified race/ethnicity, OPRE). We also have a skin tone measurement coded as quasi-continuous and alternatively as an ordinal. Finally, we have IQ based on the Peabody verbal test.
First, we attempt to establish the validity of social race and skin tone. Second, we do the same for skin tone (two codings). Then we fit the main model with IQ included. Then we try a few variations on this to check for robustness.
#social race baseline
rms::lrm(discrimination_binary ~ OPRE + sex + age, data = d2)
## Frequencies of Missing Values Due to Each Variable
## discrimination_binary OPRE sex
## 0 1395 1622
## age
## 1390
##
## Logistic Regression Model
##
## rms::lrm(formula = discrimination_binary ~ OPRE + sex + age,
## data = d2)
##
##
## Model Likelihood Discrimination Rank Discrim.
## Ratio Test Indexes Indexes
## Obs 4203 LR chi2 11.33 R2 0.004 C 0.533
## FALSE 3201 d.f. 5 g 0.119 Dxy 0.066
## TRUE 1002 Pr(> chi2) 0.0451 gr 1.126 gamma 0.073
## max |deriv| 2e-08 gp 0.022 tau-a 0.024
## Brier 0.181
##
## Coef S.E. Wald Z Pr(>|Z|)
## Intercept -1.2253 0.0588 -20.84 <0.0001
## OPRE=Black 0.2727 0.0832 3.28 0.0010
## OPRE=Asian 0.2218 0.1969 1.13 0.2598
## OPRE=American Indian 0.1381 0.3874 0.36 0.7215
## sex=Female -0.0229 0.0731 -0.31 0.7540
## age 0.0131 0.0362 0.36 0.7173
##
rms::lrm(discrimination_binary ~ SPRE + sex + age, data = d2)
## Frequencies of Missing Values Due to Each Variable
## discrimination_binary SPRE sex
## 0 1622 1622
## age
## 1390
##
## Logistic Regression Model
##
## rms::lrm(formula = discrimination_binary ~ SPRE + sex + age,
## data = d2)
##
##
## Model Likelihood Discrimination Rank Discrim.
## Ratio Test Indexes Indexes
## Obs 4208 LR chi2 16.22 R2 0.006 C 0.536
## FALSE 3206 d.f. 7 g 0.135 Dxy 0.072
## TRUE 1002 Pr(> chi2) 0.0232 gr 1.144 gamma 0.079
## max |deriv| 1e-13 gp 0.025 tau-a 0.026
## Brier 0.181
##
## Coef S.E. Wald Z Pr(>|Z|)
## Intercept -1.2328 0.0619 -19.90 <0.0001
## SPRE=Black 0.3377 0.0870 3.88 0.0001
## SPRE=Hispanic 0.0456 0.1247 0.37 0.7143
## SPRE=Asian -0.0053 0.2160 -0.02 0.9806
## SPRE=American Indian -0.3262 0.4939 -0.66 0.5089
## SPRE=Multi-racial 0.0150 0.1980 0.08 0.9396
## sex=Female -0.0212 0.0731 -0.29 0.7723
## age 0.0146 0.0362 0.40 0.6865
##
#skin tone baseline
rms::lrm(discrimination_binary ~ skin_brightness_numeric + sex + age, data = d2)
## Frequencies of Missing Values Due to Each Variable
## discrimination_binary skin_brightness_numeric sex
## 0 1629 1622
## age
## 1390
##
## Logistic Regression Model
##
## rms::lrm(formula = discrimination_binary ~ skin_brightness_numeric +
## sex + age, data = d2)
##
##
## Model Likelihood Discrimination Rank Discrim.
## Ratio Test Indexes Indexes
## Obs 4203 LR chi2 11.11 R2 0.004 C 0.531
## FALSE 3203 d.f. 3 g 0.117 Dxy 0.061
## TRUE 1000 Pr(> chi2) 0.0112 gr 1.124 gamma 0.067
## max |deriv| 1e-13 gp 0.022 tau-a 0.022
## Brier 0.181
##
## Coef S.E. Wald Z Pr(>|Z|)
## Intercept -1.1567 0.0542 -21.33 <0.0001
## skin_brightness_numeric -0.1184 0.0355 -3.34 0.0009
## sex=Female -0.0130 0.0731 -0.18 0.8586
## age 0.0156 0.0362 0.43 0.6673
##
rms::lrm(discrimination_binary ~ skin_brightness + sex + age, data = d2)
## Frequencies of Missing Values Due to Each Variable
## discrimination_binary skin_brightness sex
## 0 1629 1622
## age
## 1390
##
## Logistic Regression Model
##
## rms::lrm(formula = discrimination_binary ~ skin_brightness +
## sex + age, data = d2)
##
##
## Model Likelihood Discrimination Rank Discrim.
## Ratio Test Indexes Indexes
## Obs 4203 LR chi2 12.55 R2 0.004 C 0.534
## FALSE 3203 d.f. 6 g 0.129 Dxy 0.068
## TRUE 1000 Pr(> chi2) 0.0508 gr 1.137 gamma 0.074
## max |deriv| 1e-13 gp 0.024 tau-a 0.025
## Brier 0.181
##
## Coef S.E. Wald Z Pr(>|Z|)
## Intercept -0.9503 0.1394 -6.82 <0.0001
## skin_brightness=Dark brown 0.0036 0.1878 0.02 0.9849
## skin_brightness=Medium brown 0.0046 0.1713 0.03 0.9786
## skin_brightness=Light brown -0.1624 0.1747 -0.93 0.3526
## skin_brightness=White -0.2926 0.1416 -2.07 0.0388
## sex=Female -0.0161 0.0732 -0.22 0.8263
## age 0.0149 0.0362 0.41 0.6799
##
#main model
rms::lrm(discrimination_binary ~ OPRE + skin_brightness_numeric + IQ + sex + age, data = d2)
## Frequencies of Missing Values Due to Each Variable
## discrimination_binary OPRE skin_brightness_numeric
## 0 1395 1629
## IQ sex age
## 1850 1622 1390
##
## Logistic Regression Model
##
## rms::lrm(formula = discrimination_binary ~ OPRE + skin_brightness_numeric +
## IQ + sex + age, data = d2)
##
##
## Model Likelihood Discrimination Rank Discrim.
## Ratio Test Indexes Indexes
## Obs 4020 LR chi2 31.65 R2 0.012 C 0.559
## FALSE 3064 d.f. 7 g 0.230 Dxy 0.118
## TRUE 956 Pr(> chi2) <0.0001 gr 1.259 gamma 0.120
## max |deriv| 2e-13 gp 0.042 tau-a 0.043
## Brier 0.180
##
## Coef S.E. Wald Z Pr(>|Z|)
## Intercept -1.1531 0.0685 -16.83 <0.0001
## OPRE=Black 0.0435 0.1608 0.27 0.7866
## OPRE=Asian 0.0850 0.2075 0.41 0.6822
## OPRE=American Indian -0.1253 0.4137 -0.30 0.7620
## skin_brightness_numeric -0.0507 0.0697 -0.73 0.4666
## IQ -0.1744 0.0394 -4.42 <0.0001
## sex=Female -0.0463 0.0752 -0.62 0.5381
## age 0.0242 0.0372 0.65 0.5147
##
#SPRE istead of OPRE
rms::lrm(discrimination_binary ~ SPRE + skin_brightness_numeric + IQ + sex + age, data = d2)
## Frequencies of Missing Values Due to Each Variable
## discrimination_binary SPRE skin_brightness_numeric
## 0 1622 1629
## IQ sex age
## 1850 1622 1390
##
## Logistic Regression Model
##
## rms::lrm(formula = discrimination_binary ~ SPRE + skin_brightness_numeric +
## IQ + sex + age, data = d2)
##
##
## Model Likelihood Discrimination Rank Discrim.
## Ratio Test Indexes Indexes
## Obs 4025 LR chi2 34.95 R2 0.013 C 0.563
## FALSE 3069 d.f. 9 g 0.241 Dxy 0.126
## TRUE 956 Pr(> chi2) <0.0001 gr 1.273 gamma 0.128
## max |deriv| 5e-14 gp 0.044 tau-a 0.046
## Brier 0.180
##
## Coef S.E. Wald Z Pr(>|Z|)
## Intercept -1.1641 0.0756 -15.41 <0.0001
## SPRE=Black 0.1692 0.1751 0.97 0.3340
## SPRE=Hispanic -0.0788 0.1370 -0.58 0.5650
## SPRE=Asian -0.1658 0.2329 -0.71 0.4764
## SPRE=American Indian -0.4326 0.5032 -0.86 0.3900
## SPRE=Multi-racial 0.0122 0.2047 0.06 0.9526
## skin_brightness_numeric -0.0058 0.0716 -0.08 0.9352
## IQ -0.1790 0.0400 -4.47 <0.0001
## sex=Female -0.0495 0.0752 -0.66 0.5104
## age 0.0262 0.0372 0.71 0.4802
##
#alternative specification of skin tone as categorical
rms::lrm(discrimination_binary ~ OPRE + skin_brightness + IQ + sex + age, data = d2)
## Frequencies of Missing Values Due to Each Variable
## discrimination_binary OPRE skin_brightness
## 0 1395 1629
## IQ sex age
## 1850 1622 1390
##
## Logistic Regression Model
##
## rms::lrm(formula = discrimination_binary ~ OPRE + skin_brightness +
## IQ + sex + age, data = d2)
##
##
## Model Likelihood Discrimination Rank Discrim.
## Ratio Test Indexes Indexes
## Obs 4020 LR chi2 32.50 R2 0.012 C 0.560
## FALSE 3064 d.f. 10 g 0.233 Dxy 0.120
## TRUE 956 Pr(> chi2) 0.0003 gr 1.263 gamma 0.122
## max |deriv| 1e-13 gp 0.043 tau-a 0.044
## Brier 0.180
##
## Coef S.E. Wald Z Pr(>|Z|)
## Intercept -1.0449 0.2196 -4.76 <0.0001
## OPRE=Black 0.0060 0.1696 0.04 0.9715
## OPRE=Asian 0.0795 0.2182 0.36 0.7156
## OPRE=American Indian -0.1660 0.4189 -0.40 0.6918
## skin_brightness=Dark brown 0.0288 0.1944 0.15 0.8824
## skin_brightness=Medium brown 0.0413 0.1830 0.23 0.8216
## skin_brightness=Light brown -0.1356 0.2124 -0.64 0.5230
## skin_brightness=White -0.1418 0.2231 -0.64 0.5249
## IQ -0.1746 0.0395 -4.41 <0.0001
## sex=Female -0.0457 0.0753 -0.61 0.5443
## age 0.0234 0.0372 0.63 0.5287
##
#IQ x race interaction
rms::lrm(discrimination_binary ~ OPRE * IQ + skin_brightness_numeric + sex + age, data = d2)
## Frequencies of Missing Values Due to Each Variable
## discrimination_binary OPRE IQ
## 0 1395 1850
## skin_brightness_numeric sex age
## 1629 1622 1390
##
## Logistic Regression Model
##
## rms::lrm(formula = discrimination_binary ~ OPRE * IQ + skin_brightness_numeric +
## sex + age, data = d2)
##
##
## Model Likelihood Discrimination Rank Discrim.
## Ratio Test Indexes Indexes
## Obs 4020 LR chi2 36.17 R2 0.013 C 0.562
## FALSE 3064 d.f. 10 g 0.237 Dxy 0.124
## TRUE 956 Pr(> chi2) <0.0001 gr 1.267 gamma 0.126
## max |deriv| 1e-06 gp 0.043 tau-a 0.045
## Brier 0.180
##
## Coef S.E. Wald Z Pr(>|Z|)
## Intercept -1.1547 0.0688 -16.77 <0.0001
## OPRE=Black 0.0608 0.1625 0.37 0.7081
## OPRE=Asian 0.0391 0.2170 0.18 0.8571
## OPRE=American Indian -0.8631 0.7014 -1.23 0.2184
## IQ -0.1619 0.0491 -3.30 0.0010
## skin_brightness_numeric -0.0497 0.0699 -0.71 0.4768
## sex=Female -0.0475 0.0753 -0.63 0.5282
## age 0.0238 0.0372 0.64 0.5222
## OPRE=Black * IQ 0.0120 0.0860 0.14 0.8890
## OPRE=Asian * IQ -0.1811 0.1753 -1.03 0.3017
## OPRE=American Indian * IQ -0.8903 0.5427 -1.64 0.1009
##
What do we see? Both codings of social race had some quite small validity for predicting self-perceived discrimination. This was also true for skin tone, coded both ways. The validity was quite small, around 1% explained variance. When we added IQ to the model, neither social race nor skin tone had any detectable validity, no matter how they were coded.
We use income because it is the most plausible target of discrimination. Education has rampant affirmative action which would pollute the pattern. Income has no direct affirmative action.
#OPRE models
lm(income_log_alt1 ~ OPRE + age + sex, data = d2) %>% MOD_summary() %>% write_clipboard()
## V1 V2 V3 V4 V5 V6
## 1 coefs
## 2 Predictor Beta SE CI lower CI upper
## 3 OPRE: White 0.00
## 4 OPRE: Black -0.20 0.04 -0.27 -0.12
## 5 OPRE: Asian 0.32 0.09 0.14 0.49
## 6 OPRE: American Indian -0.28 0.19 -0.65 0.08
## 7 age 0.08 0.02 0.05 0.11
## 8 sex: Male 0.00
## 9 sex: Female -0.34 0.03 -0.41 -0.28
## 10
## 11 meta
## 12 outcome N df R2 R2-adj R2-cv
## 13 income_log_alt1 3725 3719 0.051 0.05
## 14
## 15 etas
## 16 Predictor Eta Eta partial
## 17 OPRE 0.11 0.11
## 18 age 0.08 0.08
## 19 sex 0.17 0.17
lm(income_log_alt1 ~ OPRE + skin_brightness_numeric + age + sex, data = d2) %>% MOD_summary() %>% write_clipboard()
## V1 V2 V3 V4 V5 V6
## 1 coefs
## 2 Predictor Beta SE CI lower CI upper
## 3 OPRE: White 0.00
## 4 OPRE: Black 0.01 0.07 -0.13 0.15
## 5 OPRE: Asian 0.39 0.09 0.21 0.57
## 6 OPRE: American Indian -0.21 0.19 -0.57 0.16
## 7 skin brightness numeric 0.10 0.03 0.04 0.16
## 8 age 0.08 0.02 0.05 0.11
## 9 sex: Male 0.00
## 10 sex: Female -0.35 0.03 -0.41 -0.29
## 11
## 12 meta
## 13 outcome N df R2 R2-adj R2-cv
## 14 income_log_alt1 3720 3713 0.054 0.052
## 15
## 16 etas
## 17 Predictor Eta Eta partial
## 18 OPRE 0.07 0.07
## 19 skin brightness numeric 0.05 0.06
## 20 age 0.08 0.08
## 21 sex 0.17 0.18
lm(income_log_alt1 ~ OPRE + skin_brightness_numeric + age + sex + IQ, data = d2) %>% MOD_summary() %>% write_clipboard()
## V1 V2 V3 V4 V5 V6
## 1 coefs
## 2 Predictor Beta SE CI lower CI upper
## 3 OPRE: White 0.00
## 4 OPRE: Black 0.02 0.07 -0.13 0.16
## 5 OPRE: Asian 0.41 0.09 0.23 0.60
## 6 OPRE: American Indian -0.12 0.19 -0.50 0.26
## 7 skin brightness numeric 0.05 0.03 -0.01 0.11
## 8 age 0.07 0.02 0.04 0.10
## 9 sex: Male 0.00
## 10 sex: Female -0.35 0.03 -0.41 -0.28
## 11 IQ 0.15 0.02 0.12 0.19
## 12
## 13 meta
## 14 outcome N df R2 R2-adj R2-cv
## 15 income_log_alt1 3576 3568 0.074 0.072
## 16
## 17 etas
## 18 Predictor Eta Eta partial
## 19 OPRE 0.07 0.08
## 20 skin brightness numeric 0.02 0.03
## 21 age 0.07 0.08
## 22 sex 0.17 0.18
## 23 IQ 0.15 0.15
#SPRE models
lm(income_log_alt1 ~ SPRE + age + sex, data = d2) %>% MOD_summary() %>% write_clipboard()
## V1 V2 V3 V4 V5 V6
## 1 coefs
## 2 Predictor Beta SE CI lower CI upper
## 3 SPRE: White 0.00
## 4 SPRE: Black -0.23 0.04 -0.31 -0.16
## 5 SPRE: Hispanic 0.00 0.05 -0.11 0.11
## 6 SPRE: Asian 0.39 0.09 0.21 0.58
## 7 SPRE: American Indian -0.66 0.20 -1.04 -0.27
## 8 SPRE: Multi-racial -0.09 0.08 -0.25 0.08
## 9 age 0.08 0.02 0.05 0.11
## 10 sex: Male 0.00
## 11 sex: Female -0.35 0.03 -0.41 -0.28
## 12
## 13 meta
## 14 outcome N df R2 R2-adj R2-cv
## 15 income_log_alt1 3730 3722 0.057 0.055
## 16
## 17 etas
## 18 Predictor Eta Eta partial
## 19 SPRE 0.13 0.13
## 20 age 0.08 0.08
## 21 sex 0.17 0.18
lm(income_log_alt1 ~ SPRE + skin_brightness_numeric + age + sex, data = d2) %>% MOD_summary() %>% write_clipboard()
## V1 V2 V3 V4 V5 V6
## 1 coefs
## 2 Predictor Beta SE CI lower CI upper
## 3 SPRE: White 0.00
## 4 SPRE: Black -0.07 0.08 -0.22 0.08
## 5 SPRE: Hispanic 0.05 0.06 -0.07 0.16
## 6 SPRE: Asian 0.45 0.10 0.26 0.64
## 7 SPRE: American Indian -0.58 0.20 -0.97 -0.18
## 8 SPRE: Multi-racial -0.04 0.09 -0.21 0.13
## 9 skin brightness numeric 0.08 0.03 0.02 0.14
## 10 age 0.08 0.02 0.05 0.11
## 11 sex: Male 0.00
## 12 sex: Female -0.35 0.03 -0.41 -0.29
## 13
## 14 meta
## 15 outcome N df R2 R2-adj R2-cv
## 16 income_log_alt1 3725 3716 0.059 0.057
## 17
## 18 etas
## 19 Predictor Eta Eta partial
## 20 SPRE 0.10 0.10
## 21 skin brightness numeric 0.04 0.04
## 22 age 0.08 0.08
## 23 sex 0.17 0.18
lm(income_log_alt1 ~ SPRE + skin_brightness_numeric + age + sex + IQ, data = d2) %>% MOD_summary() %>% write_clipboard()
## V1 V2 V3 V4 V5 V6
## 1 coefs
## 2 Predictor Beta SE CI lower CI upper
## 3 SPRE: White 0.00
## 4 SPRE: Black 0.00 0.08 -0.15 0.16
## 5 SPRE: Hispanic 0.15 0.06 0.03 0.26
## 6 SPRE: Asian 0.52 0.10 0.32 0.71
## 7 SPRE: American Indian -0.48 0.20 -0.87 -0.09
## 8 SPRE: Multi-racial -0.05 0.09 -0.22 0.12
## 9 skin brightness numeric 0.04 0.03 -0.02 0.10
## 10 age 0.07 0.02 0.04 0.11
## 11 sex: Male 0.00
## 12 sex: Female -0.35 0.03 -0.41 -0.29
## 13 IQ 0.16 0.02 0.13 0.19
## 14
## 15 meta
## 16 outcome N df R2 R2-adj R2-cv
## 17 income_log_alt1 3581 3571 0.081 0.078
## 18
## 19 etas
## 20 Predictor Eta Eta partial
## 21 SPRE 0.11 0.11
## 22 skin brightness numeric 0.02 0.02
## 23 age 0.07 0.08
## 24 sex 0.17 0.18
## 25 IQ 0.15 0.15
#both
lm(income_log_alt1 ~ SPRE + OPRE + skin_brightness_numeric + age + sex + IQ, data = d2) %>% MOD_summary() %>% write_clipboard()
## V1 V2 V3 V4 V5 V6
## 1 coefs
## 2 Predictor Beta SE CI lower CI upper
## 3 SPRE: White 0.00
## 4 SPRE: Black -0.19 0.13 -0.43 0.06
## 5 SPRE: Hispanic 0.14 0.06 0.02 0.25
## 6 SPRE: Asian 0.44 0.17 0.12 0.77
## 7 SPRE: American Indian -0.58 0.23 -1.04 -0.12
## 8 SPRE: Multi-racial -0.12 0.09 -0.31 0.06
## 9 OPRE: White 0.00
## 10 OPRE: Black 0.23 0.12 -0.01 0.46
## 11 OPRE: Asian 0.11 0.16 -0.20 0.42
## 12 OPRE: American Indian 0.12 0.22 -0.32 0.56
## 13 skin brightness numeric 0.06 0.03 -0.01 0.12
## 14 age 0.07 0.02 0.04 0.10
## 15 sex: Male 0.00
## 16 sex: Female -0.35 0.03 -0.41 -0.29
## 17 IQ 0.16 0.02 0.13 0.19
## 18
## 19 meta
## 20 outcome N df R2 R2-adj R2-cv
## 21 income_log_alt1 3576 3563 0.081 0.078
## 22
## 23 etas
## 24 Predictor Eta Eta partial
## 25 SPRE 0.08 0.09
## 26 OPRE 0.03 0.03
## 27 skin brightness numeric 0.03 0.03
## 28 age 0.07 0.07
## 29 sex 0.17 0.18
## 30 IQ 0.15 0.15