About

This is the R notebook for the study:

A study of stereotype accuracy in the Netherlands: immigrant crime, occupational sex distribution, and provincial income inequality

Init

#options
options(
  digits = 3,
  scipen = 99,
  contrasts=c("contr.treatment", "contr.treatment")
  )

#devtools::install_github("deleetdk/kirkegaard")
#packages
library(kirkegaard)

## Loading required package: tidyverse

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.2     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.4.2     ✔ tibble    3.2.1
## ✔ lubridate 1.9.2     ✔ tidyr     1.3.0
## ✔ purrr     1.0.1     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## Loading required package: magrittr
## 
## 
## Attaching package: 'magrittr'
## 
## 
## The following object is masked from 'package:purrr':
## 
##     set_names
## 
## 
## The following object is masked from 'package:tidyr':
## 
##     extract
## 
## 
## Loading required package: weights
## 
## Loading required package: Hmisc
## 
## 
## Attaching package: 'Hmisc'
## 
## 
## The following objects are masked from 'package:dplyr':
## 
##     src, summarize
## 
## 
## The following objects are masked from 'package:base':
## 
##     format.pval, units
## 
## 
## Loading required package: assertthat
## 
## 
## Attaching package: 'assertthat'
## 
## 
## The following object is masked from 'package:tibble':
## 
##     has_name
## 
## 
## Loading required package: psych
## 
## 
## Attaching package: 'psych'
## 
## 
## The following object is masked from 'package:Hmisc':
## 
##     describe
## 
## 
## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha
## 
## 
## 
## Attaching package: 'kirkegaard'
## 
## 
## The following object is masked from 'package:psych':
## 
##     rescale
## 
## 
## The following object is masked from 'package:assertthat':
## 
##     are_equal
## 
## 
## The following object is masked from 'package:purrr':
## 
##     is_logical
## 
## 
## The following object is masked from 'package:base':
## 
##     +

load_packages(
  readxl, 
  rms, 
  mediation, 
  lavaan, 
  lavaanPlot, 
  glmnet, 
  mirt, 
  polycor,
  GGally,
  stringdist,
  BMA,
  tidymodels,
  patchwork,
  conflicted
  )

## Loading required package: MASS
## 
## Attaching package: 'MASS'
## 
## The following object is masked from 'package:dplyr':
## 
##     select
## 
## Loading required package: Matrix
## 
## Attaching package: 'Matrix'
## 
## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack
## 
## Loading required package: mvtnorm
## 
## Attaching package: 'mvtnorm'
## 
## The following object is masked from 'package:kirkegaard':
## 
##     standardize
## 
## Loading required package: sandwich
## mediation: Causal Mediation Analysis
## Version: 4.5.0
## 
## 
## Attaching package: 'mediation'
## 
## The following object is masked from 'package:psych':
## 
##     mediate
## 
## This is lavaan 0.6-15
## lavaan is FREE software! Please report any bugs.
## 
## Attaching package: 'lavaan'
## 
## The following object is masked from 'package:psych':
## 
##     cor2cov
## 
## Loaded glmnet 4.1-7
## Loading required package: stats4
## Loading required package: lattice
## 
## Attaching package: 'polycor'
## 
## The following object is masked from 'package:psych':
## 
##     polyserial
## 
## Registered S3 method overwritten by 'GGally':
##   method from   
##   +.gg   ggplot2
## 
## Attaching package: 'stringdist'
## 
## The following object is masked from 'package:magrittr':
## 
##     extract
## 
## The following object is masked from 'package:tidyr':
## 
##     extract
## 
## Loading required package: survival
## Loading required package: leaps
## Loading required package: robustbase
## 
## Attaching package: 'robustbase'
## 
## The following object is masked from 'package:survival':
## 
##     heart
## 
## Loading required package: inline
## Loading required package: rrcov
## Scalable Robust Estimators with High Breakdown Point (version 1.7-4)
## 
## 
## Attaching package: 'rrcov'
## 
## The following object is masked from 'package:lavaan':
## 
##     getCov
## 
## Registered S3 method overwritten by 'parsnip':
##   method          from 
##   print.nullmodel vegan
## ── Attaching packages ────────────────────────────────────── tidymodels 1.1.0 ──
## ✔ broom        1.0.5     ✔ rsample      1.1.1
## ✔ dials        1.2.0     ✔ tune         1.1.1
## ✔ infer        1.0.4     ✔ workflows    1.1.3
## ✔ modeldata    1.1.0     ✔ workflowsets 1.0.1
## ✔ parsnip      1.1.0     ✔ yardstick    1.2.0
## ✔ recipes      1.0.6     
## ── Conflicts ───────────────────────────────────────── tidymodels_conflicts() ──
## ✖ psych::%+%()             masks ggplot2::%+%()
## ✖ scales::alpha()          masks psych::alpha(), ggplot2::alpha()
## ✖ recipes::averages()      masks kirkegaard::averages()
## ✖ scales::discard()        masks purrr::discard()
## ✖ recipes::discretize()    masks kirkegaard::discretize()
## ✖ Matrix::expand()         masks tidyr::expand()
## ✖ stringdist::extract()    masks magrittr::extract(), tidyr::extract()
## ✖ dplyr::filter()          masks stats::filter()
## ✖ recipes::fixed()         masks stringr::fixed()
## ✖ assertthat::has_name()   masks tibble::has_name()
## ✖ kirkegaard::is_logical() masks purrr::is_logical()
## ✖ dplyr::lag()             masks stats::lag()
## ✖ Matrix::pack()           masks tidyr::pack()
## ✖ MASS::select()           masks dplyr::select()
## ✖ magrittr::set_names()    masks purrr::set_names()
## ✖ yardstick::spec()        masks readr::spec()
## ✖ Hmisc::src()             masks dplyr::src()
## ✖ recipes::step()          masks stats::step()
## ✖ Hmisc::summarize()       masks dplyr::summarize()
## ✖ parsnip::translate()     masks Hmisc::translate()
## ✖ Matrix::unpack()         masks tidyr::unpack()
## ✖ recipes::update()        masks stats4::update(), lavaan::update(), Matrix::update(), stats::update()
## • Use tidymodels_prefer() to resolve common conflicts.
## 
## Attaching package: 'patchwork'
## 
## The following object is masked from 'package:MASS':
## 
##     area

#function overlaps
conflicts_prefer(kirkegaard::standardize)

## [conflicted] Will prefer kirkegaard::standardize over any other package.

conflicts_prefer(kirkegaard::`+`)

## [conflicted] Will prefer kirkegaard::`+` over any other package.

conflicts_prefer(dplyr::filter)

## [conflicted] Will prefer dplyr::filter over any other package.

conflicts_prefer(magrittr::set_names)

## [conflicted] Will prefer magrittr::set_names over any other package.

#theme
theme_set(theme_bw())

#main accuracy metrics
main_accu_metrics = c("pearson_r", "mean_abs_error", "mean_error", "sd_error")
main_accu_metrics2 = c("pearson_r", "mean_abs_error", "mean_error", "mean_error_abs", "sd_error", "sd_error_abs")

Functions

#log transform mean
#convert 0 to 0.1, to mirror the scale to 10
log(c(.1, 10))

## [1] -2.3  2.3

log(c(1/3, 3))

## [1] -1.1  1.1

log_transform_mean = function(x) {
  
  #is vector?
  if (is.vector(x)) {
      #convert
      logged = x %>% winsorise(lower = .1) %>% log()
      
      #take means
      logmean = mean(logged, na.rm = T)
      
      #deconvert
      backconverted_mean = exp(logmean)
      
      return(backconverted_mean)
  }
  
  #convert
  logged = x %>% map(winsorise, lower = 0.1) %>% map_df(log)
  
  #take means
  logmeans = colMeans(logged, na.rm = T)
  
  #deconvert
  backconverted_means = exp(logmeans)
  
  #add names if exist
  if (!is.null(colnames(x))) names(backconverted_means) = colnames(x)
  
  backconverted_means
}

#test
c(.3, .5, .7) %>% log_transform_mean()

## [1] 0.472

tibble(
  a = c(.3, .5, .7),
  b = c(1.5, 2, 2.5),
  c = c(0, 0, 1)
) %>% log_transform_mean()

##     a     b     c 
## 0.472 1.957 0.215

#set self to names
set_self_names = function(x) {
  names(x) = x %>% str_legalize()
  x
}

#z-score and then average
#this removes scale differences between people
z_mean = function(x) {
  # browser()
  x %>% 
    apply(standardize, MARGIN = 1) %>% 
    rowMeans(na.rm = T)
}

#ad hoc function to get best model from cv.glmnet
get_glmnet_coefs = function(x) {
  #get coefs at two criteria
  coefs_min <- coef(x, s = "lambda.min")
  coefs_1se <- coef(x, s = "lambda.1se")
  
  #get values
  d1 = data.frame(predictor = coefs_min@Dimnames[[1]][coefs_min@i + 1], beta_min = coefs_min@x)
  d2 = data.frame(predictor = coefs_1se@Dimnames[[1]][coefs_1se@i + 1], beta_1se = coefs_1se@x)
  
  #merge
  full_join(d1, d2, by = "predictor")
}

select = dplyr::select

#clean bad data
clean_bad_data = function(x) {
  mapvalues(x, from = c("CONSENT REVOKED", "DATA EXPIRED", "N/A"), to = rep(NA, 3), warn_missing = F)
}

#standardized country name
#also fix some manually inputted values, which are not caught properly
standardized_countries = function(x) {
  mapvalues(x, 
            c("NL", "suri", "Cuba, Holguin", "Nederlands Indie", "Onbekend", "Antillen", "?", "NEDERLAND", "Nederlands Indië"),
            c("Netherlands", "Suriname", "Cuba", "Indonesia", NA, "Netherlands Antillen", NA, "Netherlands", "Indonesia"), warn_missing = F) %>% 
    pu_translate() %>% 
    pu_translate(reverse = T)
}
c("Germany", "Duitsland", "NL", "Nederland", "Netherlands") %>% standardized_countries()

## [1] "Germany"     "Germany"     "Netherlands" "Netherlands" "Netherlands"

#convenience plot function
#remove the _ from labels on the plot automatically since otherwise the labels are too long
#add colors to the correlation matrix to make heatmap
ggpairs_easy = function(x) {
  #inner function, but we dont use this one anyway
  #https://stackoverflow.com/questions/45873483/ggpairs-plot-with-heatmap-of-correlation-values
  my_fn <- function(data, mapping, method="p", use="pairwise", ...){

              # grab data
              x <- eval_data_col(data, mapping$x)
              y <- eval_data_col(data, mapping$y)

              # calculate correlation
              corr <- cor(x, y, method=method, use=use)

              # calculate colour based on correlation value
              # Here I have set a correlation of minus one to blue, 
              # zero to white, and one to red 
              # Change this to suit: possibly extend to add as an argument of `my_fn`
              colFn <- colorRampPalette(c("blue", "white", "red"), interpolate ='spline')
              fill <- colFn(100)[findInterval(corr, seq(-1, 1, length=100))]

              ggally_cor(data = data, mapping = mapping, color = "black", title = F, ...) + 
                theme_void() +
                theme(panel.background = element_rect(fill=fill))
  }
  
  #alternative inner function
  #https://stackoverflow.com/questions/62196950/ggpairs-plot-with-heatmap-of-correlation-values-with-significance-stars-and-cust
  #I removed the adaptive size and stars default since we dont want those
  cor_fun <- function(data, mapping, method="pearson", use="pairwise", ndp=2, sz=5, stars=F, ...){
  
  # grab data
  x <- eval_data_col(data, mapping$x)
  y <- eval_data_col(data, mapping$y)
  
  # calculate correlation: for significance stars
  corr <- cor.test(x, y, method=method)
  est <- corr$estimate
  lb.size <- sz* abs(est)
  
  # get significance stars
  if(stars){
    #determine asterisks
    stars <- c("***", "**", "*", "")[findInterval(corr$p.value, c(0, 0.001, 0.01, 0.05, 1))]
    #force digits to be specific number
    lbl <- paste0(str_round(est, digits = ndp), stars)
  }else{
    #force digits to be specific number
    lbl <- str_round(est, digits = ndp)
  }
  
  # calculate correlation: for colored tiles
  corr <- cor(x, y, method=method, use=use)
  
  # calculate color based on correlation value
  # corr = -1 => blue, 
  # corr =  0 => white, 
  # corr = +1 => red, 
  colFn <- colorRampPalette(c("blue", "white", "red"), interpolate ='spline')
  fill <- colFn(100)[findInterval(corr, seq(-1, 1, length=100))]
  
  ggplot(data = data, mapping = mapping, ...) + 
    theme_void() +
    annotate("text",
             x=mean(x, na.rm=TRUE),
             y=mean(y, na.rm=TRUE),
             label=lbl,
             # size=lb.size,
             ...) +
    theme(panel.background = element_rect(fill=fill,  # to fill background of panel with color
                                          colour=NA), # to remove border of panel
          panel.grid.major = element_blank())
  }
  
  #do the call
  GGally::ggpairs(
    x,
    columnLabels = colnames(x) %>% str_clean(),
    upper = list(continuous = cor_fun)
    # lower = list(continuous = wrap("smooth", method = "loess"))
    ) +
    theme(
      #black font
      text = element_text(color = "black")
    )
}

#test this should give us scatter plots, distributions, and heatmap correlations
ggpairs_easy(iris[-5])

#extract BMA results, tidy
tidy_BMA_summary = function(x) {
  browser()
  xrows = nrow(x)
  x %>% 
    .[-c((xrows-3):xrows), 1:3] %>% 
    as.data.frame() %>% 
    df_as_num() %>% 
    set_colnames(c("PIP", "post_mean", "post_SD")) %>% 
    rownames_to_column(var = "term") %>% 
    mutate(
      term = str_replace(term, ".x$", "")
    )
}

#representative cases method
#try it on cars, what is the most representative car?
mpg %>% 
  select(displ, cyl, cty, hwy) %>% 
  calc_row_representativeness() %>% 
  arrange(mean)

#PCA plot should show about the same
prcomp(mpg %>% select(displ, cyl, cty, hwy), scale. = T) %>% tidy() %>% mutate(PC = "PC" + PC) %>% pivot_wider(names_from = PC, values_from = value) %>% 
  ggplot(aes(PC1, PC2)) +
  geom_point() +
  geom_text(aes(label = row, color = (row %in% c(42, 43)))) +
  scale_color_discrete("Is modal case? (42, 43)")

GG_save("figs/case_representativeness.png")

#combined plot function
accu_combo_plot = function(d, variables = main_accu_metrics, x_var = NULL) {
  #list of plots
  #do we have X?
  if (is.null(x_var)) {
    plots = purrr::map(variables, ~GG_denhist(d, var = ., vline = "median"))
  } else {
    plots = purrr::map(variables, ~GG_scatter(d, x_var = x_var, y_var = .))
  }
  
  #combine
  do.call(wrap_plots, args = plots)
}

#equal loadings FA
#a kind of extension of UWFA
elfa = function(d) {
 
  #make z scores
  d_z = df_standardize(d) %>% as.matrix()
  
  #optimize function
  optim_inner = function(par) {
    # 
    #composite scores
    comp = t(t(d_z) * c(par)) %>% rowMeans(na.rm = T)
    
    #correlations
    cors = wtd.cors(bind_cols(d_z, comp = comp))[names(d_z), "comp"]
    
    #absolute vals
    cors_abs = abs(cors)
    
    #sum of squares from mean, sum of abs pars minus 1
    sum((cors_abs-mean(cors_abs))^2) + abs(sum(abs(par)) - 1)
  }
  browser()
  #optimize
  empirical_weights = optim(
    par = runif(ncol(d)), #begin with random weights
    fn = optim_inner,
    method = "BFGS",
    control = list(maxit = 1e4)
  )
   
  #make composite
  comp = t(t(d_z) * c(empirical_weights$par)) %>% rowMeans(na.rm = T)
  
  #correlations
  cors = wtd.cors(bind_cols(d_z, comp = comp))[colnames(d_z), "comp"]
  
  #return
  list(
    scores = comp,
    loadings = tibble(
      variable = colnames(d_z),
      weights = c(empirical_weights$par),
      cor = cors
    ),
    fit = empirical_weights
  )
}

#test on iris data
#this was difficulter than expected
wtd.cors(iris[1:4])

##              Sepal.Length Sepal.Width Petal.Length Petal.Width
## Sepal.Length        1.000      -0.118        0.872       0.818
## Sepal.Width        -0.118       1.000       -0.428      -0.366
## Petal.Length        0.872      -0.428        1.000       0.963
## Petal.Width         0.818      -0.366        0.963       1.000

# elfa(iris[1:4])

df_remove_NA_vars = function(x, threshold = 1) {
  miss_col = miss_by_var(x, prop = T)
  
  x[miss_col <= threshold]
}

iris$nothing = NA
iris %>% df_remove_NA_vars()

Data

#putterman weil migration matrix
#manual changes: 
#Rep Congo Zaire to COD from ZAR
#Romania from ROM to ROU
#https://en.wikipedia.org/wiki/ISO_3166-1_alpha-3
PW = read_excel("data/matrix version 1.1.xls")

#african countries ISOs
#https://en.wikipedia.org/wiki/Sub-Saharan_Africa
SSA_ISO = c(
  "AGO",
  "BDI",
  "COD",
  "COG",
  "CMR",
  "CAF",
  "TCD",
  "GNQ",
  "GAB",
  "KEN",
  "NGA",
  "NER",
  "RWA",
  "TZA",
  "UGA",
  "STP",
  "SDN",
  "ERI",
  "ETH",
  "SOM",
  "BWA",
  "COM",
  "LSO",
  "MDG",
  "MWI",
  "MUS",
  "MOZ",
  "NAM",
  "ZAF",
  "SWZ",
  "ZMB",
  "ZWE",
  "BEN",
  "MLI",
  "BFA",
  "CPV",
  "CIV",
  "GMB",
  "GHA",
  "GIN",
  "GNB",
  "LBR",
  "MRT",
  "SLE",
  "TGO"
)
length(SSA_ISO)

## [1] 45

#african percentage for each
PW$SSA = PW[str_to_lower(SSA_ISO)] %>% rowSums()

#data from prior study
origins_orig = read_rds("data/KirkegaardKruiger2020.rds")
kirkkruiger2020_cases = readxl::read_excel("data/dutch_survey.xlsx")

#add SSA origin
origins_orig = origins_orig %>% left_join(PW %>% select(wbcode, SSA) %>% mutate(ISO = str_to_upper(wbcode)) %>% select(-wbcode), by = c("ISO" = "ISO"))

#rename Tunisia to fix spelling
origins_orig$origin[origins_orig$ISO == "TUN"] = "Tunisia"

#add manual values for some
#Soviet, use Russia
origins_orig$SSA[origins_orig$ISO == "SUN"] = PW$SSA[PW$wbcode == "RUS"]
#Czechslovak, use average
origins_orig$SSA[origins_orig$ISO == "CSK"] = PW$SSA[PW$wbcode %in% c("CZE", "SVK")] %>% mean()

#read survey responses
prolific = read_csv("data/prolific 20200818191928-SurveyExport final.csv") %>% 
  mutate(
    source = "Prolific",
    ) %>% 
  #remove duplicated IDs
  arrange(`Wat is uw Prolific ID?`) %>% 
  filter(!duplicated(`Wat is uw Prolific ID?`)) %>% 
  #remove empty columns
  df_remove_NA_vars()

## New names:
## Rows: 441 Columns: 304
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (148): Status, Language, Referer, SessionID, User Agent, IP Address, Co... dbl
## (144): Response ID, Longitude, Latitude, Wat is de faculteit van 4?, Me... lgl
## (10): Contact ID, Legacy Comments, Comments, Tags, In welk land is uw ... dttm
## (2): Time Started, Date Submitted
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `In welk land is de moeder van uw moeder geboren?:Familie-achtergrond` -> `In
##   welk land is de moeder van uw moeder geboren?:Familie-achtergrond...31`
## • `In welk land is de vader van uw moeder geboren?:Familie-achtergrond` -> `In
##   welk land is de vader van uw moeder geboren?:Familie-achtergrond...32`
## • `In welk land is de moeder van uw vader geboren?:Familie-achtergrond` -> `In
##   welk land is de moeder van uw vader geboren?:Familie-achtergrond...33`
## • `In welk land is de vader van uw vader geboren?:Familie-achtergrond` -> `In
##   welk land is de vader van uw vader geboren?:Familie-achtergrond...34`
## • `In welk land is de moeder van uw moeder geboren?:Familie-achtergrond` -> `In
##   welk land is de moeder van uw moeder geboren?:Familie-achtergrond...37`
## • `In welk land is de vader van uw moeder geboren?:Familie-achtergrond` -> `In
##   welk land is de vader van uw moeder geboren?:Familie-achtergrond...38`
## • `In welk land is de moeder van uw vader geboren?:Familie-achtergrond` -> `In
##   welk land is de moeder van uw vader geboren?:Familie-achtergrond...39`
## • `In welk land is de vader van uw vader geboren?:Familie-achtergrond` -> `In
##   welk land is de vader van uw vader geboren?:Familie-achtergrond...40`
## • `lende:Selecteer de twee woorden die hetzelfde kunnen betekenen.` ->
##   `lende:Selecteer de twee woorden die hetzelfde kunnen betekenen....75`
## • `lende:Selecteer de twee woorden die hetzelfde kunnen betekenen.` ->
##   `lende:Selecteer de twee woorden die hetzelfde kunnen betekenen....114`

#survey meta
prolific_meta = read_csv("data/prolific_export_5e529eaebcdb1e2adaf9f5e6 final.csv") %>% 
  df_legalize_names() %>% 
  arrange(participant_id) %>% 
  filter(!duplicated(participant_id)) %>% 
  #remove empty columns
  df_remove_NA_vars() %>%
  mutate(
    sex = Sex
  )

## Rows: 473 Columns: 19
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (11): session_id, participant_id, status, entered_code, Country of Birt...
## dbl   (5): time_taken, age, num_approvals, num_rejections, prolific_score
## dttm  (3): started_datetime, completed_date_time, reviewed_at_datetime
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

#ensure the same people
id_intersect = intersect(prolific$`Wat is uw Prolific ID?`, prolific_meta$participant_id)
prolific %<>% filter(`Wat is uw Prolific ID?` %in% id_intersect)
prolific_meta %<>% filter(participant_id %in% id_intersect)

assert_that(all(prolific$`Wat is uw Prolific ID?` == prolific_meta$participant_id))

## [1] TRUE

#merge in the metadata cols of itnerest
prolific_meta_cols = c("sex", "time_taken", "age", "Employment_Status", "Nationality", 
"Country_of_Birth", "Student_Status", "First_Language")
prolific = cbind(prolific, prolific_meta %>% select(!!prolific_meta_cols))

#fix prolific variables
#loop and fix the revoked data encodings to NA
for (v in colnames(prolific)) prolific[[v]] = prolific[[v]] %>% clean_bad_data()

#fix up messed up variables
prolific %<>% mutate(
  #country of birth, in Dutch, standardize to English
  #these need merging first since they are split for some reason
  Country_of_Birth = Country_of_Birth %>% standardized_countries(),
  
  Country_of_Birth_m = miss_fill(`In welk land is uw moeder geboren?\t:Familie-achtergrond`, `In welk land is uw moeder geboren?:Familie-achtergrond`) %>% standardized_countries(),
  
  Country_of_Birth_f = miss_fill(`In welk land is uw vader geboren?\t:Familie-achtergrond`, `In welk land is uw vader geboren?:Familie-achtergrond`) %>% standardized_countries(),
  
  Country_of_Birth_mm = miss_fill(`In welk land is de moeder van uw moeder geboren?:Familie-achtergrond...31`, `In welk land is de moeder van uw moeder geboren?:Familie-achtergrond...37`) %>% standardized_countries(),
  
    Country_of_Birth_mf = miss_fill(`In welk land is de moeder van uw vader geboren?:Familie-achtergrond...33`, `In welk land is de moeder van uw vader geboren?:Familie-achtergrond...39`) %>% standardized_countries(),
  
  Country_of_Birth_fm = miss_fill(`In welk land is de vader van uw moeder geboren?:Familie-achtergrond...32`, `In welk land is de vader van uw moeder geboren?:Familie-achtergrond...38`) %>% standardized_countries(),
  
  Country_of_Birth_ff = miss_fill(`In welk land is de vader van uw vader geboren?:Familie-achtergrond...40`, `In welk land is de vader van uw vader geboren?:Familie-achtergrond...34`) %>% standardized_countries()
  
)

## No exact match: Indonesie
## No exact match: nederland
## No exact match: Netherland
## No exact match: Nederland6
## No exact match: Kroatie
## No exact match: Nederlan
## No exact match: Bosnië
## No exact match: Schotland
## No exact match: Sovjet-Unie
## No exact match: Engeland
## No exact match: ethiopia
## No exact match: Macedonie
## Best fuzzy match found: Indonesie -> Indonesia with distance 1.00
## Best fuzzy match found: nederland -> Nederland with distance 1.00
## Best fuzzy match found: Netherland -> Netherlands with distance 1.00
## Best fuzzy match found: Nederland6 -> Nederland with distance 1.00
## Best fuzzy match found: Kroatie -> Kroatia with distance 1.00
## Best fuzzy match found: Nederlan -> Nederland with distance 1.00
## Best fuzzy match found: Bosnië -> Bosnia with distance 1.00
## Best fuzzy match found: Schotland -> Scotland with distance 1.00
## Best fuzzy match found: Sovjet-Unie -> Soviet Union with distance 4.00
## Best fuzzy match found: Engeland -> England with distance 1.00
## Best fuzzy match found: ethiopia -> Ethiopia with distance 1.00
## Best fuzzy match found: Macedonie -> Macedonia with distance 1.00
## No exact match: nederland
## No exact match: china
## No exact match: Indonesie
## No exact match: Bosnië
## No exact match: Nederland, Rotterdam
## No exact match: eritrea
## No exact match: Netherlands Antillen
## No exact match: Macedonie
## No exact match: suriname
## Best fuzzy match found: nederland -> Nederland with distance 1.00
## Best fuzzy match found: china -> China with distance 1.00
## Best fuzzy match found: Indonesie -> Indonesia with distance 1.00
## Best fuzzy match found: Bosnië -> Bosnia with distance 1.00
## Best fuzzy match found: Nederland, Rotterdam -> Nederlandene with distance 10.00
## Best fuzzy match found: eritrea -> Eritrea with distance 1.00
## Best fuzzy match found: Netherlands Antillen -> Netherlands Antilles with distance 1.00
## Best fuzzy match found: Macedonie -> Macedonia with distance 1.00
## Best fuzzy match found: suriname -> Suriname with distance 1.00
## No exact match: Indonesie
## No exact match: nederland
## No exact match: Kroatie
## No exact match: Bosnië
## No exact match: Indonesir
## No exact match: Schotland
## No exact match: Sovjet-Unie
## No exact match: Macedonie
## Best fuzzy match found: Indonesie -> Indonesia with distance 1.00
## Best fuzzy match found: nederland -> Nederland with distance 1.00
## Best fuzzy match found: Kroatie -> Kroatia with distance 1.00
## Best fuzzy match found: Bosnië -> Bosnia with distance 1.00
## Best fuzzy match found: Indonesir -> Indonesia with distance 1.00
## Best fuzzy match found: Schotland -> Scotland with distance 1.00
## Best fuzzy match found: Sovjet-Unie -> Soviet Union with distance 4.00
## Best fuzzy match found: Macedonie -> Macedonia with distance 1.00
## No exact match: nederland
## No exact match: Indonesie
## No exact match: Bosnië
## No exact match: Engeland
## No exact match: Belgie
## No exact match: Nederland, Rotterdam
## No exact match: -
## No exact match: eritrea
## No exact match: Denenarken
## No exact match: Netherlands Antillen
## No exact match: Macedonie
## No exact match: suriname
## No exact match: DE
## Best fuzzy match found: nederland -> Nederland with distance 1.00
## Best fuzzy match found: Indonesie -> Indonesia with distance 1.00
## Best fuzzy match found: Bosnië -> Bosnia with distance 1.00
## Best fuzzy match found: Engeland -> England with distance 1.00
## Best fuzzy match found: Belgie -> Belgio with distance 1.00
## Best fuzzy match found: Nederland, Rotterdam -> Nederlandene with distance 10.00
## Best fuzzy match found: - -> UK with distance 2.00
## Best fuzzy match found: eritrea -> Eritrea with distance 1.00
## Best fuzzy match found: Denenarken -> Denemarken with distance 1.00
## Best fuzzy match found: Netherlands Antillen -> Netherlands Antilles with distance 1.00
## Best fuzzy match found: Macedonie -> Macedonia with distance 1.00
## Best fuzzy match found: suriname -> Suriname with distance 1.00
## No exact match: Indonesie
## No exact match: nederland
## No exact match: Kroatie
## No exact match: Bosnië
## No exact match: Schotland
## No exact match: Antigua en Barbuda
## No exact match: Sovjet-Unie
## No exact match: Macedonie
## No exact match: suriname
## Best fuzzy match found: Indonesie -> Indonesia with distance 1.00
## Best fuzzy match found: nederland -> Nederland with distance 1.00
## Best fuzzy match found: Kroatie -> Kroatia with distance 1.00
## Best fuzzy match found: Bosnië -> Bosnia with distance 1.00
## Best fuzzy match found: Schotland -> Scotland with distance 1.00
## Best fuzzy match found: Antigua en Barbuda -> Antigua and Barbuda with distance 2.00
## Best fuzzy match found: Sovjet-Unie -> Soviet Union with distance 4.00
## Best fuzzy match found: Macedonie -> Macedonia with distance 1.00
## Best fuzzy match found: suriname -> Suriname with distance 1.00
## No exact match: nederland
## No exact match: Indonesie
## No exact match: Bosnië
## No exact match: Engeland
## No exact match: Nederland, Rotterdam
## No exact match: eritrea
## No exact match: Afghanistan,
## No exact match: Netherlands Antillen
## No exact match: Macedonie
## No exact match: suriname
## No exact match: Nederly
## Best fuzzy match found: nederland -> Nederland with distance 1.00
## Best fuzzy match found: Indonesie -> Indonesia with distance 1.00
## Best fuzzy match found: Bosnië -> Bosnia with distance 1.00
## Best fuzzy match found: Engeland -> England with distance 1.00
## Best fuzzy match found: Nederland, Rotterdam -> Nederlandene with distance 10.00
## Best fuzzy match found: eritrea -> Eritrea with distance 1.00
## Best fuzzy match found: Afghanistan, -> Afghanistan with distance 1.00
## Best fuzzy match found: Netherlands Antillen -> Netherlands Antilles with distance 1.00
## Best fuzzy match found: Macedonie -> Macedonia with distance 1.00
## Best fuzzy match found: suriname -> Suriname with distance 1.00
## Best fuzzy match found: Nederly -> Nederland with distance 3.00

## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `Country_of_Birth_mf = `%>%`(...)`.
## Caused by warning:
## ! There were multiple equally good matches for DE: UAE | UK. All with distance 2.00

#manually overwrite names for vocab
vocab_item_names = rep(str_glue("vocab_{1:20}"), each = 5) + rep(str_glue("_option_{1:5}"), times = 20)
colnames(prolific)[43:142] = vocab_item_names

#Survee data
survee = read_csv("data/20200814171141-SurveyExport.csv") %>% 
  mutate(source = "Survee") %>% 
  #fix data
  mutate(
    #fix column type, UK uses alpha-numeric postcodes
    Postal = Postal %>% as.character(),
    
    #sex and age
    age = `URL Variable: age`,
    sex = `URL Variable: gender` %>% mapvalues(1:2, c("Male", "Female")),
    
    #first language
    First_Language = `Wat is uw moedertaal?` %>% str_to_lower() %>% mapvalues(c("nederlands", "nederland", "fries", "marokkaans"), c("Dutch", "Dutch", "Frisian", "Moroccan")),
    
    #student status
    Student_Status = `Bent u op dit moment een student?` %>% mapvalues(c("Ja", "Nee"), c("Yes", "No")),
    
    #employment status
    Employment_Status = `Wat beschrijft u het beste?` %>% 
      mapvalues(c("Ik ben werkzoekende.", "Ik heb een deeltijdbaan.", "Ik heb een voltijdsbaan.", "Anders.", "Ik heb geen betaald werk (ik ben bijvoorbeeld huismoeder of -vader, gepensioneerd of gehandicapt)."),
                c("Unemployed (and job seeking)", "Part-Time", "Full-Time", "Other", "Not in paid work (e.g. homemaker', 'retired or disabled)")),
    
    #country of birth, in Dutch, standardize to English
    Country_of_Birth = `In welk land bent u geboren?:Familie-achtergrond` %>% standardized_countries(),
    Country_of_Birth_m = `In welk land is uw moeder geboren?\t:Familie-achtergrond` %>% standardized_countries(),
    Country_of_Birth_f = `In welk land is uw vader geboren?\t:Familie-achtergrond` %>% standardized_countries(),
    Country_of_Birth_mm = `In welk land is de moeder van uw moeder geboren?:Familie-achtergrond...75` %>% standardized_countries(),
    Country_of_Birth_fm = `In welk land is de moeder van uw vader geboren?:Familie-achtergrond...77` %>% standardized_countries(),
    Country_of_Birth_mf = `In welk land is de vader van uw moeder geboren?:Familie-achtergrond...76` %>% standardized_countries(),
    Country_of_Birth_ff = `In welk land is de vader van uw vader geboren?:Familie-achtergrond...78` %>% standardized_countries(),
    
    #time taken in seconds
    time_taken = (`Date Submitted` - `Time Started`) %>% multiply_by(60) %>% as.numeric()
  ) %>% 
  #remove empty columns
  df_remove_NA_vars()

## New names:
## Rows: 264 Columns: 350
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (148): Status, Language, Referer, SessionID, User Agent, IP Address, Co... dbl
## (149): Response ID, Longitude, Latitude, Postal, URL Variable: age, URL... lgl
## (51): Contact ID, Legacy Comments, Comments, Tags, Invite Status, Emai... dttm
## (2): Time Started, Date Submitted
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## No exact match: Azoren
## Best fuzzy match found: Azoren -> Azores with distance 1.00
## No exact match: Hongkong SAR
## Best fuzzy match found: Hongkong SAR -> Hongkong with distance 4.00
## No exact match: Hongkong SAR
## Best fuzzy match found: Hongkong SAR -> Hongkong with distance 4.00
## • `Country` -> `Country...16`
## • `City` -> `City...17`
## • `State/Region` -> `State/Region...18`
## • `City` -> `City...36`
## • `State/Region` -> `State/Region...37`
## • `Country` -> `Country...41`
## • `In welk land is de moeder van uw moeder geboren?:Familie-achtergrond` -> `In
##   welk land is de moeder van uw moeder geboren?:Familie-achtergrond...75`
## • `In welk land is de vader van uw moeder geboren?:Familie-achtergrond` -> `In
##   welk land is de vader van uw moeder geboren?:Familie-achtergrond...76`
## • `In welk land is de moeder van uw vader geboren?:Familie-achtergrond` -> `In
##   welk land is de moeder van uw vader geboren?:Familie-achtergrond...77`
## • `In welk land is de vader van uw vader geboren?:Familie-achtergrond` -> `In
##   welk land is de vader van uw vader geboren?:Familie-achtergrond...78`
## • `In welk land is de moeder van uw moeder geboren?:Familie-achtergrond` -> `In
##   welk land is de moeder van uw moeder geboren?:Familie-achtergrond...82`
## • `In welk land is de vader van uw moeder geboren?:Familie-achtergrond` -> `In
##   welk land is de vader van uw moeder geboren?:Familie-achtergrond...83`
## • `In welk land is de moeder van uw vader geboren?:Familie-achtergrond` -> `In
##   welk land is de moeder van uw vader geboren?:Familie-achtergrond...84`
## • `In welk land is de vader van uw vader geboren?:Familie-achtergrond` -> `In
##   welk land is de vader van uw vader geboren?:Familie-achtergrond...85`
## • `lende:Selecteer de twee woorden die hetzelfde kunnen betekenen.` ->
##   `lende:Selecteer de twee woorden die hetzelfde kunnen betekenen....120`
## • `lende:Selecteer de twee woorden die hetzelfde kunnen betekenen.` ->
##   `lende:Selecteer de twee woorden die hetzelfde kunnen betekenen....159`

#manually overwrite names for vocab
colnames(survee)[88:187] = vocab_item_names

#merge the data sources
survey = bind_rows(
  prolific,
  survee
)

#control questions
controls = survey %>% 
  select(contains("Gelieve de slider")) %>% 
  set_colnames("control_" + 1:4) %>% 
  map_df(as.numeric) %>% 
  score_items(c(87, 20, 2, 9))
controls$control_sum = rowSums(controls)
controls$prolific_id = c(prolific_meta$participant_id, rep(NA, nrow(survee)))
controls$survee_id = c(rep(NA, nrow(prolific)), survee$`URL Variable: id`)
controls$source = survey$source

#raw controls
controls_raw = survey %>% 
  select(contains("Gelieve de slider")) %>% 
  set_colnames("control_raw_" + 1:4) %>% 
  map_df(as.numeric)

#precision in controls?
controls_raw %>% 
  mutate(id = 1:n(),
         source = survey$source) %>% 
  pivot_longer(cols = starts_with("control_")) %>% 
  ggplot() +
  # geom_histogram() +
  stat_count(mapping = aes(x = value, y = ..prop.., group = 1), width = 1) +
  # geom_density(alpha = .5) +
  facet_wrap(c("source", "name"), nrow = 2)

## Warning: The dot-dot notation (`..prop..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(prop)` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

## Warning: `position_stack()` requires non-overlapping x intervals
## `position_stack()` requires non-overlapping x intervals
## `position_stack()` requires non-overlapping x intervals
## `position_stack()` requires non-overlapping x intervals

GG_save("figs/controls_dist.png")

## Warning: `position_stack()` requires non-overlapping x intervals
## `position_stack()` requires non-overlapping x intervals
## `position_stack()` requires non-overlapping x intervals
## `position_stack()` requires non-overlapping x intervals

#compute deviance score
controls_deviance = survey %>% 
  select(contains("Gelieve de slider")) %>% 
  set_colnames("deviance_" + 1:4) %>% 
  map_df(as.numeric) %>% 
  {
    #convert to matrix, transpose, subtract, absolute, transpose back, add sum
    y = (t(as.matrix(.)) - c(87, 20, 2, 9)) %>% abs() %>% t() %>% as_tibble()
    y$deviance_sum = y %>% rowSums()
    y
  }

#controls merge
controls_merged = cbind(
  controls_raw,
  controls,
  controls_deviance,
  time_taken = survey$time_taken/60
)

#plot deviance
controls_merged %>% 
  ggplot(aes(time_taken, deviance_sum, color = source)) +
  geom_point() +
  # geom_count() +
  # geom_smooth(se = F) +
  scale_x_log10("Time taken in minutes (log10 scale)") +
  scale_y_continuous("Deviance score", breaks = seq(0, 200, 20)) +
  geom_hline(yintercept = 20, linetype = "dotted")

  # ggtitle("Time spent on survey vs. control question performance")
GG_save("figs/deviance_time.png")

#control scores by source
table(controls$source)

## 
## Prolific   Survee 
##      421      264

table(source = controls$source, controls_correct = controls$control_sum)

##           controls_correct
## source       0   1   2   3   4
##   Prolific   4   2   7  21 387
##   Survee    46   3  13  13 189

table(source = controls$source, controls_correct = controls$control_sum) %>% prop.table(margin = 1)

##           controls_correct
## source           0       1       2       3       4
##   Prolific 0.00950 0.00475 0.01663 0.04988 0.91924
##   Survee   0.17424 0.01136 0.04924 0.04924 0.71591

table(source = controls$source, controls_correct = controls$control_sum == 4) %>% prop.table(margin = 1)

##           controls_correct
## source      FALSE   TRUE
##   Prolific 0.0808 0.9192
##   Survee   0.2841 0.7159

ols(control_sum ~ source, data = controls)

## Linear Regression Model
## 
## ols(formula = control_sum ~ source, data = controls)
## 
##                 Model Likelihood    Discrimination    
##                       Ratio Test           Indexes    
## Obs     685    LR chi2     77.61    R2       0.107    
## sigma1.0461    d.f.            1    R2 adj   0.106    
## d.f.    683    Pr(> chi2) 0.0000    g        0.353    
## 
## Residuals
## 
##     Min      1Q  Median      3Q     Max 
## -3.8646  0.1354  0.1354  0.8788  0.8788 
## 
## 
##               Coef    S.E.   t     Pr(>|t|)
## Intercept      3.8646 0.0510 75.80 <0.0001 
## source=Survee -0.7434 0.0821 -9.05 <0.0001

#using deviance < 20
table(source = controls$source, controls_correct = controls_deviance$deviance_sum < 20)

##           controls_correct
## source     FALSE TRUE
##   Prolific    10  411
##   Survee      60  204

table(source = controls$source, controls_correct = controls_deviance$deviance_sum < 20) %>% prop.table(margin = 1)

##           controls_correct
## source      FALSE   TRUE
##   Prolific 0.0238 0.9762
##   Survee   0.2273 0.7727

ols(deviance_sum ~ source, data = controls_merged)

## Linear Regression Model
## 
## ols(formula = deviance_sum ~ source, data = controls_merged)
## 
##                  Model Likelihood    Discrimination    
##                        Ratio Test           Indexes    
## Obs      685    LR chi2     71.21    R2       0.099    
## sigma20.7313    d.f.            1    R2 adj   0.097    
## d.f.     683    Pr(> chi2) 0.0000    g        6.679    
## 
## Residuals
## 
##     Min      1Q  Median      3Q     Max 
## -15.912 -15.912  -1.834  -1.834 116.688 
## 
## 
##               Coef    S.E.   t    Pr(>|t|)
## Intercept      1.8337 1.0104 1.81 0.0700  
## source=Survee 14.0780 1.6275 8.65 <0.0001

lrm(deviance_sum > 20 ~ source, data = controls_merged)

## Logistic Regression Model
## 
## lrm(formula = deviance_sum > 20 ~ source, data = controls_merged)
## 
##                        Model Likelihood      Discrimination    Rank Discrim.    
##                              Ratio Test             Indexes          Indexes    
## Obs           685    LR chi2      74.37      R2       0.213    C       0.763    
##  FALSE        615    d.f.             1      R2(1,685)0.102    Dxy     0.525    
##  TRUE          70    Pr(> chi2) <0.0001    R2(1,188.5)0.322    gamma   0.847    
## max |deriv| 4e-08                            Brier    0.082    tau-a   0.097    
## 
##               Coef    S.E.   Wald Z Pr(>|Z|)
## Intercept     -3.7160 0.3201 -11.61 <0.0001 
## source=Survee  2.4922 0.3521   7.08 <0.0001

#write controls to file for rejection and approval
controls_merged %>% writexl::write_xlsx("data/controls.xlsx")

#subset by quality
survey_orig = survey
survey = survey[controls_merged$deviance_sum < 20, ]

#variable table
survey_table = df_var_table(survey)

#true disposable incomes
prov_income = read_excel("data/Disposable standardised household income.xlsx", range = "A5:C18") %>% 
  df_legalize_names() %>% 
  mutate(prov_income = Disposable_standardised_household_income_excluding_students_x1000 * 1000) %>% 
  .[-1, ]

#occupation data
occu = read_excel("data/professions dataset.xlsx", range = "A4:G58") %>% 
  df_legalize_names() %>% 
  mutate(
    male_pct = Menpct * 100,
    short_english = str_replace(Survey_entry_English_translation, "\\(.+?\\)", "")
  )

Renaming etc.

#immigrant crime
stereo_immi = survey %>% 
  select(contains("Nederland kent vele groepen immigranten. Schuif voor elk land van herkomst de slider")) %>% 
  select(-contains("Gelieve de slider")) %>% 
  map_df(as.numeric)

#fix colnames to ISO
countries_ISO = stereo_immi %>% 
  names() %>% 
  str_match("[^:]+") %>% 
  as.vector() %>% 
  pu_translate()

## No exact match: Democratische Republiek van Congo

## No exact match: Filippijnen

## No exact match: Voormalig Joegoslavië

## No exact match: Kaapverdië

## No exact match: Nederlandse Antillen

## No exact match: Nieuw Zeeland

## Best fuzzy match found: Democratische Republiek van Congo -> Democratic Republic of Congo with distance 8.00

## Best fuzzy match found: Filippijnen -> Filipijnen with distance 1.00

## Best fuzzy match found: Voormalig Joegoslavië -> Joegoslavië with distance 10.00

## Best fuzzy match found: Kaapverdië -> Kapp Verde with distance 5.00

## Best fuzzy match found: Nederlandse Antillen -> Netherlands Antilles with distance 4.00

## Best fuzzy match found: Nieuw Zeeland -> Nieuw-Zeeland with distance 1.00

names(stereo_immi) = countries_ISO

#sort
countries_ISO = sort(countries_ISO)
stereo_immi = stereo_immi[countries_ISO]

#occupations and sex
stereo_occu = survey %>% 
  select(contains("Geef met behulp van de slider aan voor hoeveel procent")) %>% 
  select(-contains("Gelieve de slider"))

#fixnames
stereo_occu_names = names(stereo_occu) %>% 
  str_match("[^:]+") %>% 
  as.vector() %>% 
  str_legalize()
colnames(stereo_occu) = stereo_occu_names

#province incomes
stereo_prov = survey %>% 
  select(Groningen:Limburg) %>% 
  df_legalize_names()

#recoding shared across datasets
survey %<>% mutate(
  #education
  education = survey$`Wat is het hoogst onderwijsniveau waarvoor u een diploma behaald heeft?` %>% ordered(levels = c("Geen", "Basisonderwijs", "Vmbo, havo,- vwo-onderbouw of mbo1", "Havo, vwo, mbo 2-4", "Hbo, wo bachelor", "Hbo, wo master, doctor")),
  education_num = as.numeric(education),
  
  #voting
  vote2017 = survey$`Op welke politieke partij hebt u gestemd bij de tweedekamerverkiezingen in 2017?`,
  vote2020 = survey$`Op welke politieke partij zou u stemmen als er vandaag Tweede Kamerverkiezingen zouden worden gehouden?`
)


#subset of interest
d = survey %>% 
  select(education:vote2020, !!prolific_meta_cols, source) %>% 
  mutate(
    #basic
    age = age %>% mapvalues(from = 0, to = NA),
    male = (sex == "Male"),
    male_num = as.numeric(male),
    
    #time use
    time_taken_secs = time_taken,
    time_taken_min = time_taken / 60,
    time_taken = winsorise(time_taken_min, upper = quantile(time_taken_min, probs = .95)),
    
    #student
    student = (Student_Status == "Yes"),
    student_num = student %>% as.numeric(),
    
    #employment
    Employment_Status = Employment_Status %>% mapvalues(c("Ik ga binnen nu en dertig dagen met een nieuwe baan beginnen.", "Not in paid work (e.g. homemaker, retired or disabled)", "Not in paid work (e.g. homemaker', 'retired or disabled)", "Unemployed (and job seeking)"), c("Due to start a new job within the next month", "Not in paid work", "Not in paid work", "Unemployed and job seeking")),
    
    #birth variables
    Country_of_Birth = Country_of_Birth %>% fct_relevel("Netherlands"),
    Birth3 = case_when(
      .$Country_of_Birth == "Netherlands" ~ "Netherlands",
      .$Country_of_Birth %in% c("Belgium", "Canada", "France", "Ireland", "Portugal", "Spain", "United Kingdom") ~ "Western",
      TRUE ~ "Non-Western"
    ),
    Birth2 = case_when(
      .$Country_of_Birth == "Netherlands" ~ "Netherlands",
      TRUE ~ "Non-Netherlands"
    ),
    Birth2b = case_when(
      .$Country_of_Birth %in% c("Netherlands", "Belgium", "Canada", "France", "Ireland", "Portugal", "Spain", "United Kingdom") ~ "Western",
      TRUE ~ "Non-Western"
    ),
    
    #first language
    First_Language = First_Language %>% fct_relevel("Dutch"),
    First_Language2 = case_when(
      .$First_Language == "Dutch" ~ "Dutch",
      TRUE ~ "non-Dutch"
    ),
    Dutch_native_num = (First_Language2 == "Dutch") %>% as.numeric()
  )

#combine vote data to numerical for greater power
#do they relate well? they should
d %$% table(vote2017, vote2020)

##                                      vote2020
## vote2017                              50Plus CDA Christenunie D66 DENK FvD
##   50Plus                                   2   0            0   1    0   0
##   CDA                                      0  10            0   2    0   0
##   Christenunie                             0   1           13   0    0   1
##   D66                                      0   1            0  31    0   0
##   FvD                                      0   1            0   0    0  22
##   Groenlinks                               0   0            1   4    0   1
##   Ik heb blanco gestemd                    0   0            0   0    0   0
##   Ik heb niet gestemd                      0   1            2   8    0   4
##   Ik heb op een andere partij gestemd      1   0            0   0    0   1
##   PvDA                                     0   0            0   3    0   0
##   PvdD                                     0   0            0   3    0   1
##   PVV                                      1   0            0   1    0   6
##   SGP                                      0   0            1   0    0   0
##   SP                                       0   0            0   0    0   1
##   VVD                                      0   0            0   1    1   4
##                                      vote2020
## vote2017                              Groenlinks Ik zou blanco stemmen
##   50Plus                                       0                     0
##   CDA                                          0                     0
##   Christenunie                                 0                     0
##   D66                                         19                     3
##   FvD                                          0                     3
##   Groenlinks                                  87                     5
##   Ik heb blanco gestemd                        0                     6
##   Ik heb niet gestemd                         16                     4
##   Ik heb op een andere partij gestemd          1                     0
##   PvDA                                         0                     0
##   PvdD                                         2                     0
##   PVV                                          0                     0
##   SGP                                          0                     0
##   SP                                           3                     2
##   VVD                                          2                     0
##                                      vote2020
## vote2017                              Ik zou niet gaan stemmen
##   50Plus                                                     0
##   CDA                                                        1
##   Christenunie                                               0
##   D66                                                        6
##   FvD                                                        1
##   Groenlinks                                                 0
##   Ik heb blanco gestemd                                      1
##   Ik heb niet gestemd                                       51
##   Ik heb op een andere partij gestemd                        1
##   PvDA                                                       0
##   PvdD                                                       1
##   PVV                                                        1
##   SGP                                                        0
##   SP                                                         1
##   VVD                                                        1
##                                      vote2020
## vote2017                              Ik zou op een andere partij stemmen PvDA
##   50Plus                                                                1    0
##   CDA                                                                   1    0
##   Christenunie                                                          2    0
##   D66                                                                   6    5
##   FvD                                                                   1    0
##   Groenlinks                                                            1    5
##   Ik heb blanco gestemd                                                 2    0
##   Ik heb niet gestemd                                                   3    4
##   Ik heb op een andere partij gestemd                                   5    1
##   PvDA                                                                  0   27
##   PvdD                                                                  1    0
##   PVV                                                                   2    0
##   SGP                                                                   0    0
##   SP                                                                    1    2
##   VVD                                                                   4    1
##                                      vote2020
## vote2017                              PvdD PVV SGP SP VVD
##   50Plus                                 0   0   0  1   0
##   CDA                                    0   0   0  0   2
##   Christenunie                           0   0   0  1   1
##   D66                                    1   0   0  1   3
##   FvD                                    0   3   0  1   1
##   Groenlinks                             1   0   0  1   0
##   Ik heb blanco gestemd                  0   0   0  0   0
##   Ik heb niet gestemd                    3   3   0  4  15
##   Ik heb op een andere partij gestemd    1   1   0  1   2
##   PvDA                                   1   0   0  0   1
##   PvdD                                  20   0   0  2   0
##   PVV                                    0  24   0  0   0
##   SGP                                    0   0   1  0   0
##   SP                                     2   1   0 27   0
##   VVD                                    0   2   0  0  62

#collapse the Other options
d$vote2017b = d$vote2017 %>% mapvalues(c("Ik heb op een andere partij gestemd", "Ik heb niet gestemd", "Ik heb blanco gestemd"), rep("Other", 3))
d$vote2020b = d$vote2020 %>% mapvalues(c("Ik zou op een andere partij stemmen", "Ik zou niet gaan stemmen", "Ik zou blanco stemmen"), rep("Other", 3))

#make a variable for each unique value, then fill in data
for (v in unique(c(d$vote2017b, d$vote2020b))) {
  if (is.na(v)) next
  d[[str_glue("vote_{str_replace_all(v, ' ', '_')}") %>% as.character()]] = rowMeans(cbind(d$vote2017 == v, d$vote2020 == v), na.rm = T)
}

#names
vote_preds = d %>% names() %>% str_subset("^vote_") %>% 
  #not Other, which has no data
  setdiff("vote_Other")

IQ scoring

#items
#some are split into multiple columns because we used select 2
#we need to merge them first
vocab_items_raw = survey %>% 
  select(vocab_1_option_1:vocab_20_option_5)

#splits
vocab_groups = seq(1, 100, by = 5)
length(vocab_groups)

## [1] 20

#loop across and merge to a single string by item
vocab_items = matrix(nrow = nrow(d), ncol = length(vocab_groups))
collapse_func = function(x) na.omit(x) %>% sort() %>% str_c(collapse = ", ")
for (i in seq_along(vocab_groups)) {
  #col start
  i_start = vocab_groups[i]
  vocab_items[, i] = vocab_items_raw[, i_start:(i_start+4)] %>% apply(MARGIN = 1, FUN = collapse_func)
}

#science items
science_items = survey %>% 
  select(`Bijna alle planten zijn van het volgende type:`:`Wie bedacht de evolutietheorie?`)
  #minus one broken one
  # select(-contains("een onderdeel van de linguïstiek"))

IQ_items = cbind(
  vocab_items %>% set_colnames("vocab" + 1:20),
  science_items %>% set_colnames("science" + 1:20)
)

#correct answers
IQ_key = c(
  #vocab
  c("verhandelbaarheid, liquiditeit",
  "malversatie, verduistering",
  "palpatie, betasting",
  "xylografie, houtsnijkunst",
  "zengen, schroeien",
  "splitsing, bifurcatie",
  "cachet, zegel",
  "afwijzen, deballoteren",
  "abject, verwerpelijk",
  "trijp, fluweel",
  "delicaat, broos",
  "idiosyncrasie, eigenaardigheid",
  "poreus, doorlaatbaar",
  "chiromantie, handlezerij",
  "karbonade, kotelet",
  "appelleren, aanspreken",
  "behaaglijk, senang",
  "zeel, draagriem",
  "loot, stek",
  "heffing, recognitie") %>% 
    #order
    map_chr(~str_split(., pattern = ", ", simplify = T) %>% collapse_func())
    ,
  
  #science
  "Meercellige eukaryoten",
  "24",
  "nucleïne",
  "David Ricardo",
  "Marcus",
  "De Dodo-hypothese.", #6
  "Het Stroop-effect.",
  "De Gaussiaanse verdeling.",
  "Ongeveer 50%.",
  "klinkers en medeklinkers.", #10
  "Noam Chomsky",
  "Een vakgebied dat genetische effecten bestudeert die niet gecodeerd zijn in de DNA-sequentie van een organisme.",
  "Stephen Hawking",
  "Smedley Butler",
  "Ongeveer 300.000.000 m/s",
  "66 miljoen jaar",
  "Brazilië", #17
  "10",
  "Een soort celdeling die kiemcellen produceert.",
  "Charles Darwin"
)

#table by question
map2_df(IQ_items, names(IQ_items), function(x, n) {
  table2(x, include_NA = F) %>% 
    mutate(question = n)
})

#score to binary
IQ_items_scored = score_items(IQ_items, IQ_key)

#IRT
irt_g = mirt(IQ_items_scored, model = 1)

## 
Iteration: 1, Log-Lik: -13256.965, Max-Change: 0.93824
Iteration: 2, Log-Lik: -13037.983, Max-Change: 0.28136
Iteration: 3, Log-Lik: -13019.256, Max-Change: 0.06977
Iteration: 4, Log-Lik: -13016.894, Max-Change: 0.04022
Iteration: 5, Log-Lik: -13016.206, Max-Change: 0.02284
Iteration: 6, Log-Lik: -13015.954, Max-Change: 0.01492
Iteration: 7, Log-Lik: -13015.794, Max-Change: 0.00355
Iteration: 8, Log-Lik: -13015.784, Max-Change: 0.00183
Iteration: 9, Log-Lik: -13015.779, Max-Change: 0.00156
Iteration: 10, Log-Lik: -13015.773, Max-Change: 0.00088
Iteration: 11, Log-Lik: -13015.772, Max-Change: 0.00068
Iteration: 12, Log-Lik: -13015.771, Max-Change: 0.00053
Iteration: 13, Log-Lik: -13015.770, Max-Change: 0.00011
Iteration: 14, Log-Lik: -13015.770, Max-Change: 0.00011
Iteration: 15, Log-Lik: -13015.770, Max-Change: 0.00011
Iteration: 16, Log-Lik: -13015.770, Max-Change: 0.00039
Iteration: 17, Log-Lik: -13015.770, Max-Change: 0.00005

irt_BF = mirt(IQ_items_scored, model = mirt.model("g = 1-40
                                                  V = 1-20
                                                  S = 21-40"))

## 
Iteration: 1, Log-Lik: -13291.772, Max-Change: 0.82491
Iteration: 2, Log-Lik: -12939.444, Max-Change: 0.37916
Iteration: 3, Log-Lik: -12868.213, Max-Change: 0.16098
Iteration: 4, Log-Lik: -12844.866, Max-Change: 0.14082
Iteration: 5, Log-Lik: -12835.860, Max-Change: 0.09462
Iteration: 6, Log-Lik: -12832.071, Max-Change: 0.06528
Iteration: 7, Log-Lik: -12828.372, Max-Change: 0.02065
Iteration: 8, Log-Lik: -12827.965, Max-Change: 0.01470
Iteration: 9, Log-Lik: -12827.724, Max-Change: 0.01204
Iteration: 10, Log-Lik: -12827.256, Max-Change: 0.00809
Iteration: 11, Log-Lik: -12827.208, Max-Change: 0.00693
Iteration: 12, Log-Lik: -12827.168, Max-Change: 0.00735
Iteration: 13, Log-Lik: -12826.993, Max-Change: 0.00502
Iteration: 14, Log-Lik: -12826.971, Max-Change: 0.00494
Iteration: 15, Log-Lik: -12826.952, Max-Change: 0.00483
Iteration: 16, Log-Lik: -12826.852, Max-Change: 0.00407
Iteration: 17, Log-Lik: -12826.839, Max-Change: 0.00390
Iteration: 18, Log-Lik: -12826.827, Max-Change: 0.00346
Iteration: 19, Log-Lik: -12826.764, Max-Change: 0.00329
Iteration: 20, Log-Lik: -12826.755, Max-Change: 0.00295
Iteration: 21, Log-Lik: -12826.747, Max-Change: 0.00287
Iteration: 22, Log-Lik: -12826.707, Max-Change: 0.00306
Iteration: 23, Log-Lik: -12826.701, Max-Change: 0.00261
Iteration: 24, Log-Lik: -12826.695, Max-Change: 0.00256
Iteration: 25, Log-Lik: -12826.667, Max-Change: 0.00220
Iteration: 26, Log-Lik: -12826.663, Max-Change: 0.00214
Iteration: 27, Log-Lik: -12826.660, Max-Change: 0.00209
Iteration: 28, Log-Lik: -12826.641, Max-Change: 0.00182
Iteration: 29, Log-Lik: -12826.639, Max-Change: 0.00178
Iteration: 30, Log-Lik: -12826.636, Max-Change: 0.00174
Iteration: 31, Log-Lik: -12826.624, Max-Change: 0.00151
Iteration: 32, Log-Lik: -12826.622, Max-Change: 0.00147
Iteration: 33, Log-Lik: -12826.620, Max-Change: 0.00144
Iteration: 34, Log-Lik: -12826.612, Max-Change: 0.00125
Iteration: 35, Log-Lik: -12826.611, Max-Change: 0.00122
Iteration: 36, Log-Lik: -12826.610, Max-Change: 0.00119
Iteration: 37, Log-Lik: -12826.604, Max-Change: 0.00103
Iteration: 38, Log-Lik: -12826.603, Max-Change: 0.00101
Iteration: 39, Log-Lik: -12826.603, Max-Change: 0.00098
Iteration: 40, Log-Lik: -12826.599, Max-Change: 0.00085
Iteration: 41, Log-Lik: -12826.598, Max-Change: 0.00083
Iteration: 42, Log-Lik: -12826.598, Max-Change: 0.00081
Iteration: 43, Log-Lik: -12826.595, Max-Change: 0.00043
Iteration: 44, Log-Lik: -12826.595, Max-Change: 0.00044
Iteration: 45, Log-Lik: -12826.595, Max-Change: 0.00044
Iteration: 46, Log-Lik: -12826.593, Max-Change: 0.00042
Iteration: 47, Log-Lik: -12826.593, Max-Change: 0.00041
Iteration: 48, Log-Lik: -12826.593, Max-Change: 0.00039
Iteration: 49, Log-Lik: -12826.592, Max-Change: 0.00036
Iteration: 50, Log-Lik: -12826.592, Max-Change: 0.00035
Iteration: 51, Log-Lik: -12826.592, Max-Change: 0.00035
Iteration: 52, Log-Lik: -12826.591, Max-Change: 0.00033
Iteration: 53, Log-Lik: -12826.591, Max-Change: 0.00033
Iteration: 54, Log-Lik: -12826.591, Max-Change: 0.00032
Iteration: 55, Log-Lik: -12826.590, Max-Change: 0.00030
Iteration: 56, Log-Lik: -12826.590, Max-Change: 0.00030
Iteration: 57, Log-Lik: -12826.590, Max-Change: 0.00029
Iteration: 58, Log-Lik: -12826.590, Max-Change: 0.00027
Iteration: 59, Log-Lik: -12826.590, Max-Change: 0.00027
Iteration: 60, Log-Lik: -12826.589, Max-Change: 0.00026
Iteration: 61, Log-Lik: -12826.589, Max-Change: 0.00024
Iteration: 62, Log-Lik: -12826.589, Max-Change: 0.00024
Iteration: 63, Log-Lik: -12826.589, Max-Change: 0.00023
Iteration: 64, Log-Lik: -12826.589, Max-Change: 0.00021
Iteration: 65, Log-Lik: -12826.589, Max-Change: 0.00021
Iteration: 66, Log-Lik: -12826.589, Max-Change: 0.00021
Iteration: 67, Log-Lik: -12826.589, Max-Change: 0.00019
Iteration: 68, Log-Lik: -12826.589, Max-Change: 0.00018
Iteration: 69, Log-Lik: -12826.588, Max-Change: 0.00018
Iteration: 70, Log-Lik: -12826.588, Max-Change: 0.00016
Iteration: 71, Log-Lik: -12826.588, Max-Change: 0.00016
Iteration: 72, Log-Lik: -12826.588, Max-Change: 0.00016
Iteration: 73, Log-Lik: -12826.588, Max-Change: 0.00014
Iteration: 74, Log-Lik: -12826.588, Max-Change: 0.00014
Iteration: 75, Log-Lik: -12826.588, Max-Change: 0.00014
Iteration: 76, Log-Lik: -12826.588, Max-Change: 0.00013
Iteration: 77, Log-Lik: -12826.588, Max-Change: 0.00013
Iteration: 78, Log-Lik: -12826.588, Max-Change: 0.00012
Iteration: 79, Log-Lik: -12826.588, Max-Change: 0.00011
Iteration: 80, Log-Lik: -12826.588, Max-Change: 0.00011
Iteration: 81, Log-Lik: -12826.588, Max-Change: 0.00011
Iteration: 82, Log-Lik: -12826.588, Max-Change: 0.00010

#item data
item_data = tibble(
  item = names(IQ_items),
  pass_rate = colMeans(IQ_items_scored, na.rm=T),
  g_loading = irt_g@Fit$`F`[, 1],
  g_loading_BF = irt_BF@Fit$`F`[, 1],
  V_loading_BF = irt_BF@Fit$`F`[, 2],
  S_loading_BF = irt_BF@Fit$`F`[, 3]
)

item_data %>% print(n = Inf)

## # A tibble: 40 × 6
##    item      pass_rate g_loading g_loading_BF V_loading_BF S_loading_BF
##    <chr>         <dbl>     <dbl>        <dbl>        <dbl>        <dbl>
##  1 vocab1       0.234     0.195       0.309        -0.0916       0     
##  2 vocab2       0.426     0.455       0.255         0.419        0     
##  3 vocab3       0.390     0.642       0.534         0.353        0     
##  4 vocab4       0.278     0.339       0.299         0.176        0     
##  5 vocab5       0.341     0.563       0.359         0.512        0     
##  6 vocab6       0.527     0.469       0.506         0.0953       0     
##  7 vocab7       0.322     0.249       0.168         0.179        0     
##  8 vocab8       0.241     0.593       0.388         0.467        0     
##  9 vocab9       0.502     0.594       0.392         0.522        0     
## 10 vocab10      0.216     0.490       0.301         0.394        0     
## 11 vocab11      0.501     0.357       0.149         0.446        0     
## 12 vocab12      0.402     0.563       0.465         0.333        0     
## 13 vocab13      0.540     0.602       0.488         0.358        0     
## 14 vocab14      0.195     0.494       0.425         0.273        0     
## 15 vocab15      0.667     0.181       0.0248        0.282        0     
## 16 vocab16      0.580     0.469       0.342         0.346        0     
## 17 vocab17      0.486     0.595       0.303         0.645        0     
## 18 vocab18      0.254     0.388       0.188         0.408        0     
## 19 vocab19      0.356     0.361       0.00196       0.672        0     
## 20 vocab20      0.0797    0.0844     -0.133         0.305        0     
## 21 science1     0.361     0.199       0.240         0            0.206 
## 22 science2     0.322     0.423       0.554         0            0.472 
## 23 science3     0.293     0.259       0.274         0            0.446 
## 24 science4     0.114     0.389       0.315         0            0.380 
## 25 science5     0.0927    0.241       0.178         0            0.323 
## 26 science6     0.0764   -0.258      -0.214         0            0.336 
## 27 science7     0.233     0.224       0.269         0            0.136 
## 28 science8     0.333     0.547       0.662         0            0.186 
## 29 science9     0.104     0.321       0.436         0            0.113 
## 30 science10    0.350    -0.146      -0.183         0            0.0710
## 31 science11    0.247     0.356       0.457         0           -0.169 
## 32 science12    0.203     0.324       0.352         0            0.255 
## 33 science13    0.906     0.621       0.695         0            0.0547
## 34 science14    0.0894    0.0689      0.121         0            0.105 
## 35 science15    0.366     0.454       0.583         0            0.317 
## 36 science16    0.337     0.358       0.395         0            0.153 
## 37 science17    0.180    -0.0355      0.0671        0           -0.0513
## 38 science18    0.478     0.0825      0.107         0            0.351 
## 39 science19    0.259     0.277       0.269         0            0.201 
## 40 science20    0.885     0.465       0.436         0           -0.0814

#scores
irt_g_scores = fscores(irt_g, full.scores = T, full.scores.SE = T)
irt_BF_scores = fscores(irt_BF, full.scores = T, full.scores.SE = T)

#save scores
d$sumscore = IQ_items_scored %>% rowSums()
d$sumscore_z = d$sumscore %>% standardize()
d$g = irt_g_scores[, 1] %>% standardize()
d$g_BF = irt_BF_scores[, 1] %>% standardize()
d$V_BF = irt_BF_scores[, 2] %>% standardize()
d$S_BF = irt_BF_scores[, 3] %>% standardize()

#manual group factor scores
d$V_sum = IQ_items_scored[1:20] %>% rowSums()
d$S_sum = IQ_items_scored[21:40] %>% rowSums()

#residuals
d$V_resid = ols(V_sum ~ g, data = d) %>% resid() %>% standardize()
d$S_resid = ols(S_sum ~ g, data = d) %>% resid() %>% standardize()

#tilt scores
d$V_tilt1 = (standardize(d$V_sum) - standardize(d$S_sum)) %>% standardize()
d$V_tilt2 = (standardize(d$V_BF) - standardize(d$S_BF)) %>% standardize()
d$V_tilt3 = (standardize(d$V_resid) - standardize(d$S_resid)) %>% standardize()


#cog vars
cog_vars = c(
  "sumscore", "g", "g_BF",
  "V_BF", "V_sum", "V_resid",
  "S_BF", "S_sum", "S_resid",
  "V_tilt1", "V_tilt2", "V_tilt3"
)

#only important ones
cog_vars2 = c("g", "V_tilt")

#compare
d %>% select(!!cog_vars) %>% wtd.cors() %>% round(3)

##          sumscore     g   g_BF   V_BF  V_sum V_resid   S_BF  S_sum S_resid
## sumscore    1.000 0.960  0.852  0.565  0.879  -0.003  0.302  0.737   0.198
## g           0.960 1.000  0.876  0.633  0.918   0.000  0.142  0.603   0.000
## g_BF        0.852 0.876  1.000  0.192  0.658  -0.368  0.235  0.760   0.291
## V_BF        0.565 0.633  0.192  1.000  0.831   0.630 -0.225 -0.058  -0.552
## V_sum       0.879 0.918  0.658  0.831  1.000   0.397 -0.055  0.325  -0.286
## V_resid    -0.003 0.000 -0.368  0.630  0.397   1.000 -0.467 -0.572  -0.718
## S_BF        0.302 0.142  0.235 -0.225 -0.055  -0.467  1.000  0.679   0.743
## S_sum       0.737 0.603  0.760 -0.058  0.325  -0.572  0.679  1.000   0.798
## S_resid     0.198 0.000  0.291 -0.552 -0.286  -0.718  0.743  0.798   1.000
## V_tilt1     0.122 0.271 -0.090  0.767  0.581   0.836 -0.633 -0.581  -0.933
## V_tilt2     0.168 0.314 -0.028  0.783  0.566   0.701 -0.783 -0.471  -0.827
## V_tilt3    -0.108 0.001 -0.356  0.639  0.370   0.927 -0.654 -0.739  -0.926
##          V_tilt1 V_tilt2 V_tilt3
## sumscore   0.122   0.168  -0.108
## g          0.271   0.314   0.001
## g_BF      -0.090  -0.028  -0.356
## V_BF       0.767   0.783   0.639
## V_sum      0.581   0.566   0.370
## V_resid    0.836   0.701   0.927
## S_BF      -0.633  -0.783  -0.654
## S_sum     -0.581  -0.471  -0.739
## S_resid   -0.933  -0.827  -0.926
## V_tilt1    1.000   0.894   0.954
## V_tilt2    0.894   1.000   0.826
## V_tilt3    0.954   0.826   1.000

#chosen measure
#because single variable and uncorrelated with g
d$V_tilt = d$V_tilt3 

#score expected at random
IQ_key_options = c(
  #vocab all have 10 because choose(5, 2)
  rep(10, 20),
  
  #science varies
  8,
  8,
  8,
  10,
  8,
  6,
  6,
  6,
  8,
  8,
  8,
  8,
  8,
  10,
  10,
  8,
  7,
  7,
  8,
  9
) %>% 
  set_names(IQ_items)
length(IQ_key_options)

## [1] 40

#simulate and see score distribution
IQ_key_options %>% 
  map_df(function(x) {
    sample(c(T, F), size = nrow(d), replace = T, prob = c(1/x, 1-(1/x)))
  }) %>% 
  rowSums() %>% 
  {
    tibble(score = .)
  } %>% 
  ggplot(aes(score)) +
  geom_bar(stat = "count", alpha = .5) +
  scale_x_continuous(breaks = 0:40, limits = c(0, 40)) +
  geom_bar(data = d, aes(sumscore), stat = "count", fill = "red", alpha = .5)

## Warning: Removed 8 rows containing non-finite values (`stat_count()`).

## Warning: Removed 1 rows containing missing values (`geom_bar()`).

GG_save("figs/IQ_observed_vs_random.png")

## Warning: Removed 8 rows containing non-finite values (`stat_count()`).
## Removed 1 rows containing missing values (`geom_bar()`).

Aggregate stereotypes

Immigrant crime

#score and join into prior
origins = right_join(origins_orig, tibble(ISO = countries_ISO, 
                     crime_mean_estimate = stereo_immi %>% colMeans(na.rm = T),
                     crime_mean_estimate_z = stereo_immi %>% z_mean(),
                     crime_mean_estimate_logged = stereo_immi %>% log_transform_mean(),
                     one_estimates = map_dbl(stereo_immi, ~mean(.==1))
                     ),
              by = "ISO") %>% 
  #sort
  arrange(ISO)

#metrics
origins %>% 
  select(arrest_rate_RR, crime_mean_estimate) %>% 
  describe2()

mean(origins$arrest_rate_RR < 1)

## [1] 0.353

mean(origins$crime_mean_estimate < 1)

## [1] 0

#score aggregate stereotypes, by method
bind_rows(
    score_accuracy(stereo_immi %>% map_df(as.numeric),
                 origins$arrest_rate_RR,
                 aggregate = T,
                 methods = "all",
                 aggregate_function = mean),
    score_accuracy(stereo_immi %>% map_df(as.numeric),
                 origins$arrest_rate_RR,
                 aggregate = T,
                 methods = "all",
                 aggregate_function = mean,
                 trim = .10),
    score_accuracy(stereo_immi %>% map_df(as.numeric),
                 origins$arrest_rate_RR,
                 aggregate = T,
                 methods = "all",
                 aggregate_function = median),
    score_accuracy(stereo_immi %>% map_df(as.numeric),
                 origins$arrest_rate_RR,
                 aggregate = T,
                 methods = "all",
                 aggregate_function = log_transform_mean)
) %>% cbind(method = c("mean", "10% trim mean", "median", "log mean"))

#plot estimate distributions
stereo_immi %>% 
  mutate(
    id = 1:n()
  ) %>% 
  pivot_longer(cols = -id) %>% 
  ggplot(aes(value)) +
  geom_histogram() +
  facet_wrap("name")

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

GG_save("figs/crime_estimates_by_country.png")

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

#representative examples
reverse_lookup = function(x) {pu_translate(x, reverse = T)}
stereo_immi %>% 
  select(ANT, DNK, EGY, SOM) %>% 
  mutate(
    id = 1:n()
  ) %>% 
  pivot_longer(cols = -id) %>% 
  ggplot(aes(value)) +
  geom_histogram() +
  geom_vline(xintercept = 1, linetype = "dotted") +
  scale_x_continuous("Arrest rate RR estimate", breaks = 0:10) +
  facet_wrap("name", labeller = labeller(name = reverse_lookup))

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

GG_save("figs/arrest_estimate_examples.png")

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

#correlation accuracy
GG_scatter(origins, "arrest_rate_RR", "crime_mean_estimate", case_names = "origin") +
  geom_abline(slope = 1, intercept = 0, linetype = "dashed") +
  scale_x_continuous("Arrest rate in the Netherlands") +
  scale_y_continuous("Mean estimate of arrest rate in the Netherlands")

## `geom_smooth()` using formula = 'y ~ x'

GG_save("figs/crime_aggr_accuracy.png")

## `geom_smooth()` using formula = 'y ~ x'

#z scored
GG_scatter(origins, "arrest_rate_RR", "crime_mean_estimate_z", case_names = "origin") +
  scale_x_continuous("Arrest rate (RR) in the Netherlands") +
  scale_y_continuous("Mean z-score of estimate of arrest rate (RR) in the Netherlands")

## `geom_smooth()` using formula = 'y ~ x'

GG_save("figs/crime_aggr_accuracy_z.png")

## `geom_smooth()` using formula = 'y ~ x'

#log converted
GG_scatter(origins, "arrest_rate_RR", "crime_mean_estimate_logged", case_names = "origin") +
  geom_abline(slope = 1, intercept = 0, linetype = "dashed") +
  scale_x_continuous("Arrest rate (RR) in the Netherlands") +
  scale_y_continuous("Mean estimate of arrest rate (RR) in the Netherlands (log converted)")

## `geom_smooth()` using formula = 'y ~ x'

GG_save("figs/crime_aggr_accuracy_logged.png")

## `geom_smooth()` using formula = 'y ~ x'

#compare
GG_scatter(origins, "crime_mean_estimate", "crime_mean_estimate_z")

## `geom_smooth()` using formula = 'y ~ x'

#stereotype by source
if (!"crime_mean_estimate_prolific" %in% names(origins)) {
  origins = origins %>% left_join(
    score_by(
  stereo_immi,
  moderator = d$source,
  long_format = T) %>% set_colnames(c("ISO", "crime_mean_estimate_prolific", "crime_mean_estimate_survee"))
  )
}

## Joining with `by = join_by(ISO)`

#compare visually
GG_scatter(origins, "crime_mean_estimate_prolific", "crime_mean_estimate_survee") +
  geom_abline(slope = 1, intercept = 0, linetype = "dashed") +
  xlim(0, 5) + ylim(0, 5)

## `geom_smooth()` using formula = 'y ~ x'

## Warning: Removed 6 rows containing missing values (`geom_smooth()`).

GG_save("figs/stereotypes_immi_by_source.png")

## `geom_smooth()` using formula = 'y ~ x'

## Warning: Removed 6 rows containing missing values (`geom_smooth()`).

#numerically
describe2(origins %>% select(crime_mean_estimate, crime_mean_estimate_prolific, crime_mean_estimate_survee))

#regression
ols(arrest_rate_RR ~ crime_mean_estimate, data = origins)

## Linear Regression Model
## 
## ols(formula = arrest_rate_RR ~ crime_mean_estimate, data = origins)
## 
##                 Model Likelihood    Discrimination    
##                       Ratio Test           Indexes    
## Obs      68    LR chi2     36.69    R2       0.417    
## sigma0.7611    d.f.            1    R2 adj   0.408    
## d.f.     66    Pr(> chi2) 0.0000    g        0.722    
## 
## Residuals
## 
##     Min      1Q  Median      3Q     Max 
## -1.5725 -0.4835 -0.0228  0.2920  2.2051 
## 
## 
##                     Coef    S.E.   t     Pr(>|t|)
## Intercept           -0.6203 0.3363 -1.84 0.0696  
## crime_mean_estimate  1.2665 0.1844  6.87 <0.0001

#add deltas for each country
origins %<>% mutate(
  crime_delta = crime_mean_estimate - arrest_rate_RR
)

#Muslim bias
GG_scatter(origins, "Muslim", "crime_delta", case_names = "origin") +
  scale_x_continuous("Muslim % in home country", labels = scales::percent, limits = c(-.06, 1)) +
  scale_y_continuous("Arrest rate (RR) delta")

## `geom_smooth()` using formula = 'y ~ x'

GG_save("figs/crime_aggr_delta_Muslim.png")

## `geom_smooth()` using formula = 'y ~ x'

#African bias, reviewer 1 request
GG_scatter(origins, "SSA", "crime_delta", case_names = "origin") +
  scale_x_continuous("Sub-Saharan African % in home country", labels = scales::percent, limits = c(-.06, 1)) +
  scale_y_continuous("Arrest rate (RR) delta")

## `geom_smooth()` using formula = 'y ~ x'

GG_save("figs/crime_aggr_delta_African.png")

## `geom_smooth()` using formula = 'y ~ x'

#both?
ols(crime_delta ~ Muslim + SSA, data = origins)

## Frequencies of Missing Values Due to Each Variable
## crime_delta      Muslim         SSA 
##           0           0           2 
## 
## Linear Regression Model
## 
## ols(formula = crime_delta ~ Muslim + SSA, data = origins)
## 
## 
##                 Model Likelihood    Discrimination    
##                       Ratio Test           Indexes    
## Obs      66    LR chi2     16.94    R2       0.226    
## sigma0.6487    d.f.            2    R2 adj   0.202    
## d.f.     63    Pr(> chi2) 0.0002    g        0.334    
## 
## Residuals
## 
##      Min       1Q   Median       3Q      Max 
## -2.12053 -0.36159  0.05497  0.44589  1.28847 
## 
## 
##           Coef    S.E.   t     Pr(>|t|)
## Intercept  0.4427 0.1028  4.31 <0.0001 
## Muslim    -0.3785 0.2140 -1.77 0.0819  
## SSA       -0.9031 0.2412 -3.74 0.0004

#preferences
origins %>% 
  mutate(
    origin = fct_reorder(origin, net_opposition)
  ) %>% 
  ggplot(aes(net_opposition, origin, fill = net_opposition)) +
  geom_bar(stat = "identity") +
  scale_fill_gradient(low = "green", high = "red", guide = F) +
  scale_x_continuous("Net opposition\nhigher values indicate more opposition to immigration from this origin") +
  theme(
    axis.text.y = element_text(size = 7)
  )

## Warning: The `guide` argument in `scale_*()` cannot be `FALSE`. This was deprecated in
## ggplot2 3.3.4.
## ℹ Please use "none" instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

GG_save("figs/net_opposition.png")

#alternative plotting method
origins %>% 
  select(origin, starts_with("frac_"), net_opposition) %>% 
  mutate(
    origin = fct_reorder(origin, net_opposition)
  ) %>% 
  pivot_longer(cols = -c(origin, net_opposition)) %>% 
  mutate(
    group = str_replace(name, "frac_", "") %>% ordered(levels = c("none", "fewer", "same", "more")) %>% fct_rev()
  ) %>% 
  ggplot(aes(value, origin, fill = group)) +
  geom_bar(stat = "identity", position = "stack") +  
  scale_x_continuous("Preference") +
  theme(
    axis.text.y = element_text(size = 7)
  ) +
  scale_fill_ordinal("Preference")

GG_save("figs/opposition_ordinals.png")

#preferences and crime rates
GG_scatter(origins, "arrest_rate_RR", "net_opposition", case_names = "origin") +
  scale_x_continuous("Arrest rate (RR) in the Netherlands") +
  scale_y_continuous("Net opposition")

## `geom_smooth()` using formula = 'y ~ x'

GG_save("figs/crime_aggr_net_opposition.png")

## `geom_smooth()` using formula = 'y ~ x'

Mediation

#mediation of preferences?
cor_matrix(origins[c("IQ", "Muslim", "arrest_rate_RR", "crime_mean_estimate", "crime_delta", "net_opposition")], CI = .95)

##                     IQ                    Muslim               
## IQ                  "1"                   "-0.41 [-0.63 -0.19]"
## Muslim              "-0.41 [-0.63 -0.19]" "1"                  
## arrest_rate_RR      "-0.64 [-0.83 -0.46]" "0.43 [0.21 0.65]"   
## crime_mean_estimate "-0.58 [-0.78 -0.39]" "0.57 [0.37 0.76]"   
## crime_delta         "0.44 [0.23 0.66]"    "-0.18 [-0.42 0.05]" 
## net_opposition      "-0.71 [-0.88 -0.54]" "0.66 [0.48 0.84]"   
##                     arrest_rate_RR        crime_mean_estimate  
## IQ                  "-0.64 [-0.83 -0.46]" "-0.58 [-0.78 -0.39]"
## Muslim              "0.43 [0.21 0.65]"    "0.57 [0.37 0.76]"   
## arrest_rate_RR      "1"                   "0.65 [0.46 0.83]"   
## crime_mean_estimate "0.65 [0.46 0.83]"    "1"                  
## crime_delta         "-0.86 [-0.99 -0.74]" "-0.18 [-0.41 0.06]" 
## net_opposition      "0.55 [0.35 0.75]"    "0.77 [0.62 0.92]"   
##                     crime_delta           net_opposition       
## IQ                  "0.44 [0.23 0.66]"    "-0.71 [-0.88 -0.54]"
## Muslim              "-0.18 [-0.42 0.05]"  "0.66 [0.48 0.84]"   
## arrest_rate_RR      "-0.86 [-0.99 -0.74]" "0.55 [0.35 0.75]"   
## crime_mean_estimate "-0.18 [-0.41 0.06]"  "0.77 [0.62 0.92]"   
## crime_delta         "1"                   "-0.20 [-0.44 0.04]" 
## net_opposition      "-0.20 [-0.44 0.04]"  "1"

#models
origins_z = origins %>% df_standardize(messages = F, exclude_range_01 = F)

list(
  ols(net_opposition ~ arrest_rate_RR, data = origins_z),
  ols(net_opposition ~ arrest_rate_RR + crime_mean_estimate, data = origins_z)
) %>% summarize_models()

#formal mediation
set.seed(1)
crime_mediation = mediation::mediate(
  model.m = lm(crime_mean_estimate ~ arrest_rate_RR, data = origins_z),
  model.y = lm(net_opposition ~ crime_mean_estimate + arrest_rate_RR, data = origins_z),
  treat = "arrest_rate_RR", mediator = "crime_mean_estimate"
)
summary(crime_mediation)

## 
## Causal Mediation Analysis 
## 
## Quasi-Bayesian Confidence Intervals
## 
##                Estimate 95% CI Lower 95% CI Upper             p-value    
## ACME              0.462        0.287         0.66 <0.0000000000000002 ***
## ADE               0.087       -0.119         0.29                0.41    
## Total Effect      0.549        0.348         0.76 <0.0000000000000002 ***
## Prop. Mediated    0.837        0.543         1.30 <0.0000000000000002 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Sample Size Used: 68 
## 
## 
## Simulations: 1000

set.seed(1)
crime_mediation2 = mediation::mediate(
  model.m = lm(crime_mean_estimate ~ arrest_rate_RR + Muslim, data = origins_z),
  model.y = lm(net_opposition ~ crime_mean_estimate + arrest_rate_RR + Muslim, data = origins_z),
  treat = "arrest_rate_RR", mediator = "crime_mean_estimate"
)
summary(crime_mediation2)

## 
## Causal Mediation Analysis 
## 
## Quasi-Bayesian Confidence Intervals
## 
##                Estimate 95% CI Lower 95% CI Upper             p-value    
## ACME             0.2767       0.1412         0.43 <0.0000000000000002 ***
## ADE              0.0482      -0.1384         0.24                0.64    
## Total Effect     0.3249       0.1340         0.51 <0.0000000000000002 ***
## Prop. Mediated   0.8502       0.4488         1.80 <0.0000000000000002 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Sample Size Used: 68 
## 
## 
## Simulations: 1000

#path model
path_mod = "net_opposition ~ crime_mean_estimate + arrest_rate_RR + Muslim
crime_mean_estimate ~ arrest_rate_RR + Muslim
arrest_rate_RR ~~ Muslim"

#plot it automatically
sem_fit = sem(path_mod, data = origins_z)
sem_fit %>% summary()

## lavaan 0.6.15 ended normally after 14 iterations
## 
##   Estimator                                         ML
##   Optimization method                           NLMINB
##   Number of model parameters                        10
## 
##   Number of observations                            68
## 
## Model Test User Model:
##                                                       
##   Test statistic                                 0.000
##   Degrees of freedom                                 0
## 
## Parameter Estimates:
## 
##   Standard errors                             Standard
##   Information                                 Expected
##   Information saturated (h1) model          Structured
## 
## Regressions:
##                         Estimate  Std.Err  z-value  P(>|z|)
##   net_opposition ~                                         
##     crime_men_stmt         0.556    0.101    5.519    0.000
##     arrest_rate_RR         0.052    0.092    0.559    0.576
##     Muslim                 0.320    0.085    3.751    0.000
##   crime_mean_estimate ~                                    
##     arrest_rate_RR         0.494    0.093    5.290    0.000
##     Muslim                 0.353    0.093    3.782    0.000
## 
## Covariances:
##                     Estimate  Std.Err  z-value  P(>|z|)
##   arrest_rate_RR ~~                                    
##     Muslim             0.425    0.130    3.267    0.001
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)
##    .net_opposition    0.328    0.056    5.831    0.000
##    .crime_men_stmt    0.475    0.081    5.831    0.000
##     arrest_rate_RR    0.985    0.169    5.831    0.000
##     Muslim            0.985    0.169    5.831    0.000

lavaanPlot(
  model = sem(path_mod, data = origins_z),
  coefs = T
  )

#save manually
#https://github.com/alishinski/lavaanPlot/issues/12

#using the other source without overlap
#formal mediation
set.seed(1)
crime_mediation = mediation::mediate(
  model.m = lm(crime_mean_estimate_survee ~ arrest_rate_RR, data = origins_z),
  model.y = lm(net_opposition ~ crime_mean_estimate_survee + arrest_rate_RR, data = origins_z),
  treat = "arrest_rate_RR", mediator = "crime_mean_estimate_survee"
)
summary(crime_mediation)

## 
## Causal Mediation Analysis 
## 
## Quasi-Bayesian Confidence Intervals
## 
##                Estimate 95% CI Lower 95% CI Upper             p-value    
## ACME             0.4746       0.2996         0.67 <0.0000000000000002 ***
## ADE              0.0748      -0.1246         0.27                0.45    
## Total Effect     0.5494       0.3506         0.76 <0.0000000000000002 ***
## Prop. Mediated   0.8585       0.5696         1.31 <0.0000000000000002 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Sample Size Used: 68 
## 
## 
## Simulations: 1000

set.seed(1)
crime_mediation2 = mediation::mediate(
  model.m = lm(crime_mean_estimate_survee ~ arrest_rate_RR + Muslim, data = origins_z),
  model.y = lm(net_opposition ~ crime_mean_estimate_survee + arrest_rate_RR + Muslim, data = origins_z),
  treat = "arrest_rate_RR", mediator = "crime_mean_estimate_survee"
)
summary(crime_mediation2)

## 
## Causal Mediation Analysis 
## 
## Quasi-Bayesian Confidence Intervals
## 
##                Estimate 95% CI Lower 95% CI Upper             p-value    
## ACME              0.296        0.157         0.46 <0.0000000000000002 ***
## ADE               0.029       -0.150         0.21                0.78    
## Total Effect      0.325        0.136         0.51 <0.0000000000000002 ***
## Prop. Mediated    0.911        0.503         1.85 <0.0000000000000002 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Sample Size Used: 68 
## 
## 
## Simulations: 1000

Nationalists

#reviewer 1 request, nationalist voters
score_by(
  stereo_immi,
  moderator = (d$vote_PVV + d$vote_FvD),
  extrapolate_to = c(0, 1),
  long_format = T) ->
  crime_estimate_nationalists

#same order!
assert_that(all(origins$ISO == crime_estimate_nationalists$name))

## [1] TRUE

origins$crime_mean_estimate_nationalist = crime_estimate_nationalists$moderator_at_1
origins$crime_mean_estimate_nonnationalist = crime_estimate_nationalists$moderator_at_0

#scatterplot
GG_scatter(origins, "crime_mean_estimate_nationalist", "crime_mean_estimate_nonnationalist", repel_names = T, case_names = origins$origin)

## `geom_smooth()` using formula = 'y ~ x'

## Warning: ggrepel: 38 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

GG_save("figs/nationalist_crime_rate_estimates.png")

## `geom_smooth()` using formula = 'y ~ x'

## Warning: ggrepel: 14 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

#stats
ols(crime_mean_estimate_nationalist ~ crime_mean_estimate_nonnationalist, data = origins)

## Linear Regression Model
## 
## ols(formula = crime_mean_estimate_nationalist ~ crime_mean_estimate_nonnationalist, 
##     data = origins)
## 
##                 Model Likelihood    Discrimination    
##                       Ratio Test           Indexes    
## Obs      68    LR chi2    178.78    R2       0.928    
## sigma0.2792    d.f.            1    R2 adj   0.927    
## d.f.     66    Pr(> chi2) 0.0000    g        1.126    
## 
## Residuals
## 
##     Min      1Q  Median      3Q     Max 
## -0.5854 -0.1523 -0.0493  0.1874  0.6388 
## 
## 
##                                    Coef    S.E.   t     Pr(>|t|)
## Intercept                          -1.3316 0.1337 -9.96 <0.0001 
## crime_mean_estimate_nonnationalist  2.2648 0.0777 29.13 <0.0001

#accuracies
#score aggregate stereotypes, by method
bind_rows(
    nationalist = score_accuracy(origins$crime_mean_estimate_nationalist,
                 origins$arrest_rate_RR,
                 methods = "all"),
    nonnationalist = score_accuracy(origins$crime_mean_estimate_nonnationalist,
                 origins$arrest_rate_RR,
                 methods = "all"),
) %>% cbind(group = c("nationalist", "non-nationalist"))

describe2(origins$arrest_rate_RR)

#bias plots
origins$crime_delta_nationalist = origins$crime_mean_estimate_nationalist - origins$arrest_rate_RR
origins$crime_delta_nonnationalist = origins$crime_mean_estimate_nonnationalist - origins$arrest_rate_RR

#combo
GG_scatter(origins, "Muslim", "crime_delta_nationalist", case_names = "origin") +
  scale_x_continuous("Muslim % in home country", labels = scales::percent, limits = c(-.06, 1)) +
  scale_y_continuous("Arrest rate (RR) delta") +
  GG_scatter(origins, "SSA", "crime_delta_nationalist", case_names = "origin") +
  scale_x_continuous("Sub-Saharan African % in home country", labels = scales::percent, limits = c(-.06, 1)) +
  scale_y_continuous("Arrest rate (RR) delta") +
  patchwork::plot_annotation("Bias plots for nationalists")

## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'

GG_save("figs/crime_aggr_bias_nationalist.png")

## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'

#non-nationalist
GG_scatter(origins, "Muslim", "crime_delta_nonnationalist", case_names = "origin") +
  scale_x_continuous("Muslim % in home country", labels = scales::percent, limits = c(-.06, 1)) +
  scale_y_continuous("Arrest rate (RR) delta") +
  GG_scatter(origins, "SSA", "crime_delta_nonnationalist", case_names = "origin") +
  scale_x_continuous("Sub-Saharan African % in home country", labels = scales::percent, limits = c(-.06, 1)) +
  scale_y_continuous("Arrest rate (RR) delta") +
  patchwork::plot_annotation("Bias plots for non-nationalists")

## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'

GG_save("figs/crime_aggr_bias_nonnationalist.png")

## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'

Occupations and sex

#score & join
occu = cbind(
  occu,
  survey_var = stereo_occu %>% colnames(),
  mean_estimate = stereo_occu %>% colMeans(),
  mean_estimate_z = stereo_occu %>% z_mean()
)

#stats
occu %>% select(male_pct, mean_estimate) %>% describe2()

#correlation accuracy
GG_scatter(occu, "male_pct", "mean_estimate", case_names = "short_english") +
  geom_abline(slope = 1, intercept = 0, linetype = "dashed")

## `geom_smooth()` using formula = 'y ~ x'

GG_save("figs/occu_aggr_accuracy.png")

## `geom_smooth()` using formula = 'y ~ x'

GG_scatter(occu, "male_pct", "mean_estimate_z", case_names = "short_english")

## `geom_smooth()` using formula = 'y ~ x'

GG_save("figs/occu_aggr_accuracy_z.png")

## `geom_smooth()` using formula = 'y ~ x'

#regression
ols(male_pct ~ mean_estimate, data = occu)

## Linear Regression Model
## 
## ols(formula = male_pct ~ mean_estimate, data = occu)
## 
##                  Model Likelihood    Discrimination    
##                        Ratio Test           Indexes    
## Obs       54    LR chi2    116.81    R2       0.885    
## sigma10.4575    d.f.            1    R2 adj   0.883    
## d.f.      52    Pr(> chi2) 0.0000    g       32.255    
## 
## Residuals
## 
##     Min      1Q  Median      3Q     Max 
## -24.581  -6.067   1.824   5.981  23.147 
## 
## 
##               Coef     S.E.   t     Pr(>|t|)
## Intercept     -34.5753 5.0122 -6.90 <0.0001 
## mean_estimate   1.6033 0.0801 20.01 <0.0001

#stats
occu %>% 
  dplyr::select(male_pct, mean_estimate) %>% 
  describe2()

#aggregating method
bind_rows(
    score_accuracy(stereo_occu %>% map_df(as.numeric),
                 occu$male_pct,
                 methods = "all",
                 aggregate = T,
                 aggregate_function = mean),
    score_accuracy(stereo_occu %>% map_df(as.numeric),
                 occu$male_pct,
                 methods = "all",
                 aggregate = T,
                 aggregate_function = mean,
                 trim = .10),
    score_accuracy(stereo_occu %>% map_df(as.numeric),
                 occu$male_pct,
                 methods = "all",
                 aggregate = T,
                 aggregate_function = median)
) %>% cbind(method = c("mean", "trim_mean", "median"))

#add deltas for each country
occu %<>% mutate(
  delta = mean_estimate - male_pct
)

#by sex
occu_stereo_sex = score_by(
  stereo_occu,
  moderator = d$sex
)

#long
occu_stereo_sex_long = occu_stereo_sex %>% 
  pivot_longer(cols = -moderator) %>% 
  left_join(occu %>% select(survey_var, male_pct), by = c("name" = "survey_var"))

#plot
occu_stereo_sex_long %>% 
  ggplot(aes(male_pct, value, color = moderator)) +
  geom_point() +
  geom_smooth(method = lm, se = F) +
  geom_abline(slope = 1, intercept = 0, linetype = "dashed") +
  scale_color_discrete("Rater sex") + 
  ylab("Male % estimate") + 
  xlab("Male % true")

## `geom_smooth()` using formula = 'y ~ x'

GG_save("figs/occu_mean_by_sex.png")

## `geom_smooth()` using formula = 'y ~ x'

#metrics
bind_rows(
    score_accuracy(occu_stereo_sex[1, -1],
                 occu$male_pct,
                 methods = "all",
                 aggregate = T,
                 aggregate_function = mean),
    score_accuracy(occu_stereo_sex[2, -1],
             occu$male_pct,
             methods = "all",
             aggregate = T,
             aggregate_function = mean),
) %>% cbind(method = c("women", "men"))

#correlation between estimates
cor.test(occu_stereo_sex[1, -1] %>% unlist(),
         occu_stereo_sex[2, -1] %>% unlist())

## 
##  Pearson's product-moment correlation
## 
## data:  occu_stereo_sex[1, -1] %>% unlist() and occu_stereo_sex[2, -1] %>% unlist()
## t = 79, df = 52, p-value <0.0000000000000002
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.993 0.998
## sample estimates:
##   cor 
## 0.996

#reg
ols(Male ~ Female, data = occu_stereo_sex_long %>% 
  pivot_wider(names_from = "moderator", values_from = "value"))

## Linear Regression Model
## 
## ols(formula = Male ~ Female, data = occu_stereo_sex_long %>% 
##     pivot_wider(names_from = "moderator", values_from = "value"))
## 
##                 Model Likelihood    Discrimination    
##                       Ratio Test           Indexes    
## Obs      54    LR chi2    259.30    R2       0.992    
## sigma1.6431    d.f.            1    R2 adj   0.992    
## d.f.     52    Pr(> chi2) 0.0000    g       19.956    
## 
## Residuals
## 
##     Min      1Q  Median      3Q     Max 
## -4.2902 -1.0561  0.1675  0.8546  4.1131 
## 
## 
##           Coef    S.E.   t     Pr(>|t|)
## Intercept -1.1921 0.7964 -1.50 0.1405  
## Female     0.9977 0.0126 79.24 <0.0001

occu_stereo_sex_long %>% 
  pivot_wider(names_from = "moderator", values_from = "value") %>% 
  GG_scatter("Male", "Female", case_names = "name")

## `geom_smooth()` using formula = 'y ~ x'

Province income

#score
prov_income %<>% 
  mutate(
    #average
    mean_estimate = stereo_prov %>% colMeans(),
    mean_estimate_z = stereo_prov %>% z_mean()
  )

#correlation accuracy
GG_scatter(prov_income, "prov_income", "mean_estimate", case_names = "Region") +
  geom_abline(slope = 1, intercept = 0, linetype = "dashed") +
  ylim(25e3, NA) +
  xlab("Province mean disposable income") +
  ylab("Mean estimate of province mean disposable income")

## `geom_smooth()` using formula = 'y ~ x'

GG_save("figs/prov_aggr_accuracy.png")

## `geom_smooth()` using formula = 'y ~ x'

GG_scatter(prov_income, "prov_income", "mean_estimate_z", case_names = "Region")

## `geom_smooth()` using formula = 'y ~ x'

GG_save("figs/prov_aggr_accuracy_z.png")

## `geom_smooth()` using formula = 'y ~ x'

#regression
ols(prov_income ~ mean_estimate, data = prov_income)

## Linear Regression Model
## 
## ols(formula = prov_income ~ mean_estimate, data = prov_income)
## 
##                   Model Likelihood    Discrimination    
##                         Ratio Test           Indexes    
## Obs        12    LR chi2     15.45    R2       0.724    
## sigma856.7404    d.f.            1    R2 adj   0.697    
## d.f.       10    Pr(> chi2) 0.0001    g     1467.491    
## 
## Residuals
## 
##      Min       1Q   Median       3Q      Max 
## -1422.87  -400.28    33.84   439.59  1360.02 
## 
## 
##               Coef       S.E.      t    Pr(>|t|)
## Intercept     12448.0483 3202.6387 3.89 0.0030  
## mean_estimate     0.5385    0.1051 5.12 0.0004

#stats
prov_income %>% 
  select(prov_income, mean_estimate) %>%
  describe2()

#aggregation method
bind_rows(
    score_accuracy(stereo_prov %>% map_df(as.numeric),
                 prov_income$prov_income,
                 aggregate = T,
                 methods = "all",
                 aggregate_function = mean),
    score_accuracy(stereo_prov %>% map_df(as.numeric),
                 prov_income$prov_income,
                 methods = "all",
                 aggregate = T,
                 aggregate_function = mean,
                 trim = .10),
    score_accuracy(stereo_prov %>% map_df(as.numeric),
                 prov_income$prov_income,
                 methods = "all",
                 aggregate = T,
                 aggregate_function = median),
) %>% cbind(method = c("mean", "10% trimmed mean", "median"))

#add deltas for each country
prov_income %<>% mutate(
  income_delta = mean_estimate - prov_income
)

Individual stereotypes

Descriptives

#time taken
describe2(survey$time_taken/60)

describeBy(survey$time_taken/60, survey$source, mat = T)

#individual predictors
preds = c("g", "V_tilt", "age" , "male", "time_taken", "education_num", "First_Language2", "Birth3", "student", "Employment_Status", vote_preds)

#quasi-numeric
preds_quasinum = map_chr(preds, function(x) {
  if (!class(d[[x]])[1] %in% c("character", "factor")) return(x)
  NA_character_
}) %>% na.omit() %>% as.vector()

preds_num = map_chr(preds, function(x) {
  if (!class(d[[x]])[1] %in% c("character", "factor", "logical")) return(x)
  NA_character_
}) %>% na.omit() %>% as.vector()

#combined summary stats
map_df(d %>% select(!!preds_quasinum) %>% names(), function(v) {

  #results by group and combined
  bind_rows(
    describe2(d[[v]] %>% as.numeric()) %>% mutate(group1 = "combined"),
    describeBy(d[[v]] %>% as.numeric(), d$source, mat = T)
  ) %>% mutate(variable = v)
}) %>% select(variable, group1, n, mean, median, sd, mad, min, max, skew)

#gaps and p values
cohen.d(d %>% select(!!preds_num) %>% map_df(as.numeric), group = d$source %>% factor() %>% as.integer())

## Call: cohen.d(x = d %>% select(!!preds_num) %>% map_df(as.numeric), 
##     group = d$source %>% factor() %>% as.integer())
## Cohen d statistic of difference between two means
##                   lower effect upper
## g                 -0.60  -0.43 -0.26
## V_tilt             0.47   0.64  0.81
## age                1.02   1.20  1.38
## time_taken        -0.30  -0.13  0.03
## education_num     -0.59  -0.42 -0.25
## vote_PvdD         -0.35  -0.18 -0.01
## vote_Groenlinks   -0.62  -0.45 -0.28
## vote_SP            0.16   0.33  0.50
## vote_D66          -0.46  -0.29 -0.12
## vote_PvDA         -0.03   0.14  0.31
## vote_VVD           0.24   0.41  0.57
## vote_Christenunie -0.12   0.05  0.22
## vote_PVV           0.24   0.41  0.58
## vote_CDA           0.07   0.24  0.41
## vote_FvD          -0.11   0.06  0.23
## vote_SGP          -0.09   0.08  0.25
## vote_50Plus        0.04   0.20  0.37
## vote_DENK         -0.23  -0.06  0.11
## 
## Multivariate (Mahalanobis) distance between groups
## [1] 1.8
## r equivalent of difference between two means
##                 g            V_tilt               age        time_taken 
##             -0.20              0.29              0.49             -0.06 
##     education_num         vote_PvdD   vote_Groenlinks           vote_SP 
##             -0.19             -0.08             -0.21              0.15 
##          vote_D66         vote_PvDA          vote_VVD vote_Christenunie 
##             -0.13              0.07              0.19              0.02 
##          vote_PVV          vote_CDA          vote_FvD          vote_SGP 
##              0.19              0.11              0.03              0.04 
##       vote_50Plus         vote_DENK 
##              0.10             -0.03

#cor matrix
d[preds_quasinum] %>% hetcor(use = "pairwise.complete.obs")

## 
## Two-Step Estimates
## 
## Correlations/Type of Correlation:
##                          g  V_tilt      age       male time_taken education_num
## g                        1 Pearson  Pearson Polyserial    Pearson       Pearson
## V_tilt            0.000658       1  Pearson Polyserial    Pearson       Pearson
## age                  0.208   0.425        1 Polyserial    Pearson       Pearson
## male                0.0761  -0.221  -0.0743          1 Polyserial    Polyserial
## time_taken           0.174  0.0726    0.116      0.043          1       Pearson
## education_num        0.365 -0.0375  -0.0202     0.0289    -0.0136             1
## student             -0.107  -0.405    -0.75    0.00892    -0.0879        -0.188
## vote_PvdD           0.0306  0.0186 -0.00884     -0.109     0.0138         0.024
## vote_Groenlinks     0.0932 -0.0646   -0.137     -0.137    -0.0399        0.0614
## vote_SP            0.00728   0.137    0.173     -0.142    0.00979       -0.0817
## vote_D66             0.134 -0.0527   -0.065     0.0486    -0.0569         0.124
## vote_PvDA          -0.0142  0.0452  0.00797     0.0372    0.00498       -0.0355
## vote_VVD           0.00407  0.0608   0.0652      0.119    -0.0113         0.179
## vote_Christenunie   0.0121 -0.0515   0.0284   -0.00852    0.00986        0.0481
## vote_PVV            -0.141   0.104    0.153     0.0328     0.0545        -0.118
## vote_CDA           -0.0442   0.116    0.111    -0.0825    -0.0895      -0.00641
## vote_FvD           -0.0196  -0.026   0.0274      0.119  -0.000098       -0.0758
## vote_SGP           0.00357 -0.0531   0.0174     0.0179   -0.00705       -0.0306
## vote_50Plus        -0.0665 -0.0265   0.0563    -0.0958    -0.0353       -0.0809
## vote_DENK           -0.011  -0.025  -0.0292     -0.129    -0.0236        0.0198
##                      student  vote_PvdD vote_Groenlinks    vote_SP   vote_D66
## g                 Polyserial    Pearson         Pearson    Pearson    Pearson
## V_tilt            Polyserial    Pearson         Pearson    Pearson    Pearson
## age               Polyserial    Pearson         Pearson    Pearson    Pearson
## male              Polychoric Polyserial      Polyserial Polyserial Polyserial
## time_taken        Polyserial    Pearson         Pearson    Pearson    Pearson
## education_num     Polyserial    Pearson         Pearson    Pearson    Pearson
## student                    1 Polyserial      Polyserial Polyserial Polyserial
## vote_PvdD            -0.0254          1         Pearson    Pearson    Pearson
## vote_Groenlinks         0.23     -0.114               1    Pearson    Pearson
## vote_SP               -0.199    -0.0334          -0.133          1    Pearson
## vote_D66               0.097    -0.0684          -0.117      -0.11          1
## vote_PvDA             0.0361      -0.06          -0.125    -0.0646    -0.0586
## vote_VVD             -0.0938     -0.105          -0.219     -0.122     -0.154
## vote_Christenunie   -0.00797    -0.0462         -0.0926    -0.0423    -0.0767
## vote_PVV              -0.223    -0.0655          -0.142    -0.0679     -0.101
## vote_CDA              -0.214    -0.0428         -0.0925    -0.0499    -0.0374
## vote_FvD              -0.178    -0.0604          -0.147    -0.0648     -0.117
## vote_SGP            -0.00668    -0.0133         -0.0288    -0.0156    -0.0221
## vote_50Plus           -0.101    -0.0249         -0.0539   -0.00393    -0.0196
## vote_DENK              0.129   -0.00994         -0.0215    -0.0116    -0.0165
##                    vote_PvDA   vote_VVD vote_Christenunie   vote_PVV   vote_CDA
## g                    Pearson    Pearson           Pearson    Pearson    Pearson
## V_tilt               Pearson    Pearson           Pearson    Pearson    Pearson
## age                  Pearson    Pearson           Pearson    Pearson    Pearson
## male              Polyserial Polyserial        Polyserial Polyserial Polyserial
## time_taken           Pearson    Pearson           Pearson    Pearson    Pearson
## education_num        Pearson    Pearson           Pearson    Pearson    Pearson
## student           Polyserial Polyserial        Polyserial Polyserial Polyserial
## vote_PvdD            Pearson    Pearson           Pearson    Pearson    Pearson
## vote_Groenlinks      Pearson    Pearson           Pearson    Pearson    Pearson
## vote_SP              Pearson    Pearson           Pearson    Pearson    Pearson
## vote_D66             Pearson    Pearson           Pearson    Pearson    Pearson
## vote_PvDA                  1    Pearson           Pearson    Pearson    Pearson
## vote_VVD              -0.108          1           Pearson    Pearson    Pearson
## vote_Christenunie    -0.0529    -0.0715                 1    Pearson    Pearson
## vote_PVV              -0.075     -0.101           -0.0499          1    Pearson
## vote_CDA             -0.0489    -0.0555            -0.014    -0.0462          1
## vote_FvD             -0.0805    -0.0909           -0.0412    0.00639    -0.0359
## vote_SGP             -0.0153     -0.023            0.0477    -0.0144   -0.00941
## vote_50Plus          -0.0285     -0.043            -0.019   -0.00026    -0.0176
## vote_DENK            -0.0114     0.0468          -0.00757    -0.0107   -0.00701
##                     vote_FvD   vote_SGP vote_50Plus  vote_DENK
## g                    Pearson    Pearson     Pearson    Pearson
## V_tilt               Pearson    Pearson     Pearson    Pearson
## age                  Pearson    Pearson     Pearson    Pearson
## male              Polyserial Polyserial  Polyserial Polyserial
## time_taken           Pearson    Pearson     Pearson    Pearson
## education_num        Pearson    Pearson     Pearson    Pearson
## student           Polyserial Polyserial  Polyserial Polyserial
## vote_PvdD            Pearson    Pearson     Pearson    Pearson
## vote_Groenlinks      Pearson    Pearson     Pearson    Pearson
## vote_SP              Pearson    Pearson     Pearson    Pearson
## vote_D66             Pearson    Pearson     Pearson    Pearson
## vote_PvDA            Pearson    Pearson     Pearson    Pearson
## vote_VVD             Pearson    Pearson     Pearson    Pearson
## vote_Christenunie    Pearson    Pearson     Pearson    Pearson
## vote_PVV             Pearson    Pearson     Pearson    Pearson
## vote_CDA             Pearson    Pearson     Pearson    Pearson
## vote_FvD                   1    Pearson     Pearson    Pearson
## vote_SGP             -0.0155          1     Pearson    Pearson
## vote_50Plus          -0.0289   -0.00548           1    Pearson
## vote_DENK            -0.0115   -0.00219    -0.00408          1
## 
## Standard Errors/Numbers of Observations:
##                        g V_tilt    age   male time_taken education_num student
## g                    615    607    607    612        615           615     610
## V_tilt            0.0406    607    599    606        607           607     604
## age               0.0389 0.0335    607    604        607           607     602
## male              0.0506 0.0483 0.0507    612        612           612     610
## time_taken        0.0391 0.0404 0.0401 0.0509        615           615     610
## education_num      0.035 0.0406 0.0406 0.0507     0.0403           615     610
## student           0.0513 0.0438 0.0269 0.0649     0.0521          0.05     610
## vote_PvdD         0.0403 0.0406 0.0406 0.0515     0.0403        0.0403  0.0525
## vote_Groenlinks     0.04 0.0405 0.0399 0.0496     0.0403        0.0402  0.0474
## vote_SP           0.0404 0.0399 0.0394 0.0511     0.0404        0.0401  0.0593
## vote_D66          0.0396 0.0405 0.0404 0.0511     0.0402        0.0397  0.0502
## vote_PvDA         0.0403 0.0405 0.0406 0.0511     0.0404        0.0403  0.0511
## vote_VVD          0.0404 0.0405 0.0404 0.0508     0.0403        0.0391  0.0529
## vote_Christenunie 0.0403 0.0405 0.0406 0.0507     0.0404        0.0403  0.0521
## vote_PVV          0.0396 0.0402 0.0397 0.0511     0.0402        0.0398  0.0616
## vote_CDA          0.0403 0.0401 0.0401 0.0524       0.04        0.0404  0.0841
## vote_FvD          0.0403 0.0406 0.0406 0.0523     0.0404        0.0401  0.0582
## vote_SGP          0.0404 0.0405 0.0406 0.0546     0.0404        0.0403   0.054
## vote_50Plus       0.0402 0.0406 0.0405 0.0614     0.0403        0.0401  0.0749
## vote_DENK         0.0403 0.0406 0.0406   0.35     0.0403        0.0403   0.261
##                   vote_PvdD vote_Groenlinks vote_SP vote_D66 vote_PvDA vote_VVD
## g                       615             615     615      615       615      615
## V_tilt                  607             607     607      607       607      607
## age                     607             607     607      607       607      607
## male                    612             612     612      612       612      612
## time_taken              615             615     615      615       615      615
## education_num           615             615     615      615       615      615
## student                 610             610     610      610       610      610
## vote_PvdD               615             615     615      615       615      615
## vote_Groenlinks      0.0398             615     615      615       615      615
## vote_SP              0.0403          0.0396     615      615       615      615
## vote_D66             0.0402          0.0398  0.0399      615       615      615
## vote_PvDA            0.0402          0.0397  0.0402   0.0402       615      615
## vote_VVD             0.0399          0.0384  0.0398   0.0394    0.0399      615
## vote_Christenunie    0.0403            0.04  0.0403   0.0401    0.0402   0.0401
## vote_PVV             0.0402          0.0395  0.0402   0.0399    0.0401   0.0399
## vote_CDA             0.0403            0.04  0.0403   0.0403    0.0403   0.0402
## vote_FvD             0.0402          0.0395  0.0402   0.0398    0.0401     0.04
## vote_SGP             0.0403          0.0403  0.0403   0.0403    0.0403   0.0403
## vote_50Plus          0.0403          0.0402  0.0404   0.0403    0.0403   0.0403
## vote_DENK            0.0404          0.0403  0.0403   0.0403    0.0403   0.0403
##                   vote_Christenunie vote_PVV vote_CDA vote_FvD vote_SGP
## g                               615      615      615      615      615
## V_tilt                          607      607      607      607      607
## age                             607      607      607      607      607
## male                            612      612      612      612      612
## time_taken                      615      615      615      615      615
## education_num                   615      615      615      615      615
## student                         610      610      610      610      610
## vote_PvdD                       615      615      615      615      615
## vote_Groenlinks                 615      615      615      615      615
## vote_SP                         615      615      615      615      615
## vote_D66                        615      615      615      615      615
## vote_PvDA                       615      615      615      615      615
## vote_VVD                        615      615      615      615      615
## vote_Christenunie               615      615      615      615      615
## vote_PVV                     0.0403      615      615      615      615
## vote_CDA                     0.0403   0.0403      615      615      615
## vote_FvD                     0.0403   0.0404   0.0403      615      615
## vote_SGP                     0.0403   0.0403   0.0404   0.0403      615
## vote_50Plus                  0.0403   0.0404   0.0403   0.0403   0.0404
## vote_DENK                    0.0404   0.0403   0.0404   0.0403   0.0404
##                   vote_50Plus vote_DENK
## g                         615       615
## V_tilt                    607       607
## age                       607       607
## male                      612       612
## time_taken                615       615
## education_num             615       615
## student                   610       610
## vote_PvdD                 615       615
## vote_Groenlinks           615       615
## vote_SP                   615       615
## vote_D66                  615       615
## vote_PvDA                 615       615
## vote_VVD                  615       615
## vote_Christenunie         615       615
## vote_PVV                  615       615
## vote_CDA                  615       615
## vote_FvD                  615       615
## vote_SGP                  615       615
## vote_50Plus               615       615
## vote_DENK              0.0404       615
## 
## P-values for Tests of Bivariate Normality:
##                                              g                        V_tilt
## g                                                                           
## V_tilt                                   0.115                              
## age                            0.0000000000183                0.000000000046
## male                                     0.793                         0.748
## time_taken                         0.000000282                   0.000000126
## education_num     0.00000000000000000000000275 0.000000000000000000000000107
## student                                  0.792                         0.811
## vote_PvdD                            2.19e-294                     1.95e-290
## vote_Groenlinks                      9.65e-211                     3.56e-207
## vote_SP                              1.44e-279                     4.29e-277
## vote_D66                              1.5e-240                     2.03e-238
## vote_PvDA                            1.21e-281                     6.82e-279
## vote_VVD                                1e-236                     6.23e-234
## vote_Christenunie        1.44000000018514e-314         4.14000000000002e-310
## vote_PVV                             3.14e-288                     3.31e-288
## vote_CDA                 2.95999669079949e-319         1.71000023144258e-317
## vote_FvD                             2.35e-280                     1.66e-275
## vote_SGP                                     0                             0
## vote_50Plus                                  0                             0
## vote_DENK                                    0                             0
##                                                                                age
## g                                                                                 
## V_tilt                                                                            
## age                                                                               
## male                                                             0.000000000000211
## time_taken                                                  0.00000000000000000556
## education_num     0.00000000000000000000000000000000000000000000000000000000000179
## student                                                      0.0000000000000000238
## vote_PvdD                                                                7.07e-304
## vote_Groenlinks                                                          8.04e-226
## vote_SP                                                                  3.62e-290
## vote_D66                                                                 1.62e-253
## vote_PvDA                                                                2.01e-292
## vote_VVD                                                                 6.21e-249
## vote_Christenunie                                            4.94065645841247e-324
## vote_PVV                                                                 1.93e-297
## vote_CDA                                                                         0
## vote_FvD                                                                 1.96e-290
## vote_SGP                                                                         0
## vote_50Plus                                                                      0
## vote_DENK                                                                        0
##                                               male
## g                                                 
## V_tilt                                            
## age                                               
## male                                              
## time_taken                          0.000000000288
## education_num     0.000000000000000000000000000101
## student                                       <NA>
## vote_PvdD                                3.79e-302
## vote_Groenlinks                          2.99e-218
## vote_SP                                  3.81e-289
## vote_D66                                 1.48e-247
## vote_PvDA                                6.25e-290
## vote_VVD                                 2.49e-245
## vote_Christenunie            1.97626258336499e-322
## vote_PVV                                  2.7e-295
## vote_CDA                                         0
## vote_FvD                                 2.24e-288
## vote_SGP                                         0
## vote_50Plus                                      0
## vote_DENK                                        0
##                                               time_taken
## g                                                       
## V_tilt                                                  
## age                                                     
## male                                                    
## time_taken                                              
## education_num     0.000000000000000000000000000000000144
## student                                   0.000000000408
## vote_PvdD                                      2.52e-305
## vote_Groenlinks                                3.45e-221
## vote_SP                                        2.93e-290
## vote_D66                                        7.2e-251
## vote_PvDA                                       1.5e-292
## vote_VVD                                       5.98e-248
## vote_Christenunie                                      0
## vote_PVV                                       2.06e-297
## vote_CDA                                               0
## vote_FvD                                       2.59e-290
## vote_SGP                                               0
## vote_50Plus                                            0
## vote_DENK                                              0
##                                           education_num               student
## g                                                                            
## V_tilt                                                                       
## age                                                                          
## male                                                                         
## time_taken                                                                   
## education_num                                                                
## student           0.00000000000000000000000000000000737                      
## vote_PvdD                         1.23516411460312e-322             7.02e-301
## vote_Groenlinks                               2.12e-238             1.17e-217
## vote_SP                                       1.34e-308             1.44e-288
## vote_D66                                      1.61e-268             1.03e-246
## vote_PvDA                         1.16999999999998e-310             6.28e-290
## vote_VVD                                      2.38e-266             1.06e-243
## vote_Christenunie                                     0 2.91004665400494e-321
## vote_PVV                          2.76999880603478e-318             5.87e-297
## vote_CDA                                              0                     0
## vote_FvD                                      4.66e-309              2.1e-287
## vote_SGP                                              0                     0
## vote_50Plus                                           0                     0
## vote_DENK                                             0                     0
##                   vote_PvdD vote_Groenlinks vote_SP vote_D66 vote_PvDA vote_VVD
## g                                                                              
## V_tilt                                                                         
## age                                                                            
## male                                                                           
## time_taken                                                                     
## education_num                                                                  
## student                                                                        
## vote_PvdD                                                                      
## vote_Groenlinks           0                                                    
## vote_SP                   0               0                                    
## vote_D66                  0               0       0                            
## vote_PvDA                 0               0       0        0                   
## vote_VVD                  0               0       0        0         0         
## vote_Christenunie         0               0       0        0         0        0
## vote_PVV                  0               0       0        0         0        0
## vote_CDA                  0               0       0        0         0        0
## vote_FvD                  0               0       0        0         0        0
## vote_SGP                  0               0       0        0         0        0
## vote_50Plus               0               0       0        0         0        0
## vote_DENK                 0               0       0        0         0        0
##                   vote_Christenunie vote_PVV vote_CDA vote_FvD vote_SGP
## g                                                                      
## V_tilt                                                                 
## age                                                                    
## male                                                                   
## time_taken                                                             
## education_num                                                          
## student                                                                
## vote_PvdD                                                              
## vote_Groenlinks                                                        
## vote_SP                                                                
## vote_D66                                                               
## vote_PvDA                                                              
## vote_VVD                                                               
## vote_Christenunie                                                      
## vote_PVV                          0                                    
## vote_CDA                          0        0                           
## vote_FvD                          0        0        0                  
## vote_SGP                          0        0        0        0         
## vote_50Plus                       0        0        0        0        0
## vote_DENK                         0        0        0        0        0
##                   vote_50Plus
## g                            
## V_tilt                       
## age                          
## male                         
## time_taken                   
## education_num                
## student                      
## vote_PvdD                    
## vote_Groenlinks              
## vote_SP                      
## vote_D66                     
## vote_PvDA                    
## vote_VVD                     
## vote_Christenunie            
## vote_PVV                     
## vote_CDA                     
## vote_FvD                     
## vote_SGP                     
## vote_50Plus                  
## vote_DENK                   0

#model right side
model_rs_small = "g + V_tilt + age + male + education_num + time_taken"
model_rs_full = str_glue("{str_c(preds, collapse = ' + ')}")

Party stats

map_df(vote_preds, function(v) {
  score_by(
    d %>% select(g, V_tilt, male_num, age),
    moderator = d[[v]],
    extrapolate_to = 1
  ) %>% mutate(
    party = v,
    n = sum(d[[v]], na.rm = T)
  ) %>% 
    select(-moderator)
}) %>% arrange(-g)

Immigrant crime

#score
stereo_immi_accu = score_accuracy(stereo_immi,
                    origins$arrest_rate_RR,
                    methods = "all")

## Warning in cor(criterion, df[x, ] %>% unlist, use = "p"): the standard
## deviation is zero

## Warning in cor(criterion, df[x, ] %>% unlist, use = "p"): the standard
## deviation is zero

## Warning in cor(criterion, df[x, ] %>% unlist, use = "p"): the standard
## deviation is zero

## Warning in cor(criterion, df[x, ] %>% unlist, use = "p"): the standard
## deviation is zero

## Warning in cor(criterion, df[x, ] %>% unlist, use = "p"): the standard
## deviation is zero

## Warning in cor(criterion, df[x, ] %>% unlist, use = "p"): the standard
## deviation is zero

## Warning in cor(criterion, df[x, ] %>% unlist, use = "p"): the standard
## deviation is zero

## Warning in cor(criterion, df[x, ] %>% unlist, use = "p"): the standard
## deviation is zero

## Warning in cor(criterion, df[x, ] %>% unlist, use = "p"): the standard
## deviation is zero

## Warning in cor(criterion, df[x, ] %>% unlist, use = "p"): the standard
## deviation is zero

## Warning in cor(criterion, df[x, ] %>% unlist, use = "p"): the standard
## deviation is zero

## Warning in cor(criterion, df[x, ] %>% unlist, use = "p"): the standard
## deviation is zero

## Warning in cor(criterion, df[x, ] %>% unlist, use = "p"): the standard
## deviation is zero

## Warning in cor(criterion, df[x, ] %>% unlist, use = "p"): the standard
## deviation is zero

## Warning in cor(criterion, df[x, ] %>% unlist, use = "p"): the standard
## deviation is zero

## Warning in cor(criterion, df[x, ] %>% unlist, use = "p"): the standard
## deviation is zero

## Warning in cor(criterion, df[x, ] %>% unlist, use = "p"): the standard
## deviation is zero

## Warning in cor(criterion, df[x, ] %>% unlist, method = "spearman", use = "p"):
## the standard deviation is zero

## Warning in cor(criterion, df[x, ] %>% unlist, method = "spearman", use = "p"):
## the standard deviation is zero

## Warning in cor(criterion, df[x, ] %>% unlist, method = "spearman", use = "p"):
## the standard deviation is zero

## Warning in cor(criterion, df[x, ] %>% unlist, method = "spearman", use = "p"):
## the standard deviation is zero

## Warning in cor(criterion, df[x, ] %>% unlist, method = "spearman", use = "p"):
## the standard deviation is zero

## Warning in cor(criterion, df[x, ] %>% unlist, method = "spearman", use = "p"):
## the standard deviation is zero

## Warning in cor(criterion, df[x, ] %>% unlist, method = "spearman", use = "p"):
## the standard deviation is zero

## Warning in cor(criterion, df[x, ] %>% unlist, method = "spearman", use = "p"):
## the standard deviation is zero

## Warning in cor(criterion, df[x, ] %>% unlist, method = "spearman", use = "p"):
## the standard deviation is zero

## Warning in cor(criterion, df[x, ] %>% unlist, method = "spearman", use = "p"):
## the standard deviation is zero

## Warning in cor(criterion, df[x, ] %>% unlist, method = "spearman", use = "p"):
## the standard deviation is zero

## Warning in cor(criterion, df[x, ] %>% unlist, method = "spearman", use = "p"):
## the standard deviation is zero

## Warning in cor(criterion, df[x, ] %>% unlist, method = "spearman", use = "p"):
## the standard deviation is zero

## Warning in cor(criterion, df[x, ] %>% unlist, method = "spearman", use = "p"):
## the standard deviation is zero

## Warning in cor(criterion, df[x, ] %>% unlist, method = "spearman", use = "p"):
## the standard deviation is zero

## Warning in cor(criterion, df[x, ] %>% unlist, method = "spearman", use = "p"):
## the standard deviation is zero

## Warning in cor(criterion, df[x, ] %>% unlist, method = "spearman", use = "p"):
## the standard deviation is zero

#which proportion of estimates were 1?
stereo_immi_ones = (stereo_immi == 1)
stereo_immi_ones %>% rowMeans() %>% describe2()

stereo_immi_ones %>% plyr::adply(.margins = 1, .id = NULL, .expand = F, .fun = function(x) {
  all(x == 1)
}) %>% unlist() %>% table2()

#distributions
describe2(stereo_immi_accu)

#combined dists plot
accu_combo_plot(stereo_immi_accu)

## Warning: Removed 17 rows containing non-finite values (`stat_bin()`).

## Warning: Removed 17 rows containing non-finite values (`stat_density()`).

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## Warning: Removed 17 rows containing non-finite values (`stat_bin()`).
## Removed 17 rows containing non-finite values (`stat_density()`).

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

GG_save("figs/crime_dists.png")

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## Warning: Removed 17 rows containing non-finite values (`stat_bin()`).
## Removed 17 rows containing non-finite values (`stat_density()`).

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

#add to main
for (v in (d %>% names() %>% str_subset("^crime_"))) d[[v]] = NULL
d = cbind(d, stereo_immi_accu %>% df_add_affix(prefix = "crime_"))

#combined plot 2
accu_combo_plot(bind_cols(stereo_immi_accu, d["g"]), variables = main_accu_metrics2, x_var = "g")

## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'

GG_save("figs/crime_g_scatter.png")

## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'

#heteroscedasticity, Reviewer 2 request
#correlation compare
d_HS = bind_cols(stereo_immi_accu, d["g"])
kirkegaard::combine_upperlower(
  .upper.tri = d_HS %>% cor(method = "pearson", use = "pairwise"),
  .lower.tri = d_HS %>% cor(method = "spearman", use = "pairwise")
)

##                pearson_r  rank_r mean_abs_error      sd sd_error sd_error_abs
## pearson_r             NA  0.9151         -0.179  0.1885   0.1885       0.0681
## rank_r             0.902      NA         -0.205  0.1866   0.1866       0.0483
## mean_abs_error    -0.302 -0.3348             NA  0.6107   0.6107       0.3858
## sd                 0.205  0.2140          0.592      NA   1.0000       0.4856
## sd_error           0.205  0.2139          0.592  1.0000       NA       0.4856
## sd_error_abs      -0.018 -0.0523          0.272 -0.0755  -0.0755           NA
## mean               0.139  0.1584          0.345  0.7492   0.7492       0.0152
## mean_error         0.139  0.1584          0.345  0.7492   0.7492       0.0152
## mean_error_abs    -0.177 -0.2233          0.704  0.1651   0.1651       0.4623
## g                  0.186  0.2117         -0.322 -0.2429  -0.2429      -0.0165
##                   mean mean_error mean_error_abs       g
## pearson_r      -0.0266    -0.0266         -0.169  0.2049
## rank_r         -0.0369    -0.0369         -0.201  0.2315
## mean_abs_error  0.8891     0.8891          0.963 -0.2996
## sd              0.6715     0.6715          0.455 -0.2337
## sd_error        0.6715     0.6715          0.455 -0.2337
## sd_error_abs    0.3222     0.3222          0.362 -0.0651
## mean                NA     1.0000          0.828 -0.2362
## mean_error      1.0000         NA          0.828 -0.2362
## mean_error_abs  0.0123     0.0123             NA -0.2420
## g              -0.1957    -0.1957         -0.154      NA

#HS testing
MAD_model = ols(mean_abs_error ~ g, data = bind_cols(stereo_immi_accu, d["g"]))
MAD_model

## Linear Regression Model
## 
## ols(formula = mean_abs_error ~ g, data = bind_cols(stereo_immi_accu, 
##     d["g"]))
## 
##                 Model Likelihood    Discrimination    
##                       Ratio Test           Indexes    
## Obs     615    LR chi2     57.85    R2       0.090    
## sigma0.8597    d.f.            1    R2 adj   0.088    
## d.f.    613    Pr(> chi2) 0.0000    g        0.305    
## 
## Residuals
## 
##     Min      1Q  Median      3Q     Max 
## -1.0720 -0.4805 -0.2448  0.1050  5.0529 
## 
## 
##           Coef    S.E.   t     Pr(>|t|)
## Intercept  1.2336 0.0347 35.59 <0.0001 
## g         -0.2698 0.0347 -7.78 <0.0001

test_HS(resid = resid(MAD_model), x = d$g)

#plot
#first add quantiles
d_HS$mean_abs_error_10 = quantile_smooth(x = d_HS$g,
                                              y = d_HS$mean_abs_error,
                                              quantile = .10,
                                              method = "qgam")

## Estimating learning rate. Each dot corresponds to a loss evaluation. 
## qu = 0.1.................done

d_HS$mean_abs_error_90 = quantile_smooth(x = d_HS$g,
                                              y = d_HS$mean_abs_error,
                                              quantile = .90,
                                              method = "qgam")

## Estimating learning rate. Each dot corresponds to a loss evaluation. 
## qu = 0.9................done

d_HS %>% 
  ggplot(aes(g, mean_abs_error)) +
  geom_point(alpha = .2) +
  geom_smooth(method = lm, se = F) +
  geom_ribbon(mapping = aes(
    ymin = mean_abs_error_90,
    ymax = mean_abs_error_10
  ), alpha = .4)

## `geom_smooth()` using formula = 'y ~ x'

#correlation table
d %>% select(!!("crime_" + main_accu_metrics2), !!preds_num) %>% cor_matrix(p_val = T) %>% .[preds_num, "crime_" + main_accu_metrics2] %>% {set_colnames(., value = colnames(.) %>% str_replace("crime_", ""))}

##                   pearson_r  mean_abs_error mean_error mean_error_abs
## g                 "0.20***"  "-0.30***"     "-0.24***" "-0.24***"    
## V_tilt            "0.07"     "0.08"         "0.10"     "0.04"        
## age               "0.12**"   "0.22***"      "0.25***"  "0.20***"     
## time_taken        "0.02"     "-0.04"        "-0.04"    "-0.03"       
## education_num     "0.16***"  "-0.21***"     "-0.17***" "-0.16***"    
## vote_PvdD         "-0.10"    "-0.05"        "-0.07"    "-0.04"       
## vote_Groenlinks   "-0.16***" "-0.10*"       "-0.15***" "-0.07"       
## vote_SP           "0.04"     "0.10"         "0.08"     "0.09"        
## vote_D66          "0.04"     "-0.10"        "-0.10"    "-0.07"       
## vote_PvDA         "0.00"     "0.00"         "0.02"     "-0.01"       
## vote_VVD          "0.01"     "0.09"         "0.12**"   "0.09"        
## vote_Christenunie "-0.01"    "-0.06"        "-0.07"    "-0.04"       
## vote_PVV          "0.03"     "0.19***"      "0.18***"  "0.14***"     
## vote_CDA          "0.03"     "0.02"         "0.06"     "0.02"        
## vote_FvD          "0.13**"   "0.03"         "0.07"     "-0.01"       
## vote_SGP          "0.01"     "0.08"         "0.08"     "0.09"        
## vote_50Plus       "0.00"     "0.04"         "0.06"     "0.04"        
## vote_DENK         "-0.04"    "0.02"         "0.03"     "0.01"        
##                   sd_error   sd_error_abs
## g                 "-0.23***" "-0.07"     
## V_tilt            "0.20***"  "0.11*"     
## age               "0.24***"  "0.09"      
## time_taken        "0.00"     "0.00"      
## education_num     "-0.18***" "-0.07"     
## vote_PvdD         "-0.09"    "0.02"      
## vote_Groenlinks   "-0.20***" "-0.04"     
## vote_SP           "0.08"     "0.03"      
## vote_D66          "-0.18***" "-0.05"     
## vote_PvDA         "0.01"     "-0.06"     
## vote_VVD          "0.09"     "-0.02"     
## vote_Christenunie "-0.10"    "-0.04"     
## vote_PVV          "0.30***"  "0.19***"   
## vote_CDA          "0.06"     "0.01"      
## vote_FvD          "0.17***"  "0.02"      
## vote_SGP          "0.05"     "0.06"      
## vote_50Plus       "0.09"     "0.02"      
## vote_DENK         "0.05"     "0.02"

#regressions model full
crime_full_model = str_glue("crime_pearson_r ~ {model_rs_full}") %>% as.formula()
crime_small_model = str_glue("crime_pearson_r ~ {model_rs_small}") %>% as.formula()

#model fits
list(
  ols(crime_small_model, data = d %>% df_standardize(messages = F)),
  ols(crime_full_model, data = d %>% df_standardize(messages = F))
) %>% summarize_models() %>% print(n = Inf)

## # A tibble: 34 × 3
##    `Predictor/Model`                                                `1`    `2`  
##    <chr>                                                            <chr>  <chr>
##  1 Intercept                                                        -0.16… -0.4…
##  2 g                                                                0.13 … 0.17…
##  3 V_tilt                                                           0.08 … 0.06…
##  4 age                                                              0.07 … -0.0…
##  5 male                                                             0.28 … 0.23…
##  6 education_num                                                    0.12 … 0.15…
##  7 time_taken                                                       -0.01… -0.0…
##  8 First_Language2 = Dutch                                          <NA>   (ref)
##  9 First_Language2 = non-Dutch                                      <NA>   -0.0…
## 10 Birth3 = Netherlands                                             <NA>   (ref)
## 11 Birth3 = Non-Western                                             <NA>   -0.0…
## 12 Birth3 = Western                                                 <NA>   0.05…
## 13 student                                                          <NA>   -0.3…
## 14 Employment_Status = Due to start a new job within the next month <NA>   (ref)
## 15 Employment_Status = Full-Time                                    <NA>   0.50…
## 16 Employment_Status = Not in paid work                             <NA>   0.62…
## 17 Employment_Status = Other                                        <NA>   0.57…
## 18 Employment_Status = Part-Time                                    <NA>   0.73…
## 19 Employment_Status = Unemployed and job seeking                   <NA>   0.86…
## 20 vote_PvdD                                                        <NA>   -0.6…
## 21 vote_Groenlinks                                                  <NA>   -0.5…
## 22 vote_SP                                                          <NA>   0.01…
## 23 vote_D66                                                         <NA>   -0.1…
## 24 vote_PvDA                                                        <NA>   -0.0…
## 25 vote_VVD                                                         <NA>   -0.2…
## 26 vote_Christenunie                                                <NA>   -0.4…
## 27 vote_PVV                                                         <NA>   0.03…
## 28 vote_CDA                                                         <NA>   0.10…
## 29 vote_FvD                                                         <NA>   0.39…
## 30 vote_SGP                                                         <NA>   0.17…
## 31 vote_50Plus                                                      <NA>   0.15…
## 32 vote_DENK                                                        <NA>   -1.7…
## 33 R2 adj.                                                          0.070  0.133
## 34 N                                                                581    572

#lasso
#follow approach in https://rpubs.com/EmilOWK/VES_MMPI
#it cant handle missing data
d_nomiss = d %>% 
  #subset only variables for model
  .[formula.tools::get.vars(crime_full_model)] %>% 
  #remove NAs
  na.omit() %>% 
  #remove chr to fct
  map_df(function(x) {
    if (is.character(x)) return(factor(x))
    x
  }) %>% 
  #standardize
  df_standardize(messages = F)

#make a recipe
#convert logicals to 0/1
d_nomiss$male %<>% as.numeric()
d_nomiss$student %<>% as.numeric()

crime_recipe = recipe(crime_full_model, data = d_nomiss) %>% 
  # Added these: 
  step_dummy(all_nominal()) %>%
  step_zv(all_predictors())

#make a model
crime_model = 
  linear_reg(
    mixture = 1, #lasso
    penalty = tune() #tune penalty
    ) %>% 
  set_engine("glmnet")

#resampling method
set.seed(1)
crime_folds = vfold_cv(d_nomiss, v = 10)

#make workflow
crime_wf = 
  workflow() %>% 
  add_model(crime_model) %>% 
  add_recipe(crime_recipe)

#grid
grid = expand_grid(penalty = seq(0, .3, by = 0.001))

#fit
crime_fit = 
  crime_wf %>% 
  tune_grid(
    resamples = crime_folds,
    grid = grid,
    control = control_grid(
      save_pred = TRUE
      )
    )

## → A | warning: A correlation computation is required, but `estimate` is constant and has 0 standard deviation, resulting in a divide by 0 error. `NA` will be returned.
## 
There were issues with some computations   A: x1

There were issues with some computations   A: x2

There were issues with some computations   A: x3

There were issues with some computations   A: x4

There were issues with some computations   A: x5

There were issues with some computations   A: x6

There were issues with some computations   A: x7

There were issues with some computations   A: x8

There were issues with some computations   A: x9

There were issues with some computations   A: x10

There were issues with some computations   A: x10

#best
crime_best = crime_fit %>% 
  select_best("rsq")

#metrics
collect_metrics(crime_fit) %>% 
  filter(.metric == "rsq") %>% 
  arrange(.metric)

collect_metrics(crime_fit) %>% 
  ggplot(aes(penalty, mean)) +
  geom_line() +
  facet_wrap(".metric")

## Warning: Removed 74 rows containing missing values (`geom_line()`).

#best model has ~0 penalty, so we don't need lasso here, but...
crime_wf %>% 
  finalize_workflow(crime_best) %>%
  fit(d_nomiss) %>%
  pull_workflow_fit() %>%
  tidy() %>% 
  filter(estimate != 0) %>% 
  print(n = Inf)

## Warning: `pull_workflow_fit()` was deprecated in workflows 0.2.3.
## ℹ Please use `extract_fit_parsnip()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

## # A tibble: 17 × 3
##    term                                         estimate penalty
##    <chr>                                           <dbl>   <dbl>
##  1 (Intercept)                                   0.0744    0.024
##  2 g                                             0.146     0.024
##  3 V_tilt                                        0.0308    0.024
##  4 male                                          0.151     0.024
##  5 education_num                                 0.101     0.024
##  6 student                                      -0.290     0.024
##  7 vote_PvdD                                    -0.495     0.024
##  8 vote_Groenlinks                              -0.390     0.024
##  9 vote_VVD                                     -0.107     0.024
## 10 vote_Christenunie                            -0.135     0.024
## 11 vote_CDA                                      0.00152   0.024
## 12 vote_FvD                                      0.343     0.024
## 13 vote_DENK                                    -0.561     0.024
## 14 First_Language2_non.Dutch                    -0.0214    0.024
## 15 Birth3_Non.Western                           -0.0267    0.024
## 16 Employment_Status_Part.Time                   0.0765    0.024
## 17 Employment_Status_Unemployed.and.job.seeking  0.189     0.024

#BMA
set.seed(1)
BMA_fit = BMA::bic.glm(
  crime_full_model,
  data = d[formula.tools::get.vars(crime_full_model)] %>% 
    na.omit() %>% 
    df_standardize(messages = F), 
  glm.family = "gaussian"
  )

#summary
BMA_fit %>% summary()

## 
## Call:
## bic.glm.formula(f = crime_full_model, data = d[formula.tools::get.vars(crime_full_model)] %>%     na.omit() %>% df_standardize(messages = F), glm.family = "gaussian")
## 
## 
##   44  models were selected
##  Best  5  models (cumulative posterior probability =  0.445 ): 
## 
##                                                p!=0    EV        SD     
## Intercept                                      100     0.189863  0.08376
## g.x                                            100.0   0.193721  0.04406
## V_tilt.x                                         0.0   0.000000  0.00000
## age.x                                            1.0  -0.000472  0.00667
## maleTRUE.x                                      32.8   0.060031  0.09755
## time_taken.x                                     0.7  -0.000170  0.00387
## education_num.x                                 37.1   0.037768  0.05589
## First_Language2non-Dutch.x                       0.8  -0.000965  0.01677
## Birth3Non-Western.x                              1.8  -0.002988  0.03006
## Birth3Western.x                                  0.0   0.000000  0.00000
## studentTRUE.x                                  100.0  -0.334980  0.08570
## Employment_StatusFull-Time.x                     2.4  -0.003987  0.02966
## Employment_StatusNot in paid work.x              0.8  -0.000781  0.01469
## Employment_StatusOther.x                         0.9  -0.001087  0.01726
## Employment_StatusPart-Time.x                     1.5   0.001285  0.01526
## Employment_StatusUnemployed and job seeking.x   13.0   0.029788  0.08886
## vote_PvdD.x                                     87.1  -0.563552  0.29378
## vote_Groenlinks.x                              100.0  -0.456738  0.11822
## vote_SP.x                                        0.0   0.000000  0.00000
## vote_D66.x                                       0.0   0.000000  0.00000
## vote_PvDA.x                                      0.0   0.000000  0.00000
## vote_VVD.x                                       8.3  -0.018174  0.07169
## vote_Christenunie.x                              1.1  -0.003061  0.03935
## vote_PVV.x                                       0.0   0.000000  0.00000
## vote_CDA.x                                       0.0   0.000000  0.00000
## vote_FvD.x                                      38.8   0.171883  0.24440
## vote_SGP.x                                       0.0   0.000000  0.00000
## vote_50Plus.x                                    0.0   0.000000  0.00000
## vote_DENK.x                                      0.8  -0.012720  0.21913
##                                                                         
## nVar                                                                    
## BIC                                                                     
## post prob                                                               
##                                                model 1     model 2   
## Intercept                                          0.2448      0.2016
## g.x                                                0.2091      0.2104
## V_tilt.x                                            .           .    
## age.x                                               .           .    
## maleTRUE.x                                          .           .    
## time_taken.x                                        .           .    
## education_num.x                                     .           .    
## First_Language2non-Dutch.x                          .           .    
## Birth3Non-Western.x                                 .           .    
## Birth3Western.x                                     .           .    
## studentTRUE.x                                     -0.3429     -0.3243
## Employment_StatusFull-Time.x                        .           .    
## Employment_StatusNot in paid work.x                 .           .    
## Employment_StatusOther.x                            .           .    
## Employment_StatusPart-Time.x                        .           .    
## Employment_StatusUnemployed and job seeking.x       .           .    
## vote_PvdD.x                                       -0.6639     -0.6274
## vote_Groenlinks.x                                 -0.4796     -0.4426
## vote_SP.x                                           .           .    
## vote_D66.x                                          .           .    
## vote_PvDA.x                                         .           .    
## vote_VVD.x                                          .           .    
## vote_Christenunie.x                                 .           .    
## vote_PVV.x                                          .           .    
## vote_CDA.x                                          .           .    
## vote_FvD.x                                          .          0.4299
## vote_SGP.x                                          .           .    
## vote_50Plus.x                                       .           .    
## vote_DENK.x                                         .           .    
##                                                                      
## nVar                                                 4           5   
## BIC                                            -3019.5446  -3018.7595
## post prob                                          0.135       0.091 
##                                                model 3     model 4   
## Intercept                                          0.1386      0.1919
## g.x                                                0.2001      0.1750
## V_tilt.x                                            .           .    
## age.x                                               .           .    
## maleTRUE.x                                         0.1832       .    
## time_taken.x                                        .           .    
## education_num.x                                     .          0.1025
## First_Language2non-Dutch.x                          .           .    
## Birth3Non-Western.x                                 .           .    
## Birth3Western.x                                     .           .    
## studentTRUE.x                                     -0.3497     -0.2970
## Employment_StatusFull-Time.x                        .           .    
## Employment_StatusNot in paid work.x                 .           .    
## Employment_StatusOther.x                            .           .    
## Employment_StatusPart-Time.x                        .           .    
## Employment_StatusUnemployed and job seeking.x       .           .    
## vote_PvdD.x                                       -0.6198     -0.6425
## vote_Groenlinks.x                                 -0.4466     -0.4526
## vote_SP.x                                           .           .    
## vote_D66.x                                          .           .    
## vote_PvDA.x                                         .           .    
## vote_VVD.x                                          .           .    
## vote_Christenunie.x                                 .           .    
## vote_PVV.x                                          .           .    
## vote_CDA.x                                          .           .    
## vote_FvD.x                                          .          0.4642
## vote_SGP.x                                          .           .    
## vote_50Plus.x                                       .           .    
## vote_DENK.x                                         .           .    
##                                                                      
## nVar                                                 5           6   
## BIC                                            -3018.5217  -3018.2761
## post prob                                          0.081       0.071 
##                                                model 5   
## Intercept                                          0.2390
## g.x                                                0.1765
## V_tilt.x                                            .    
## age.x                                               .    
## maleTRUE.x                                          .    
## time_taken.x                                        .    
## education_num.x                                    0.0942
## First_Language2non-Dutch.x                          .    
## Birth3Non-Western.x                                 .    
## Birth3Western.x                                     .    
## studentTRUE.x                                     -0.3191
## Employment_StatusFull-Time.x                        .    
## Employment_StatusNot in paid work.x                 .    
## Employment_StatusOther.x                            .    
## Employment_StatusPart-Time.x                        .    
## Employment_StatusUnemployed and job seeking.x       .    
## vote_PvdD.x                                       -0.6805
## vote_Groenlinks.x                                 -0.4915
## vote_SP.x                                           .    
## vote_D66.x                                          .    
## vote_PvDA.x                                         .    
## vote_VVD.x                                          .    
## vote_Christenunie.x                                 .    
## vote_PVV.x                                          .    
## vote_CDA.x                                          .    
## vote_FvD.x                                          .    
## vote_SGP.x                                          .    
## vote_50Plus.x                                       .    
## vote_DENK.x                                         .    
##                                                          
## nVar                                                 5   
## BIC                                            -3018.1773
## post prob                                          0.068 
## 
##   1  observations deleted due to missingness.

Muslim bias

#compute the muslim bias metrics
assert_that(all(origins$ISO == names(stereo_immi)))

## [1] TRUE

muslim_bias_metrics = score_bias_metrics(
  estimates = stereo_immi,
  criterion = origins$arrest_rate_RR,
  bias_var = origins$Muslim
) %>% df_add_affix(prefix = "muslim_")

#summary
describe2(muslim_bias_metrics)

#combined dists plot
accu_combo_plot(muslim_bias_metrics, variables = c("muslim_bias_r", "muslim_bias_wmean", "muslim_resid_r"))

## Warning: Removed 17 rows containing non-finite values (`stat_bin()`).

## Warning: Removed 17 rows containing non-finite values (`stat_density()`).

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## Warning: Removed 17 rows containing non-finite values (`stat_bin()`).
## Removed 17 rows containing non-finite values (`stat_density()`).

GG_save("figs/crime_muslim_bias_dists.png")

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## Warning: Removed 17 rows containing non-finite values (`stat_bin()`).
## Removed 17 rows containing non-finite values (`stat_density()`).

#representative example
calc_row_representativeness(muslim_bias_metrics) %>% 
  arrange(mean)

#values
muslim_bias_metrics[49, ]

case49 = tibble(
    estimate = stereo_immi[49, ] %>% unlist(),
    criterion = origins$arrest_rate_RR,
    error = estimate - criterion,
    Muslim = origins$Muslim,
    origin = origins$origin
    )

case49

#plot
GG_scatter(case49,
  x_var = "Muslim", y_var = "error", case_names = "origin", text_pos = "br"
  ) +
  scale_x_continuous("Muslim% (home country)", labels = scales::percent, limits = c(-.05, NA)) +
  scale_y_continuous("Error (estimate - crieterion)")

## `geom_smooth()` using formula = 'y ~ x'

GG_save("figs/muslim_bias_example_case49.png")

## `geom_smooth()` using formula = 'y ~ x'

#cors, add accuracy metrics
bind_cols(
  muslim_bias_metrics,
  stereo_immi_accu %>% select(!!main_accu_metrics2)
) %>% 
  cor_matrix(p_val = T)

##                       muslim_bias_r muslim_bias_r_abs muslim_bias_wmean
## muslim_bias_r         "1"           "-0.12**"         "0.94***"        
## muslim_bias_r_abs     "-0.12**"     "1"               "0.08"           
## muslim_bias_wmean     "0.94***"     "0.08"            "1"              
## muslim_bias_wmean_abs "0.30***"     "0.84***"         "0.52***"        
## muslim_resid_r        "0.59***"     "0.07"            "0.60***"        
## muslim_resid_r_abs    "0.34***"     "-0.13**"         "0.31***"        
## pearson_r             "0.37***"     "0.12**"          "0.42***"        
## mean_abs_error        "0.42***"     "-0.29***"        "0.41***"        
## mean_error            "0.52***"     "-0.42***"        "0.48***"        
## mean_error_abs        "0.30***"     "-0.20***"        "0.30***"        
## sd_error              "0.86***"     "-0.23***"        "0.82***"        
## sd_error_abs          "0.32***"     "0.41***"         "0.51***"        
##                       muslim_bias_wmean_abs muslim_resid_r muslim_resid_r_abs
## muslim_bias_r         "0.30***"             "0.59***"      "0.34***"         
## muslim_bias_r_abs     "0.84***"             "0.07"         "-0.13**"         
## muslim_bias_wmean     "0.52***"             "0.60***"      "0.31***"         
## muslim_bias_wmean_abs "1"                   "0.23***"      "0.01"            
## muslim_resid_r        "0.23***"             "1"            "0.42***"         
## muslim_resid_r_abs    "0.01"                "0.42***"      "1"               
## pearson_r             "0.21***"             "0.68***"      "0.17***"         
## mean_abs_error        "0.02"                "-0.05"        "0.01"            
## mean_error            "-0.08"               "0.07"         "0.06"            
## mean_error_abs        "0.04"                "-0.06"        "0.00"            
## sd_error              "0.24***"             "0.24***"      "0.14***"         
## sd_error_abs          "0.66***"             "0.11*"        "0.01"            
##                       pearson_r  mean_abs_error mean_error mean_error_abs
## muslim_bias_r         "0.37***"  "0.42***"      "0.52***"  "0.30***"     
## muslim_bias_r_abs     "0.12**"   "-0.29***"     "-0.42***" "-0.20***"    
## muslim_bias_wmean     "0.42***"  "0.41***"      "0.48***"  "0.30***"     
## muslim_bias_wmean_abs "0.21***"  "0.02"         "-0.08"    "0.04"        
## muslim_resid_r        "0.68***"  "-0.05"        "0.07"     "-0.06"       
## muslim_resid_r_abs    "0.17***"  "0.01"         "0.06"     "0.00"        
## pearson_r             "1"        "-0.18***"     "-0.03"    "-0.17***"    
## mean_abs_error        "-0.18***" "1"            "0.89***"  "0.96***"     
## mean_error            "-0.03"    "0.89***"      "1"        "0.83***"     
## mean_error_abs        "-0.17***" "0.96***"      "0.83***"  "1"           
## sd_error              "0.19***"  "0.61***"      "0.67***"  "0.46***"     
## sd_error_abs          "0.07"     "0.39***"      "0.32***"  "0.36***"     
##                       sd_error   sd_error_abs
## muslim_bias_r         "0.86***"  "0.32***"   
## muslim_bias_r_abs     "-0.23***" "0.41***"   
## muslim_bias_wmean     "0.82***"  "0.51***"   
## muslim_bias_wmean_abs "0.24***"  "0.66***"   
## muslim_resid_r        "0.24***"  "0.11*"     
## muslim_resid_r_abs    "0.14***"  "0.01"      
## pearson_r             "0.19***"  "0.07"      
## mean_abs_error        "0.61***"  "0.39***"   
## mean_error            "0.67***"  "0.32***"   
## mean_error_abs        "0.46***"  "0.36***"   
## sd_error              "1"        "0.49***"   
## sd_error_abs          "0.49***"  "1"

#merge to main
for (v in names(muslim_bias_metrics)) d[[v]] = NULL
d = cbind(
  d,
  muslim_bias_metrics
)

#scatterplots
GG_scatter(d, x_var = "muslim_bias_r", y_var = "crime_pearson_r", text_pos = "tl") +
  geom_smooth(method = "loess") +
  geom_vline(xintercept = 0, linetype = "dotted") +
  scale_x_continuous("Muslim bias (r metric)") +
  scale_y_continuous("Pearson r") +
  GG_scatter(d, x_var = "muslim_bias_r", y_var = "crime_mean_abs_error", text_pos = "tr") +
  geom_smooth(method = "loess") +
  geom_vline(xintercept = 0, linetype = "dotted") +
  scale_x_continuous("Muslim bias (r metric)") +
  scale_y_continuous("Mean absolute error")

## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'

GG_save("figs/crime_muslim_bias_r_accu.png")

## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'

#how do they relate to predictors?
d %>% 
  select(
    #bias measures
    !!names(muslim_bias_metrics),
    #predictors
    !!preds_num
  ) %>% 
  cor_matrix(p_val = T) %>% 
  .[preds_num, names(muslim_bias_metrics)]

##                   muslim_bias_r muslim_bias_r_abs muslim_bias_wmean
## g                 "-0.20***"    "0.20***"         "-0.16***"       
## V_tilt            "0.13**"      "-0.05"           "0.14***"        
## age               "0.13***"     "-0.11*"          "0.12**"         
## time_taken        "-0.01"       "0.06"            "-0.01"          
## education_num     "-0.14***"    "0.12**"          "-0.12**"        
## vote_PvdD         "-0.14***"    "0.01"            "-0.13**"        
## vote_Groenlinks   "-0.22***"    "0.09"            "-0.20***"       
## vote_SP           "0.08"        "-0.04"           "0.08"           
## vote_D66          "-0.16***"    "0.05"            "-0.14***"       
## vote_PvDA         "0.02"        "-0.01"           "0.01"           
## vote_VVD          "0.09"        "-0.09"           "0.05"           
## vote_Christenunie "-0.09"       "0.03"            "-0.08"          
## vote_PVV          "0.23***"     "-0.03"           "0.22***"        
## vote_CDA          "0.05"        "-0.10"           "0.06"           
## vote_FvD          "0.24***"     "-0.01"           "0.21***"        
## vote_SGP          "0.06"        "0.03"            "0.07"           
## vote_50Plus       "0.11*"       "-0.04"           "0.09"           
## vote_DENK         "0.01"        "-0.03"           "-0.01"          
##                   muslim_bias_wmean_abs muslim_resid_r muslim_resid_r_abs
## g                 "0.07"                "0.02"         "-0.09"           
## V_tilt            "0.06"                "-0.03"        "0.02"            
## age               "0.00"                "-0.06"        "-0.03"           
## time_taken        "0.04"                "-0.03"        "-0.03"           
## education_num     "0.03"                "0.06"         "-0.04"           
## vote_PvdD         "-0.03"               "-0.14***"     "-0.05"           
## vote_Groenlinks   "-0.01"               "-0.12**"      "-0.08"           
## vote_SP           "0.01"                "0.03"         "0.05"            
## vote_D66          "-0.04"               "0.01"         "-0.04"           
## vote_PvDA         "-0.01"               "-0.02"        "0.01"            
## vote_VVD          "-0.05"               "0.01"         "0.04"            
## vote_Christenunie "-0.02"               "-0.06"        "-0.08"           
## vote_PVV          "0.10"                "0.02"         "0.09"            
## vote_CDA          "-0.02"               "0.03"         "0.05"            
## vote_FvD          "0.05"                "0.21***"      "0.01"            
## vote_SGP          "0.07"                "0.03"         "0.01"            
## vote_50Plus       "0.00"                "0.03"         "0.06"            
## vote_DENK         "-0.02"               "-0.04"        "-0.01"

#combo plots with intelligence
accu_combo_plot(d, variables = names(muslim_bias_metrics), x_var = "g")

## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'

GG_save("figs/muslim_bias_intelligence.png")

## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'

#combined models
map(names(muslim_bias_metrics)[-1], function(v) {
  #make formula
  form = str_glue("{v} ~ {model_rs_full}") %>% as.formula()
  
  #fit
  fit = ols(form, data = d %>% df_standardize(messages = F))
  
  fit
}) %>% 
  set_names(names(muslim_bias_metrics[-1])) %>% 
  summarize_models() %>% 
  print(n=Inf)

## # A tibble: 34 × 6
##    `Predictor/Model`   muslim_bias_r_abs muslim_bias_wmean muslim_bias_wmean_abs
##    <chr>               <chr>             <chr>             <chr>                
##  1 Intercept           -0.39 (0.353)     0.04 (0.331)      -0.20 (0.362)        
##  2 g                   0.21 (0.048***)   -0.09 (0.045)     0.11 (0.049)         
##  3 V_tilt              0.02 (0.047)      0.08 (0.044)      0.07 (0.048)         
##  4 age                 -0.15 (0.056*)    -0.01 (0.052)     -0.09 (0.057)        
##  5 male                -0.12 (0.088)     0.02 (0.083)      -0.10 (0.090)        
##  6 time_taken          0.04 (0.042)      -0.01 (0.039)     0.03 (0.043)         
##  7 education_num       0.07 (0.049)      0.01 (0.046)      0.05 (0.051)         
##  8 First_Language2 = … (ref)             (ref)             (ref)                
##  9 First_Language2 = … 0.17 (0.162)      0.04 (0.152)      0.09 (0.166)         
## 10 Birth3 = Netherlan… (ref)             (ref)             (ref)                
## 11 Birth3 = Non-Weste… -0.22 (0.166)     -0.05 (0.155)     -0.09 (0.170)        
## 12 Birth3 = Western    0.09 (0.319)      -0.13 (0.299)     0.08 (0.327)         
## 13 student             -0.03 (0.112)     -0.14 (0.105)     -0.09 (0.115)        
## 14 Employment_Status … (ref)             (ref)             (ref)                
## 15 Employment_Status … 0.59 (0.335)      0.01 (0.314)      0.36 (0.343)         
## 16 Employment_Status … 0.71 (0.353)      0.33 (0.331)      0.56 (0.362)         
## 17 Employment_Status … 0.51 (0.351)      -0.06 (0.329)     0.29 (0.359)         
## 18 Employment_Status … 0.70 (0.335)      0.10 (0.314)      0.50 (0.343)         
## 19 Employment_Status … 0.63 (0.348)      0.05 (0.327)      0.41 (0.357)         
## 20 vote_PvdD           -0.22 (0.231)     -0.69 (0.216**)   -0.33 (0.236)        
## 21 vote_Groenlinks     -0.10 (0.151)     -0.48 (0.142***)  -0.25 (0.155)        
## 22 vote_SP             -0.25 (0.209)     0.18 (0.196)      -0.08 (0.215)        
## 23 vote_D66            -0.18 (0.189)     -0.41 (0.178)     -0.35 (0.194)        
## 24 vote_PvDA           -0.18 (0.205)     0.03 (0.193)      -0.14 (0.211)        
## 25 vote_VVD            -0.42 (0.165)     0.06 (0.155)      -0.30 (0.170)        
## 26 vote_Christenunie   -0.03 (0.281)     -0.61 (0.263)     -0.34 (0.287)        
## 27 vote_PVV            -0.07 (0.223)     0.69 (0.209***)   0.31 (0.228)         
## 28 vote_CDA            -0.84 (0.317*)    0.31 (0.297)      -0.30 (0.325)        
## 29 vote_FvD            -0.12 (0.212)     0.77 (0.199***)   0.13 (0.217)         
## 30 vote_SGP            0.49 (0.881)      1.50 (0.827)      1.45 (0.903)         
## 31 vote_50Plus         -0.37 (0.559)     1.01 (0.525)      0.02 (0.573)         
## 32 vote_DENK           -1.66 (1.961)     -0.44 (1.839)     -0.89 (2.009)        
## 33 R2 adj.             0.065             0.171             0.009                
## 34 N                   589               589               589                  
## # ℹ 2 more variables: muslim_resid_r <chr>, muslim_resid_r_abs <chr>

Occupations and sex

#score
stereo_occu_accu = score_accuracy(stereo_occu,
                                  occu$male_pct,
                                  methods = "all")

#distributions
stereo_occu_accu %>% describe2()

#plot
accu_combo_plot(stereo_occu_accu, variables = main_accu_metrics2)

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

GG_save("figs/occu_accu_dists.png")

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

#plot with intelligence
accu_combo_plot(bind_cols(stereo_occu_accu, d["g"]), variables = main_accu_metrics2, x_var = "g")

## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'

GG_save("figs/occu_accu_g.png")

## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'

accu_combo_plot(bind_cols(stereo_occu_accu, d["g"]) %>% filter(pearson_r > 0), variables = main_accu_metrics2, x_var = "g")

## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'

#add to main
for (v in (d %>% names() %>% str_subset("^occu_"))) d[[v]] = NULL
d = cbind(d, stereo_occu_accu %>% df_add_affix(prefix = "occu_"))

#correlation table
d %>% select(!!("occu_" + main_accu_metrics2), !!preds_num) %>% cor_matrix(p_val = T) %>% .[preds_num, "occu_" + main_accu_metrics2] %>% {set_colnames(., value = colnames(.) %>% str_replace("occu_", ""))}

##                   pearson_r mean_abs_error mean_error mean_error_abs sd_error 
## g                 "0.25***" "-0.20***"     "-0.15***" "-0.05"        "0.06"   
## V_tilt            "0.04"    "-0.09"        "0.11*"    "-0.06"        "0.14***"
## age               "0.00"    "0.01"         "0.05"     "0.07"         "0.05"   
## time_taken        "0.03"    "-0.05"        "0.00"     "-0.04"        "0.05"   
## education_num     "0.12**"  "-0.13**"      "-0.02"    "-0.12**"      "0.07"   
## vote_PvdD         "0.01"    "0.02"         "-0.05"    "-0.01"        "-0.09"  
## vote_Groenlinks   "-0.01"   "0.01"         "-0.03"    "0.00"         "0.00"   
## vote_SP           "0.01"    "-0.05"        "0.09"     "0.00"         "0.05"   
## vote_D66          "0.03"    "-0.01"        "-0.06"    "0.04"         "-0.06"  
## vote_PvDA         "0.04"    "-0.02"        "0.07"     "0.01"         "-0.02"  
## vote_VVD          "0.02"    "-0.04"        "0.02"     "-0.09"        "0.05"   
## vote_Christenunie "0.05"    "-0.05"        "-0.04"    "-0.02"        "-0.02"  
## vote_PVV          "-0.07"   "0.07"         "0.07"     "0.00"         "0.06"   
## vote_CDA          "-0.01"   "0.00"         "0.01"     "-0.05"        "-0.03"  
## vote_FvD          "0.05"    "-0.07"        "0.05"     "-0.04"        "0.08"   
## vote_SGP          "0.01"    "-0.02"        "-0.02"    "-0.01"        "0.07"   
## vote_50Plus       "0.01"    "0.01"         "-0.04"    "0.05"         "0.00"   
## vote_DENK         "0.01"    "-0.02"        "0.01"     "-0.04"        "0.01"   
##                   sd_error_abs
## g                 "-0.06"     
## V_tilt            "-0.15***"  
## age               "-0.04"     
## time_taken        "-0.05"     
## education_num     "-0.06"     
## vote_PvdD         "0.09"      
## vote_Groenlinks   "0.00"      
## vote_SP           "-0.06"     
## vote_D66          "0.05"      
## vote_PvDA         "0.02"      
## vote_VVD          "-0.04"     
## vote_Christenunie "0.01"      
## vote_PVV          "-0.06"     
## vote_CDA          "0.03"      
## vote_FvD          "-0.08"     
## vote_SGP          "-0.07"     
## vote_50Plus       "-0.01"     
## vote_DENK         "-0.01"

#regressions models
occu_small_model = str_glue("occu_pearson_r ~ {model_rs_small}") %>% as.formula()
occu_full_model = str_glue("occu_pearson_r ~ {model_rs_full}") %>% as.formula()

list(
  ols(occu_small_model, data = d %>% df_standardize(messages = F)),
  ols(occu_full_model, data = d %>% df_standardize(messages = F))
) %>% 
  summarize_models() %>% 
  print(n = Inf)

## # A tibble: 34 × 3
##    `Predictor/Model`                                                `1`    `2`  
##    <chr>                                                            <chr>  <chr>
##  1 Intercept                                                        0.04 … -0.3…
##  2 g                                                                0.27 … 0.25…
##  3 V_tilt                                                           0.07 … 0.06…
##  4 age                                                              -0.08… -0.1…
##  5 male                                                             -0.09… -0.0…
##  6 education_num                                                    0.02 … 0.03…
##  7 time_taken                                                       -0.01… 0.01…
##  8 First_Language2 = Dutch                                          <NA>   (ref)
##  9 First_Language2 = non-Dutch                                      <NA>   -0.6…
## 10 Birth3 = Netherlands                                             <NA>   (ref)
## 11 Birth3 = Non-Western                                             <NA>   0.31…
## 12 Birth3 = Western                                                 <NA>   0.15…
## 13 student                                                          <NA>   -0.0…
## 14 Employment_Status = Due to start a new job within the next month <NA>   (ref)
## 15 Employment_Status = Full-Time                                    <NA>   0.23…
## 16 Employment_Status = Not in paid work                             <NA>   0.31…
## 17 Employment_Status = Other                                        <NA>   0.50…
## 18 Employment_Status = Part-Time                                    <NA>   0.32…
## 19 Employment_Status = Unemployed and job seeking                   <NA>   0.39…
## 20 vote_PvdD                                                        <NA>   0.05…
## 21 vote_Groenlinks                                                  <NA>   -0.0…
## 22 vote_SP                                                          <NA>   0.11…
## 23 vote_D66                                                         <NA>   0.07…
## 24 vote_PvDA                                                        <NA>   0.31…
## 25 vote_VVD                                                         <NA>   0.20…
## 26 vote_Christenunie                                                <NA>   0.38…
## 27 vote_PVV                                                         <NA>   -0.0…
## 28 vote_CDA                                                         <NA>   0.20…
## 29 vote_FvD                                                         <NA>   0.44…
## 30 vote_SGP                                                         <NA>   0.33…
## 31 vote_50Plus                                                      <NA>   0.51…
## 32 vote_DENK                                                        <NA>   0.57…
## 33 R2 adj.                                                          0.062  0.068
## 34 N                                                                598    589

#lasso and BMA
d_nomiss = d %>% 
  #subset only variables for model
  .[formula.tools::get.vars(occu_full_model)] %>% 
  #remove NAs
  na.omit() %>% 
  #remove chr to fct
  map_df(function(x) {
    if (is.character(x)) return(factor(x))
    x
  }) %>% 
  #standardize
  df_standardize(messages = F)



#make a recipe
#convert logicals to 0/1
d_nomiss$male %<>% as.numeric()
d_nomiss$student %<>% as.numeric()
occu_recipe = recipe(occu_full_model, data = d_nomiss) %>% 
  step_dummy(all_nominal()) %>%
  step_zv(all_predictors())

#make a model
occu_model = 
  linear_reg(
    mixture = 1, #lasso
    penalty = tune() #tune penalty
    ) %>% 
  set_engine("glmnet")

#resampling method
set.seed(1)
occu_folds = vfold_cv(d_nomiss, v = 10)

#make workflow
occu_wf = 
  workflow() %>% 
  add_model(occu_model) %>% 
  add_recipe(occu_recipe)

#grid
grid = expand_grid(penalty = seq(0, .3, by = 0.001))

#fit
occu_fit = 
  occu_wf %>% 
  tune_grid(
    resamples = occu_folds,
    grid = grid,
    control = control_grid(
      save_pred = TRUE
      )
    )

## → A | warning: A correlation computation is required, but `estimate` is constant and has 0 standard deviation, resulting in a divide by 0 error. `NA` will be returned.
## 
There were issues with some computations   A: x1

There were issues with some computations   A: x2

There were issues with some computations   A: x3

There were issues with some computations   A: x4

There were issues with some computations   A: x5

There were issues with some computations   A: x6

There were issues with some computations   A: x7

There were issues with some computations   A: x8

There were issues with some computations   A: x9

There were issues with some computations   A: x10

There were issues with some computations   A: x10

#best
occu_best = occu_fit %>% 
  select_best("rmse")

#plot
collect_metrics(occu_fit) %>% 
  ggplot(aes(penalty, mean)) +
  geom_line() +
  facet_wrap(".metric")

## Warning: Removed 31 rows containing missing values (`geom_line()`).

#some penalty may be needed
occu_wf %>% 
  finalize_workflow(occu_best) %>%
  fit(d_nomiss) %>%
  pull_workflow_fit() %>%
  tidy() %>% 
  filter(estimate != 0) %>% 
  print(n = Inf)

## # A tibble: 4 × 3
##   term                       estimate penalty
##   <chr>                         <dbl>   <dbl>
## 1 (Intercept)                0.0230     0.065
## 2 g                          0.165      0.065
## 3 vote_FvD                   0.000239   0.065
## 4 First_Language2_non.Dutch -0.283      0.065

#BMA
BMA_fit = BMA::bic.glm(
  occu_full_model,
  data = d[formula.tools::get.vars(occu_full_model)] %>% 
    na.omit() %>% 
    df_standardize(messages = F), 
  glm.family = "gaussian"
  )

#summary
BMA_fit %>% summary()

## 
## Call:
## bic.glm.formula(f = occu_full_model, data = d[formula.tools::get.vars(occu_full_model)] %>%     na.omit() %>% df_standardize(messages = F), glm.family = "gaussian")
## 
## 
##   14  models were selected
##  Best  5  models (cumulative posterior probability =  0.727 ): 
## 
##                                                p!=0    EV        SD     
## Intercept                                      100     0.038152  0.04743
## g.x                                            100.0   0.223790  0.04061
## V_tilt.x                                         2.4   0.000761  0.00787
## age.x                                            3.4  -0.001592  0.01132
## maleTRUE.x                                       4.8  -0.005406  0.02982
## time_taken.x                                     0.0   0.000000  0.00000
## education_num.x                                  0.0   0.000000  0.00000
## First_Language2non-Dutch.x                      95.9  -0.486116  0.18127
## Birth3Non-Western.x                             14.9   0.049557  0.13368
## Birth3Western.x                                  0.0   0.000000  0.00000
## studentTRUE.x                                    0.0   0.000000  0.00000
## Employment_StatusFull-Time.x                     2.5  -0.001646  0.01653
## Employment_StatusNot in paid work.x              0.0   0.000000  0.00000
## Employment_StatusOther.x                         3.6   0.005728  0.03873
## Employment_StatusPart-Time.x                     0.0   0.000000  0.00000
## Employment_StatusUnemployed and job seeking.x    0.0   0.000000  0.00000
## vote_PvdD.x                                      0.0   0.000000  0.00000
## vote_Groenlinks.x                                3.5  -0.004492  0.03161
## vote_SP.x                                        0.0   0.000000  0.00000
## vote_D66.x                                       0.0   0.000000  0.00000
## vote_PvDA.x                                      2.9   0.004984  0.04193
## vote_VVD.x                                       0.0   0.000000  0.00000
## vote_Christenunie.x                              4.0   0.013017  0.08210
## vote_PVV.x                                       2.9  -0.005665  0.04651
## vote_CDA.x                                       0.0   0.000000  0.00000
## vote_FvD.x                                       5.9   0.016744  0.08074
## vote_SGP.x                                       0.0   0.000000  0.00000
## vote_50Plus.x                                    2.2   0.007414  0.09377
## vote_DENK.x                                      0.0   0.000000  0.00000
##                                                                         
## nVar                                                                    
## BIC                                                                     
## post prob                                                               
##                                                model 1                 
## Intercept                                          0.039892137740372716
## g.x                                                0.221187184761164612
## V_tilt.x                                                   .           
## age.x                                                      .           
## maleTRUE.x                                                 .           
## time_taken.x                                               .           
## education_num.x                                            .           
## First_Language2non-Dutch.x                        -0.489509773522493086
## Birth3Non-Western.x                                        .           
## Birth3Western.x                                            .           
## studentTRUE.x                                              .           
## Employment_StatusFull-Time.x                               .           
## Employment_StatusNot in paid work.x                        .           
## Employment_StatusOther.x                                   .           
## Employment_StatusPart-Time.x                               .           
## Employment_StatusUnemployed and job seeking.x              .           
## vote_PvdD.x                                                .           
## vote_Groenlinks.x                                          .           
## vote_SP.x                                                  .           
## vote_D66.x                                                 .           
## vote_PvDA.x                                                .           
## vote_VVD.x                                                 .           
## vote_Christenunie.x                                        .           
## vote_PVV.x                                                 .           
## vote_CDA.x                                                 .           
## vote_FvD.x                                                 .           
## vote_SGP.x                                                 .           
## vote_50Plus.x                                              .           
## vote_DENK.x                                                .           
##                                                                        
## nVar                                                 2                 
## BIC                                            -3154.530843935569464520
## post prob                                          0.430               
##                                                model 2                 
## Intercept                                          0.023615840808100821
## g.x                                                0.228209231189108075
## V_tilt.x                                                   .           
## age.x                                                      .           
## maleTRUE.x                                                 .           
## time_taken.x                                               .           
## education_num.x                                            .           
## First_Language2non-Dutch.x                        -0.601194557400140273
## Birth3Non-Western.x                                0.332169078205226731
## Birth3Western.x                                            .           
## studentTRUE.x                                              .           
## Employment_StatusFull-Time.x                               .           
## Employment_StatusNot in paid work.x                        .           
## Employment_StatusOther.x                                   .           
## Employment_StatusPart-Time.x                               .           
## Employment_StatusUnemployed and job seeking.x              .           
## vote_PvdD.x                                                .           
## vote_Groenlinks.x                                          .           
## vote_SP.x                                                  .           
## vote_D66.x                                                 .           
## vote_PvDA.x                                                .           
## vote_VVD.x                                                 .           
## vote_Christenunie.x                                        .           
## vote_PVV.x                                                 .           
## vote_CDA.x                                                 .           
## vote_FvD.x                                                 .           
## vote_SGP.x                                                 .           
## vote_50Plus.x                                              .           
## vote_DENK.x                                                .           
##                                                                        
## nVar                                                 3                 
## BIC                                            -3152.411993364266891149
## post prob                                          0.149               
##                                                model 3                 
## Intercept                                          0.021841926057689497
## g.x                                                0.222633369029350658
## V_tilt.x                                                   .           
## age.x                                                      .           
## maleTRUE.x                                                 .           
## time_taken.x                                               .           
## education_num.x                                            .           
## First_Language2non-Dutch.x                        -0.488620762508526318
## Birth3Non-Western.x                                        .           
## Birth3Western.x                                            .           
## studentTRUE.x                                              .           
## Employment_StatusFull-Time.x                               .           
## Employment_StatusNot in paid work.x                        .           
## Employment_StatusOther.x                                   .           
## Employment_StatusPart-Time.x                               .           
## Employment_StatusUnemployed and job seeking.x              .           
## vote_PvdD.x                                                .           
## vote_Groenlinks.x                                          .           
## vote_SP.x                                                  .           
## vote_D66.x                                                 .           
## vote_PvDA.x                                                .           
## vote_VVD.x                                                 .           
## vote_Christenunie.x                                        .           
## vote_PVV.x                                                 .           
## vote_CDA.x                                                 .           
## vote_FvD.x                                         0.286186544660270659
## vote_SGP.x                                                 .           
## vote_50Plus.x                                              .           
## vote_DENK.x                                                .           
##                                                                        
## nVar                                                 3                 
## BIC                                            -3150.539785432079042948
## post prob                                          0.059               
##                                                model 4                 
## Intercept                                          0.101943095696314517
## g.x                                                0.225130479235725156
## V_tilt.x                                                   .           
## age.x                                                      .           
## maleTRUE.x                                        -0.112968177910601181
## time_taken.x                                               .           
## education_num.x                                            .           
## First_Language2non-Dutch.x                        -0.500159054409320292
## Birth3Non-Western.x                                        .           
## Birth3Western.x                                            .           
## studentTRUE.x                                              .           
## Employment_StatusFull-Time.x                               .           
## Employment_StatusNot in paid work.x                        .           
## Employment_StatusOther.x                                   .           
## Employment_StatusPart-Time.x                               .           
## Employment_StatusUnemployed and job seeking.x              .           
## vote_PvdD.x                                                .           
## vote_Groenlinks.x                                          .           
## vote_SP.x                                                  .           
## vote_D66.x                                                 .           
## vote_PvDA.x                                                .           
## vote_VVD.x                                                 .           
## vote_Christenunie.x                                        .           
## vote_PVV.x                                                 .           
## vote_CDA.x                                                 .           
## vote_FvD.x                                                 .           
## vote_SGP.x                                                 .           
## vote_50Plus.x                                              .           
## vote_DENK.x                                                .           
##                                                                        
## nVar                                                 3                 
## BIC                                            -3150.137815655873509968
## post prob                                          0.048               
##                                                model 5                 
## Intercept                                         -0.000000000000000157
## g.x                                                0.241232557811191739
## V_tilt.x                                                   .           
## age.x                                                      .           
## maleTRUE.x                                                 .           
## time_taken.x                                               .           
## education_num.x                                            .           
## First_Language2non-Dutch.x                                 .           
## Birth3Non-Western.x                                        .           
## Birth3Western.x                                            .           
## studentTRUE.x                                              .           
## Employment_StatusFull-Time.x                               .           
## Employment_StatusNot in paid work.x                        .           
## Employment_StatusOther.x                                   .           
## Employment_StatusPart-Time.x                               .           
## Employment_StatusUnemployed and job seeking.x              .           
## vote_PvdD.x                                                .           
## vote_Groenlinks.x                                          .           
## vote_SP.x                                                  .           
## vote_D66.x                                                 .           
## vote_PvDA.x                                                .           
## vote_VVD.x                                                 .           
## vote_Christenunie.x                                        .           
## vote_PVV.x                                                 .           
## vote_CDA.x                                                 .           
## vote_FvD.x                                                 .           
## vote_SGP.x                                                 .           
## vote_50Plus.x                                              .           
## vote_DENK.x                                                .           
##                                                                        
## nVar                                                 1                 
## BIC                                            -3149.825246696569593041
## post prob                                          0.041               
## 
##   1  observations deleted due to missingness.

Province income

#summary stats of true values
prov_income$prov_income %>% describe2()

#score
stereo_prov_accu = score_accuracy(stereo_prov,
                    prov_income$prov_income,
                    methods = "all")

## Warning in cor(criterion, df[x, ] %>% unlist, use = "p"): the standard
## deviation is zero

## Warning in cor(criterion, df[x, ] %>% unlist, use = "p"): the standard
## deviation is zero

## Warning in cor(criterion, df[x, ] %>% unlist, use = "p"): the standard
## deviation is zero

## Warning in cor(criterion, df[x, ] %>% unlist, use = "p"): the standard
## deviation is zero

## Warning in cor(criterion, df[x, ] %>% unlist, use = "p"): the standard
## deviation is zero

## Warning in cor(criterion, df[x, ] %>% unlist, use = "p"): the standard
## deviation is zero

## Warning in cor(criterion, df[x, ] %>% unlist, use = "p"): the standard
## deviation is zero

## Warning in cor(criterion, df[x, ] %>% unlist, use = "p"): the standard
## deviation is zero

## Warning in cor(criterion, df[x, ] %>% unlist, use = "p"): the standard
## deviation is zero

## Warning in cor(criterion, df[x, ] %>% unlist, use = "p"): the standard
## deviation is zero

## Warning in cor(criterion, df[x, ] %>% unlist, use = "p"): the standard
## deviation is zero

## Warning in cor(criterion, df[x, ] %>% unlist, use = "p"): the standard
## deviation is zero

## Warning in cor(criterion, df[x, ] %>% unlist, use = "p"): the standard
## deviation is zero

## Warning in cor(criterion, df[x, ] %>% unlist, use = "p"): the standard
## deviation is zero

## Warning in cor(criterion, df[x, ] %>% unlist, use = "p"): the standard
## deviation is zero

## Warning in cor(criterion, df[x, ] %>% unlist, use = "p"): the standard
## deviation is zero

## Warning in cor(criterion, df[x, ] %>% unlist, use = "p"): the standard
## deviation is zero

## Warning in cor(criterion, df[x, ] %>% unlist, use = "p"): the standard
## deviation is zero

## Warning in cor(criterion, df[x, ] %>% unlist, use = "p"): the standard
## deviation is zero

## Warning in cor(criterion, df[x, ] %>% unlist, use = "p"): the standard
## deviation is zero

## Warning in cor(criterion, df[x, ] %>% unlist, use = "p"): the standard
## deviation is zero

## Warning in cor(criterion, df[x, ] %>% unlist, use = "p"): the standard
## deviation is zero

## Warning in cor(criterion, df[x, ] %>% unlist, use = "p"): the standard
## deviation is zero

## Warning in cor(criterion, df[x, ] %>% unlist, use = "p"): the standard
## deviation is zero

## Warning in cor(criterion, df[x, ] %>% unlist, use = "p"): the standard
## deviation is zero

## Warning in cor(criterion, df[x, ] %>% unlist, use = "p"): the standard
## deviation is zero

## Warning in cor(criterion, df[x, ] %>% unlist, use = "p"): the standard
## deviation is zero

## Warning in cor(criterion, df[x, ] %>% unlist, use = "p"): the standard
## deviation is zero

## Warning in cor(criterion, df[x, ] %>% unlist, use = "p"): the standard
## deviation is zero

## Warning in cor(criterion, df[x, ] %>% unlist, use = "p"): the standard
## deviation is zero

## Warning in cor(criterion, df[x, ] %>% unlist, method = "spearman", use = "p"):
## the standard deviation is zero

## Warning in cor(criterion, df[x, ] %>% unlist, method = "spearman", use = "p"):
## the standard deviation is zero

## Warning in cor(criterion, df[x, ] %>% unlist, method = "spearman", use = "p"):
## the standard deviation is zero

## Warning in cor(criterion, df[x, ] %>% unlist, method = "spearman", use = "p"):
## the standard deviation is zero

## Warning in cor(criterion, df[x, ] %>% unlist, method = "spearman", use = "p"):
## the standard deviation is zero

## Warning in cor(criterion, df[x, ] %>% unlist, method = "spearman", use = "p"):
## the standard deviation is zero

## Warning in cor(criterion, df[x, ] %>% unlist, method = "spearman", use = "p"):
## the standard deviation is zero

## Warning in cor(criterion, df[x, ] %>% unlist, method = "spearman", use = "p"):
## the standard deviation is zero

## Warning in cor(criterion, df[x, ] %>% unlist, method = "spearman", use = "p"):
## the standard deviation is zero

## Warning in cor(criterion, df[x, ] %>% unlist, method = "spearman", use = "p"):
## the standard deviation is zero

## Warning in cor(criterion, df[x, ] %>% unlist, method = "spearman", use = "p"):
## the standard deviation is zero

## Warning in cor(criterion, df[x, ] %>% unlist, method = "spearman", use = "p"):
## the standard deviation is zero

## Warning in cor(criterion, df[x, ] %>% unlist, method = "spearman", use = "p"):
## the standard deviation is zero

## Warning in cor(criterion, df[x, ] %>% unlist, method = "spearman", use = "p"):
## the standard deviation is zero

## Warning in cor(criterion, df[x, ] %>% unlist, method = "spearman", use = "p"):
## the standard deviation is zero

## Warning in cor(criterion, df[x, ] %>% unlist, method = "spearman", use = "p"):
## the standard deviation is zero

## Warning in cor(criterion, df[x, ] %>% unlist, method = "spearman", use = "p"):
## the standard deviation is zero

## Warning in cor(criterion, df[x, ] %>% unlist, method = "spearman", use = "p"):
## the standard deviation is zero

## Warning in cor(criterion, df[x, ] %>% unlist, method = "spearman", use = "p"):
## the standard deviation is zero

## Warning in cor(criterion, df[x, ] %>% unlist, method = "spearman", use = "p"):
## the standard deviation is zero

## Warning in cor(criterion, df[x, ] %>% unlist, method = "spearman", use = "p"):
## the standard deviation is zero

## Warning in cor(criterion, df[x, ] %>% unlist, method = "spearman", use = "p"):
## the standard deviation is zero

## Warning in cor(criterion, df[x, ] %>% unlist, method = "spearman", use = "p"):
## the standard deviation is zero

## Warning in cor(criterion, df[x, ] %>% unlist, method = "spearman", use = "p"):
## the standard deviation is zero

## Warning in cor(criterion, df[x, ] %>% unlist, method = "spearman", use = "p"):
## the standard deviation is zero

## Warning in cor(criterion, df[x, ] %>% unlist, method = "spearman", use = "p"):
## the standard deviation is zero

## Warning in cor(criterion, df[x, ] %>% unlist, method = "spearman", use = "p"):
## the standard deviation is zero

## Warning in cor(criterion, df[x, ] %>% unlist, method = "spearman", use = "p"):
## the standard deviation is zero

## Warning in cor(criterion, df[x, ] %>% unlist, method = "spearman", use = "p"):
## the standard deviation is zero

## Warning in cor(criterion, df[x, ] %>% unlist, method = "spearman", use = "p"):
## the standard deviation is zero

#score without bad data
stereo_prov_accu_good = stereo_prov_accu %>% filter(mean > 1000, sd > 0)
prov_good_cases_idx = stereo_prov_accu %>% mutate(row = 1:n()) %>% filter(mean > 1000, sd > 0) %>% pull(row)

#distributions
stereo_prov_accu %>% describe2()

stereo_prov_accu_good %>% describe2()

#below 0 r?
(stereo_prov_accu$pearson_r<0) %>% describe2()

## Warning in psych::describe(as.data.frame(x), na.rm = TRUE, interp = FALSE, :
## You were trying to describe a non-numeric data.frame or vector which describe
## converted to numeric.

(stereo_prov_accu_good$pearson_r<0) %>% describe2()

## Warning in psych::describe(as.data.frame(x), na.rm = TRUE, interp = FALSE, :
## You were trying to describe a non-numeric data.frame or vector which describe
## converted to numeric.

#plot
accu_combo_plot(stereo_prov_accu, variables = main_accu_metrics2)

## Warning: Removed 30 rows containing non-finite values (`stat_bin()`).

## Warning: Removed 30 rows containing non-finite values (`stat_density()`).

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## Warning: Removed 30 rows containing non-finite values (`stat_bin()`).
## Removed 30 rows containing non-finite values (`stat_density()`).

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

GG_save("figs/prov_dists.png")

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## Warning: Removed 30 rows containing non-finite values (`stat_bin()`).
## Removed 30 rows containing non-finite values (`stat_density()`).

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

#plot with intelligence
accu_combo_plot(bind_cols(stereo_prov_accu, d["g"]), variables = main_accu_metrics2, x_var = "g")

## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'

GG_save("figs/prov_g_accu.png")

## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'

#without outlying values
accu_combo_plot(bind_cols(stereo_prov_accu, d["g"]) %>% filter(pearson_r > 0), variables = main_accu_metrics2, x_var = "g")

## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'

#good data
accu_combo_plot(bind_cols(stereo_prov_accu_good, d["g"][prov_good_cases_idx, , drop = F]), variables = main_accu_metrics2, x_var = "g")

## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'

#add to main
d = cbind(d, stereo_prov_accu %>% df_add_affix(prefix = "prov_"))

#regressions model full
prov_small_model = str_glue("prov_pearson_r ~ {model_rs_small}") %>% as.formula()
prov_full_model = str_glue("prov_pearson_r ~ {model_rs_full}") %>% as.formula()

list(
  ols(prov_small_model, data = d %>% df_standardize(messages = F)),
  ols(prov_full_model, data = d %>% df_standardize(messages = F))
) %>% 
  summarize_models() %>% 
  print(n = Inf)

## # A tibble: 34 × 3
##    `Predictor/Model`                                                `1`    `2`  
##    <chr>                                                            <chr>  <chr>
##  1 Intercept                                                        -0.15… -0.2…
##  2 g                                                                0.22 … 0.21…
##  3 V_tilt                                                           -0.02… -0.0…
##  4 age                                                              0.08 … 0.07…
##  5 male                                                             0.27 … 0.31…
##  6 education_num                                                    0.08 … 0.05…
##  7 time_taken                                                       -0.09… -0.0…
##  8 First_Language2 = Dutch                                          <NA>   (ref)
##  9 First_Language2 = non-Dutch                                      <NA>   0.12…
## 10 Birth3 = Netherlands                                             <NA>   (ref)
## 11 Birth3 = Non-Western                                             <NA>   0.17…
## 12 Birth3 = Western                                                 <NA>   0.12…
## 13 student                                                          <NA>   -0.1…
## 14 Employment_Status = Due to start a new job within the next month <NA>   (ref)
## 15 Employment_Status = Full-Time                                    <NA>   0.28…
## 16 Employment_Status = Not in paid work                             <NA>   0.29…
## 17 Employment_Status = Other                                        <NA>   0.11…
## 18 Employment_Status = Part-Time                                    <NA>   0.34…
## 19 Employment_Status = Unemployed and job seeking                   <NA>   0.15…
## 20 vote_PvdD                                                        <NA>   0.11…
## 21 vote_Groenlinks                                                  <NA>   -0.0…
## 22 vote_SP                                                          <NA>   -0.1…
## 23 vote_D66                                                         <NA>   -0.1…
## 24 vote_PvDA                                                        <NA>   -0.3…
## 25 vote_VVD                                                         <NA>   -0.0…
## 26 vote_Christenunie                                                <NA>   -0.1…
## 27 vote_PVV                                                         <NA>   -0.5…
## 28 vote_CDA                                                         <NA>   -0.0…
## 29 vote_FvD                                                         <NA>   -0.1…
## 30 vote_SGP                                                         <NA>   -1.0…
## 31 vote_50Plus                                                      <NA>   -0.1…
## 32 vote_DENK                                                        <NA>   -0.7…
## 33 R2 adj.                                                          0.090  0.086
## 34 N                                                                568    559

#lasso and BMA
d_nomiss = d %>% 
  #subset only variables for model
  .[formula.tools::get.vars(prov_full_model)] %>% 
  #remove NAs
  na.omit() %>% 
  #remove chr to fct
  map_df(function(x) {
    if (is.character(x)) return(factor(x))
    x
  }) %>% 
  #standardize
  df_standardize()

## Skipped male because it is a logical (boolean)
## Skipped First_Language2 because it is class factor
## Skipped Birth3 because it is class factor
## Skipped student because it is a logical (boolean)
## Skipped Employment_Status because it is class factor
## Skipped vote_PvdD because it is ranged from 0 to 1 (a proportion, maybe)
## Skipped vote_Groenlinks because it is ranged from 0 to 1 (a proportion, maybe)
## Skipped vote_SP because it is ranged from 0 to 1 (a proportion, maybe)
## Skipped vote_D66 because it is ranged from 0 to 1 (a proportion, maybe)
## Skipped vote_PvDA because it is ranged from 0 to 1 (a proportion, maybe)
## Skipped vote_VVD because it is ranged from 0 to 1 (a proportion, maybe)
## Skipped vote_Christenunie because it is ranged from 0 to 1 (a proportion, maybe)
## Skipped vote_PVV because it is ranged from 0 to 1 (a proportion, maybe)
## Skipped vote_CDA because it is ranged from 0 to 1 (a proportion, maybe)
## Skipped vote_FvD because it is ranged from 0 to 1 (a proportion, maybe)
## Skipped vote_SGP because it is ranged from 0 to 1 (a proportion, maybe)
## Skipped vote_50Plus because it is ranged from 0 to 1 (a proportion, maybe)
## Skipped vote_DENK because it is ranged from 0 to 1 (a proportion, maybe)

#make a recipe
#convert logicals to 0/1
d_nomiss$male %<>% as.numeric()
d_nomiss$student %<>% as.numeric()
prov_recipe = recipe(prov_full_model, data = d_nomiss) %>% 
  step_dummy(all_nominal()) %>%
  step_zv(all_predictors())

#make a model
prov_model = 
  linear_reg(
    mixture = 1, #lasso
    penalty = tune() #tune penalty
    ) %>% 
  set_engine("glmnet")

#resampling method
set.seed(1)
prov_folds = vfold_cv(d_nomiss, v = 10)

#make workflow
prov_wf = 
  workflow() %>% 
  add_model(prov_model) %>% 
  add_recipe(prov_recipe)

#grid
grid = expand_grid(penalty = seq(0, .5, by = 0.001))

#fit
prov_fit = 
  prov_wf %>% 
  tune_grid(
    resamples = prov_folds,
    grid = grid,
    control = control_grid(
      save_pred = TRUE
      )
    )

## → A | warning: A correlation computation is required, but `estimate` is constant and has 0 standard deviation, resulting in a divide by 0 error. `NA` will be returned.
## 
There were issues with some computations   A: x1

There were issues with some computations   A: x2

There were issues with some computations   A: x3

There were issues with some computations   A: x4

There were issues with some computations   A: x5

There were issues with some computations   A: x6

There were issues with some computations   A: x7

There were issues with some computations   A: x8

There were issues with some computations   A: x9

There were issues with some computations   A: x10

There were issues with some computations   A: x10

#best
prov_best = prov_fit %>% 
  select_best("rmse")

#plot
collect_metrics(prov_fit) %>% 
  ggplot(aes(penalty, mean)) +
  geom_line() +
  facet_wrap(".metric")

## Warning: Removed 216 rows containing missing values (`geom_line()`).

#some penalty may be needed
prov_wf %>% 
  finalize_workflow(prov_best) %>%
  fit(d_nomiss) %>%
  pull_workflow_fit() %>%
  tidy() %>% 
  filter(estimate != 0) %>% 
  print(n = Inf)

## # A tibble: 7 × 3
##   term                    estimate penalty
##   <chr>                      <dbl>   <dbl>
## 1 (Intercept)              -0.0524   0.076
## 2 g                         0.152    0.076
## 3 male                      0.128    0.076
## 4 education_num             0.0284   0.076
## 5 student                  -0.0217   0.076
## 6 vote_PVV                 -0.128    0.076
## 7 Employment_Status_Other  -0.0248   0.076

#BMA
BMA_fit = BMA::bic.glm(
  prov_full_model,
  data = d[formula.tools::get.vars(prov_full_model)] %>% 
    na.omit() %>% 
    df_standardize(messages = F), 
  glm.family = "gaussian"
  )

#summary
BMA_fit %>% summary()

## 
## Call:
## bic.glm.formula(f = prov_full_model, data = d[formula.tools::get.vars(prov_full_model)] %>%     na.omit() %>% df_standardize(messages = F), glm.family = "gaussian")
## 
## 
##   48  models were selected
##  Best  5  models (cumulative posterior probability =  0.393 ): 
## 
##                                                p!=0    EV        SD     
## Intercept                                      100    -0.119949  0.07596
## g.x                                            100.0   0.232714  0.04434
## V_tilt.x                                         0.0   0.000000  0.00000
## age.x                                            5.2   0.003681  0.01863
## maleTRUE.x                                      97.4   0.265476  0.09182
## time_taken.x                                    27.4  -0.025650  0.04708
## education_num.x                                 11.2   0.009342  0.03013
## First_Language2non-Dutch.x                       3.6   0.007999  0.05072
## Birth3Non-Western.x                              3.0   0.006238  0.04462
## Birth3Western.x                                  0.8   0.002054  0.03563
## studentTRUE.x                                   20.6  -0.038626  0.08549
## Employment_StatusFull-Time.x                     3.6   0.004448  0.02823
## Employment_StatusNot in paid work.x              0.0   0.000000  0.00000
## Employment_StatusOther.x                        12.3  -0.032219  0.09855
## Employment_StatusPart-Time.x                     0.7   0.000497  0.00964
## Employment_StatusUnemployed and job seeking.x    1.9  -0.002950  0.02722
## vote_PvdD.x                                      0.9   0.001880  0.02840
## vote_Groenlinks.x                                0.0   0.000000  0.00000
## vote_SP.x                                        0.0   0.000000  0.00000
## vote_D66.x                                       0.0   0.000000  0.00000
## vote_PvDA.x                                      4.8  -0.013215  0.07073
## vote_VVD.x                                       1.1   0.001682  0.02097
## vote_Christenunie.x                              0.0   0.000000  0.00000
## vote_PVV.x                                      36.4  -0.173311  0.25931
## vote_CDA.x                                       0.7   0.001580  0.03243
## vote_FvD.x                                       0.0   0.000000  0.00000
## vote_SGP.x                                       1.0  -0.009005  0.12406
## vote_50Plus.x                                    0.0   0.000000  0.00000
## vote_DENK.x                                      0.0   0.000000  0.00000
##                                                                         
## nVar                                                                    
## BIC                                                                     
## post prob                                                               
##                                                model 1     model 2   
## Intercept                                         -0.1479     -0.1246
## g.x                                                0.2383      0.2250
## V_tilt.x                                            .           .    
## age.x                                               .           .    
## maleTRUE.x                                         0.2712      0.2742
## time_taken.x                                        .           .    
## education_num.x                                     .           .    
## First_Language2non-Dutch.x                          .           .    
## Birth3Non-Western.x                                 .           .    
## Birth3Western.x                                     .           .    
## studentTRUE.x                                       .           .    
## Employment_StatusFull-Time.x                        .           .    
## Employment_StatusNot in paid work.x                 .           .    
## Employment_StatusOther.x                            .           .    
## Employment_StatusPart-Time.x                        .           .    
## Employment_StatusUnemployed and job seeking.x       .           .    
## vote_PvdD.x                                         .           .    
## vote_Groenlinks.x                                   .           .    
## vote_SP.x                                           .           .    
## vote_D66.x                                          .           .    
## vote_PvDA.x                                         .           .    
## vote_VVD.x                                          .           .    
## vote_Christenunie.x                                 .           .    
## vote_PVV.x                                          .         -0.4582
## vote_CDA.x                                          .           .    
## vote_FvD.x                                          .           .    
## vote_SGP.x                                          .           .    
## vote_50Plus.x                                       .           .    
## vote_DENK.x                                         .           .    
##                                                                      
## nVar                                                 2           3   
## BIC                                            -2955.5799  -2954.6771
## post prob                                          0.129       0.082 
##                                                model 3     model 4   
## Intercept                                         -0.1508     -0.0459
## g.x                                                0.2561      0.2152
## V_tilt.x                                            .           .    
## age.x                                               .           .    
## maleTRUE.x                                         0.2764      0.2743
## time_taken.x                                      -0.0945       .    
## education_num.x                                     .           .    
## First_Language2non-Dutch.x                          .           .    
## Birth3Non-Western.x                                 .           .    
## Birth3Western.x                                     .           .    
## studentTRUE.x                                       .         -0.2026
## Employment_StatusFull-Time.x                        .           .    
## Employment_StatusNot in paid work.x                 .           .    
## Employment_StatusOther.x                            .           .    
## Employment_StatusPart-Time.x                        .           .    
## Employment_StatusUnemployed and job seeking.x       .           .    
## vote_PvdD.x                                         .           .    
## vote_Groenlinks.x                                   .           .    
## vote_SP.x                                           .           .    
## vote_D66.x                                          .           .    
## vote_PvDA.x                                         .           .    
## vote_VVD.x                                          .           .    
## vote_Christenunie.x                                 .           .    
## vote_PVV.x                                          .         -0.5261
## vote_CDA.x                                          .           .    
## vote_FvD.x                                          .           .    
## vote_SGP.x                                          .           .    
## vote_50Plus.x                                       .           .    
## vote_DENK.x                                         .           .    
##                                                                      
## nVar                                                 3           4   
## BIC                                            -2954.5015  -2954.0462
## post prob                                          0.075       0.060 
##                                                model 5   
## Intercept                                         -0.1486
## g.x                                                0.2063
## V_tilt.x                                            .    
## age.x                                               .    
## maleTRUE.x                                         0.2724
## time_taken.x                                        .    
## education_num.x                                    0.0893
## First_Language2non-Dutch.x                          .    
## Birth3Non-Western.x                                 .    
## Birth3Western.x                                     .    
## studentTRUE.x                                       .    
## Employment_StatusFull-Time.x                        .    
## Employment_StatusNot in paid work.x                 .    
## Employment_StatusOther.x                            .    
## Employment_StatusPart-Time.x                        .    
## Employment_StatusUnemployed and job seeking.x       .    
## vote_PvdD.x                                         .    
## vote_Groenlinks.x                                   .    
## vote_SP.x                                           .    
## vote_D66.x                                          .    
## vote_PvDA.x                                         .    
## vote_VVD.x                                          .    
## vote_Christenunie.x                                 .    
## vote_PVV.x                                          .    
## vote_CDA.x                                          .    
## vote_FvD.x                                          .    
## vote_SGP.x                                          .    
## vote_50Plus.x                                       .    
## vote_DENK.x                                         .    
##                                                          
## nVar                                                 3   
## BIC                                            -2953.4976
## post prob                                          0.046 
## 
##   1  observations deleted due to missingness.

General accuracy

#correlations among accuracy metrics
d_accu_all = d %>% 
  dplyr::select(matches("pearson"), 
                matches("mean_abs_error"), 
                matches("sd_error$"), 
                matches("sd_error_abs"), 
                matches("mean_error$"),
                matches("mean_error_abs")
                ) %>% as_tibble()

#cors
wtd.cors(d_accu_all) %>% GG_heatmap(reorder_vars = F, font_size = 3)

GG_save("figs/accu_metrics_heatmap.png")

#chosen metrics
domains = c("crime_", "occu_", "prov_")
d_accu = d %>% 
  dplyr::select(!!domains + "pearson_r", !!domains + "mean_abs_error")

#cors
wtd.cors(d_accu)

##                      crime_pearson_r occu_pearson_r prov_pearson_r
## crime_pearson_r                1.000          0.230          0.155
## occu_pearson_r                 0.230          1.000          0.121
## prov_pearson_r                 0.155          0.121          1.000
## crime_mean_abs_error          -0.179         -0.258         -0.127
## occu_mean_abs_error           -0.254         -0.915         -0.123
## prov_mean_abs_error           -0.151         -0.171         -0.254
##                      crime_mean_abs_error occu_mean_abs_error
## crime_pearson_r                    -0.179              -0.254
## occu_pearson_r                     -0.258              -0.915
## prov_pearson_r                     -0.127              -0.123
## crime_mean_abs_error                1.000               0.197
## occu_mean_abs_error                 0.197               1.000
## prov_mean_abs_error                 0.178               0.179
##                      prov_mean_abs_error
## crime_pearson_r                   -0.151
## occu_pearson_r                    -0.171
## prov_pearson_r                    -0.254
## crime_mean_abs_error               0.178
## occu_mean_abs_error                0.179
## prov_mean_abs_error                1.000

#factor analyze default
stereo_accu_fa = fa(d_accu)
stereo_accu_fa

## Factor Analysis using method =  minres
## Call: fa(r = d_accu)
## Standardized loadings (pattern matrix) based upon correlation matrix
##                        MR1    h2   u2 com
## crime_pearson_r      -0.30 0.088 0.91   1
## occu_pearson_r       -0.94 0.892 0.11   1
## prov_pearson_r       -0.19 0.036 0.96   1
## crime_mean_abs_error  0.28 0.081 0.92   1
## occu_mean_abs_error   0.92 0.846 0.15   1
## prov_mean_abs_error   0.24 0.058 0.94   1
## 
##                 MR1
## SS loadings    2.00
## Proportion Var 0.33
## 
## Mean item complexity =  1
## Test of the hypothesis that 1 factor is sufficient.
## 
## df null model =  15  with the objective function =  2.13 with Chi Square =  1304
## df of  the model are 9  and the objective function was  0.24 
## 
## The root mean square of the residuals (RMSR) is  0.08 
## The df corrected root mean square of the residuals is  0.11 
## 
## The harmonic n.obs is  600 with the empirical chi square  126  with prob <  0.00000000000000000000008 
## The total n.obs was  615  with Likelihood Chi Square =  144  with prob <  0.000000000000000000000000014 
## 
## Tucker Lewis Index of factoring reliability =  0.825
## RMSEA index =  0.156  and the 90 % confidence intervals are  0.134 0.179
## BIC =  86.4
## Fit based upon off diagonal values = 0.92
## Measures of factor score adequacy             
##                                                    MR1
## Correlation of (regression) scores with factors   0.96
## Multiple R square of scores with factors          0.92
## Minimum correlation of possible factor scores     0.85

#ranks
stereo_accu_fa_rank = fa(d_accu %>% df_rank())
stereo_accu_fa_rank

## Factor Analysis using method =  minres
## Call: fa(r = d_accu %>% df_rank())
## Standardized loadings (pattern matrix) based upon correlation matrix
##                        MR1    h2    u2 com
## crime_pearson_r      -0.32 0.101 0.899   1
## occu_pearson_r       -0.95 0.901 0.099   1
## prov_pearson_r       -0.18 0.034 0.966   1
## crime_mean_abs_error  0.32 0.102 0.898   1
## occu_mean_abs_error   0.87 0.752 0.248   1
## prov_mean_abs_error   0.31 0.098 0.902   1
## 
##                 MR1
## SS loadings    1.99
## Proportion Var 0.33
## 
## Mean item complexity =  1
## Test of the hypothesis that 1 factor is sufficient.
## 
## df null model =  15  with the objective function =  1.9 with Chi Square =  1161
## df of  the model are 9  and the objective function was  0.24 
## 
## The root mean square of the residuals (RMSR) is  0.08 
## The df corrected root mean square of the residuals is  0.11 
## 
## The harmonic n.obs is  615 with the empirical chi square  127  with prob <  0.000000000000000000000058 
## The total n.obs was  615  with Likelihood Chi Square =  148  with prob <  0.0000000000000000000000000019 
## 
## Tucker Lewis Index of factoring reliability =  0.797
## RMSEA index =  0.159  and the 90 % confidence intervals are  0.137 0.182
## BIC =  90.6
## Fit based upon off diagonal values = 0.93
## Measures of factor score adequacy             
##                                                    MR1
## Correlation of (regression) scores with factors   0.96
## Multiple R square of scores with factors          0.92
## Minimum correlation of possible factor scores     0.84

#method variation
stereo_accu_fa_all = fa_all_methods(d_accu, messages = F)
stereo_accu_fa_all$scores = stereo_accu_fa_all$scores * -1

#z mean
stereo_accu_fa_all$scores$zmean = d_accu %>% 
  #reverse MAEs
  {
    t(.) %>% `*`(c(1, 1, 1, -1, -1, -1)) %>% t() %>% as.data.frame()
  } %>% 
  df_standardize() %>% 
  rowMeans(na.rm = T) %>% 
  multiply_by(1)

#compare scores
stereo_accu_fa_all$scores %>% GG_heatmap()

#best method
fa_best_method = stereo_accu_fa_all$scores %>% 
  wtd.cors() %>% 
  {
    diag(.) = NA
    .
  } %>% 
  colMeans(na.rm=T) %>% 
  sort(decreasing = T)

fa_best_method

##        tenBerge_pa      regression_pa       Thurstone_pa     regression_ols 
##              0.991              0.991              0.991              0.990 
##      Thurstone_ols       tenBerge_ols    tenBerge_minres  regression_minres 
##              0.990              0.990              0.990              0.990 
##   Thurstone_minres       Bartlett_wls       tenBerge_gls     regression_gls 
##              0.990              0.990              0.990              0.990 
##      Thurstone_gls        Bartlett_pa       Bartlett_ols    Bartlett_minres 
##              0.990              0.988              0.988              0.988 
##  regression_minchi   Thurstone_minchi    tenBerge_minchi       Bartlett_gls 
##              0.987              0.987              0.987              0.987 
##    Bartlett_minchi        tenBerge_ml      regression_ml       Thurstone_ml 
##              0.986              0.985              0.985              0.985 
##        Bartlett_ml regression_minrank  Thurstone_minrank   tenBerge_minrank 
##              0.985              0.983              0.983              0.983 
##   Bartlett_minrank       tenBerge_wls     regression_wls      Thurstone_wls 
##              0.983              0.979              0.979              0.979 
##              zmean 
##              0.812

#use z-means as primary and alternatively the EFA scores
#also composites by metric
d %<>% mutate(
  g_accu_UWFA = stereo_accu_fa_all$scores$zmean %>% 
    standardize() %>% 
    multiply_by(1),
  
  g_accu_EFA = stereo_accu_fa_all$scores[[names(fa_best_method)[1]]] %>% 
    standardize() %>% 
    multiply_by(1),
  
  g_accu_pearson = d_accu %>% 
  select(!!domains + "pearson_r") %>% 
  df_standardize() %>% 
  rowMeans(na.rm = T) %>% 
  multiply_by(1),
  
  g_accu_MAE = d_accu %>% 
  select(!!domains + "mean_abs_error") %>% 
  df_standardize() %>% 
  rowMeans(na.rm = T) %>% 
  multiply_by(-1)
)


#correlations for each domain
d %>% 
  select(g, 
         contains("g_accu"), 
         !!domains + "pearson_r", 
         !!domains + "mean_abs_error") %>% 
  cor_matrix()

##                           g g_accu_UWFA g_accu_EFA g_accu_pearson g_accu_MAE
## g                     1.000       0.394      0.257          0.352      0.353
## g_accu_UWFA           0.394       1.000      0.826          0.892      0.896
## g_accu_EFA            0.257       0.826      1.000          0.730      0.741
## g_accu_pearson        0.352       0.892      0.730          1.000      0.600
## g_accu_MAE            0.353       0.896      0.741          0.600      1.000
## crime_pearson_r       0.205       0.547      0.282          0.693      0.288
## occu_pearson_r        0.249       0.752      0.980          0.687      0.663
## prov_pearson_r        0.257       0.504      0.198          0.654      0.249
## crime_mean_abs_error -0.300      -0.546     -0.260         -0.295     -0.679
## occu_mean_abs_error  -0.204      -0.742     -0.964         -0.650     -0.679
## prov_mean_abs_error  -0.212      -0.529     -0.274         -0.273     -0.670
##                      crime_pearson_r occu_pearson_r prov_pearson_r
## g                              0.205          0.249          0.257
## g_accu_UWFA                    0.547          0.752          0.504
## g_accu_EFA                     0.282          0.980          0.198
## g_accu_pearson                 0.693          0.687          0.654
## g_accu_MAE                     0.288          0.663          0.249
## crime_pearson_r                1.000          0.230          0.155
## occu_pearson_r                 0.230          1.000          0.121
## prov_pearson_r                 0.155          0.121          1.000
## crime_mean_abs_error          -0.179         -0.258         -0.127
## occu_mean_abs_error           -0.254         -0.915         -0.123
## prov_mean_abs_error           -0.151         -0.171         -0.254
##                      crime_mean_abs_error occu_mean_abs_error
## g                                  -0.300              -0.204
## g_accu_UWFA                        -0.546              -0.742
## g_accu_EFA                         -0.260              -0.964
## g_accu_pearson                     -0.295              -0.650
## g_accu_MAE                         -0.679              -0.679
## crime_pearson_r                    -0.179              -0.254
## occu_pearson_r                     -0.258              -0.915
## prov_pearson_r                     -0.127              -0.123
## crime_mean_abs_error                1.000               0.197
## occu_mean_abs_error                 0.197               1.000
## prov_mean_abs_error                 0.178               0.179
##                      prov_mean_abs_error
## g                                 -0.212
## g_accu_UWFA                       -0.529
## g_accu_EFA                        -0.274
## g_accu_pearson                    -0.273
## g_accu_MAE                        -0.670
## crime_pearson_r                   -0.151
## occu_pearson_r                    -0.171
## prov_pearson_r                    -0.254
## crime_mean_abs_error               0.178
## occu_mean_abs_error                0.179
## prov_mean_abs_error                1.000

#plot main
GG_scatter(d, "g", "g_accu_UWFA") +
  xlab("Intelligence") +
  ylab("General factor of stereotype accuracy") +
  geom_smooth()

## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'

GG_save("figs/ga_g.png")

## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'

#nonlinear pattern test
g_accu_linear = ols(g_accu_UWFA ~ g, data = d)
g_accu_linear

## Linear Regression Model
## 
## ols(formula = g_accu_UWFA ~ g, data = d)
## 
##                 Model Likelihood    Discrimination    
##                       Ratio Test           Indexes    
## Obs     615    LR chi2    103.64    R2       0.155    
## sigma0.9199    d.f.            1    R2 adj   0.154    
## d.f.    613    Pr(> chi2) 0.0000    g        0.446    
## 
## Residuals
## 
##     Min      1Q  Median      3Q     Max 
## -4.4830 -0.3866  0.1349  0.6070  1.8700 
## 
## 
##           Coef   S.E.   t     Pr(>|t|)
## Intercept 0.0000 0.0371  0.00 1.0000  
## g         0.3938 0.0371 10.61 <0.0001

g_accu_rcs = ols(g_accu_UWFA ~ rcs(g), data = d)
g_accu_rcs

## Linear Regression Model
## 
## ols(formula = g_accu_UWFA ~ rcs(g), data = d)
## 
##                 Model Likelihood    Discrimination    
##                       Ratio Test           Indexes    
## Obs     615    LR chi2    127.92    R2       0.188    
## sigma0.9042    d.f.            4    R2 adj   0.182    
## d.f.    610    Pr(> chi2) 0.0000    g        0.450    
## 
## Residuals
## 
##     Min      1Q  Median      3Q     Max 
## -4.5405 -0.4150  0.1410  0.5784  2.1654 
## 
## 
##           Coef     S.E.   t     Pr(>|t|)
## Intercept   0.7390 0.2504  2.95 0.0033  
## g           1.0146 0.1811  5.60 <0.0001 
## g'         -2.2087 0.9478 -2.33 0.0201  
## g''         9.6950 5.2077  1.86 0.0631  
## g'''      -13.9889 8.3482 -1.68 0.0943

lrtest(g_accu_linear, g_accu_rcs)

## 
## Model 1: g_accu_UWFA ~ g
## Model 2: g_accu_UWFA ~ rcs(g)
## 
## L.R. Chisq       d.f.          P 
## 24.2730556  3.0000000  0.0000219

#regressions model full
ga_small_model = str_glue("g_accu_UWFA ~ {model_rs_small}") %>% as.formula()
ga_full_model = str_glue("g_accu_UWFA ~ {model_rs_full}") %>% as.formula()

#models
list(
  ols(ga_small_model, data = d %>% df_standardize(messages = F)),
  ols(ga_full_model, data = d %>% df_standardize(messages = F))
) %>% 
  summarize_models() %>% 
  print(n = Inf)

## # A tibble: 34 × 3
##    `Predictor/Model`                                                `1`    `2`  
##    <chr>                                                            <chr>  <chr>
##  1 Intercept                                                        -0.08… -0.3…
##  2 g                                                                0.39 … 0.37…
##  3 V_tilt                                                           0.12 … 0.12…
##  4 age                                                              -0.12… -0.1…
##  5 male                                                             0.13 … 0.14…
##  6 education_num                                                    0.10 … 0.11…
##  7 time_taken                                                       -0.05… -0.0…
##  8 First_Language2 = Dutch                                          <NA>   (ref)
##  9 First_Language2 = non-Dutch                                      <NA>   -0.4…
## 10 Birth3 = Netherlands                                             <NA>   (ref)
## 11 Birth3 = Non-Western                                             <NA>   0.20…
## 12 Birth3 = Western                                                 <NA>   0.04…
## 13 student                                                          <NA>   -0.2…
## 14 Employment_Status = Due to start a new job within the next month <NA>   (ref)
## 15 Employment_Status = Full-Time                                    <NA>   0.31…
## 16 Employment_Status = Not in paid work                             <NA>   0.32…
## 17 Employment_Status = Other                                        <NA>   0.37…
## 18 Employment_Status = Part-Time                                    <NA>   0.46…
## 19 Employment_Status = Unemployed and job seeking                   <NA>   0.54…
## 20 vote_PvdD                                                        <NA>   -0.2…
## 21 vote_Groenlinks                                                  <NA>   -0.1…
## 22 vote_SP                                                          <NA>   -0.0…
## 23 vote_D66                                                         <NA>   -0.0…
## 24 vote_PvDA                                                        <NA>   -0.0…
## 25 vote_VVD                                                         <NA>   -0.0…
## 26 vote_Christenunie                                                <NA>   0.19…
## 27 vote_PVV                                                         <NA>   -0.4…
## 28 vote_CDA                                                         <NA>   -0.0…
## 29 vote_FvD                                                         <NA>   0.25…
## 30 vote_SGP                                                         <NA>   -0.1…
## 31 vote_50Plus                                                      <NA>   0.37…
## 32 vote_DENK                                                        <NA>   -0.3…
## 33 R2 adj.                                                          0.180  0.187
## 34 N                                                                598    589

#lasso and BMA
d_nomiss = d %>% 
  #subset only variables for model
  .[formula.tools::get.vars(ga_full_model)] %>% 
  #remove NAs
  na.omit() %>% 
  #remove chr to fct
  map_df(function(x) {
    if (is.character(x)) return(factor(x))
    x
  }) %>% 
  #standardize
  df_standardize(messages = F)

#make a recipe
#convert logicals to 0/1
d_nomiss$male %<>% as.numeric()
d_nomiss$student %<>% as.numeric()
ga_recipe = recipe(ga_full_model, data = d_nomiss) %>% 
  step_dummy(all_nominal()) %>%
  step_zv(all_predictors())

#make a model
ga_model = 
  linear_reg(
    mixture = 1, #lasso
    penalty = tune() #tune penalty
    ) %>% 
  set_engine("glmnet")

#resampling method
set.seed(1)
ga_folds = vfold_cv(d_nomiss, v = 10)

#make workflow
ga_wf = 
  workflow() %>% 
  add_model(ga_model) %>% 
  add_recipe(ga_recipe)

#grid
grid = expand_grid(penalty = seq(0, .5, by = 0.001))

#fit
ga_fit = 
  ga_wf %>% 
  tune_grid(
    resamples = ga_folds,
    grid = grid,
    control = control_grid(
      save_pred = TRUE
      )
    )

## → A | warning: A correlation computation is required, but `estimate` is constant and has 0 standard deviation, resulting in a divide by 0 error. `NA` will be returned.
## 
There were issues with some computations   A: x1

There were issues with some computations   A: x2

There were issues with some computations   A: x3

There were issues with some computations   A: x4

There were issues with some computations   A: x5

There were issues with some computations   A: x6

There were issues with some computations   A: x7

There were issues with some computations   A: x8

There were issues with some computations   A: x9

There were issues with some computations   A: x10

There were issues with some computations   A: x10

#best
ga_best = ga_fit %>% 
  select_best("rmse")

#plot
collect_metrics(ga_fit) %>% 
  ggplot(aes(penalty, mean)) +
  geom_line() +
  facet_wrap(".metric")

## Warning: Removed 84 rows containing missing values (`geom_line()`).

#some penalty may be needed
ga_wf %>% 
  finalize_workflow(ga_best) %>%
  fit(d_nomiss) %>%
  pull_workflow_fit() %>%
  tidy() %>% 
  filter(estimate != 0)

#BMA
BMA_fit = BMA::bic.glm(
  ga_full_model,
  data = d[formula.tools::get.vars(ga_full_model)] %>% 
    na.omit() %>% 
    df_standardize(messages = F), 
  glm.family = "gaussian"
  )

#summary
BMA_fit %>% summary()

## 
## Call:
## bic.glm.formula(f = ga_full_model, data = d[formula.tools::get.vars(ga_full_model)] %>%     na.omit() %>% df_standardize(messages = F), glm.family = "gaussian")
## 
## 
##   64  models were selected
##  Best  5  models (cumulative posterior probability =  0.27 ): 
## 
##                                                p!=0    EV        SD     
## Intercept                                      100     0.032354  0.06020
## g.x                                            100.0   0.361003  0.05074
## V_tilt.x                                        27.1   0.027901  0.05127
## age.x                                           46.5  -0.061811  0.07640
## maleTRUE.x                                       3.1   0.003730  0.02480
## time_taken.x                                     1.2  -0.000478  0.00607
## education_num.x                                 67.6   0.077390  0.06351
## First_Language2non-Dutch.x                      55.2  -0.201015  0.20906
## Birth3Non-Western.x                              1.0   0.001930  0.02444
## Birth3Western.x                                  0.0   0.000000  0.00000
## studentTRUE.x                                   19.5  -0.045348  0.10205
## Employment_StatusFull-Time.x                     0.0   0.000000  0.00000
## Employment_StatusNot in paid work.x              0.8  -0.001187  0.01766
## Employment_StatusOther.x                         0.7  -0.000896  0.01485
## Employment_StatusPart-Time.x                     0.5   0.000319  0.00746
## Employment_StatusUnemployed and job seeking.x    3.8   0.007190  0.04273
## vote_PvdD.x                                      0.9  -0.002088  0.02851
## vote_Groenlinks.x                                1.5  -0.002108  0.02153
## vote_SP.x                                        0.0   0.000000  0.00000
## vote_D66.x                                       0.0   0.000000  0.00000
## vote_PvDA.x                                      0.0   0.000000  0.00000
## vote_VVD.x                                       0.0   0.000000  0.00000
## vote_Christenunie.x                              0.8   0.002202  0.03265
## vote_PVV.x                                      20.4  -0.079763  0.17829
## vote_CDA.x                                       0.0   0.000000  0.00000
## vote_FvD.x                                      12.3   0.043225  0.13072
## vote_SGP.x                                       0.0   0.000000  0.00000
## vote_50Plus.x                                    0.5   0.001612  0.04256
## vote_DENK.x                                      0.0   0.000000  0.00000
##                                                                         
## nVar                                                                    
## BIC                                                                     
## post prob                                                               
##                                                model 1                 
## Intercept                                          0.030902388047459307
## g.x                                                0.328592162032528168
## V_tilt.x                                                   .           
## age.x                                                      .           
## maleTRUE.x                                                 .           
## time_taken.x                                               .           
## education_num.x                                    0.121619268965273170
## First_Language2non-Dutch.x                        -0.379198053332367280
## Birth3Non-Western.x                                        .           
## Birth3Western.x                                            .           
## studentTRUE.x                                              .           
## Employment_StatusFull-Time.x                               .           
## Employment_StatusNot in paid work.x                        .           
## Employment_StatusOther.x                                   .           
## Employment_StatusPart-Time.x                               .           
## Employment_StatusUnemployed and job seeking.x              .           
## vote_PvdD.x                                                .           
## vote_Groenlinks.x                                          .           
## vote_SP.x                                                  .           
## vote_D66.x                                                 .           
## vote_PvDA.x                                                .           
## vote_VVD.x                                                 .           
## vote_Christenunie.x                                        .           
## vote_PVV.x                                                 .           
## vote_CDA.x                                                 .           
## vote_FvD.x                                                 .           
## vote_SGP.x                                                 .           
## vote_50Plus.x                                              .           
## vote_DENK.x                                                .           
##                                                                        
## nVar                                                 3                 
## BIC                                            -3133.476008451677444100
## post prob                                          0.102               
##                                                model 2                 
## Intercept                                         -0.000000000000000125
## g.x                                                0.347154060032085765
## V_tilt.x                                                   .           
## age.x                                                      .           
## maleTRUE.x                                                 .           
## time_taken.x                                               .           
## education_num.x                                    0.113409963472814110
## First_Language2non-Dutch.x                                 .           
## Birth3Non-Western.x                                        .           
## Birth3Western.x                                            .           
## studentTRUE.x                                              .           
## Employment_StatusFull-Time.x                               .           
## Employment_StatusNot in paid work.x                        .           
## Employment_StatusOther.x                                   .           
## Employment_StatusPart-Time.x                               .           
## Employment_StatusUnemployed and job seeking.x              .           
## vote_PvdD.x                                                .           
## vote_Groenlinks.x                                          .           
## vote_SP.x                                                  .           
## vote_D66.x                                                 .           
## vote_PvDA.x                                                .           
## vote_VVD.x                                                 .           
## vote_Christenunie.x                                        .           
## vote_PVV.x                                                 .           
## vote_CDA.x                                                 .           
## vote_FvD.x                                                 .           
## vote_SGP.x                                                 .           
## vote_50Plus.x                                              .           
## vote_DENK.x                                                .           
##                                                                        
## nVar                                                 2                 
## BIC                                            -3132.276431071458318911
## post prob                                          0.056               
##                                                model 3                 
## Intercept                                          0.051967796323829113
## g.x                                                0.320677719813810769
## V_tilt.x                                                   .           
## age.x                                                      .           
## maleTRUE.x                                                 .           
## time_taken.x                                               .           
## education_num.x                                    0.114342519311999444
## First_Language2non-Dutch.x                        -0.379733062103288999
## Birth3Non-Western.x                                        .           
## Birth3Western.x                                            .           
## studentTRUE.x                                              .           
## Employment_StatusFull-Time.x                               .           
## Employment_StatusNot in paid work.x                        .           
## Employment_StatusOther.x                                   .           
## Employment_StatusPart-Time.x                               .           
## Employment_StatusUnemployed and job seeking.x              .           
## vote_PvdD.x                                                .           
## vote_Groenlinks.x                                          .           
## vote_SP.x                                                  .           
## vote_D66.x                                                 .           
## vote_PvDA.x                                                .           
## vote_VVD.x                                                 .           
## vote_Christenunie.x                                        .           
## vote_PVV.x                                        -0.386932657930549484
## vote_CDA.x                                                 .           
## vote_FvD.x                                                 .           
## vote_SGP.x                                                 .           
## vote_50Plus.x                                              .           
## vote_DENK.x                                                .           
##                                                                        
## nVar                                                 4                 
## BIC                                            -3131.633491125055570592
## post prob                                          0.041               
##                                                model 4                 
## Intercept                                         -0.000000000000000159
## g.x                                                0.376451197778223945
## V_tilt.x                                           0.112638454999812648
## age.x                                             -0.130978485520925753
## maleTRUE.x                                                 .           
## time_taken.x                                               .           
## education_num.x                                    0.105288607568409256
## First_Language2non-Dutch.x                                 .           
## Birth3Non-Western.x                                        .           
## Birth3Western.x                                            .           
## studentTRUE.x                                              .           
## Employment_StatusFull-Time.x                               .           
## Employment_StatusNot in paid work.x                        .           
## Employment_StatusOther.x                                   .           
## Employment_StatusPart-Time.x                               .           
## Employment_StatusUnemployed and job seeking.x              .           
## vote_PvdD.x                                                .           
## vote_Groenlinks.x                                          .           
## vote_SP.x                                                  .           
## vote_D66.x                                                 .           
## vote_PvDA.x                                                .           
## vote_VVD.x                                                 .           
## vote_Christenunie.x                                        .           
## vote_PVV.x                                                 .           
## vote_CDA.x                                                 .           
## vote_FvD.x                                                 .           
## vote_SGP.x                                                 .           
## vote_50Plus.x                                              .           
## vote_DENK.x                                                .           
##                                                                        
## nVar                                                 4                 
## BIC                                            -3131.452652272517298115
## post prob                                          0.037               
##                                                model 5                 
## Intercept                                          0.093843159132286885
## g.x                                                0.410052909966997059
## V_tilt.x                                                   .           
## age.x                                             -0.158019908417521671
## maleTRUE.x                                                 .           
## time_taken.x                                               .           
## education_num.x                                            .           
## First_Language2non-Dutch.x                                 .           
## Birth3Non-Western.x                                        .           
## Birth3Western.x                                            .           
## studentTRUE.x                                     -0.252390962232497240
## Employment_StatusFull-Time.x                               .           
## Employment_StatusNot in paid work.x                        .           
## Employment_StatusOther.x                                   .           
## Employment_StatusPart-Time.x                               .           
## Employment_StatusUnemployed and job seeking.x              .           
## vote_PvdD.x                                                .           
## vote_Groenlinks.x                                          .           
## vote_SP.x                                                  .           
## vote_D66.x                                                 .           
## vote_PvDA.x                                                .           
## vote_VVD.x                                                 .           
## vote_Christenunie.x                                        .           
## vote_PVV.x                                                 .           
## vote_CDA.x                                                 .           
## vote_FvD.x                                                 .           
## vote_SGP.x                                                 .           
## vote_50Plus.x                                              .           
## vote_DENK.x                                                .           
##                                                                        
## nVar                                                 3                 
## BIC                                            -3131.256544912568642758
## post prob                                          0.034               
## 
##   1  observations deleted due to missingness.

Accuracy by source

Check for source variance.

#simple
d %>% 
  select(
    !!(d_accu %>% names()), g_accu_UWFA
  ) %>%
  describeBy(group = d$source, mat = T) %>% 
  select(-item, -vars, -n, -skew, -range, -kurtosis, -trimmed) %>%
  df_round()

#plot it, all in one plot
#reformat data to long first though
d %>% 
  select(g, crime_pearson_r, occu_pearson_r, prov_pearson_r, source) %>% 
  pivot_longer(cols = c(crime_pearson_r, occu_pearson_r, prov_pearson_r)) %>% 
  ggplot(aes(g, value, color = source)) +
  geom_point() +
  geom_smooth(method = lm) +
  facet_wrap("name")

## `geom_smooth()` using formula = 'y ~ x'

## Warning: Removed 47 rows containing non-finite values (`stat_smooth()`).

## Warning: Removed 47 rows containing missing values (`geom_point()`).

GG_save("figs/pearson_accu_side_by_side.png")

## `geom_smooth()` using formula = 'y ~ x'

## Warning: Removed 47 rows containing non-finite values (`stat_smooth()`).
## Removed 47 rows containing missing values (`geom_point()`).

#formal analysis
map(c(domains + "pearson_r", domains + "mean_abs_error", "g_accu_UWFA") %>% set_self_names(), function(y) {
  #build formula
  form = str_glue("standardize({y}) ~ {model_rs_full} + source")
  ols(as.formula(form), data = d)
}) %>% 
  summarize_models() %>% 
  print(n=Inf)

## # A tibble: 36 × 8
##    `Predictor/Model`               crime_pearson_r occu_pearson_r prov_pearson_r
##    <chr>                           <chr>           <chr>          <chr>         
##  1 Intercept                       -1.09 (0.465)   -0.24 (0.468)  -0.49 (0.471) 
##  2 g                               0.17 (0.047***) 0.21 (0.049**… 0.19 (0.050**…
##  3 V_tilt                          0.06 (0.047)    0.07 (0.047)   -0.01 (0.047) 
##  4 age                             0.00 (0.005)    0.00 (0.005)   0.01 (0.005)  
##  5 male                            0.23 (0.087*)   -0.10 (0.088)  0.30 (0.089**…
##  6 time_taken                      0.00 (0.004)    0.00 (0.004)   -0.01 (0.004) 
##  7 education_num                   0.16 (0.051**)  0.02 (0.052)   0.04 (0.053)  
##  8 First_Language2 = Dutch         (ref)           (ref)          (ref)         
##  9 First_Language2 = non-Dutch     -0.09 (0.158)   -0.58 (0.162*… 0.13 (0.168)  
## 10 Birth3 = Netherlands            (ref)           (ref)          (ref)         
## 11 Birth3 = Non-Western            -0.08 (0.165)   0.34 (0.166)   0.19 (0.171)  
## 12 Birth3 = Western                0.05 (0.325)    0.09 (0.319)   0.09 (0.314)  
## 13 student                         -0.35 (0.110**) -0.09 (0.112)  -0.12 (0.116) 
## 14 Employment_Status = Due to sta… (ref)           (ref)          (ref)         
## 15 Employment_Status = Full-Time   0.50 (0.343)    0.26 (0.335)   0.29 (0.330)  
## 16 Employment_Status = Not in pai… 0.61 (0.361)    0.39 (0.354)   0.34 (0.351)  
## 17 Employment_Status = Other       0.57 (0.359)    0.53 (0.351)   0.13 (0.346)  
## 18 Employment_Status = Part-Time   0.73 (0.343)    0.33 (0.335)   0.35 (0.329)  
## 19 Employment_Status = Unemployed… 0.86 (0.356)    0.42 (0.348)   0.17 (0.343)  
## 20 vote_PvdD                       -0.68 (0.232**) 0.03 (0.231)   0.11 (0.237)  
## 21 vote_Groenlinks                 -0.53 (0.149**… -0.01 (0.151)  -0.05 (0.151) 
## 22 vote_SP                         0.01 (0.209)    0.18 (0.211)   -0.12 (0.214) 
## 23 vote_D66                        -0.11 (0.187)   0.09 (0.189)   -0.14 (0.188) 
## 24 vote_PvDA                       -0.09 (0.202)   0.41 (0.207)   -0.30 (0.205) 
## 25 vote_VVD                        -0.26 (0.166)   0.32 (0.170)   0.01 (0.173)  
## 26 vote_Christenunie               -0.41 (0.275)   0.45 (0.281)   -0.06 (0.286) 
## 27 vote_PVV                        0.03 (0.220)    0.03 (0.225)   -0.51 (0.226) 
## 28 vote_CDA                        0.10 (0.308)    0.29 (0.318)   0.01 (0.340)  
## 29 vote_FvD                        0.39 (0.207)    0.47 (0.212)   -0.16 (0.210) 
## 30 vote_SGP                        0.17 (0.851)    0.44 (0.881)   -0.94 (0.866) 
## 31 vote_50Plus                     0.14 (0.542)    0.64 (0.560)   -0.07 (0.551) 
## 32 vote_DENK                       -1.79 (1.892)   0.38 (1.960)   -0.88 (1.923) 
## 33 source = Prolific               (ref)           (ref)          (ref)         
## 34 source = Survee                 0.01 (0.111)    -0.35 (0.113*… -0.22 (0.114) 
## 35 R2 adj.                         0.131           0.082          0.090         
## 36 N                               572             589            559           
## # ℹ 4 more variables: crime_mean_abs_error <chr>, occu_mean_abs_error <chr>,
## #   prov_mean_abs_error <chr>, g_accu_UWFA <chr>

#without source, just to get R2's
map(c(domains + "pearson_r", domains + "mean_abs_error", "g_accu_UWFA") %>% set_self_names(), function(y) {
  #build formula
  form = str_glue("standardize({y}) ~ {model_rs_full}")
  ols(as.formula(form), data = d)
}) %>% 
  summarize_models() %>% 
  .[30:31, ]

Party support and intelligence

#predict g from party support
g_party_model = str_glue("g ~ {str_c(vote_preds, collapse = ' + ')}") %>% formula()
ols(g_party_model, data = d)

## Linear Regression Model
## 
## ols(formula = g_party_model, data = d)
## 
##                 Model Likelihood    Discrimination    
##                       Ratio Test           Indexes    
## Obs     615    LR chi2     34.44    R2       0.054    
## sigma0.9828    d.f.           13    R2 adj   0.034    
## d.f.    601    Pr(> chi2) 0.0010    g        0.254    
## 
## Residuals
## 
##      Min       1Q   Median       3Q      Max 
## -2.65246 -0.61514 -0.02736  0.64726  3.45667 
## 
## 
##                   Coef    S.E.   t     Pr(>|t|)
## Intercept         -0.2077 0.0980 -2.12 0.0345  
## vote_PvdD          0.3434 0.2230  1.54 0.1241  
## vote_Groenlinks    0.3943 0.1416  2.78 0.0055  
## vote_SP            0.2509 0.1998  1.26 0.2099  
## vote_D66           0.6664 0.1793  3.72 0.0002  
## vote_PvDA          0.1190 0.1993  0.60 0.5507  
## vote_VVD           0.2360 0.1565  1.51 0.1320  
## vote_Christenunie  0.2965 0.2708  1.09 0.2741  
## vote_PVV          -0.4001 0.2094 -1.91 0.0565  
## vote_CDA          -0.1149 0.2966 -0.39 0.6986  
## vote_FvD           0.1828 0.2100  0.87 0.3842  
## vote_SGP           0.2690 0.8858  0.30 0.7614  
## vote_50Plus       -0.6618 0.5569 -1.19 0.2352  
## vote_DENK         -0.3663 1.9712 -0.19 0.8526

#verbal tilt
V_party_model = str_glue("V_tilt ~ {str_c(vote_preds, collapse = ' + ')}") %>% formula()
ols(V_party_model, data = d)

## Frequencies of Missing Values Due to Each Variable
##            V_tilt         vote_PvdD   vote_Groenlinks           vote_SP 
##                 8                 0                 0                 0 
##          vote_D66         vote_PvDA          vote_VVD vote_Christenunie 
##                 0                 0                 0                 0 
##          vote_PVV          vote_CDA          vote_FvD          vote_SGP 
##                 0                 0                 0                 0 
##       vote_50Plus         vote_DENK 
##                 0                 0 
## 
## Linear Regression Model
## 
## ols(formula = V_party_model, data = d)
## 
## 
##                 Model Likelihood    Discrimination    
##                       Ratio Test           Indexes    
## Obs     607    LR chi2     46.26    R2       0.073    
## sigma0.9731    d.f.           13    R2 adj   0.053    
## d.f.    593    Pr(> chi2) 0.0000    g        0.291    
## 
## Residuals
## 
##      Min       1Q   Median       3Q      Max 
## -4.34926 -0.64735  0.01147  0.68750  2.42648 
## 
## 
##                   Coef    S.E.   t     Pr(>|t|)
## Intercept         -0.2874 0.0977 -2.94 0.0034  
## vote_PvdD          0.3797 0.2211  1.72 0.0865  
## vote_Groenlinks    0.1646 0.1410  1.17 0.2435  
## vote_SP            0.8798 0.2005  4.39 <0.0001 
## vote_D66           0.1312 0.1796  0.73 0.4654  
## vote_PvDA          0.4867 0.1982  2.46 0.0144  
## vote_VVD           0.4720 0.1554  3.04 0.0025  
## vote_Christenunie -0.0149 0.2684 -0.06 0.9558  
## vote_PVV           0.7773 0.2108  3.69 0.0002  
## vote_CDA           1.1484 0.3050  3.76 0.0002  
## vote_FvD           0.1390 0.2083  0.67 0.5047  
## vote_SGP          -0.8193 0.8771 -0.93 0.3506  
## vote_50Plus       -0.1000 0.5515 -0.18 0.8562  
## vote_DENK         -1.1255 1.9517 -0.58 0.5644

Simulation

Accuracy metrics under random error model.

Completely random data

No signal, just random noise.

#criterion values
set.seed(1)
sim_criterion_values = rnorm(20)
sim_criterion_values %>% describe2()

sim_n = 1000

#generate ratings
sim1_ratings = matrix(rnorm(sim_n*length(sim_criterion_values)), nrow = sim_n)
sim1_scored = score_accuracy(sim1_ratings, criterion = sim_criterion_values, methods = "all")

#describe2
sim1_scored %>% describe2() %>% cbind(aggregate = score_accuracy(sim1_ratings, criterion = sim_criterion_values, methods = "all", aggregate = T) %>% unlist())

#correlations
sim1_scored %>% wtd.cors() %>% GG_heatmap(reorder_vars = F)

#pairwise
sim1_scored %>% ggpairs_easy()

GG_save("figs/sim1_pairs.png")

Random data, uniform distributions

#criterion values
set.seed(1)
sim_criterion_values = runif(100)
sim_criterion_values %>% describe2()

sim_n = 1000

#generate ratings
sim1b_ratings = matrix(runif(sim_n*length(sim_criterion_values)), nrow = sim_n)
sim1b_scored = score_accuracy(sim1b_ratings, criterion = sim_criterion_values, methods = "all")

#describe2
sim1b_scored %>% describe2() %>% cbind(aggregate = score_accuracy(sim1b_ratings, criterion = sim_criterion_values, methods = "all", aggregate = T) %>% unlist())

#correlations
sim1b_scored %>% wtd.cors() %>% GG_heatmap(reorder_vars = F)

#pairwise
sim1b_scored %>% ggpairs_easy()

GG_save("figs/sim1b_pairs.png")

True signal + random errors

No true individual differences

#generate ratings
set.seed(1)
sim2_ratings = t(t(matrix(rnorm(sim_n*length(sim_criterion_values)), nrow = sim_n)) + sim_criterion_values*.5)
sim2_scored = score_accuracy(sim2_ratings, criterion = sim_criterion_values, methods = "all")

#describe2
sim2_scored %>% describe2() %>% cbind(aggregate = score_accuracy(sim2_ratings, criterion = sim_criterion_values, methods = "all", aggregate = T) %>% unlist())

#correlations
sim2_scored %>% wtd.cors() %>% GG_heatmap(reorder_vars = F)

#pairwise
sim2_scored %>% ggpairs_easy()

GG_save("figs/sim2_pairs.png")

True signal + random errors + individuals vary in error SD

#generate ratings
set.seed(1)
sim3_ratings = map_dfc(1:sim_n, function(i) {
  tibble(
    a = sim_criterion_values*.5 + rnorm(20, mean = 0, sd = runif(1, min = 0, max = 2))
  )
}) %>% t() %>% as_tibble()

## New names:
## • `a` -> `a...1`
## • `a` -> `a...2`
## • `a` -> `a...3`
## • `a` -> `a...4`
## • `a` -> `a...5`
## • `a` -> `a...6`
## • `a` -> `a...7`
## • `a` -> `a...8`
## • `a` -> `a...9`
## • `a` -> `a...10`
## • `a` -> `a...11`
## • `a` -> `a...12`
## • `a` -> `a...13`
## • `a` -> `a...14`
## • `a` -> `a...15`
## • `a` -> `a...16`
## • `a` -> `a...17`
## • `a` -> `a...18`
## • `a` -> `a...19`
## • `a` -> `a...20`
## • `a` -> `a...21`
## • `a` -> `a...22`
## • `a` -> `a...23`
## • `a` -> `a...24`
## • `a` -> `a...25`
## • `a` -> `a...26`
## • `a` -> `a...27`
## • `a` -> `a...28`
## • `a` -> `a...29`
## • `a` -> `a...30`
## • `a` -> `a...31`
## • `a` -> `a...32`
## • `a` -> `a...33`
## • `a` -> `a...34`
## • `a` -> `a...35`
## • `a` -> `a...36`
## • `a` -> `a...37`
## • `a` -> `a...38`
## • `a` -> `a...39`
## • `a` -> `a...40`
## • `a` -> `a...41`
## • `a` -> `a...42`
## • `a` -> `a...43`
## • `a` -> `a...44`
## • `a` -> `a...45`
## • `a` -> `a...46`
## • `a` -> `a...47`
## • `a` -> `a...48`
## • `a` -> `a...49`
## • `a` -> `a...50`
## • `a` -> `a...51`
## • `a` -> `a...52`
## • `a` -> `a...53`
## • `a` -> `a...54`
## • `a` -> `a...55`
## • `a` -> `a...56`
## • `a` -> `a...57`
## • `a` -> `a...58`
## • `a` -> `a...59`
## • `a` -> `a...60`
## • `a` -> `a...61`
## • `a` -> `a...62`
## • `a` -> `a...63`
## • `a` -> `a...64`
## • `a` -> `a...65`
## • `a` -> `a...66`
## • `a` -> `a...67`
## • `a` -> `a...68`
## • `a` -> `a...69`
## • `a` -> `a...70`
## • `a` -> `a...71`
## • `a` -> `a...72`
## • `a` -> `a...73`
## • `a` -> `a...74`
## • `a` -> `a...75`
## • `a` -> `a...76`
## • `a` -> `a...77`
## • `a` -> `a...78`
## • `a` -> `a...79`
## • `a` -> `a...80`
## • `a` -> `a...81`
## • `a` -> `a...82`
## • `a` -> `a...83`
## • `a` -> `a...84`
## • `a` -> `a...85`
## • `a` -> `a...86`
## • `a` -> `a...87`
## • `a` -> `a...88`
## • `a` -> `a...89`
## • `a` -> `a...90`
## • `a` -> `a...91`
## • `a` -> `a...92`
## • `a` -> `a...93`
## • `a` -> `a...94`
## • `a` -> `a...95`
## • `a` -> `a...96`
## • `a` -> `a...97`
## • `a` -> `a...98`
## • `a` -> `a...99`
## • `a` -> `a...100`
## • `a` -> `a...101`
## • `a` -> `a...102`
## • `a` -> `a...103`
## • `a` -> `a...104`
## • `a` -> `a...105`
## • `a` -> `a...106`
## • `a` -> `a...107`
## • `a` -> `a...108`
## • `a` -> `a...109`
## • `a` -> `a...110`
## • `a` -> `a...111`
## • `a` -> `a...112`
## • `a` -> `a...113`
## • `a` -> `a...114`
## • `a` -> `a...115`
## • `a` -> `a...116`
## • `a` -> `a...117`
## • `a` -> `a...118`
## • `a` -> `a...119`
## • `a` -> `a...120`
## • `a` -> `a...121`
## • `a` -> `a...122`
## • `a` -> `a...123`
## • `a` -> `a...124`
## • `a` -> `a...125`
## • `a` -> `a...126`
## • `a` -> `a...127`
## • `a` -> `a...128`
## • `a` -> `a...129`
## • `a` -> `a...130`
## • `a` -> `a...131`
## • `a` -> `a...132`
## • `a` -> `a...133`
## • `a` -> `a...134`
## • `a` -> `a...135`
## • `a` -> `a...136`
## • `a` -> `a...137`
## • `a` -> `a...138`
## • `a` -> `a...139`
## • `a` -> `a...140`
## • `a` -> `a...141`
## • `a` -> `a...142`
## • `a` -> `a...143`
## • `a` -> `a...144`
## • `a` -> `a...145`
## • `a` -> `a...146`
## • `a` -> `a...147`
## • `a` -> `a...148`
## • `a` -> `a...149`
## • `a` -> `a...150`
## • `a` -> `a...151`
## • `a` -> `a...152`
## • `a` -> `a...153`
## • `a` -> `a...154`
## • `a` -> `a...155`
## • `a` -> `a...156`
## • `a` -> `a...157`
## • `a` -> `a...158`
## • `a` -> `a...159`
## • `a` -> `a...160`
## • `a` -> `a...161`
## • `a` -> `a...162`
## • `a` -> `a...163`
## • `a` -> `a...164`
## • `a` -> `a...165`
## • `a` -> `a...166`
## • `a` -> `a...167`
## • `a` -> `a...168`
## • `a` -> `a...169`
## • `a` -> `a...170`
## • `a` -> `a...171`
## • `a` -> `a...172`
## • `a` -> `a...173`
## • `a` -> `a...174`
## • `a` -> `a...175`
## • `a` -> `a...176`
## • `a` -> `a...177`
## • `a` -> `a...178`
## • `a` -> `a...179`
## • `a` -> `a...180`
## • `a` -> `a...181`
## • `a` -> `a...182`
## • `a` -> `a...183`
## • `a` -> `a...184`
## • `a` -> `a...185`
## • `a` -> `a...186`
## • `a` -> `a...187`
## • `a` -> `a...188`
## • `a` -> `a...189`
## • `a` -> `a...190`
## • `a` -> `a...191`
## • `a` -> `a...192`
## • `a` -> `a...193`
## • `a` -> `a...194`
## • `a` -> `a...195`
## • `a` -> `a...196`
## • `a` -> `a...197`
## • `a` -> `a...198`
## • `a` -> `a...199`
## • `a` -> `a...200`
## • `a` -> `a...201`
## • `a` -> `a...202`
## • `a` -> `a...203`
## • `a` -> `a...204`
## • `a` -> `a...205`
## • `a` -> `a...206`
## • `a` -> `a...207`
## • `a` -> `a...208`
## • `a` -> `a...209`
## • `a` -> `a...210`
## • `a` -> `a...211`
## • `a` -> `a...212`
## • `a` -> `a...213`
## • `a` -> `a...214`
## • `a` -> `a...215`
## • `a` -> `a...216`
## • `a` -> `a...217`
## • `a` -> `a...218`
## • `a` -> `a...219`
## • `a` -> `a...220`
## • `a` -> `a...221`
## • `a` -> `a...222`
## • `a` -> `a...223`
## • `a` -> `a...224`
## • `a` -> `a...225`
## • `a` -> `a...226`
## • `a` -> `a...227`
## • `a` -> `a...228`
## • `a` -> `a...229`
## • `a` -> `a...230`
## • `a` -> `a...231`
## • `a` -> `a...232`
## • `a` -> `a...233`
## • `a` -> `a...234`
## • `a` -> `a...235`
## • `a` -> `a...236`
## • `a` -> `a...237`
## • `a` -> `a...238`
## • `a` -> `a...239`
## • `a` -> `a...240`
## • `a` -> `a...241`
## • `a` -> `a...242`
## • `a` -> `a...243`
## • `a` -> `a...244`
## • `a` -> `a...245`
## • `a` -> `a...246`
## • `a` -> `a...247`
## • `a` -> `a...248`
## • `a` -> `a...249`
## • `a` -> `a...250`
## • `a` -> `a...251`
## • `a` -> `a...252`
## • `a` -> `a...253`
## • `a` -> `a...254`
## • `a` -> `a...255`
## • `a` -> `a...256`
## • `a` -> `a...257`
## • `a` -> `a...258`
## • `a` -> `a...259`
## • `a` -> `a...260`
## • `a` -> `a...261`
## • `a` -> `a...262`
## • `a` -> `a...263`
## • `a` -> `a...264`
## • `a` -> `a...265`
## • `a` -> `a...266`
## • `a` -> `a...267`
## • `a` -> `a...268`
## • `a` -> `a...269`
## • `a` -> `a...270`
## • `a` -> `a...271`
## • `a` -> `a...272`
## • `a` -> `a...273`
## • `a` -> `a...274`
## • `a` -> `a...275`
## • `a` -> `a...276`
## • `a` -> `a...277`
## • `a` -> `a...278`
## • `a` -> `a...279`
## • `a` -> `a...280`
## • `a` -> `a...281`
## • `a` -> `a...282`
## • `a` -> `a...283`
## • `a` -> `a...284`
## • `a` -> `a...285`
## • `a` -> `a...286`
## • `a` -> `a...287`
## • `a` -> `a...288`
## • `a` -> `a...289`
## • `a` -> `a...290`
## • `a` -> `a...291`
## • `a` -> `a...292`
## • `a` -> `a...293`
## • `a` -> `a...294`
## • `a` -> `a...295`
## • `a` -> `a...296`
## • `a` -> `a...297`
## • `a` -> `a...298`
## • `a` -> `a...299`
## • `a` -> `a...300`
## • `a` -> `a...301`
## • `a` -> `a...302`
## • `a` -> `a...303`
## • `a` -> `a...304`
## • `a` -> `a...305`
## • `a` -> `a...306`
## • `a` -> `a...307`
## • `a` -> `a...308`
## • `a` -> `a...309`
## • `a` -> `a...310`
## • `a` -> `a...311`
## • `a` -> `a...312`
## • `a` -> `a...313`
## • `a` -> `a...314`
## • `a` -> `a...315`
## • `a` -> `a...316`
## • `a` -> `a...317`
## • `a` -> `a...318`
## • `a` -> `a...319`
## • `a` -> `a...320`
## • `a` -> `a...321`
## • `a` -> `a...322`
## • `a` -> `a...323`
## • `a` -> `a...324`
## • `a` -> `a...325`
## • `a` -> `a...326`
## • `a` -> `a...327`
## • `a` -> `a...328`
## • `a` -> `a...329`
## • `a` -> `a...330`
## • `a` -> `a...331`
## • `a` -> `a...332`
## • `a` -> `a...333`
## • `a` -> `a...334`
## • `a` -> `a...335`
## • `a` -> `a...336`
## • `a` -> `a...337`
## • `a` -> `a...338`
## • `a` -> `a...339`
## • `a` -> `a...340`
## • `a` -> `a...341`
## • `a` -> `a...342`
## • `a` -> `a...343`
## • `a` -> `a...344`
## • `a` -> `a...345`
## • `a` -> `a...346`
## • `a` -> `a...347`
## • `a` -> `a...348`
## • `a` -> `a...349`
## • `a` -> `a...350`
## • `a` -> `a...351`
## • `a` -> `a...352`
## • `a` -> `a...353`
## • `a` -> `a...354`
## • `a` -> `a...355`
## • `a` -> `a...356`
## • `a` -> `a...357`
## • `a` -> `a...358`
## • `a` -> `a...359`
## • `a` -> `a...360`
## • `a` -> `a...361`
## • `a` -> `a...362`
## • `a` -> `a...363`
## • `a` -> `a...364`
## • `a` -> `a...365`
## • `a` -> `a...366`
## • `a` -> `a...367`
## • `a` -> `a...368`
## • `a` -> `a...369`
## • `a` -> `a...370`
## • `a` -> `a...371`
## • `a` -> `a...372`
## • `a` -> `a...373`
## • `a` -> `a...374`
## • `a` -> `a...375`
## • `a` -> `a...376`
## • `a` -> `a...377`
## • `a` -> `a...378`
## • `a` -> `a...379`
## • `a` -> `a...380`
## • `a` -> `a...381`
## • `a` -> `a...382`
## • `a` -> `a...383`
## • `a` -> `a...384`
## • `a` -> `a...385`
## • `a` -> `a...386`
## • `a` -> `a...387`
## • `a` -> `a...388`
## • `a` -> `a...389`
## • `a` -> `a...390`
## • `a` -> `a...391`
## • `a` -> `a...392`
## • `a` -> `a...393`
## • `a` -> `a...394`
## • `a` -> `a...395`
## • `a` -> `a...396`
## • `a` -> `a...397`
## • `a` -> `a...398`
## • `a` -> `a...399`
## • `a` -> `a...400`
## • `a` -> `a...401`
## • `a` -> `a...402`
## • `a` -> `a...403`
## • `a` -> `a...404`
## • `a` -> `a...405`
## • `a` -> `a...406`
## • `a` -> `a...407`
## • `a` -> `a...408`
## • `a` -> `a...409`
## • `a` -> `a...410`
## • `a` -> `a...411`
## • `a` -> `a...412`
## • `a` -> `a...413`
## • `a` -> `a...414`
## • `a` -> `a...415`
## • `a` -> `a...416`
## • `a` -> `a...417`
## • `a` -> `a...418`
## • `a` -> `a...419`
## • `a` -> `a...420`
## • `a` -> `a...421`
## • `a` -> `a...422`
## • `a` -> `a...423`
## • `a` -> `a...424`
## • `a` -> `a...425`
## • `a` -> `a...426`
## • `a` -> `a...427`
## • `a` -> `a...428`
## • `a` -> `a...429`
## • `a` -> `a...430`
## • `a` -> `a...431`
## • `a` -> `a...432`
## • `a` -> `a...433`
## • `a` -> `a...434`
## • `a` -> `a...435`
## • `a` -> `a...436`
## • `a` -> `a...437`
## • `a` -> `a...438`
## • `a` -> `a...439`
## • `a` -> `a...440`
## • `a` -> `a...441`
## • `a` -> `a...442`
## • `a` -> `a...443`
## • `a` -> `a...444`
## • `a` -> `a...445`
## • `a` -> `a...446`
## • `a` -> `a...447`
## • `a` -> `a...448`
## • `a` -> `a...449`
## • `a` -> `a...450`
## • `a` -> `a...451`
## • `a` -> `a...452`
## • `a` -> `a...453`
## • `a` -> `a...454`
## • `a` -> `a...455`
## • `a` -> `a...456`
## • `a` -> `a...457`
## • `a` -> `a...458`
## • `a` -> `a...459`
## • `a` -> `a...460`
## • `a` -> `a...461`
## • `a` -> `a...462`
## • `a` -> `a...463`
## • `a` -> `a...464`
## • `a` -> `a...465`
## • `a` -> `a...466`
## • `a` -> `a...467`
## • `a` -> `a...468`
## • `a` -> `a...469`
## • `a` -> `a...470`
## • `a` -> `a...471`
## • `a` -> `a...472`
## • `a` -> `a...473`
## • `a` -> `a...474`
## • `a` -> `a...475`
## • `a` -> `a...476`
## • `a` -> `a...477`
## • `a` -> `a...478`
## • `a` -> `a...479`
## • `a` -> `a...480`
## • `a` -> `a...481`
## • `a` -> `a...482`
## • `a` -> `a...483`
## • `a` -> `a...484`
## • `a` -> `a...485`
## • `a` -> `a...486`
## • `a` -> `a...487`
## • `a` -> `a...488`
## • `a` -> `a...489`
## • `a` -> `a...490`
## • `a` -> `a...491`
## • `a` -> `a...492`
## • `a` -> `a...493`
## • `a` -> `a...494`
## • `a` -> `a...495`
## • `a` -> `a...496`
## • `a` -> `a...497`
## • `a` -> `a...498`
## • `a` -> `a...499`
## • `a` -> `a...500`
## • `a` -> `a...501`
## • `a` -> `a...502`
## • `a` -> `a...503`
## • `a` -> `a...504`
## • `a` -> `a...505`
## • `a` -> `a...506`
## • `a` -> `a...507`
## • `a` -> `a...508`
## • `a` -> `a...509`
## • `a` -> `a...510`
## • `a` -> `a...511`
## • `a` -> `a...512`
## • `a` -> `a...513`
## • `a` -> `a...514`
## • `a` -> `a...515`
## • `a` -> `a...516`
## • `a` -> `a...517`
## • `a` -> `a...518`
## • `a` -> `a...519`
## • `a` -> `a...520`
## • `a` -> `a...521`
## • `a` -> `a...522`
## • `a` -> `a...523`
## • `a` -> `a...524`
## • `a` -> `a...525`
## • `a` -> `a...526`
## • `a` -> `a...527`
## • `a` -> `a...528`
## • `a` -> `a...529`
## • `a` -> `a...530`
## • `a` -> `a...531`
## • `a` -> `a...532`
## • `a` -> `a...533`
## • `a` -> `a...534`
## • `a` -> `a...535`
## • `a` -> `a...536`
## • `a` -> `a...537`
## • `a` -> `a...538`
## • `a` -> `a...539`
## • `a` -> `a...540`
## • `a` -> `a...541`
## • `a` -> `a...542`
## • `a` -> `a...543`
## • `a` -> `a...544`
## • `a` -> `a...545`
## • `a` -> `a...546`
## • `a` -> `a...547`
## • `a` -> `a...548`
## • `a` -> `a...549`
## • `a` -> `a...550`
## • `a` -> `a...551`
## • `a` -> `a...552`
## • `a` -> `a...553`
## • `a` -> `a...554`
## • `a` -> `a...555`
## • `a` -> `a...556`
## • `a` -> `a...557`
## • `a` -> `a...558`
## • `a` -> `a...559`
## • `a` -> `a...560`
## • `a` -> `a...561`
## • `a` -> `a...562`
## • `a` -> `a...563`
## • `a` -> `a...564`
## • `a` -> `a...565`
## • `a` -> `a...566`
## • `a` -> `a...567`
## • `a` -> `a...568`
## • `a` -> `a...569`
## • `a` -> `a...570`
## • `a` -> `a...571`
## • `a` -> `a...572`
## • `a` -> `a...573`
## • `a` -> `a...574`
## • `a` -> `a...575`
## • `a` -> `a...576`
## • `a` -> `a...577`
## • `a` -> `a...578`
## • `a` -> `a...579`
## • `a` -> `a...580`
## • `a` -> `a...581`
## • `a` -> `a...582`
## • `a` -> `a...583`
## • `a` -> `a...584`
## • `a` -> `a...585`
## • `a` -> `a...586`
## • `a` -> `a...587`
## • `a` -> `a...588`
## • `a` -> `a...589`
## • `a` -> `a...590`
## • `a` -> `a...591`
## • `a` -> `a...592`
## • `a` -> `a...593`
## • `a` -> `a...594`
## • `a` -> `a...595`
## • `a` -> `a...596`
## • `a` -> `a...597`
## • `a` -> `a...598`
## • `a` -> `a...599`
## • `a` -> `a...600`
## • `a` -> `a...601`
## • `a` -> `a...602`
## • `a` -> `a...603`
## • `a` -> `a...604`
## • `a` -> `a...605`
## • `a` -> `a...606`
## • `a` -> `a...607`
## • `a` -> `a...608`
## • `a` -> `a...609`
## • `a` -> `a...610`
## • `a` -> `a...611`
## • `a` -> `a...612`
## • `a` -> `a...613`
## • `a` -> `a...614`
## • `a` -> `a...615`
## • `a` -> `a...616`
## • `a` -> `a...617`
## • `a` -> `a...618`
## • `a` -> `a...619`
## • `a` -> `a...620`
## • `a` -> `a...621`
## • `a` -> `a...622`
## • `a` -> `a...623`
## • `a` -> `a...624`
## • `a` -> `a...625`
## • `a` -> `a...626`
## • `a` -> `a...627`
## • `a` -> `a...628`
## • `a` -> `a...629`
## • `a` -> `a...630`
## • `a` -> `a...631`
## • `a` -> `a...632`
## • `a` -> `a...633`
## • `a` -> `a...634`
## • `a` -> `a...635`
## • `a` -> `a...636`
## • `a` -> `a...637`
## • `a` -> `a...638`
## • `a` -> `a...639`
## • `a` -> `a...640`
## • `a` -> `a...641`
## • `a` -> `a...642`
## • `a` -> `a...643`
## • `a` -> `a...644`
## • `a` -> `a...645`
## • `a` -> `a...646`
## • `a` -> `a...647`
## • `a` -> `a...648`
## • `a` -> `a...649`
## • `a` -> `a...650`
## • `a` -> `a...651`
## • `a` -> `a...652`
## • `a` -> `a...653`
## • `a` -> `a...654`
## • `a` -> `a...655`
## • `a` -> `a...656`
## • `a` -> `a...657`
## • `a` -> `a...658`
## • `a` -> `a...659`
## • `a` -> `a...660`
## • `a` -> `a...661`
## • `a` -> `a...662`
## • `a` -> `a...663`
## • `a` -> `a...664`
## • `a` -> `a...665`
## • `a` -> `a...666`
## • `a` -> `a...667`
## • `a` -> `a...668`
## • `a` -> `a...669`
## • `a` -> `a...670`
## • `a` -> `a...671`
## • `a` -> `a...672`
## • `a` -> `a...673`
## • `a` -> `a...674`
## • `a` -> `a...675`
## • `a` -> `a...676`
## • `a` -> `a...677`
## • `a` -> `a...678`
## • `a` -> `a...679`
## • `a` -> `a...680`
## • `a` -> `a...681`
## • `a` -> `a...682`
## • `a` -> `a...683`
## • `a` -> `a...684`
## • `a` -> `a...685`
## • `a` -> `a...686`
## • `a` -> `a...687`
## • `a` -> `a...688`
## • `a` -> `a...689`
## • `a` -> `a...690`
## • `a` -> `a...691`
## • `a` -> `a...692`
## • `a` -> `a...693`
## • `a` -> `a...694`
## • `a` -> `a...695`
## • `a` -> `a...696`
## • `a` -> `a...697`
## • `a` -> `a...698`
## • `a` -> `a...699`
## • `a` -> `a...700`
## • `a` -> `a...701`
## • `a` -> `a...702`
## • `a` -> `a...703`
## • `a` -> `a...704`
## • `a` -> `a...705`
## • `a` -> `a...706`
## • `a` -> `a...707`
## • `a` -> `a...708`
## • `a` -> `a...709`
## • `a` -> `a...710`
## • `a` -> `a...711`
## • `a` -> `a...712`
## • `a` -> `a...713`
## • `a` -> `a...714`
## • `a` -> `a...715`
## • `a` -> `a...716`
## • `a` -> `a...717`
## • `a` -> `a...718`
## • `a` -> `a...719`
## • `a` -> `a...720`
## • `a` -> `a...721`
## • `a` -> `a...722`
## • `a` -> `a...723`
## • `a` -> `a...724`
## • `a` -> `a...725`
## • `a` -> `a...726`
## • `a` -> `a...727`
## • `a` -> `a...728`
## • `a` -> `a...729`
## • `a` -> `a...730`
## • `a` -> `a...731`
## • `a` -> `a...732`
## • `a` -> `a...733`
## • `a` -> `a...734`
## • `a` -> `a...735`
## • `a` -> `a...736`
## • `a` -> `a...737`
## • `a` -> `a...738`
## • `a` -> `a...739`
## • `a` -> `a...740`
## • `a` -> `a...741`
## • `a` -> `a...742`
## • `a` -> `a...743`
## • `a` -> `a...744`
## • `a` -> `a...745`
## • `a` -> `a...746`
## • `a` -> `a...747`
## • `a` -> `a...748`
## • `a` -> `a...749`
## • `a` -> `a...750`
## • `a` -> `a...751`
## • `a` -> `a...752`
## • `a` -> `a...753`
## • `a` -> `a...754`
## • `a` -> `a...755`
## • `a` -> `a...756`
## • `a` -> `a...757`
## • `a` -> `a...758`
## • `a` -> `a...759`
## • `a` -> `a...760`
## • `a` -> `a...761`
## • `a` -> `a...762`
## • `a` -> `a...763`
## • `a` -> `a...764`
## • `a` -> `a...765`
## • `a` -> `a...766`
## • `a` -> `a...767`
## • `a` -> `a...768`
## • `a` -> `a...769`
## • `a` -> `a...770`
## • `a` -> `a...771`
## • `a` -> `a...772`
## • `a` -> `a...773`
## • `a` -> `a...774`
## • `a` -> `a...775`
## • `a` -> `a...776`
## • `a` -> `a...777`
## • `a` -> `a...778`
## • `a` -> `a...779`
## • `a` -> `a...780`
## • `a` -> `a...781`
## • `a` -> `a...782`
## • `a` -> `a...783`
## • `a` -> `a...784`
## • `a` -> `a...785`
## • `a` -> `a...786`
## • `a` -> `a...787`
## • `a` -> `a...788`
## • `a` -> `a...789`
## • `a` -> `a...790`
## • `a` -> `a...791`
## • `a` -> `a...792`
## • `a` -> `a...793`
## • `a` -> `a...794`
## • `a` -> `a...795`
## • `a` -> `a...796`
## • `a` -> `a...797`
## • `a` -> `a...798`
## • `a` -> `a...799`
## • `a` -> `a...800`
## • `a` -> `a...801`
## • `a` -> `a...802`
## • `a` -> `a...803`
## • `a` -> `a...804`
## • `a` -> `a...805`
## • `a` -> `a...806`
## • `a` -> `a...807`
## • `a` -> `a...808`
## • `a` -> `a...809`
## • `a` -> `a...810`
## • `a` -> `a...811`
## • `a` -> `a...812`
## • `a` -> `a...813`
## • `a` -> `a...814`
## • `a` -> `a...815`
## • `a` -> `a...816`
## • `a` -> `a...817`
## • `a` -> `a...818`
## • `a` -> `a...819`
## • `a` -> `a...820`
## • `a` -> `a...821`
## • `a` -> `a...822`
## • `a` -> `a...823`
## • `a` -> `a...824`
## • `a` -> `a...825`
## • `a` -> `a...826`
## • `a` -> `a...827`
## • `a` -> `a...828`
## • `a` -> `a...829`
## • `a` -> `a...830`
## • `a` -> `a...831`
## • `a` -> `a...832`
## • `a` -> `a...833`
## • `a` -> `a...834`
## • `a` -> `a...835`
## • `a` -> `a...836`
## • `a` -> `a...837`
## • `a` -> `a...838`
## • `a` -> `a...839`
## • `a` -> `a...840`
## • `a` -> `a...841`
## • `a` -> `a...842`
## • `a` -> `a...843`
## • `a` -> `a...844`
## • `a` -> `a...845`
## • `a` -> `a...846`
## • `a` -> `a...847`
## • `a` -> `a...848`
## • `a` -> `a...849`
## • `a` -> `a...850`
## • `a` -> `a...851`
## • `a` -> `a...852`
## • `a` -> `a...853`
## • `a` -> `a...854`
## • `a` -> `a...855`
## • `a` -> `a...856`
## • `a` -> `a...857`
## • `a` -> `a...858`
## • `a` -> `a...859`
## • `a` -> `a...860`
## • `a` -> `a...861`
## • `a` -> `a...862`
## • `a` -> `a...863`
## • `a` -> `a...864`
## • `a` -> `a...865`
## • `a` -> `a...866`
## • `a` -> `a...867`
## • `a` -> `a...868`
## • `a` -> `a...869`
## • `a` -> `a...870`
## • `a` -> `a...871`
## • `a` -> `a...872`
## • `a` -> `a...873`
## • `a` -> `a...874`
## • `a` -> `a...875`
## • `a` -> `a...876`
## • `a` -> `a...877`
## • `a` -> `a...878`
## • `a` -> `a...879`
## • `a` -> `a...880`
## • `a` -> `a...881`
## • `a` -> `a...882`
## • `a` -> `a...883`
## • `a` -> `a...884`
## • `a` -> `a...885`
## • `a` -> `a...886`
## • `a` -> `a...887`
## • `a` -> `a...888`
## • `a` -> `a...889`
## • `a` -> `a...890`
## • `a` -> `a...891`
## • `a` -> `a...892`
## • `a` -> `a...893`
## • `a` -> `a...894`
## • `a` -> `a...895`
## • `a` -> `a...896`
## • `a` -> `a...897`
## • `a` -> `a...898`
## • `a` -> `a...899`
## • `a` -> `a...900`
## • `a` -> `a...901`
## • `a` -> `a...902`
## • `a` -> `a...903`
## • `a` -> `a...904`
## • `a` -> `a...905`
## • `a` -> `a...906`
## • `a` -> `a...907`
## • `a` -> `a...908`
## • `a` -> `a...909`
## • `a` -> `a...910`
## • `a` -> `a...911`
## • `a` -> `a...912`
## • `a` -> `a...913`
## • `a` -> `a...914`
## • `a` -> `a...915`
## • `a` -> `a...916`
## • `a` -> `a...917`
## • `a` -> `a...918`
## • `a` -> `a...919`
## • `a` -> `a...920`
## • `a` -> `a...921`
## • `a` -> `a...922`
## • `a` -> `a...923`
## • `a` -> `a...924`
## • `a` -> `a...925`
## • `a` -> `a...926`
## • `a` -> `a...927`
## • `a` -> `a...928`
## • `a` -> `a...929`
## • `a` -> `a...930`
## • `a` -> `a...931`
## • `a` -> `a...932`
## • `a` -> `a...933`
## • `a` -> `a...934`
## • `a` -> `a...935`
## • `a` -> `a...936`
## • `a` -> `a...937`
## • `a` -> `a...938`
## • `a` -> `a...939`
## • `a` -> `a...940`
## • `a` -> `a...941`
## • `a` -> `a...942`
## • `a` -> `a...943`
## • `a` -> `a...944`
## • `a` -> `a...945`
## • `a` -> `a...946`
## • `a` -> `a...947`
## • `a` -> `a...948`
## • `a` -> `a...949`
## • `a` -> `a...950`
## • `a` -> `a...951`
## • `a` -> `a...952`
## • `a` -> `a...953`
## • `a` -> `a...954`
## • `a` -> `a...955`
## • `a` -> `a...956`
## • `a` -> `a...957`
## • `a` -> `a...958`
## • `a` -> `a...959`
## • `a` -> `a...960`
## • `a` -> `a...961`
## • `a` -> `a...962`
## • `a` -> `a...963`
## • `a` -> `a...964`
## • `a` -> `a...965`
## • `a` -> `a...966`
## • `a` -> `a...967`
## • `a` -> `a...968`
## • `a` -> `a...969`
## • `a` -> `a...970`
## • `a` -> `a...971`
## • `a` -> `a...972`
## • `a` -> `a...973`
## • `a` -> `a...974`
## • `a` -> `a...975`
## • `a` -> `a...976`
## • `a` -> `a...977`
## • `a` -> `a...978`
## • `a` -> `a...979`
## • `a` -> `a...980`
## • `a` -> `a...981`
## • `a` -> `a...982`
## • `a` -> `a...983`
## • `a` -> `a...984`
## • `a` -> `a...985`
## • `a` -> `a...986`
## • `a` -> `a...987`
## • `a` -> `a...988`
## • `a` -> `a...989`
## • `a` -> `a...990`
## • `a` -> `a...991`
## • `a` -> `a...992`
## • `a` -> `a...993`
## • `a` -> `a...994`
## • `a` -> `a...995`
## • `a` -> `a...996`
## • `a` -> `a...997`
## • `a` -> `a...998`
## • `a` -> `a...999`
## • `a` -> `a...1000`

## Warning: The `x` argument of `as_tibble.matrix()` must have unique column names if
## `.name_repair` is omitted as of tibble 2.0.0.
## ℹ Using compatibility `.name_repair`.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

sim3_scored = score_accuracy(sim3_ratings, criterion = sim_criterion_values, methods = "all")

#describe2
sim3_scored %>% describe2() %>% cbind(aggregate = score_accuracy(sim3_ratings, criterion = sim_criterion_values, methods = "all", aggregate = T) %>% unlist())

#correlations
sim3_scored %>% wtd.cors() %>% GG_heatmap(reorder_vars = F)

#pairwise
sim3_scored %>% ggpairs_easy()

GG_save("figs/sim3_pairs.png")

True signal + random errors + individuals vary in error mean and SD

Most realistic scenario.

#generate ratings
set.seed(1)
sim4_ratings = map_dfc(1:sim_n, function(i) {
  tibble(
    a = sim_criterion_values*.5 + rnorm(20, mean = rnorm(1), sd = runif(1, min = 0, max = 2))
  )
}) %>% t() %>% as_tibble()

## New names:
## • `a` -> `a...1`
## • `a` -> `a...2`
## • `a` -> `a...3`
## • `a` -> `a...4`
## • `a` -> `a...5`
## • `a` -> `a...6`
## • `a` -> `a...7`
## • `a` -> `a...8`
## • `a` -> `a...9`
## • `a` -> `a...10`
## • `a` -> `a...11`
## • `a` -> `a...12`
## • `a` -> `a...13`
## • `a` -> `a...14`
## • `a` -> `a...15`
## • `a` -> `a...16`
## • `a` -> `a...17`
## • `a` -> `a...18`
## • `a` -> `a...19`
## • `a` -> `a...20`
## • `a` -> `a...21`
## • `a` -> `a...22`
## • `a` -> `a...23`
## • `a` -> `a...24`
## • `a` -> `a...25`
## • `a` -> `a...26`
## • `a` -> `a...27`
## • `a` -> `a...28`
## • `a` -> `a...29`
## • `a` -> `a...30`
## • `a` -> `a...31`
## • `a` -> `a...32`
## • `a` -> `a...33`
## • `a` -> `a...34`
## • `a` -> `a...35`
## • `a` -> `a...36`
## • `a` -> `a...37`
## • `a` -> `a...38`
## • `a` -> `a...39`
## • `a` -> `a...40`
## • `a` -> `a...41`
## • `a` -> `a...42`
## • `a` -> `a...43`
## • `a` -> `a...44`
## • `a` -> `a...45`
## • `a` -> `a...46`
## • `a` -> `a...47`
## • `a` -> `a...48`
## • `a` -> `a...49`
## • `a` -> `a...50`
## • `a` -> `a...51`
## • `a` -> `a...52`
## • `a` -> `a...53`
## • `a` -> `a...54`
## • `a` -> `a...55`
## • `a` -> `a...56`
## • `a` -> `a...57`
## • `a` -> `a...58`
## • `a` -> `a...59`
## • `a` -> `a...60`
## • `a` -> `a...61`
## • `a` -> `a...62`
## • `a` -> `a...63`
## • `a` -> `a...64`
## • `a` -> `a...65`
## • `a` -> `a...66`
## • `a` -> `a...67`
## • `a` -> `a...68`
## • `a` -> `a...69`
## • `a` -> `a...70`
## • `a` -> `a...71`
## • `a` -> `a...72`
## • `a` -> `a...73`
## • `a` -> `a...74`
## • `a` -> `a...75`
## • `a` -> `a...76`
## • `a` -> `a...77`
## • `a` -> `a...78`
## • `a` -> `a...79`
## • `a` -> `a...80`
## • `a` -> `a...81`
## • `a` -> `a...82`
## • `a` -> `a...83`
## • `a` -> `a...84`
## • `a` -> `a...85`
## • `a` -> `a...86`
## • `a` -> `a...87`
## • `a` -> `a...88`
## • `a` -> `a...89`
## • `a` -> `a...90`
## • `a` -> `a...91`
## • `a` -> `a...92`
## • `a` -> `a...93`
## • `a` -> `a...94`
## • `a` -> `a...95`
## • `a` -> `a...96`
## • `a` -> `a...97`
## • `a` -> `a...98`
## • `a` -> `a...99`
## • `a` -> `a...100`
## • `a` -> `a...101`
## • `a` -> `a...102`
## • `a` -> `a...103`
## • `a` -> `a...104`
## • `a` -> `a...105`
## • `a` -> `a...106`
## • `a` -> `a...107`
## • `a` -> `a...108`
## • `a` -> `a...109`
## • `a` -> `a...110`
## • `a` -> `a...111`
## • `a` -> `a...112`
## • `a` -> `a...113`
## • `a` -> `a...114`
## • `a` -> `a...115`
## • `a` -> `a...116`
## • `a` -> `a...117`
## • `a` -> `a...118`
## • `a` -> `a...119`
## • `a` -> `a...120`
## • `a` -> `a...121`
## • `a` -> `a...122`
## • `a` -> `a...123`
## • `a` -> `a...124`
## • `a` -> `a...125`
## • `a` -> `a...126`
## • `a` -> `a...127`
## • `a` -> `a...128`
## • `a` -> `a...129`
## • `a` -> `a...130`
## • `a` -> `a...131`
## • `a` -> `a...132`
## • `a` -> `a...133`
## • `a` -> `a...134`
## • `a` -> `a...135`
## • `a` -> `a...136`
## • `a` -> `a...137`
## • `a` -> `a...138`
## • `a` -> `a...139`
## • `a` -> `a...140`
## • `a` -> `a...141`
## • `a` -> `a...142`
## • `a` -> `a...143`
## • `a` -> `a...144`
## • `a` -> `a...145`
## • `a` -> `a...146`
## • `a` -> `a...147`
## • `a` -> `a...148`
## • `a` -> `a...149`
## • `a` -> `a...150`
## • `a` -> `a...151`
## • `a` -> `a...152`
## • `a` -> `a...153`
## • `a` -> `a...154`
## • `a` -> `a...155`
## • `a` -> `a...156`
## • `a` -> `a...157`
## • `a` -> `a...158`
## • `a` -> `a...159`
## • `a` -> `a...160`
## • `a` -> `a...161`
## • `a` -> `a...162`
## • `a` -> `a...163`
## • `a` -> `a...164`
## • `a` -> `a...165`
## • `a` -> `a...166`
## • `a` -> `a...167`
## • `a` -> `a...168`
## • `a` -> `a...169`
## • `a` -> `a...170`
## • `a` -> `a...171`
## • `a` -> `a...172`
## • `a` -> `a...173`
## • `a` -> `a...174`
## • `a` -> `a...175`
## • `a` -> `a...176`
## • `a` -> `a...177`
## • `a` -> `a...178`
## • `a` -> `a...179`
## • `a` -> `a...180`
## • `a` -> `a...181`
## • `a` -> `a...182`
## • `a` -> `a...183`
## • `a` -> `a...184`
## • `a` -> `a...185`
## • `a` -> `a...186`
## • `a` -> `a...187`
## • `a` -> `a...188`
## • `a` -> `a...189`
## • `a` -> `a...190`
## • `a` -> `a...191`
## • `a` -> `a...192`
## • `a` -> `a...193`
## • `a` -> `a...194`
## • `a` -> `a...195`
## • `a` -> `a...196`
## • `a` -> `a...197`
## • `a` -> `a...198`
## • `a` -> `a...199`
## • `a` -> `a...200`
## • `a` -> `a...201`
## • `a` -> `a...202`
## • `a` -> `a...203`
## • `a` -> `a...204`
## • `a` -> `a...205`
## • `a` -> `a...206`
## • `a` -> `a...207`
## • `a` -> `a...208`
## • `a` -> `a...209`
## • `a` -> `a...210`
## • `a` -> `a...211`
## • `a` -> `a...212`
## • `a` -> `a...213`
## • `a` -> `a...214`
## • `a` -> `a...215`
## • `a` -> `a...216`
## • `a` -> `a...217`
## • `a` -> `a...218`
## • `a` -> `a...219`
## • `a` -> `a...220`
## • `a` -> `a...221`
## • `a` -> `a...222`
## • `a` -> `a...223`
## • `a` -> `a...224`
## • `a` -> `a...225`
## • `a` -> `a...226`
## • `a` -> `a...227`
## • `a` -> `a...228`
## • `a` -> `a...229`
## • `a` -> `a...230`
## • `a` -> `a...231`
## • `a` -> `a...232`
## • `a` -> `a...233`
## • `a` -> `a...234`
## • `a` -> `a...235`
## • `a` -> `a...236`
## • `a` -> `a...237`
## • `a` -> `a...238`
## • `a` -> `a...239`
## • `a` -> `a...240`
## • `a` -> `a...241`
## • `a` -> `a...242`
## • `a` -> `a...243`
## • `a` -> `a...244`
## • `a` -> `a...245`
## • `a` -> `a...246`
## • `a` -> `a...247`
## • `a` -> `a...248`
## • `a` -> `a...249`
## • `a` -> `a...250`
## • `a` -> `a...251`
## • `a` -> `a...252`
## • `a` -> `a...253`
## • `a` -> `a...254`
## • `a` -> `a...255`
## • `a` -> `a...256`
## • `a` -> `a...257`
## • `a` -> `a...258`
## • `a` -> `a...259`
## • `a` -> `a...260`
## • `a` -> `a...261`
## • `a` -> `a...262`
## • `a` -> `a...263`
## • `a` -> `a...264`
## • `a` -> `a...265`
## • `a` -> `a...266`
## • `a` -> `a...267`
## • `a` -> `a...268`
## • `a` -> `a...269`
## • `a` -> `a...270`
## • `a` -> `a...271`
## • `a` -> `a...272`
## • `a` -> `a...273`
## • `a` -> `a...274`
## • `a` -> `a...275`
## • `a` -> `a...276`
## • `a` -> `a...277`
## • `a` -> `a...278`
## • `a` -> `a...279`
## • `a` -> `a...280`
## • `a` -> `a...281`
## • `a` -> `a...282`
## • `a` -> `a...283`
## • `a` -> `a...284`
## • `a` -> `a...285`
## • `a` -> `a...286`
## • `a` -> `a...287`
## • `a` -> `a...288`
## • `a` -> `a...289`
## • `a` -> `a...290`
## • `a` -> `a...291`
## • `a` -> `a...292`
## • `a` -> `a...293`
## • `a` -> `a...294`
## • `a` -> `a...295`
## • `a` -> `a...296`
## • `a` -> `a...297`
## • `a` -> `a...298`
## • `a` -> `a...299`
## • `a` -> `a...300`
## • `a` -> `a...301`
## • `a` -> `a...302`
## • `a` -> `a...303`
## • `a` -> `a...304`
## • `a` -> `a...305`
## • `a` -> `a...306`
## • `a` -> `a...307`
## • `a` -> `a...308`
## • `a` -> `a...309`
## • `a` -> `a...310`
## • `a` -> `a...311`
## • `a` -> `a...312`
## • `a` -> `a...313`
## • `a` -> `a...314`
## • `a` -> `a...315`
## • `a` -> `a...316`
## • `a` -> `a...317`
## • `a` -> `a...318`
## • `a` -> `a...319`
## • `a` -> `a...320`
## • `a` -> `a...321`
## • `a` -> `a...322`
## • `a` -> `a...323`
## • `a` -> `a...324`
## • `a` -> `a...325`
## • `a` -> `a...326`
## • `a` -> `a...327`
## • `a` -> `a...328`
## • `a` -> `a...329`
## • `a` -> `a...330`
## • `a` -> `a...331`
## • `a` -> `a...332`
## • `a` -> `a...333`
## • `a` -> `a...334`
## • `a` -> `a...335`
## • `a` -> `a...336`
## • `a` -> `a...337`
## • `a` -> `a...338`
## • `a` -> `a...339`
## • `a` -> `a...340`
## • `a` -> `a...341`
## • `a` -> `a...342`
## • `a` -> `a...343`
## • `a` -> `a...344`
## • `a` -> `a...345`
## • `a` -> `a...346`
## • `a` -> `a...347`
## • `a` -> `a...348`
## • `a` -> `a...349`
## • `a` -> `a...350`
## • `a` -> `a...351`
## • `a` -> `a...352`
## • `a` -> `a...353`
## • `a` -> `a...354`
## • `a` -> `a...355`
## • `a` -> `a...356`
## • `a` -> `a...357`
## • `a` -> `a...358`
## • `a` -> `a...359`
## • `a` -> `a...360`
## • `a` -> `a...361`
## • `a` -> `a...362`
## • `a` -> `a...363`
## • `a` -> `a...364`
## • `a` -> `a...365`
## • `a` -> `a...366`
## • `a` -> `a...367`
## • `a` -> `a...368`
## • `a` -> `a...369`
## • `a` -> `a...370`
## • `a` -> `a...371`
## • `a` -> `a...372`
## • `a` -> `a...373`
## • `a` -> `a...374`
## • `a` -> `a...375`
## • `a` -> `a...376`
## • `a` -> `a...377`
## • `a` -> `a...378`
## • `a` -> `a...379`
## • `a` -> `a...380`
## • `a` -> `a...381`
## • `a` -> `a...382`
## • `a` -> `a...383`
## • `a` -> `a...384`
## • `a` -> `a...385`
## • `a` -> `a...386`
## • `a` -> `a...387`
## • `a` -> `a...388`
## • `a` -> `a...389`
## • `a` -> `a...390`
## • `a` -> `a...391`
## • `a` -> `a...392`
## • `a` -> `a...393`
## • `a` -> `a...394`
## • `a` -> `a...395`
## • `a` -> `a...396`
## • `a` -> `a...397`
## • `a` -> `a...398`
## • `a` -> `a...399`
## • `a` -> `a...400`
## • `a` -> `a...401`
## • `a` -> `a...402`
## • `a` -> `a...403`
## • `a` -> `a...404`
## • `a` -> `a...405`
## • `a` -> `a...406`
## • `a` -> `a...407`
## • `a` -> `a...408`
## • `a` -> `a...409`
## • `a` -> `a...410`
## • `a` -> `a...411`
## • `a` -> `a...412`
## • `a` -> `a...413`
## • `a` -> `a...414`
## • `a` -> `a...415`
## • `a` -> `a...416`
## • `a` -> `a...417`
## • `a` -> `a...418`
## • `a` -> `a...419`
## • `a` -> `a...420`
## • `a` -> `a...421`
## • `a` -> `a...422`
## • `a` -> `a...423`
## • `a` -> `a...424`
## • `a` -> `a...425`
## • `a` -> `a...426`
## • `a` -> `a...427`
## • `a` -> `a...428`
## • `a` -> `a...429`
## • `a` -> `a...430`
## • `a` -> `a...431`
## • `a` -> `a...432`
## • `a` -> `a...433`
## • `a` -> `a...434`
## • `a` -> `a...435`
## • `a` -> `a...436`
## • `a` -> `a...437`
## • `a` -> `a...438`
## • `a` -> `a...439`
## • `a` -> `a...440`
## • `a` -> `a...441`
## • `a` -> `a...442`
## • `a` -> `a...443`
## • `a` -> `a...444`
## • `a` -> `a...445`
## • `a` -> `a...446`
## • `a` -> `a...447`
## • `a` -> `a...448`
## • `a` -> `a...449`
## • `a` -> `a...450`
## • `a` -> `a...451`
## • `a` -> `a...452`
## • `a` -> `a...453`
## • `a` -> `a...454`
## • `a` -> `a...455`
## • `a` -> `a...456`
## • `a` -> `a...457`
## • `a` -> `a...458`
## • `a` -> `a...459`
## • `a` -> `a...460`
## • `a` -> `a...461`
## • `a` -> `a...462`
## • `a` -> `a...463`
## • `a` -> `a...464`
## • `a` -> `a...465`
## • `a` -> `a...466`
## • `a` -> `a...467`
## • `a` -> `a...468`
## • `a` -> `a...469`
## • `a` -> `a...470`
## • `a` -> `a...471`
## • `a` -> `a...472`
## • `a` -> `a...473`
## • `a` -> `a...474`
## • `a` -> `a...475`
## • `a` -> `a...476`
## • `a` -> `a...477`
## • `a` -> `a...478`
## • `a` -> `a...479`
## • `a` -> `a...480`
## • `a` -> `a...481`
## • `a` -> `a...482`
## • `a` -> `a...483`
## • `a` -> `a...484`
## • `a` -> `a...485`
## • `a` -> `a...486`
## • `a` -> `a...487`
## • `a` -> `a...488`
## • `a` -> `a...489`
## • `a` -> `a...490`
## • `a` -> `a...491`
## • `a` -> `a...492`
## • `a` -> `a...493`
## • `a` -> `a...494`
## • `a` -> `a...495`
## • `a` -> `a...496`
## • `a` -> `a...497`
## • `a` -> `a...498`
## • `a` -> `a...499`
## • `a` -> `a...500`
## • `a` -> `a...501`
## • `a` -> `a...502`
## • `a` -> `a...503`
## • `a` -> `a...504`
## • `a` -> `a...505`
## • `a` -> `a...506`
## • `a` -> `a...507`
## • `a` -> `a...508`
## • `a` -> `a...509`
## • `a` -> `a...510`
## • `a` -> `a...511`
## • `a` -> `a...512`
## • `a` -> `a...513`
## • `a` -> `a...514`
## • `a` -> `a...515`
## • `a` -> `a...516`
## • `a` -> `a...517`
## • `a` -> `a...518`
## • `a` -> `a...519`
## • `a` -> `a...520`
## • `a` -> `a...521`
## • `a` -> `a...522`
## • `a` -> `a...523`
## • `a` -> `a...524`
## • `a` -> `a...525`
## • `a` -> `a...526`
## • `a` -> `a...527`
## • `a` -> `a...528`
## • `a` -> `a...529`
## • `a` -> `a...530`
## • `a` -> `a...531`
## • `a` -> `a...532`
## • `a` -> `a...533`
## • `a` -> `a...534`
## • `a` -> `a...535`
## • `a` -> `a...536`
## • `a` -> `a...537`
## • `a` -> `a...538`
## • `a` -> `a...539`
## • `a` -> `a...540`
## • `a` -> `a...541`
## • `a` -> `a...542`
## • `a` -> `a...543`
## • `a` -> `a...544`
## • `a` -> `a...545`
## • `a` -> `a...546`
## • `a` -> `a...547`
## • `a` -> `a...548`
## • `a` -> `a...549`
## • `a` -> `a...550`
## • `a` -> `a...551`
## • `a` -> `a...552`
## • `a` -> `a...553`
## • `a` -> `a...554`
## • `a` -> `a...555`
## • `a` -> `a...556`
## • `a` -> `a...557`
## • `a` -> `a...558`
## • `a` -> `a...559`
## • `a` -> `a...560`
## • `a` -> `a...561`
## • `a` -> `a...562`
## • `a` -> `a...563`
## • `a` -> `a...564`
## • `a` -> `a...565`
## • `a` -> `a...566`
## • `a` -> `a...567`
## • `a` -> `a...568`
## • `a` -> `a...569`
## • `a` -> `a...570`
## • `a` -> `a...571`
## • `a` -> `a...572`
## • `a` -> `a...573`
## • `a` -> `a...574`
## • `a` -> `a...575`
## • `a` -> `a...576`
## • `a` -> `a...577`
## • `a` -> `a...578`
## • `a` -> `a...579`
## • `a` -> `a...580`
## • `a` -> `a...581`
## • `a` -> `a...582`
## • `a` -> `a...583`
## • `a` -> `a...584`
## • `a` -> `a...585`
## • `a` -> `a...586`
## • `a` -> `a...587`
## • `a` -> `a...588`
## • `a` -> `a...589`
## • `a` -> `a...590`
## • `a` -> `a...591`
## • `a` -> `a...592`
## • `a` -> `a...593`
## • `a` -> `a...594`
## • `a` -> `a...595`
## • `a` -> `a...596`
## • `a` -> `a...597`
## • `a` -> `a...598`
## • `a` -> `a...599`
## • `a` -> `a...600`
## • `a` -> `a...601`
## • `a` -> `a...602`
## • `a` -> `a...603`
## • `a` -> `a...604`
## • `a` -> `a...605`
## • `a` -> `a...606`
## • `a` -> `a...607`
## • `a` -> `a...608`
## • `a` -> `a...609`
## • `a` -> `a...610`
## • `a` -> `a...611`
## • `a` -> `a...612`
## • `a` -> `a...613`
## • `a` -> `a...614`
## • `a` -> `a...615`
## • `a` -> `a...616`
## • `a` -> `a...617`
## • `a` -> `a...618`
## • `a` -> `a...619`
## • `a` -> `a...620`
## • `a` -> `a...621`
## • `a` -> `a...622`
## • `a` -> `a...623`
## • `a` -> `a...624`
## • `a` -> `a...625`
## • `a` -> `a...626`
## • `a` -> `a...627`
## • `a` -> `a...628`
## • `a` -> `a...629`
## • `a` -> `a...630`
## • `a` -> `a...631`
## • `a` -> `a...632`
## • `a` -> `a...633`
## • `a` -> `a...634`
## • `a` -> `a...635`
## • `a` -> `a...636`
## • `a` -> `a...637`
## • `a` -> `a...638`
## • `a` -> `a...639`
## • `a` -> `a...640`
## • `a` -> `a...641`
## • `a` -> `a...642`
## • `a` -> `a...643`
## • `a` -> `a...644`
## • `a` -> `a...645`
## • `a` -> `a...646`
## • `a` -> `a...647`
## • `a` -> `a...648`
## • `a` -> `a...649`
## • `a` -> `a...650`
## • `a` -> `a...651`
## • `a` -> `a...652`
## • `a` -> `a...653`
## • `a` -> `a...654`
## • `a` -> `a...655`
## • `a` -> `a...656`
## • `a` -> `a...657`
## • `a` -> `a...658`
## • `a` -> `a...659`
## • `a` -> `a...660`
## • `a` -> `a...661`
## • `a` -> `a...662`
## • `a` -> `a...663`
## • `a` -> `a...664`
## • `a` -> `a...665`
## • `a` -> `a...666`
## • `a` -> `a...667`
## • `a` -> `a...668`
## • `a` -> `a...669`
## • `a` -> `a...670`
## • `a` -> `a...671`
## • `a` -> `a...672`
## • `a` -> `a...673`
## • `a` -> `a...674`
## • `a` -> `a...675`
## • `a` -> `a...676`
## • `a` -> `a...677`
## • `a` -> `a...678`
## • `a` -> `a...679`
## • `a` -> `a...680`
## • `a` -> `a...681`
## • `a` -> `a...682`
## • `a` -> `a...683`
## • `a` -> `a...684`
## • `a` -> `a...685`
## • `a` -> `a...686`
## • `a` -> `a...687`
## • `a` -> `a...688`
## • `a` -> `a...689`
## • `a` -> `a...690`
## • `a` -> `a...691`
## • `a` -> `a...692`
## • `a` -> `a...693`
## • `a` -> `a...694`
## • `a` -> `a...695`
## • `a` -> `a...696`
## • `a` -> `a...697`
## • `a` -> `a...698`
## • `a` -> `a...699`
## • `a` -> `a...700`
## • `a` -> `a...701`
## • `a` -> `a...702`
## • `a` -> `a...703`
## • `a` -> `a...704`
## • `a` -> `a...705`
## • `a` -> `a...706`
## • `a` -> `a...707`
## • `a` -> `a...708`
## • `a` -> `a...709`
## • `a` -> `a...710`
## • `a` -> `a...711`
## • `a` -> `a...712`
## • `a` -> `a...713`
## • `a` -> `a...714`
## • `a` -> `a...715`
## • `a` -> `a...716`
## • `a` -> `a...717`
## • `a` -> `a...718`
## • `a` -> `a...719`
## • `a` -> `a...720`
## • `a` -> `a...721`
## • `a` -> `a...722`
## • `a` -> `a...723`
## • `a` -> `a...724`
## • `a` -> `a...725`
## • `a` -> `a...726`
## • `a` -> `a...727`
## • `a` -> `a...728`
## • `a` -> `a...729`
## • `a` -> `a...730`
## • `a` -> `a...731`
## • `a` -> `a...732`
## • `a` -> `a...733`
## • `a` -> `a...734`
## • `a` -> `a...735`
## • `a` -> `a...736`
## • `a` -> `a...737`
## • `a` -> `a...738`
## • `a` -> `a...739`
## • `a` -> `a...740`
## • `a` -> `a...741`
## • `a` -> `a...742`
## • `a` -> `a...743`
## • `a` -> `a...744`
## • `a` -> `a...745`
## • `a` -> `a...746`
## • `a` -> `a...747`
## • `a` -> `a...748`
## • `a` -> `a...749`
## • `a` -> `a...750`
## • `a` -> `a...751`
## • `a` -> `a...752`
## • `a` -> `a...753`
## • `a` -> `a...754`
## • `a` -> `a...755`
## • `a` -> `a...756`
## • `a` -> `a...757`
## • `a` -> `a...758`
## • `a` -> `a...759`
## • `a` -> `a...760`
## • `a` -> `a...761`
## • `a` -> `a...762`
## • `a` -> `a...763`
## • `a` -> `a...764`
## • `a` -> `a...765`
## • `a` -> `a...766`
## • `a` -> `a...767`
## • `a` -> `a...768`
## • `a` -> `a...769`
## • `a` -> `a...770`
## • `a` -> `a...771`
## • `a` -> `a...772`
## • `a` -> `a...773`
## • `a` -> `a...774`
## • `a` -> `a...775`
## • `a` -> `a...776`
## • `a` -> `a...777`
## • `a` -> `a...778`
## • `a` -> `a...779`
## • `a` -> `a...780`
## • `a` -> `a...781`
## • `a` -> `a...782`
## • `a` -> `a...783`
## • `a` -> `a...784`
## • `a` -> `a...785`
## • `a` -> `a...786`
## • `a` -> `a...787`
## • `a` -> `a...788`
## • `a` -> `a...789`
## • `a` -> `a...790`
## • `a` -> `a...791`
## • `a` -> `a...792`
## • `a` -> `a...793`
## • `a` -> `a...794`
## • `a` -> `a...795`
## • `a` -> `a...796`
## • `a` -> `a...797`
## • `a` -> `a...798`
## • `a` -> `a...799`
## • `a` -> `a...800`
## • `a` -> `a...801`
## • `a` -> `a...802`
## • `a` -> `a...803`
## • `a` -> `a...804`
## • `a` -> `a...805`
## • `a` -> `a...806`
## • `a` -> `a...807`
## • `a` -> `a...808`
## • `a` -> `a...809`
## • `a` -> `a...810`
## • `a` -> `a...811`
## • `a` -> `a...812`
## • `a` -> `a...813`
## • `a` -> `a...814`
## • `a` -> `a...815`
## • `a` -> `a...816`
## • `a` -> `a...817`
## • `a` -> `a...818`
## • `a` -> `a...819`
## • `a` -> `a...820`
## • `a` -> `a...821`
## • `a` -> `a...822`
## • `a` -> `a...823`
## • `a` -> `a...824`
## • `a` -> `a...825`
## • `a` -> `a...826`
## • `a` -> `a...827`
## • `a` -> `a...828`
## • `a` -> `a...829`
## • `a` -> `a...830`
## • `a` -> `a...831`
## • `a` -> `a...832`
## • `a` -> `a...833`
## • `a` -> `a...834`
## • `a` -> `a...835`
## • `a` -> `a...836`
## • `a` -> `a...837`
## • `a` -> `a...838`
## • `a` -> `a...839`
## • `a` -> `a...840`
## • `a` -> `a...841`
## • `a` -> `a...842`
## • `a` -> `a...843`
## • `a` -> `a...844`
## • `a` -> `a...845`
## • `a` -> `a...846`
## • `a` -> `a...847`
## • `a` -> `a...848`
## • `a` -> `a...849`
## • `a` -> `a...850`
## • `a` -> `a...851`
## • `a` -> `a...852`
## • `a` -> `a...853`
## • `a` -> `a...854`
## • `a` -> `a...855`
## • `a` -> `a...856`
## • `a` -> `a...857`
## • `a` -> `a...858`
## • `a` -> `a...859`
## • `a` -> `a...860`
## • `a` -> `a...861`
## • `a` -> `a...862`
## • `a` -> `a...863`
## • `a` -> `a...864`
## • `a` -> `a...865`
## • `a` -> `a...866`
## • `a` -> `a...867`
## • `a` -> `a...868`
## • `a` -> `a...869`
## • `a` -> `a...870`
## • `a` -> `a...871`
## • `a` -> `a...872`
## • `a` -> `a...873`
## • `a` -> `a...874`
## • `a` -> `a...875`
## • `a` -> `a...876`
## • `a` -> `a...877`
## • `a` -> `a...878`
## • `a` -> `a...879`
## • `a` -> `a...880`
## • `a` -> `a...881`
## • `a` -> `a...882`
## • `a` -> `a...883`
## • `a` -> `a...884`
## • `a` -> `a...885`
## • `a` -> `a...886`
## • `a` -> `a...887`
## • `a` -> `a...888`
## • `a` -> `a...889`
## • `a` -> `a...890`
## • `a` -> `a...891`
## • `a` -> `a...892`
## • `a` -> `a...893`
## • `a` -> `a...894`
## • `a` -> `a...895`
## • `a` -> `a...896`
## • `a` -> `a...897`
## • `a` -> `a...898`
## • `a` -> `a...899`
## • `a` -> `a...900`
## • `a` -> `a...901`
## • `a` -> `a...902`
## • `a` -> `a...903`
## • `a` -> `a...904`
## • `a` -> `a...905`
## • `a` -> `a...906`
## • `a` -> `a...907`
## • `a` -> `a...908`
## • `a` -> `a...909`
## • `a` -> `a...910`
## • `a` -> `a...911`
## • `a` -> `a...912`
## • `a` -> `a...913`
## • `a` -> `a...914`
## • `a` -> `a...915`
## • `a` -> `a...916`
## • `a` -> `a...917`
## • `a` -> `a...918`
## • `a` -> `a...919`
## • `a` -> `a...920`
## • `a` -> `a...921`
## • `a` -> `a...922`
## • `a` -> `a...923`
## • `a` -> `a...924`
## • `a` -> `a...925`
## • `a` -> `a...926`
## • `a` -> `a...927`
## • `a` -> `a...928`
## • `a` -> `a...929`
## • `a` -> `a...930`
## • `a` -> `a...931`
## • `a` -> `a...932`
## • `a` -> `a...933`
## • `a` -> `a...934`
## • `a` -> `a...935`
## • `a` -> `a...936`
## • `a` -> `a...937`
## • `a` -> `a...938`
## • `a` -> `a...939`
## • `a` -> `a...940`
## • `a` -> `a...941`
## • `a` -> `a...942`
## • `a` -> `a...943`
## • `a` -> `a...944`
## • `a` -> `a...945`
## • `a` -> `a...946`
## • `a` -> `a...947`
## • `a` -> `a...948`
## • `a` -> `a...949`
## • `a` -> `a...950`
## • `a` -> `a...951`
## • `a` -> `a...952`
## • `a` -> `a...953`
## • `a` -> `a...954`
## • `a` -> `a...955`
## • `a` -> `a...956`
## • `a` -> `a...957`
## • `a` -> `a...958`
## • `a` -> `a...959`
## • `a` -> `a...960`
## • `a` -> `a...961`
## • `a` -> `a...962`
## • `a` -> `a...963`
## • `a` -> `a...964`
## • `a` -> `a...965`
## • `a` -> `a...966`
## • `a` -> `a...967`
## • `a` -> `a...968`
## • `a` -> `a...969`
## • `a` -> `a...970`
## • `a` -> `a...971`
## • `a` -> `a...972`
## • `a` -> `a...973`
## • `a` -> `a...974`
## • `a` -> `a...975`
## • `a` -> `a...976`
## • `a` -> `a...977`
## • `a` -> `a...978`
## • `a` -> `a...979`
## • `a` -> `a...980`
## • `a` -> `a...981`
## • `a` -> `a...982`
## • `a` -> `a...983`
## • `a` -> `a...984`
## • `a` -> `a...985`
## • `a` -> `a...986`
## • `a` -> `a...987`
## • `a` -> `a...988`
## • `a` -> `a...989`
## • `a` -> `a...990`
## • `a` -> `a...991`
## • `a` -> `a...992`
## • `a` -> `a...993`
## • `a` -> `a...994`
## • `a` -> `a...995`
## • `a` -> `a...996`
## • `a` -> `a...997`
## • `a` -> `a...998`
## • `a` -> `a...999`
## • `a` -> `a...1000`

sim4_scored = score_accuracy(sim4_ratings, criterion = sim_criterion_values, methods = "all")

#describe2
sim4_scored %>% 
  describe2() %>% 
  cbind(
    aggregate = score_accuracy(sim4_ratings, criterion = sim_criterion_values, methods = "all", aggregate = T) %>% unlist()  
)

#correlations
sim4_scored %>% wtd.cors() %>% GG_heatmap(reorder_vars = F)

#pairwise
sim4_scored %>% ggpairs_easy()

GG_save("figs/sim4_pairs.png")

Dutch stereotype study notebook

About

Init

Functions

Data

Renaming etc.

IQ scoring

Aggregate stereotypes

Immigrant crime

Mediation

Nationalists

Occupations and sex

Province income

Individual stereotypes

Descriptives

Party stats

Immigrant crime

Muslim bias

Occupations and sex

Province income

General accuracy

Accuracy by source

Party support and intelligence

Simulation

Completely random data

Random data, uniform distributions

True signal + random errors

True signal + random errors + individuals vary in error SD

True signal + random errors + individuals vary in error mean and SD

Meta