Most participants are not born in US

us_demog %>% 
  filter(demog_question == "country_born") %>% 
  group_by(demog_response) %>% 
  summarise(
    count_country_born = n()
  )
## `summarise()` ungrouping output (override with `.groups` argument)
## # A tibble: 2 x 2
##   demog_response count_country_born
##   <chr>                       <int>
## 1 No                             32
## 2 Yes                            19
not_born_us <- us_demog %>% 
  filter(demog_question == "country_born") %>% 
  filter(demog_response == "No") %>% 
  pull(subject)

Most participants are not currently in US

us_demog %>% 
  filter(demog_question == "current_in") %>% 
  group_by(demog_response) %>% 
  summarise(
    current_in = n()
  )
## `summarise()` ungrouping output (override with `.groups` argument)
## # A tibble: 2 x 2
##   demog_response current_in
##   <chr>               <int>
## 1 No                     32
## 2 Yes                    19
not_current_us <- us_demog %>% 
  filter(demog_question == "current_in") %>% 
  filter(demog_response == "No") %>% 
  pull(subject)

The non-in-US participants are largely the same group

non_overlap = c()
for (id in not_born_us){
  if (!id %in% not_current_us){
    non_overlap = c(non_overlap, id)
  }
}
non_overlap
## [1] "SS1605203131564"

The information provided by participants at task is mostly consistent with prolific-provided demographic

hiding code block b/c IRB won’t like it - first read in prolific data

Most of the reported demographic is consistent

us_demog_prolific_d %>% 
  mutate(prolific_current_in_clean = 
           case_when(
             prolific_current_in == "United States" ~ "Yes", 
             is.na(prolific_current_in) ~ "NA", 
             TRUE ~ "No"
           )) %>% 
  group_by(current_in, prolific_current_in_clean) %>% 
  count()
## # A tibble: 5 x 3
## # Groups:   current_in, prolific_current_in_clean [5]
##   current_in prolific_current_in_clean     n
##   <chr>      <chr>                     <int>
## 1 No         NA                            1
## 2 No         No                           31
## 3 Yes        NA                            4
## 4 Yes        No                            2
## 5 Yes        Yes                          13

Most of the born country information is consistent

us_demog_prolific_d %>% 
  mutate(prolific_born_country_clean = 
           case_when(
             prolific_born_country == "United States" ~ "Yes", 
             is.na(prolific_born_country) ~ "NA", 
             TRUE ~ "No"
           )) %>% 
  group_by(country_born, prolific_born_country_clean) %>% 
  count()
## # A tibble: 4 x 3
## # Groups:   country_born, prolific_born_country_clean [4]
##   country_born prolific_born_country_clean     n
##   <chr>        <chr>                       <int>
## 1 No           No                             32
## 2 Yes          NA                              5
## 3 Yes          No                              1
## 4 Yes          Yes                            13

Many “monolingual English speakers” do not have first language as English

us_demog_prolific_d %>% 
  mutate(
    prolific_english_first_clean = case_when(
      is.na(prolific_english_first) | prolific_english_first == "DATA EXPIRED" ~ "NA", 
      TRUE ~ prolific_english_first
    )
  ) %>% 
  group_by(prolific_english_first_clean) %>% 
  count()
## # A tibble: 6 x 2
## # Groups:   prolific_english_first_clean [6]
##   prolific_english_first_clean     n
##   <chr>                        <int>
## 1 English                         27
## 2 Greek                            1
## 3 NA                              13
## 4 Polish                           8
## 5 Portuguese                       1
## 6 Spanish                          1

Mandarin participants check

All but one participant is not born in China

cn_demog %>% 
  filter(demog_question == "country_born") %>% 
  group_by(demog_response) %>% 
  summarise(
    count_country_born = n()
  )
## `summarise()` ungrouping output (override with `.groups` argument)
## # A tibble: 2 x 2
##   demog_response count_country_born
##   <chr>                       <int>
## 1 否                              1
## 2 是                             33

All but one participant is not currently in US

cn_demog %>% 
  filter(demog_question == "current_in") %>% 
  group_by(demog_response) %>% 
  summarise(
    current_in = n()
  )
## `summarise()` ungrouping output (override with `.groups` argument)
## # A tibble: 2 x 2
##   demog_response current_in
##   <chr>               <int>
## 1 否                      1
## 2 是                     33