us_demog %>%
filter(demog_question == "country_born") %>%
group_by(demog_response) %>%
summarise(
count_country_born = n()
)
## `summarise()` ungrouping output (override with `.groups` argument)
## # A tibble: 2 x 2
## demog_response count_country_born
## <chr> <int>
## 1 No 32
## 2 Yes 19
not_born_us <- us_demog %>%
filter(demog_question == "country_born") %>%
filter(demog_response == "No") %>%
pull(subject)
us_demog %>%
filter(demog_question == "current_in") %>%
group_by(demog_response) %>%
summarise(
current_in = n()
)
## `summarise()` ungrouping output (override with `.groups` argument)
## # A tibble: 2 x 2
## demog_response current_in
## <chr> <int>
## 1 No 32
## 2 Yes 19
not_current_us <- us_demog %>%
filter(demog_question == "current_in") %>%
filter(demog_response == "No") %>%
pull(subject)
non_overlap = c()
for (id in not_born_us){
if (!id %in% not_current_us){
non_overlap = c(non_overlap, id)
}
}
non_overlap
## [1] "SS1605203131564"
hiding code block b/c IRB won’t like it - first read in prolific data
second match subject ID with prolific id, need to work with raw data here (hidding the block bc IRB won’t like it)
make us_demog wider to better join with demog
join with prolific
us_demog_prolific_d %>%
mutate(prolific_current_in_clean =
case_when(
prolific_current_in == "United States" ~ "Yes",
is.na(prolific_current_in) ~ "NA",
TRUE ~ "No"
)) %>%
group_by(current_in, prolific_current_in_clean) %>%
count()
## # A tibble: 5 x 3
## # Groups: current_in, prolific_current_in_clean [5]
## current_in prolific_current_in_clean n
## <chr> <chr> <int>
## 1 No NA 1
## 2 No No 31
## 3 Yes NA 4
## 4 Yes No 2
## 5 Yes Yes 13
us_demog_prolific_d %>%
mutate(prolific_born_country_clean =
case_when(
prolific_born_country == "United States" ~ "Yes",
is.na(prolific_born_country) ~ "NA",
TRUE ~ "No"
)) %>%
group_by(country_born, prolific_born_country_clean) %>%
count()
## # A tibble: 4 x 3
## # Groups: country_born, prolific_born_country_clean [4]
## country_born prolific_born_country_clean n
## <chr> <chr> <int>
## 1 No No 32
## 2 Yes NA 5
## 3 Yes No 1
## 4 Yes Yes 13
us_demog_prolific_d %>%
mutate(
prolific_english_first_clean = case_when(
is.na(prolific_english_first) | prolific_english_first == "DATA EXPIRED" ~ "NA",
TRUE ~ prolific_english_first
)
) %>%
group_by(prolific_english_first_clean) %>%
count()
## # A tibble: 6 x 2
## # Groups: prolific_english_first_clean [6]
## prolific_english_first_clean n
## <chr> <int>
## 1 English 27
## 2 Greek 1
## 3 NA 13
## 4 Polish 8
## 5 Portuguese 1
## 6 Spanish 1
cn_demog %>%
filter(demog_question == "country_born") %>%
group_by(demog_response) %>%
summarise(
count_country_born = n()
)
## `summarise()` ungrouping output (override with `.groups` argument)
## # A tibble: 2 x 2
## demog_response count_country_born
## <chr> <int>
## 1 否 1
## 2 是 33
cn_demog %>%
filter(demog_question == "current_in") %>%
group_by(demog_response) %>%
summarise(
current_in = n()
)
## `summarise()` ungrouping output (override with `.groups` argument)
## # A tibble: 2 x 2
## demog_response current_in
## <chr> <int>
## 1 否 1
## 2 是 33