library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.5 ✓ purrr 0.3.4
## ✓ tibble 3.1.6 ✓ dplyr 1.0.8
## ✓ tidyr 1.2.0 ✓ stringr 1.4.0
## ✓ readr 2.1.2 ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(here)
## here() starts at /Users/caoanjie/Desktop/projects/CCRR_analysis/study_2
library(lme4)
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
library(lmerTest)
##
## Attaching package: 'lmerTest'
## The following object is masked from 'package:lme4':
##
## lmer
## The following object is masked from 'package:stats':
##
## step
library(effectsize)
library(corrplot)
## corrplot 0.92 loaded
library(PerformanceAnalytics)
## Loading required package: xts
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
##
## Attaching package: 'xts'
## The following objects are masked from 'package:dplyr':
##
## first, last
##
## Attaching package: 'PerformanceAnalytics'
## The following object is masked from 'package:graphics':
##
## legend
tidy_d <- read_csv(here("data/4_processed/with_human_coded_main.csv"))
## Warning: One or more parsing issues, see `problems()` for details
## Rows: 41284 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (7): subject, culture, task_name, task_info, trial_info, resp_type, resp
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# RV
rv_df <- tidy_d %>%
filter(task_name == "RV") %>%
group_by(subject, culture) %>%
summarise(
rv_score = mean(as.numeric(resp))
) %>%
select(subject, culture, rv_score)
## `summarise()` has grouped output by 'subject'. You can override using the
## `.groups` argument.
# Triads
td_df <- tidy_d %>%
filter(task_name == "TD") %>%
filter(task_info == "triads") %>%
group_by(subject, culture) %>%
summarise(td_score = mean(as.numeric(as.logical(resp))))
## `summarise()` has grouped output by 'subject'. You can override using the
## `.groups` argument.
# sei_ch_score
sei_ch_df <- tidy_d %>%
filter(task_name == "SeI") %>%
filter(task_info == "critical") %>%
mutate(resp_score = case_when(
resp == "causal_historical" ~ 1,
resp == "descriptivist" ~ 0
)) %>%
group_by(subject, culture) %>%
summarise(sei_ch_score = sum(resp_score))
## `summarise()` has grouped output by 'subject'. You can override using the
## `.groups` argument.
# fd score
fd_score <- tidy_d %>%
filter(task_name == "FD") %>%
group_by(subject, culture) %>%
summarise(fd_score = mean(as.numeric(resp)))
## `summarise()` has grouped output by 'subject'. You can override using the
## `.groups` argument.
ca_df <- tidy_d %>%
filter(task_name == "CA") %>%
filter(task_info == "situational") %>%
filter(!is.na(resp)) %>%
group_by(subject, culture) %>%
summarise(ca_score = mean(as.numeric(resp)))
## `summarise()` has grouped output by 'subject'. You can override using the
## `.groups` argument.
full_df <- rv_df %>%
left_join(td_df, by = c("subject", "culture")) %>%
left_join(sei_ch_df, by = c("subject", "culture")) %>%
left_join(fd_score, by = c("subject", "culture")) %>%
left_join(ca_df, by = c("subject", "culture"))
big correlational matrices
cn_df <- full_df %>%
filter(culture == "CN") %>%
column_to_rownames("subject") %>%
select(-culture)
cn_res <- cor(cn_df, use = "complete.obs")
us_df <- full_df %>%
filter(culture == "US") %>%
column_to_rownames("subject") %>%
select(-culture)
us_res <- cor(us_df, use = "complete.obs")
CN
#CN plots
corrplot(cn_res, type = "upper", order = "hclust",
tl.col = "black", tl.srt = 45)

chart.Correlation(cn_df, histogram=TRUE, pch=19)

US
corrplot(us_res, type = "upper", order = "hclust",
tl.col = "black", tl.srt = 45)

chart.Correlation(us_df, histogram=TRUE, pch=19)

Visualizing tasks
full_df %>%
ggplot(aes(x = rv_score, y = ca_score)) +
geom_point() +
geom_jitter()+
facet_wrap(~culture) +
geom_smooth(method = "lm") +
theme_classic() +
labs(title = "RV - Causal Attribution (situational)")
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 60 rows containing non-finite values (stat_smooth).
## Warning: Removed 60 rows containing missing values (geom_point).
## Removed 60 rows containing missing values (geom_point).

full_df %>%
ggplot(aes(x = td_score, y = ca_score)) +
geom_point() +
geom_jitter()+
facet_wrap(~culture) +
geom_smooth(method = "lm") +
theme_classic() +
labs(title = "Triads - Causal Attribution (situational)")
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 60 rows containing non-finite values (stat_smooth).
## Removed 60 rows containing missing values (geom_point).
## Removed 60 rows containing missing values (geom_point).

full_df %>%
ggplot(aes(x = rv_score, y = td_score)) +
geom_point() +
geom_jitter()+
facet_wrap(~culture) +
geom_smooth(method = "lm") +
theme_classic() +
labs(title = "RV - Triads ")
## `geom_smooth()` using formula 'y ~ x'

semantic intuition & free description
full_df %>%
ggplot(aes(x = as.factor(sei_ch_score),
y = fd_score)) +
geom_violin() +
geom_point() +
geom_jitter() +
facet_wrap(~culture) +
theme_classic()
