library(tidyverse)

## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──

## ✓ ggplot2 3.3.5     ✓ purrr   0.3.4
## ✓ tibble  3.1.6     ✓ dplyr   1.0.8
## ✓ tidyr   1.2.0     ✓ stringr 1.4.0
## ✓ readr   2.1.2     ✓ forcats 0.5.1

## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()

library(here)

## here() starts at /Users/caoanjie/Desktop/projects/CCRR_analysis/study_2

library(lme4)

## Loading required package: Matrix

## 
## Attaching package: 'Matrix'

## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack

library(lmerTest)

## 
## Attaching package: 'lmerTest'

## The following object is masked from 'package:lme4':
## 
##     lmer

## The following object is masked from 'package:stats':
## 
##     step

library(effectsize)
library(corrplot)

## corrplot 0.92 loaded

library(PerformanceAnalytics)

## Loading required package: xts

## Loading required package: zoo

## 
## Attaching package: 'zoo'

## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric

## 
## Attaching package: 'xts'

## The following objects are masked from 'package:dplyr':
## 
##     first, last

## 
## Attaching package: 'PerformanceAnalytics'

## The following object is masked from 'package:graphics':
## 
##     legend

tidy_d <- read_csv(here("data/4_processed/with_human_coded_main.csv"))

## Warning: One or more parsing issues, see `problems()` for details

## Rows: 41284 Columns: 7

## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (7): subject, culture, task_name, task_info, trial_info, resp_type, resp
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

# RV
rv_df <- tidy_d %>% 
  filter(task_name == "RV") %>% 
  group_by(subject, culture) %>% 
  summarise(
    rv_score = mean(as.numeric(resp))
  ) %>% 
  select(subject, culture, rv_score)

## `summarise()` has grouped output by 'subject'. You can override using the
## `.groups` argument.

# Triads 
td_df <- tidy_d %>% 
  filter(task_name == "TD") %>% 
  filter(task_info == "triads") %>% 
  group_by(subject, culture) %>% 
  summarise(td_score = mean(as.numeric(as.logical(resp))))

## `summarise()` has grouped output by 'subject'. You can override using the
## `.groups` argument.

# sei_ch_score
sei_ch_df <- tidy_d %>% 
  filter(task_name == "SeI") %>% 
  filter(task_info == "critical") %>% 
  mutate(resp_score = case_when(
    resp == "causal_historical" ~ 1, 
    resp == "descriptivist" ~ 0
  )) %>% 
  group_by(subject, culture) %>% 
  summarise(sei_ch_score = sum(resp_score))

## `summarise()` has grouped output by 'subject'. You can override using the
## `.groups` argument.

# fd score 
fd_score <- tidy_d %>% 
  filter(task_name == "FD") %>% 
  group_by(subject, culture) %>% 
  summarise(fd_score = mean(as.numeric(resp)))

## `summarise()` has grouped output by 'subject'. You can override using the
## `.groups` argument.

ca_df <- tidy_d %>% 
  filter(task_name == "CA") %>% 
  filter(task_info == "situational") %>% 
  filter(!is.na(resp)) %>% 
  group_by(subject, culture) %>% 
  summarise(ca_score = mean(as.numeric(resp)))

## `summarise()` has grouped output by 'subject'. You can override using the
## `.groups` argument.

full_df <- rv_df %>% 
  left_join(td_df, by = c("subject", "culture")) %>% 
  left_join(sei_ch_df, by = c("subject", "culture")) %>% 
  left_join(fd_score, by = c("subject", "culture")) %>% 
  left_join(ca_df, by = c("subject", "culture"))

big correlational matrices

cn_df <- full_df %>% 
  filter(culture == "CN") %>% 
  column_to_rownames("subject") %>% 
  select(-culture) 

cn_res <- cor(cn_df, use = "complete.obs")

us_df <- full_df %>% 
  filter(culture == "US") %>% 
  column_to_rownames("subject") %>% 
  select(-culture) 
  
us_res <- cor(us_df, use = "complete.obs")

CN

#CN plots 
corrplot(cn_res, type = "upper", order = "hclust", 
         tl.col = "black", tl.srt = 45)

chart.Correlation(cn_df, histogram=TRUE, pch=19)

US

corrplot(us_res, type = "upper", order = "hclust", 
         tl.col = "black", tl.srt = 45)

chart.Correlation(us_df, histogram=TRUE, pch=19)

Visualizing tasks

full_df %>% 
  ggplot(aes(x = rv_score, y = ca_score)) + 
  geom_point() + 
  geom_jitter()+
  facet_wrap(~culture) + 
  geom_smooth(method = "lm") + 
  theme_classic() + 
  labs(title = "RV - Causal Attribution (situational)")

## `geom_smooth()` using formula 'y ~ x'

## Warning: Removed 60 rows containing non-finite values (stat_smooth).

## Warning: Removed 60 rows containing missing values (geom_point).
## Removed 60 rows containing missing values (geom_point).

full_df %>% 
  ggplot(aes(x = td_score, y = ca_score)) + 
  geom_point() + 
  geom_jitter()+
  facet_wrap(~culture) + 
  geom_smooth(method = "lm") + 
  theme_classic() + 
  labs(title = "Triads - Causal Attribution (situational)")

## `geom_smooth()` using formula 'y ~ x'

## Warning: Removed 60 rows containing non-finite values (stat_smooth).
## Removed 60 rows containing missing values (geom_point).
## Removed 60 rows containing missing values (geom_point).

full_df %>% 
  ggplot(aes(x = rv_score, y = td_score)) + 
  geom_point() + 
  geom_jitter()+
  facet_wrap(~culture) + 
  geom_smooth(method = "lm") + 
  theme_classic() + 
  labs(title = "RV - Triads ")

## `geom_smooth()` using formula 'y ~ x'

semantic intuition & free description

full_df %>% 
  ggplot(aes(x = as.factor(sei_ch_score), 
             y = fd_score)) + 
  geom_violin() + 
  geom_point() + 
  geom_jitter() + 
  facet_wrap(~culture) + 
  theme_classic()

04_exploratory

anjie

2022-06-25

big correlational matrices

CN

US

Visualizing tasks

semantic intuition & free description