library(tidyverse)

## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──

## ✔ ggplot2 3.3.6      ✔ purrr   0.3.4 
## ✔ tibble  3.1.8      ✔ dplyr   1.0.10
## ✔ tidyr   1.2.0      ✔ stringr 1.4.1 
## ✔ readr   2.1.2      ✔ forcats 0.5.1

## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()

library(here)

## here() starts at /Users/caoanjie/Desktop/projects/CCRR_writeups

library(reshape2)

## 
## Attaching package: 'reshape2'

## The following object is masked from 'package:tidyr':
## 
##     smiths

library(psych)

## 
## Attaching package: 'psych'

## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha

library(corrplot)

## corrplot 0.92 loaded

library(psych)
library(ggplot2)
library(car)

## Loading required package: carData

## 
## Attaching package: 'car'

## The following object is masked from 'package:psych':
## 
##     logit

## The following object is masked from 'package:dplyr':
## 
##     recode

## The following object is masked from 'package:purrr':
## 
##     some

d1 <- read_csv(here("data/03_processed_data/exp1/tidy_main.csv"))

## New names:
## • `` -> `...1`

## Rows: 37595 Columns: 8
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (6): subject, culture, task_name, task_info, trial_info, resp_type
## dbl (2): ...1, resp
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

d2 <- read_csv(here("data/03_processed_data/exp2/tidy_main.csv"))

## Warning: One or more parsing issues, see `problems()` for details

## Rows: 40257 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (7): subject, culture, task_name, task_info, trial_info, resp_type, resp
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Now after establishing the reliability of multi-trial tasks, we would like to consider the correlations. Below are the tasks that we are going to consider

For study 1, this includes:

RMTS
Free Description (FD)
Ebbignhaus Illusion (EBB): only with trials in the illusional context.
Ravens (RV)

For study 2, this includes:

RMTS
Free Description (FD)
Change Detection (CD): only with trials in which changes occur in the background
Causal Attribution (CA): only with items on the scale for situational attribution
Triads (TD)
Ravens (RV)

study 1

d1_task_summary <- d1 %>% 
  filter(task_name == "RMTS"| task_name == "FD" | (task_name == "EBB" & task_info == "IL") 
         | task_name == "RV") %>% 
  group_by(subject, task_name) %>% 
  summarise(mean_resp = mean(resp)) %>% 
  ungroup() %>% 
  pivot_wider(names_from = task_name, 
              values_from = mean_resp) %>% 
  select(-subject)

## `summarise()` has grouped output by 'subject'. You can override using the
## `.groups` argument.

d1_corr_mat <- cor(d1_task_summary)
melted_d1_corr_mat <- melt(d1_corr_mat)
melted_d1_corr_mat %>% 
  ggplot(aes(x = Var1, y = Var2, fill = value)) + 
  geom_tile()

corrplot(d1_corr_mat, method = "number")

For study 2, this includes: - RMTS - Free Description (FD) - Change Detection (CD): only with trials in which changes occur in the background - Causal Attribution (CA): only with items on the scale for situational attribution - Triads (TD) - Ravens (RV)

study 2

since we are dealing with missing data, two approaches were considered

imputed with mean value

d2_task_summary <- d2 %>% 
  filter(task_name == "RMTS"| task_name == "FD" | (task_name == "CD" & task_info == "context") 
         | (task_name == "CA" & task_info == "situational") | 
           (task_name == "TD" & task_info == "triads")| task_name == "RV") %>% 
  mutate(resp = case_when(
    task_name == "TD" ~ as.numeric(as.logical(resp)), 
    TRUE ~ as.numeric(resp)
  )) %>% 
  group_by(subject, task_name) %>% 
  summarise(mean_resp = mean(resp, na.rm = TRUE)) %>% 
  ungroup() %>% 
  pivot_wider(names_from = task_name, 
              values_from = mean_resp) %>% 
  select(-subject)

## Warning in eval_tidy(pair$rhs, env = default_env): NAs introduced by coercion

## `summarise()` has grouped output by 'subject'. You can override using the
## `.groups` argument.

# have many missing data

#imputation mean 
d2_task_mean <- d2_task_summary %>% 
  pivot_longer(cols = everything(), 
               names_to = "task", 
               values_to = "value") %>% 
  group_by(task) %>% 
  summarise(mean = mean(value, na.rm = TRUE))

imputed_d2_task_summary <- d2_task_summary %>% 
  mutate(
  CD = case_when(
    is.na(CD) ~ filter(d2_task_mean, task == "CD")$mean, 
    TRUE ~ CD
  ), 
  RMTS = case_when(
    is.na(RMTS) ~ filter(d2_task_mean, task == "RMTS")$mean, 
    TRUE ~ RMTS
  ),
  RV = case_when(
    is.na(RV) ~ filter(d2_task_mean, task == "RV")$mean, 
    TRUE ~ RV
  ),
  TD = case_when(
    is.na(TD) ~ filter(d2_task_mean, task == "TD")$mean, 
    TRUE ~ TD
  ),
  CA = case_when(
    is.na(CA) ~ filter(d2_task_mean, task == "CA")$mean, 
    TRUE ~ CA
  ),
  FD = case_when(
    is.na(FD) ~ filter(d2_task_mean, task == "FD")$mean, 
    TRUE ~ FD
  )
  ) 


d2_corr_mat <- cor(imputed_d2_task_summary)
corrplot(d2_corr_mat, method = "number")

melted_d2_corr_mat <- melt(d2_corr_mat)
melted_d2_corr_mat %>% 
  ggplot(aes(x = Var1, y = Var2, fill = value)) + 
  geom_tile()

deletion

# or do deletion
deletion_d2_task_summary <- drop_na(d2_task_summary)

d2_corr_mat <- cor(deletion_d2_task_summary)
corrplot(d2_corr_mat, method = "number")

melted_d2_corr_mat <- melt(d2_corr_mat)
melted_d2_corr_mat %>% 
  ggplot(aes(x = Var1, y = Var2, fill = value)) + 
  geom_tile()

factor analysis

check factorability of the data

Bartlett’s Test of Sphericity Small values of the significance level indicate a factor analysis maybe useful but apparently it’s not the case

cortest.bartlett(d1_task_summary)

## R was not square, finding R from data

## $chisq
## [1] 49.30244
## 
## $p.value
## [1] 6.485699e-09
## 
## $df
## [1] 6

cortest.bartlett(d2_task_summary)

## R was not square, finding R from data

## $chisq
## [1] 140.4605
## 
## $p.value
## [1] 1.867108e-22
## 
## $df
## [1] 15

scree plots

Study 1

fafitfree <- fa(d1_task_summary, nfactors = ncol(d1_task_summary), rotate = "none")
n_factors <- length(fafitfree$e.values)
scree     <- data.frame(
  Factor_n =  as.factor(1:n_factors), 
  Eigenvalue = fafitfree$e.values)
ggplot(scree, aes(x = Factor_n, y = Eigenvalue, group = 1)) + 
  geom_point() + geom_line() +
  xlab("Number of factors") +
  ylab("Initial eigenvalue") +
  labs( title = "Scree Plot for Study 1", 
        subtitle = "(Based on the unreduced correlation matrix)")

Study 2

fafitfree <- fa(d2_task_summary, nfactors = ncol(d2_task_summary), rotate = "none")
n_factors <- length(fafitfree$e.values)
scree     <- data.frame(
  Factor_n =  as.factor(1:n_factors), 
  Eigenvalue = fafitfree$e.values)
ggplot(scree, aes(x = Factor_n, y = Eigenvalue, group = 1)) + 
  geom_point() + geom_line() +
  xlab("Number of factors") +
  ylab("Initial eigenvalue") +
  labs( title = "Scree Plot for study 2", 
        subtitle = "(Based on the unreduced correlation matrix)")

03_relations_between_tasks.rmd

anjie

2022-09-15