ˆ
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✔ ggplot2 3.3.6 ✔ purrr 0.3.4
## ✔ tibble 3.1.8 ✔ dplyr 1.0.10
## ✔ tidyr 1.2.0 ✔ stringr 1.4.1
## ✔ readr 2.1.2 ✔ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(here)
## here() starts at /Users/caoanjie/Desktop/projects/CCRR_writeups
library(reshape2)
##
## Attaching package: 'reshape2'
## The following object is masked from 'package:tidyr':
##
## smiths
library(psych)
##
## Attaching package: 'psych'
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
library(corrplot)
## corrplot 0.92 loaded
library(psych)
library(ggplot2)
library(car)
## Loading required package: carData
##
## Attaching package: 'car'
## The following object is masked from 'package:psych':
##
## logit
## The following object is masked from 'package:dplyr':
##
## recode
## The following object is masked from 'package:purrr':
##
## some
d1 <- read_csv(here("data/03_processed_data/exp1/tidy_main.csv"))
## New names:
## • `` -> `...1`
## Rows: 37595 Columns: 8
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (6): subject, culture, task_name, task_info, trial_info, resp_type
## dbl (2): ...1, resp
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
d2 <- read_csv(here("data/03_processed_data/exp2/tidy_main.csv"))
## Warning: One or more parsing issues, see `problems()` for details
## Rows: 40257 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (7): subject, culture, task_name, task_info, trial_info, resp_type, resp
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Now after establishing the reliability of multi-trial tasks, we would like to consider the correlations. Below are the tasks that we are going to consider
For study 1, this includes:
For study 2, this includes:
d1_task_summary <- d1 %>%
filter(task_name == "RMTS"| task_name == "FD" | (task_name == "EBB" & task_info == "IL")
| task_name == "RV") %>%
group_by(subject, task_name) %>%
summarise(mean_resp = mean(resp)) %>%
ungroup() %>%
pivot_wider(names_from = task_name,
values_from = mean_resp) %>%
select(-subject)
## `summarise()` has grouped output by 'subject'. You can override using the
## `.groups` argument.
d1_corr_mat <- cor(d1_task_summary)
melted_d1_corr_mat <- melt(d1_corr_mat)
melted_d1_corr_mat %>%
ggplot(aes(x = Var1, y = Var2, fill = value)) +
geom_tile()
corrplot(d1_corr_mat, method = "number")
For study 2, this includes: - RMTS - Free Description (FD) - Change
Detection (CD): only with trials in which changes occur in the
background - Causal Attribution (CA): only with items on the scale for
situational attribution - Triads (TD) - Ravens (RV)
since we are dealing with missing data, two approaches were considered
d2_task_summary <- d2 %>%
filter(task_name == "RMTS"| task_name == "FD" | (task_name == "CD" & task_info == "context")
| (task_name == "CA" & task_info == "situational") |
(task_name == "TD" & task_info == "triads")| task_name == "RV") %>%
mutate(resp = case_when(
task_name == "TD" ~ as.numeric(as.logical(resp)),
TRUE ~ as.numeric(resp)
)) %>%
group_by(subject, task_name) %>%
summarise(mean_resp = mean(resp, na.rm = TRUE)) %>%
ungroup() %>%
pivot_wider(names_from = task_name,
values_from = mean_resp) %>%
select(-subject)
## Warning in eval_tidy(pair$rhs, env = default_env): NAs introduced by coercion
## `summarise()` has grouped output by 'subject'. You can override using the
## `.groups` argument.
# have many missing data
#imputation mean
d2_task_mean <- d2_task_summary %>%
pivot_longer(cols = everything(),
names_to = "task",
values_to = "value") %>%
group_by(task) %>%
summarise(mean = mean(value, na.rm = TRUE))
imputed_d2_task_summary <- d2_task_summary %>%
mutate(
CD = case_when(
is.na(CD) ~ filter(d2_task_mean, task == "CD")$mean,
TRUE ~ CD
),
RMTS = case_when(
is.na(RMTS) ~ filter(d2_task_mean, task == "RMTS")$mean,
TRUE ~ RMTS
),
RV = case_when(
is.na(RV) ~ filter(d2_task_mean, task == "RV")$mean,
TRUE ~ RV
),
TD = case_when(
is.na(TD) ~ filter(d2_task_mean, task == "TD")$mean,
TRUE ~ TD
),
CA = case_when(
is.na(CA) ~ filter(d2_task_mean, task == "CA")$mean,
TRUE ~ CA
),
FD = case_when(
is.na(FD) ~ filter(d2_task_mean, task == "FD")$mean,
TRUE ~ FD
)
)
d2_corr_mat <- cor(imputed_d2_task_summary)
corrplot(d2_corr_mat, method = "number")
melted_d2_corr_mat <- melt(d2_corr_mat)
melted_d2_corr_mat %>%
ggplot(aes(x = Var1, y = Var2, fill = value)) +
geom_tile()
# or do deletion
deletion_d2_task_summary <- drop_na(d2_task_summary)
d2_corr_mat <- cor(deletion_d2_task_summary)
corrplot(d2_corr_mat, method = "number")
melted_d2_corr_mat <- melt(d2_corr_mat)
melted_d2_corr_mat %>%
ggplot(aes(x = Var1, y = Var2, fill = value)) +
geom_tile()
Bartlett’s Test of Sphericity Small values of the significance level indicate a factor analysis maybe useful but apparently it’s not the case
cortest.bartlett(d1_task_summary)
## R was not square, finding R from data
## $chisq
## [1] 49.30244
##
## $p.value
## [1] 6.485699e-09
##
## $df
## [1] 6
cortest.bartlett(d2_task_summary)
## R was not square, finding R from data
## $chisq
## [1] 140.4605
##
## $p.value
## [1] 1.867108e-22
##
## $df
## [1] 15
fafitfree <- fa(d1_task_summary, nfactors = ncol(d1_task_summary), rotate = "none")
n_factors <- length(fafitfree$e.values)
scree <- data.frame(
Factor_n = as.factor(1:n_factors),
Eigenvalue = fafitfree$e.values)
ggplot(scree, aes(x = Factor_n, y = Eigenvalue, group = 1)) +
geom_point() + geom_line() +
xlab("Number of factors") +
ylab("Initial eigenvalue") +
labs( title = "Scree Plot for Study 1",
subtitle = "(Based on the unreduced correlation matrix)")
fafitfree <- fa(d2_task_summary, nfactors = ncol(d2_task_summary), rotate = "none")
n_factors <- length(fafitfree$e.values)
scree <- data.frame(
Factor_n = as.factor(1:n_factors),
Eigenvalue = fafitfree$e.values)
ggplot(scree, aes(x = Factor_n, y = Eigenvalue, group = 1)) +
geom_point() + geom_line() +
xlab("Number of factors") +
ylab("Initial eigenvalue") +
labs( title = "Scree Plot for study 2",
subtitle = "(Based on the unreduced correlation matrix)")