Error Correction R Notebook

errorcorrectiondata<-read.csv(file.choose(), header = T) # PRE-LOADING LIBRARIES library(ggplot2) library(dplyr) library(tidyr) library(broom) library(effectsize) library(car) library(moments) #DATA CLEANING errorcorrectiondata <- errorcorrectiondata[ errorcorrectiondata\(Consent == 1 & errorcorrectiondata\)eligibility_1 != 2 & errorcorrectiondata\(eligibility_2 != 2 & errorcorrectiondata\)Finished == 1,] errorcorrectiondata <- errorcorrectiondata[errorcorrectiondata\(check == 1, ] nrow(errorcorrectiondata) # PARTICIPANT DESCRIPTIVES errorcorrectiondata\)Age <- as.numeric(as.character(errorcorrectiondata\(Age)) summary(errorcorrectiondata\)Age) sd(errorcorrectiondata\(Age, na.rm = TRUE) table(errorcorrectiondata\)Gender) errorcorrectiondata %>% filter(Country_Res_YN %in% c(1,2,3)) %>% group_by(Country_Res_YN) %>% summarise(N = n()) errorcorrectiondata %>% count(Country_Res) %>% arrange(desc(n)) # SCREENING TESTS errorcorrectiondata <- errorcorrectiondata %>% filter(Q_RecaptchaScore > 0.5) dot_correct <- c(3, 2, 4, 2, 1, 3) for (i in 1:6) { i_col <- paste0(“crt”, i, “i”) r_col <- paste0(”crt”, i, ”r”) ext_col <- paste0(”ext_resources_use”, i) click_col <- paste0(”crt”, i, ”i_t_Click.Count”) fam_col <- paste0(”familiarity”, i) dot_col <- paste0(”dot_crt”, i) invalid <- (!is.na(errorcorrectiondata[[ext_col]]) & errorcorrectiondata[[ext_col]] == 1) | (!is.na(errorcorrectiondata[[click_col]]) & errorcorrectiondata[[click_col]] == 0) | (!is.na(errorcorrectiondata[[fam_col]]) & errorcorrectiondata[[fam_col]] == 1) | (!is.na(errorcorrectiondata[[dot_col]]) & errorcorrectiondata[[dot_col]] != dot_correct[i]) errorcorrectiondata[[i_col]][invalid] <- NA errorcorrectiondata[[r_col]][invalid] <- NA } #CALCULATING VARIABLES errorcorrectiondata\(initial_correct <- rowSums( errorcorrectiondata[, paste0("crt", 1:6, "_i")] == 1, na.rm = TRUE ) errorcorrectiondata\)revised_correct <- rowSums( errorcorrectiondata[, paste0(”crt”, 1:6, ”_r”)] == 1, na.rm = TRUE ) errorcorrectiondata\(initial_valid <- rowSums(!is.na(errorcorrectiondata[, paste0("crt", 1:6, "_i")])) errorcorrectiondata\)revised_valid <- rowSums(!is.na(errorcorrectiondata[, paste0(”crt”, 1:6, ”_r”)])) errorcorrectiondata <- errorcorrectiondata %>% mutate( initial_accuracy = if_else(initial_valid > 0, initial_correct / initial_valid, NA_real), revised_accuracy = if_else(revised_valid > 0, revised_correct / revised_valid, NA_real_), accuracy_diff = revised_accuracy - initial_accuracy
) ih_items <- paste0(”IH_“, 1:9) errorcorrectiondata[ , ih_items] <- lapply(errorcorrectiondata[ , ih_items], function(x) as.numeric(as.character(x))) errorcorrectiondata <- errorcorrectiondata %>% mutate( IH_6 = ifelse(!is.na(IH_6), 8 - IH_6, NA), IH_8 = ifelse(!is.na(IH_8), 8 - IH_8, NA), IH_9 = ifelse(!is.na(IH_9), 8 - IH_9, NA) ) errorcorrectiondata\(ih_score <- rowMeans(errorcorrectiondata[, ih_items], na.rm = TRUE) errorcorrectiondata\)condition <- with(errorcorrectiondata, paste0(feedback,”_“, justification)) # PRELIMINARY ANALYSIS # Skew & Kurtosis vars <- c(”accuracy_diff”, “ih_score”) for (v in vars) { cat(“:”, v, “”) x <- errorcorrectiondata[[v]] x <- x[!is.na(x)] cat(” Skewness :“, skewness(x),”“) cat(” Kurtosis :“, kurtosis(x),”“) } # Outliers vars <- c(”accuracy_diff”, “ih_score”) k <- 2.2 for (v in vars) { x <- errorcorrectiondata[[v]] x <- x[!is.na(x)] # remove NAs

Q1 <- quantile(x, 0.25) Q3 <- quantile(x, 0.75) IQR_val <- IQR(x)

lower <- Q1 - k * IQR_val upper <- Q3 + k * IQR_val

outliers <- x[x < lower | x > upper]

cat(“:”, v, “”) cat(” Lower bound:“, lower,”“) cat(” Upper bound:“, upper,”“) if (length(outliers) == 0) { cat(” No outliers detected.“) } else { cat(” Outliers detected:“, outliers,”“) } } # Count outliers outliers <- errorcorrectiondata\(ih_score < lower | errorcorrectiondata\)ih_score > upper n_outliers <- sum(outliers, na.rm = TRUE) cat(”Number of ih_score outliers:“, n_outliers,”“) # NA outliers k <- 1.5 # IQR multiplier Q1 <- quantile(errorcorrectiondata\(ih_score, 0.25, na.rm = TRUE) Q3 <- quantile(errorcorrectiondata\)ih_score, 0.75, na.rm = TRUE) IQR_val <- Q3 - Q1 errorcorrectiondata <- errorcorrectiondata %>% mutate(ih_score = if_else(ih_score < (Q1 - k * IQR_val) | ih_score > (Q3 + k * IQR_val), NA_real_, ih_score)) # ERROR CORRECTION HYPOTHESIS # One-sample t-tests w/ Levels of Error Correction t_test_results <- errorcorrectiondata %>% group_by(condition) %>% summarise( # run one-sample t-test t_test = list(t.test(accuracy_diff, mu = 0)), # compute Cohen’s d directly from raw data d_res = list(cohens_d(accuracy_diff ~ 1, mu = 0, data = cur_data())), .groups =”drop” ) %>% mutate( tidy_res = lapply(t_test, broom::tidy) ) %>% tidyr::unnest(c(tidy_res, d_res)) t_test_results # FEEDBACK AND JUSTIFICATION HYPOTHESIS # 2 x 2 ANOVA with Levels of Error Correction errorcorrectiondata <- errorcorrectiondata %>% mutate( feedback = factor(feedback), justification = factor(justification) ) anova_data <- errorcorrectiondata %>% filter(!is.na(accuracy_diff) & !is.na(feedback) & !is.na(justification)) anova_model <- aov(accuracy_diff ~ feedback * justification, data = anova_data) summary(anova_model) effectsize::eta_squared(anova_model, partial = TRUE) emm_feedback <- emmeans(anova_model, ~ feedback) # Figure - Levels of Error Correction just_colors <- c(“NJ” = “grey70”, “J” = “grey30”) # NJ = light grey, J = dark grey plot_data <- anova_data %>% group_by(feedback, justification) %>% summarise( mean_acc = mean(accuracy_diff, na.rm = TRUE) * 100,
ci_acc = qt(0.975, df = n() - 1) * (sd(accuracy_diff, na.rm = TRUE) / sqrt(n())) * 100,
.groups = “drop” ) ggplot(plot_data, aes(x = feedback, y = mean_acc, fill = justification)) + geom_col(position = position_dodge(0.8), width = 0.7, color = “black”) +
geom_errorbar(aes(ymin = mean_acc - ci_acc, ymax = mean_acc + ci_acc), width = 0.2, position = position_dodge(0.8), linewidth = 0.6) +
labs( x = “Feedback”, y = “Mean Accuracy Difference (%)”, fill = “Justification” ) + theme_minimal(base_size = 14) + theme( axis.title = element_text(face = “bold”), axis.text = element_text(color = “black”), panel.grid.major.y = element_line(color = “grey90”), panel.grid.minor = element_blank(), panel.grid.major.x = element_blank(), legend.position = “bottom” ) + scale_fill_manual(values = just_colors) + scale_y_continuous(limits = c(0, 100), breaks = seq(0, 100, 10), expand = expansion(mult = c(0, 0.05))) # INTELLECTUAL HUMILITY HYPOTHESIS # Overall correlation cor.test(errorcorrectiondata\(ih_score, errorcorrectiondata\)accuracy_diff, method = “pearson”) # Figure - Overall ggplot(errorcorrectiondata, aes(x = ih_score, y = accuracy_diff * 100)) + geom_jitter(width = 0.1, height = 0.5, alpha = 0.6, size = 2, color = “black”) +
geom_smooth(method = “lm”, se = TRUE, color = “black”, fill = “grey70”) +
theme_minimal(base_size = 14) + theme( axis.text = element_text(size = 11, color = “black”), axis.title = element_text(size = 14, face = “bold”), plot.title = element_text(size = 16, face = “bold”, hjust = 0.5), legend.position = “none” ) + labs( x = “Intellectual Humility Score”, y = “Level of Error Correction (%)” ) + scale_x_continuous(breaks = 1:7, limits = c(1, 7)) + scale_y_continuous( limits = c(-50, 100),
expand = expansion(mult = c(0.02, 0.02)) ) # Correlation by condition conditions <- c(“NF_NJ”, “F_NJ”, “NF_J”, “F_J”) for (cond in conditions) { cat(“Condition:”, cond, “”) subset_data <- subset(errorcorrectiondata, condition == cond) cor_test <- cor.test(subset_data\(ih_score, subset_data\)accuracy_diff, method = “pearson”) print(cor_test) cat(“”) } # Figure - Condition ggplot(errorcorrectiondata, aes(x = ih_score, y = accuracy_diff * 100)) + geom_jitter(width = 0.05, height = 0.5, alpha = 0.6, size = 2, color = “black”) + geom_smooth(method = “lm”, se = TRUE, alpha = 0.2, color = “black”) + theme_minimal(base_size = 14) + theme( axis.text = element_text(size = 11, color = “black”), axis.title = element_text(size = 14), plot.title = element_text(size = 16, face = “bold”, hjust = 0.5), strip.text = element_text(size = 14, face = “bold”), legend.position = “none” ) + labs( x = “Intellectual Humility Score”, y = “Level of Error Correction (%)” ) + scale_x_continuous(limits = c(1, 7), expand = c(0, 0)) + scale_y_continuous( breaks = seq( floor(min(errorcorrectiondata\(accuracy_diff * 100, na.rm = TRUE) / 50) * 50, ceiling(max(errorcorrectiondata\)accuracy_diff * 100, na.rm = TRUE) / 50) * 50, 50 ), expand = expansion(mult = c(0.02, 0.02)) ) + facet_wrap(~condition, ncol = 2) # Saving plot ggsave(“~/Desktop/IH_condition.pdf”, plot = last_plot(), width = 7, height = 6, useDingbats = FALSE) # ANOVA ASSUMPTION CHECKS # Homogeneity of Variance leveneTest(accuracy_diff ~ interaction(feedback, justification), data = anova_data) # Normality of Residuals resid_anova <- residuals(anova_model) hist(resid_anova) qqnorm(resid_anova); qqline(resid_anova) shapiro.test(resid_anova)
# DESCRIPTIVES # Levels of Error Correction descriptives_by_condition_acc <- errorcorrectiondata %>% group_by(condition) %>% summarise( N = sum(!is.na(accuracy_diff)), # count of non-missing Mean = mean(accuracy_diff, na.rm = TRUE), SD = sd(accuracy_diff, na.rm = TRUE), Min = min(accuracy_diff, na.rm = TRUE), Max = max(accuracy_diff, na.rm = TRUE) ) descriptives_overall_acc <- errorcorrectiondata %>% summarise( N = sum(!is.na(accuracy_diff)), Mean = mean(accuracy_diff, na.rm = TRUE), SD = sd(accuracy_diff, na.rm = TRUE), Min = min(accuracy_diff, na.rm = TRUE), Max = max(accuracy_diff, na.rm = TRUE) ) descriptives_by_condition_acc descriptives_overall_acc # IH Scores descriptives_by_condition <- errorcorrectiondata %>% group_by(condition) %>% summarise( N = n(), Mean = mean(ih_score, na.rm = TRUE), SD = sd(ih_score, na.rm = TRUE), Min = min(ih_score, na.rm = TRUE), Max = max(ih_score, na.rm = TRUE) ) descriptives_overall <- errorcorrectiondata %>% summarise( N = n(), Mean = mean(ih_score, na.rm = TRUE), SD = sd(ih_score, na.rm = TRUE), Min = min(ih_score, na.rm = TRUE), Max = max(ih_score, na.rm = TRUE) ) descriptives_by_condition descriptives_overall