PSYCH 251: Pilot A Data Analysis

Pilot A Description

This is my re-submission for the pilot A assignment in PSYCH251 in regards to replicating Muenks et al. (2020) Study 1. In this re-submission I have modified the Qualtrics survey to include: a consent and debrief paragraph, the condition videos (thank you Mike!), and made all the responses required.

Qualtrics survey link: https://stanforduniversity.qualtrics.com/jfe/form/SV_7PNJTKIHxk8Gk7k (current estimated time is 8-10 minutes)

For Pilot A, I have re-ran the survey 19 times randomly selecting answers each round (so expect null findings here!). In the code below, I have: cleaned and reshaped the data, computed summary statistics, ran an ANCOVA (the main analysis used in the paper), and replicated figure 2 from the paper.

Using G*power, apriori analysis with 80% power suggests that I need 122 participants total in order to see the same effect sizes mentioned in the paper (ANCOVA test with 3 groups and 3 covariates).

##libraries

## Load all packages needed for reshaping data
library(tidyverse) # for piping, useful packages
library(ltm) # for Cronbach's alpha
library(effects) # for predicted data for plotting
library(ggplot2) # for graphs
library(sjPlot) # correlation & model output
library(car) # for comparing coefficients
library(emmeans) # for unpacking interactions
library(ggpubr) # for significance on plots
library(psych)
library(effectsize)

pilota <- read.csv("pilota_data.csv")

#renaming columns and values
df.pilota <- pilota %>% 
  rename(race_native= race_1,
         race_asian = race_2,
         race_black = race_3,
         race_hispanic = race_4,
         race_white = race_5,
         race_other = race_6) %>% 
  mutate(condition = recode_factor(condition,
                                   `0` = "control instructor",
                                   `1` = "growth instructor", 
                                   `2` = "fixed instructor"))

#exclusion critera (for actual data)

#gender
df.pilota <- df.pilota %>% 
  mutate (dc_gender = case_when(
    gender == 1 ~ 0,
    gender == 2 ~ 1,
    gender == 3 ~ NA,
    gender == 4 ~ NA,
    TRUE ~ NA
  ))

#urm

df.pilota <- df.pilota %>% 
  mutate (dc_urm = case_when(
    race_native == 1 ~ 1,
    race_asian == 1 ~ 0,
    race_black == 1 ~ 1,
    race_hispanic == 1 ~ 1,
    race_white == 1 ~ 0,
    race_other == 1 ~ 1,
    TRUE ~ NA
  ))

#personal mindset
df.pilota <- df.pilota %>%
  mutate(personalmindset = rowMeans(dplyr::select(., starts_with("personal_mindset_")), na.rm = TRUE))

#factorization
df.pilota <- df.pilota %>%
  mutate(
    condition = factor(condition),
    dc_gender = factor(dc_gender),
    dc_urm = factor(dc_urm))

#mean calculations
df.pilota <- df.pilota %>%
  mutate(
    instructormindset = rowMeans(dplyr::select(., starts_with("instructor_mindset_")), na.rm = TRUE),
    belonging = rowMeans(dplyr::select(., starts_with("belonging_")), na.rm = TRUE),
    evaluation = rowMeans(dplyr::select(., starts_with("evaluation_")), na.rm = TRUE),
    engagement = rowMeans(dplyr::select(., starts_with("engagement_")), na.rm = TRUE),
    interest = rowMeans(dplyr::select(., starts_with("interest_")), na.rm = TRUE),
    preformance = rowMeans(dplyr::select(., starts_with("preformance_")), na.rm = TRUE))

#function
run_ancova <- function(outcome_var, data) {
  formula <- as.formula(paste(outcome_var, 
                              "~ condition + dc_urm + dc_gender + personalmindset"))
  model <- lm(formula, data = data)
  return(model)
}

# List of outcome variables
outcomes <- c("belonging", "evaluation", "engagement", 
              "interest", "preformance")

# Run ANCOVA for each outcome
models <- lapply(outcomes, run_ancova, data = df.pilota)
names(models) <- outcomes

# View results for each
lapply(models, function(m) Anova(m, type = "II"))

## $belonging
## Anova Table (Type II tests)
## 
## Response: belonging
##                 Sum Sq Df F value   Pr(>F)   
## condition       0.7234  2  1.0393 0.383478   
## dc_urm          1.1848  1  3.4041 0.089846 . 
## dc_gender       3.3761  1  9.7003 0.008945 **
## personalmindset 0.1040  1  0.2988 0.594684   
## Residuals       4.1765 12                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## $evaluation
## Anova Table (Type II tests)
## 
## Response: evaluation
##                 Sum Sq Df F value Pr(>F)
## condition       2.0208  2  1.6896 0.2257
## dc_urm          0.8113  1  1.3566 0.2668
## dc_gender       0.5416  1  0.9056 0.3601
## personalmindset 0.0159  1  0.0265 0.8733
## Residuals       7.1763 12               
## 
## $engagement
## Anova Table (Type II tests)
## 
## Response: engagement
##                 Sum Sq Df F value  Pr(>F)  
## condition       5.5747  2  4.3508 0.03794 *
## dc_urm          1.7968  1  2.8047 0.11983  
## dc_gender       3.6636  1  5.7186 0.03405 *
## personalmindset 1.2886  1  2.0114 0.18157  
## Residuals       7.6877 12                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## $interest
## Anova Table (Type II tests)
## 
## Response: interest
##                  Sum Sq Df F value   Pr(>F)   
## condition       1.81336  2  5.5348 0.019808 * 
## dc_urm          0.82376  1  5.0286 0.044599 * 
## dc_gender       1.85818  1 11.3432 0.005591 **
## personalmindset 0.48416  1  2.9555 0.111247   
## Residuals       1.96577 12                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## $preformance
## Anova Table (Type II tests)
## 
## Response: preformance
##                  Sum Sq Df F value Pr(>F)
## condition        3.4335  2  1.8458 0.2000
## dc_urm           1.0932  1  1.1753 0.2996
## dc_gender        2.7893  1  2.9989 0.1089
## personalmindset  0.3985  1  0.4285 0.5251
## Residuals       11.1612 12

# CREATE SUMMARY TABLE
results_summary <- data.frame()

for (i in 1:length(models)) {
  outcome_name <- names(models)[i]
  model <- models[[i]]
  
  # ANOVA results
  anova_result <- Anova(model, type = "II")
  condition_row <- anova_result["condition", ]
  
  # Partial eta squared
  eta_sq <- eta_squared(model, partial = TRUE)
  condition_eta <- eta_sq[eta_sq$Parameter == "condition", "Eta2_partial"]
  
  # Cohen's f from model (omnibus effect size)
  cohens_f <- effectsize::cohens_f(model, partial = TRUE)
  condition_f <- cohens_f[cohens_f$Parameter == "condition", "Cohens_f_partial"]
  
  # Model R-squared
  r_sq <- summary(model)$r.squared
  
  results_summary <- rbind(results_summary, data.frame(
    Outcome = outcome_name,
    F_value = round(condition_row$`F value`, 2),
    df1 = condition_row$Df,
    df2 = anova_result["Residuals", "Df"],
    p_value = condition_row$`Pr(>F)`,
    partial_eta_sq = round(condition_eta, 3),
    cohens_f = round(condition_f, 3),
    R_squared = round(r_sq, 3)
  ))
}

# Add corrections and significance
results_summary <- results_summary %>%
  mutate(
    p_bonferroni = p.adjust(p_value, method = "bonferroni"),
    p_formatted = case_when(
      p_value < 0.001 ~ "<.001",
      TRUE ~ sprintf("%.3f", p_value)
    ),
    p_bonf_formatted = case_when(
      p_bonferroni < 0.001 ~ "<.001",
      TRUE ~ sprintf("%.3f", p_bonferroni)
    ),
    Significant = ifelse(p_bonferroni < 0.05, "Yes", "No")
  )

print(results_summary)

##       Outcome F_value df1 df2    p_value partial_eta_sq cohens_f R_squared
## 1   belonging    1.04   2  12 0.38347846          0.335    0.710     0.577
## 2  evaluation    1.69   2  12 0.22567958          0.178    0.466     0.404
## 3  engagement    4.35   2  12 0.03793628          0.477    0.955     0.616
## 4    interest    5.53   2  12 0.01980813          0.431    0.870     0.670
## 5 preformance    1.85   2  12 0.20003079          0.302    0.658     0.424
##   p_bonferroni p_formatted p_bonf_formatted Significant
## 1   1.00000000       0.383            1.000          No
## 2   1.00000000       0.226            1.000          No
## 3   0.18968141       0.038            0.190          No
## 4   0.09904063       0.020            0.099          No
## 5   1.00000000       0.200            1.000          No

#data reshaping

df.graph <- df.pilota %>%
  dplyr::select(ResponseId, condition, belonging, evaluation, engagement, interest, preformance) %>% 
  pivot_longer(cols = -c(condition, ResponseId), 
               names_to = "variable", 
               values_to = "value") %>%
  group_by(condition, variable) %>%
  summarise(
    Mean = mean(value),
    SE = sd(value) / sqrt(n()),
    CI_lower = Mean - qt(0.975, df = n() - 1) * SE,
    CI_upper = Mean + qt(0.975, df = n() - 1) * SE,
    .groups = "drop"
  )

df.graph <- df.graph %>%
  mutate(variable = case_when(
    variable == "belonging" ~ "Belonging",
    variable == "evaluation" ~ "Evaluative Concerns",
    variable == "engagement" ~ "Course Engagement",
    variable == "interest" ~ "Course Interest",
    variable == "preformance" ~ "Course Performance",
    TRUE ~ variable
  ))
#plotting

graph.p <- ggplot(df.graph, aes(x = variable, y = Mean, fill = condition)) +
  geom_bar(stat = "identity", position = position_dodge(width = 0.9), width = 0.8) +
  geom_errorbar(aes(ymin = CI_lower, ymax = CI_upper), 
                width = 0.25, position = position_dodge(width = 0.9)) +
  scale_fill_manual(values = c("grey20", "grey95", "grey60")) +
  scale_y_continuous(limits = c(0, 7), breaks = 1:7) +
  labs(y = "", x = "", fill = "") +
  theme_minimal() +
  theme(
    legend.position = "bottom",
    panel.grid.major.x = element_blank(),
    panel.grid.minor.y = element_blank()
  ) +
  coord_cartesian(ylim = c(1, 7)) +
  geom_hline(yintercept = 0, color = "black")

print(graph.p)

PSYCH 251: Pilot A Data Analysis

Ramya Kumar

2025-11-05

Pilot A Description