#Loading packages
library(lme4)
## Loading required package: Matrix
library(lmerTest)
##
## Attaching package: 'lmerTest'
## The following object is masked from 'package:lme4':
##
## lmer
## The following object is masked from 'package:stats':
##
## step
library(lavaan)
## This is lavaan 0.6-21
## lavaan is FREE software! Please report any bugs.
library(asherR)
library(reshape2)
#Loading data
source("../Scripts/S25_Chem1041_Data_Combination.R", chdir = T)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.2.1 ✔ readr 2.2.0
## ✔ forcats 1.0.1 ✔ stringr 1.6.0
## ✔ ggplot2 4.0.3 ✔ tibble 3.3.1
## ✔ lubridate 1.9.5 ✔ tidyr 1.3.2
## ✔ purrr 1.2.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ tidyr::expand() masks Matrix::expand()
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ✖ tidyr::pack() masks Matrix::pack()
## ✖ tidyr::unpack() masks Matrix::unpack()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
##
## Attaching package: 'janitor'
##
##
## The following objects are masked from 'package:stats':
##
## chisq.test, fisher.test
##
##
## |-------------------------------------|
## | misty 0.8.1 (2026-03-06) |
## | Miscellaneous Functions T. Yanagida |
## |-------------------------------------|
##
##
## Attaching package: 'misty'
##
##
## The following object is masked from 'package:janitor':
##
## crosstab
##
##
## Rows: 983 Columns: 115
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (18): StartDate, EndDate, IPAddress, RecordedDate, ResponseId, Recipient...
## dbl (87): Status, Progress, Duration (in seconds), Finished, LocationLatitud...
## num (1): race_b
## lgl (9): RecipientLastName, RecipientFirstName, ExternalReference, userName...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 10125 Columns: 52
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (10): StartDate, EndDate, IPAddress, RecordedDate, ResponseId, Recipient...
## dbl (39): Status, Progress, Duration (in seconds), Finished, LocationLatitud...
## lgl (3): RecipientLastName, RecipientFirstName, ExternalReference
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 758 Columns: 123
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (18): StartDate, EndDate, IPAddress, RecordedDate, ResponseId, Recipient...
## dbl (96): Status, Progress, Duration (in seconds), Finished, LocationLatitud...
## lgl (9): RecipientLastName, RecipientFirstName, ExternalReference, userName...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 974 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): student, loginid, recipient_email, stud_id
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 888 Columns: 89
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (39): loginid, Recitation Assignment - WEEK 1 - PRACTICE (21920682), Rec...
## dbl (50): FINAL LETTER GRADE (22222814), Initial Knowledge Check Completion ...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 861 Columns: 33
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (19): student, recipient_email, s25_enrollment_status, s25_program_code,...
## dbl (14): s25_last_term_enrolled, s25_uc_gpa_inst, s25_gpa, s25_college_code...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 861 Columns: 21
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (9): recipient_email, f25_last_term_enrolled, f25_enrollment_status, f2...
## dbl (12): f25_uc_gpa_inst, f25_gpa, f25_college_code, f25_credits_taken, f25...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 861 Columns: 21
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (9): recipient_email, s26_last_term_enrolled, s26_enrollment_status, s2...
## dbl (12): s26_uc_gpa_inst, s26_gpa, s26_college_code, s26_credits_taken, s26...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 847 Columns: 14
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): stud_id
## dbl (13): te_code_coursework_b, te_code_performance_b, te_code_comprehension...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 655 Columns: 25
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): stud_id
## dbl (24): mediadescribe_stem_code_f, mediadescribe_fiction_code_f, mediadesc...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Joining with `by = join_by(recipient_email, student, loginid, stud_id)`
## Joining with `by = join_by(stud_id)`
## Warning in psych::alpha(chem_1041_baseline[, c("bel1_b", "bel2_b", "bel3_b", : Some items were negatively correlated with the first principal component and were automatically reversed.
## This is indicated by a negative sign for the variable name.
## Warning in psych::alpha(chem_1041_diary[, c("sr1_eoc", "sr2_eoc", "sr3_eoc")], : Some items were negatively correlated with the first principal component and were automatically reversed.
## This is indicated by a negative sign for the variable name.
## Warning in psych::alpha(chem_1041_final[, c("bel1_f", "bel2_f", "bel3_f", : Some items were negatively correlated with the first principal component and were automatically reversed.
## This is indicated by a negative sign for the variable name.
## Warning in psych::alpha(chem_1041_final[, c("dr1_f", "dr2_f", "dr3_f", "dr4_f", : Some items were negatively correlated with the first principal component and were automatically reversed.
## This is indicated by a negative sign for the variable name.
#Pivoting grades to long and creating wave
library(dplyr)
library(tidyr)
chem_1041_grades_long <- chem_1041_grades %>%
select(stud_id,
homework_week_2_21905056,
homework_week_3_21905058,
homework_week_4_22032186,
homework_week_5_22046326,
homework_week_6_22056124,
homework_week_7_22063384,
homework_week_8_22078166,
homework_week_9_22078190,
homework_week_11_22111120,
homework_week_12_22127630,
homework_week_13_22142184,
homework_week_14_22149232,
homework_week_15_22172800) %>%
pivot_longer(
cols = -stud_id,
names_to = "hw_var",
values_to = "homework"
) %>%
mutate(
wave = case_when(
hw_var == "homework_week_2_21905056" ~ 1,
hw_var == "homework_week_3_21905058" ~ 2,
hw_var == "homework_week_4_22032186" ~ 3,
hw_var == "homework_week_5_22046326" ~ 4,
hw_var == "homework_week_6_22056124" ~ 5,
hw_var == "homework_week_7_22063384" ~ 6,
hw_var == "homework_week_8_22078166" ~ 7,
hw_var == "homework_week_9_22078190" ~ 8,
hw_var == "homework_week_11_22111120" ~ 9,
hw_var == "homework_week_12_22127630" ~ 10,
hw_var == "homework_week_13_22142184" ~ 11,
hw_var == "homework_week_14_22149232" ~ 12,
hw_var == "homework_week_15_22172800" ~ 13
)
) %>%
select(stud_id, wave, homework)
chem_1041_diary <- chem_1041_diary %>%
left_join(chem_1041_grades_long, by = c("stud_id", "wave"))
#Creating wave 0 variable
chem_1041_diary <- chem_1041_diary %>% mutate(wave_c = wave - 1)
#Creating weekly variable for forecasting
chem_1041_diary <- chem_1041_diary %>%
arrange(stud_id, wave) %>%
group_by(stud_id) %>%
mutate(
comp_eoc_next = lead(comp_eoc),
pred_error = comp_nw_eoc - comp_eoc_next,
# person mean (between-person component)
pred_error_pm = mean(pred_error, na.rm = TRUE),
# group-mean centered (within-person deviation)
pred_error_wc = pred_error - pred_error_pm
) %>%
ungroup()
chem_1041_diary <- chem_1041_diary %>% mutate(pred_comp_eoc_abs = abs(pred_error))
chem_1041_diary <- chem_1041_diary %>% mutate(
competence_calibration = case_when(
pred_error > 0 ~ "overconfident",
pred_error < 0 ~ "underconfident",
pred_error == 0 ~ "accurate"
))
#Creating a lagged homework variable
chem_1041_diary <- chem_1041_diary %>%
arrange(stud_id, wave) %>%
group_by(stud_id) %>%
mutate(homework_next = lead(homework)) %>%
ungroup()
#Joining Data
chem_bf <- left_join(chem_1041_baseline, chem_1041_final, by = "stud_id")
chem_bf <- left_join(chem_bf, chem_1041_grades, by = "stud_id")
#chem_bf_diary_only <- left_join(chem_1040_diary_wide, chem_bf, by = "stud_id")
chem_all <- left_join(chem_1041_diary, chem_bf, by = "stud_id")
# Extract the first row
# first_row <- chem_1041_final[1, ]
# names_column <- data.frame(Names = unlist(first_row))
# write.csv(chem_1041_final, "s25_chem_1041_final.csv")
# write.csv(chem_1041_baseline, "s25_chem_1041_baseline.csv")
# write.csv(chem_1041_diary, "s25_chem_1041_diary.csv")
# write.csv(chem_1041_grades, "s25_chem_1041_grades.csv")
# write.csv(chem_1041_diary_wide, "s25_chem_1041_diary_wide.csv")
#Model
library(lme4)
# #Performance during the current week
# model <- lmer(
# homework ~ pred_comp_eoc_wc + pred_comp_eoc_pm +
# urm + female + fg + hs_gpa_scaled_grandZ + wave_c + (1 + wave_c || stud_id),
# data = chem_all, control = lmerControl(optimizer = "bobyqa")
# )
# summary(model)
# #When a student is more overconfident than usual, they perform slightly better that same week. Students who are generally more overconfident across the semester perform worse overall.
#Performance during the following week
model2 <- lmer(
homework_next ~ pred_error_wc + pred_error_pm +
urm + female + fg + hs_gpa_scaled_grandZ + wave_c + (1 + wave_c | stud_id),
data = chem_all, control = lmerControl(optimizer = "bobyqa")
)
summary(model2)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: homework_next ~ pred_error_wc + pred_error_pm + urm + female +
## fg + hs_gpa_scaled_grandZ + wave_c + (1 + wave_c | stud_id)
## Data: chem_all
## Control: lmerControl(optimizer = "bobyqa")
##
## REML criterion at convergence: 35223
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -5.6870 -0.0075 0.0953 0.3234 4.4934
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## stud_id (Intercept) 4.37541 2.0917
## wave_c 0.03394 0.1842 -0.18
## Residual 5.60266 2.3670
## Number of obs: 7293, groups: stud_id, 734
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 13.77313 0.14848 716.62165 92.763 < 2e-16 ***
## pred_error_wc -0.04228 0.03398 6342.80166 -1.244 0.21348
## pred_error_pm -0.34653 0.12474 656.38145 -2.778 0.00563 **
## urm -0.52926 0.24405 641.73263 -2.169 0.03047 *
## female 0.07590 0.17753 627.12812 0.428 0.66916
## fg 0.13525 0.24265 631.01059 0.557 0.57745
## hs_gpa_scaled_grandZ 2.48716 0.30270 644.41851 8.216 1.15e-15 ***
## wave_c -0.07542 0.01086 501.05439 -6.946 1.17e-11 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) prd_rrr_w prd_rrr_p urm female fg hs___Z
## pred_rrr_wc -0.012
## pred_rrr_pm -0.172 0.000
## urm -0.139 0.004 -0.025
## female -0.708 0.002 0.142 -0.074
## fg -0.118 -0.003 -0.007 -0.213 -0.109
## hs_gp_scl_Z 0.039 0.002 -0.135 0.123 -0.185 0.111
## wave_c -0.288 0.030 0.009 0.003 0.002 0.004 -0.006
#No week-to-week effect. Between-person: Students who are more overconfident on average across the semester tend to perform worse in subsequent weeks. Chronic overconfidence is maladaptive even though momentary fluctuations are not.
#Calibration does not drive learning or adjustment across weeks, but more overconfident students overall perform worse, suggesting poor self-knowledge/regulation
#Overall bias test
psych::describe(chem_all$pred_error, na.rm = TRUE)
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 7995 0.08 1.06 0 0.05 0.49 -6 6 12 0.32 3.54 0.01
#Is it signficant
bias_model <- lmer(
pred_error ~ 1 + (1 | stud_id),
data = chem_all
)
summary(bias_model)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: pred_error ~ 1 + (1 | stud_id)
## Data: chem_all
##
## REML criterion at convergence: 22206.3
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -6.7568 -0.3883 -0.0170 0.3663 6.7021
##
## Random effects:
## Groups Name Variance Std.Dev.
## stud_id (Intercept) 0.3539 0.5949
## Residual 0.7950 0.8916
## Number of obs: 7995, groups: stud_id, 825
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 0.08950 0.02326 779.27219 3.848 0.000129 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Students on average significantly overestimate their competence week-to-week
#Does bias change over time
bias_time <- lmer(
pred_error ~ wave_c + (1 | stud_id),
data = chem_all
)
summary(bias_time)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: pred_error ~ wave_c + (1 | stud_id)
## Data: chem_all
##
## REML criterion at convergence: 22207
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -6.7769 -0.3801 -0.0241 0.3680 6.6782
##
## Random effects:
## Groups Name Variance Std.Dev.
## stud_id (Intercept) 0.3533 0.5944
## Residual 0.7942 0.8912
## Number of obs: 7995, groups: stud_id, 825
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 1.350e-01 2.772e-02 1.568e+03 4.873 1.21e-06 ***
## wave_c -8.933e-03 2.963e-03 7.313e+03 -3.015 0.00258 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr)
## wave_c -0.545
#Early semester there is more overconfidence; later in the semester it is closer to accurate
#Who is most likely to be miscalibrated
library(lme4)
bias_model2 <- lmer(
pred_error ~ urm + female + fg + hs_gpa_scaled_grandZ + (1 | stud_id),
data = chem_all
)
summary(bias_model2)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: pred_error ~ urm + female + fg + hs_gpa_scaled_grandZ + (1 |
## stud_id)
## Data: chem_all
##
## REML criterion at convergence: 20875.3
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -6.7290 -0.3890 -0.0231 0.3805 6.7774
##
## Random effects:
## Groups Name Variance Std.Dev.
## stud_id (Intercept) 0.3369 0.5804
## Residual 0.7894 0.8885
## Number of obs: 7541, groups: stud_id, 776
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 0.208982 0.039655 741.433445 5.270 1.79e-07 ***
## urm 0.049000 0.068474 761.884985 0.716 0.474460
## female -0.213184 0.049738 731.575478 -4.286 2.06e-05 ***
## fg 0.003283 0.068111 748.903493 0.048 0.961570
## hs_gpa_scaled_grandZ 0.309370 0.084232 766.377601 3.673 0.000257 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) urm female fg
## urm -0.150
## female -0.734 -0.072
## fg -0.123 -0.223 -0.110
## hs_gp_scl_Z 0.031 0.128 -0.176 0.116
#Women are less overconfient than men (or more underconfident relative to men)
#Higher achieving students are more overconfident on average