library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tidyr)
library(readr)
library(stringr)
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.3.3
library(haven)
library(naniar)
library(tibble)
## Warning: package 'tibble' was built under R version 4.3.3
library(gt)
## Warning: package 'gt' was built under R version 4.3.3

BSPKORM8 : Process Data

load("~/Library/CloudStorage/GoogleDrive-ejmyoung@stanford.edu/My Drive/0.Research/Process Data/TIMSS data/TIMSS2023_IDB_R_G8/2_Data Files/R Data/bspkorm8.rdata")

data<-BSPKORM8

Data Cleaning

Process Data

1). Check the process data based on the handbook

data_booklet<-data |> 
  select(IDSTUD, ITPARTPT1, ITPARTPT2, FLAGPT1, FLAGPT2)

#check the values 
# table(data_booklet$ITPARTPT1)
# table(data_booklet$ITPARTPT2)
# table(data_booklet$FLAGPT1)
# table(data_booklet$FLAGPT2)

#data understanding
## FLAGPT : 0: No issues; 1: Deleted due to technical issue; 2: Deleted due to outlier
## ITPARTPT :1: Yes; 2: No
## Delete the ITPARTP 2 and FLAGPT 1/2 is appropriate. 
data<-data |> 
  filter(ITPARTPT1 == 1 & ITPARTPT2 ==1 
         & FLAGPT1 == 0 & FLAGPT2 == 0)
table(data$ITPARTPT1)
## 
##    1 
## 4231
table(data$ITPARTPT2)
## 
##    1 
## 4231
table(data$FLAGPT1)
## 
##    0 
## 4231
table(data$FLAGPT2)
## 
##    0 
## 4231

2). Filter Science Data

data_science<-data |> 
  select(!starts_with("M")) |> 
  select(!SENWGT) |> 
  select(!SCOPE)
ncol(data_science)
## [1] 744
colnames(data_science)
##   [1] "CTY"       "IDCNTRY"   "IDPOP"     "IDGRADER"  "IDGRADE"   "ITASSESS" 
##   [7] "IDBOOK"    "IDSCHOOL"  "IDCLASS"   "IDSTUD"    "ITSEX"     "BSDAGE"   
##  [13] "ITADMINI"  "ILRELIAB"  "ITLANG_SA" "LCID_SA"   "ITPARTPT1" "ITPARTPT2"
##  [19] "FLAGPT1"   "FLAGPT2"   "SE72002_F" "SE72002_R" "SE72002_S" "SE72403_F"
##  [25] "SE72403_R" "SE72403_S" "SE72021_F" "SE72021_R" "SE72021_S" "SE72082_F"
##  [31] "SE72082_R" "SE72082_S" "SE72066_F" "SE72066_R" "SE72066_S" "SE72063_F"
##  [37] "SE72063_R" "SE72063_S" "SE72102_F" "SE72102_R" "SE72102_S" "SE72141_F"
##  [43] "SE72141_R" "SE72141_S" "SE72921_F" "SE72921_R" "SE72921_S" "SE72234_F"
##  [49] "SE72234_R" "SE72234_S" "SE72251_F" "SE72251_R" "SE72251_S" "SE72284_F"
##  [55] "SE72284_R" "SE72284_S" "SE72345_F" "SE72345_R" "SE72345_S" "SE72349_F"
##  [61] "SE72349_R" "SE72349_S" "SE72363_F" "SE72363_R" "SE72363_S" "SE62099_F"
##  [67] "SE62099_R" "SE62099_S" "SE62095_F" "SE62095_R" "SE62095_S" "SE62106_F"
##  [73] "SE62106_R" "SE62106_S" "SE62064_F" "SE62064_R" "SE62064_S" "SE62132_F"
##  [79] "SE62132_R" "SE62132_S" "SE62163_F" "SE62163_R" "SE62163_S" "SE62153_F"
##  [85] "SE62153_R" "SE62153_S" "SE62018_F" "SE62018_R" "SE62018_S" "SE62143_F"
##  [91] "SE62143_R" "SE62143_S" "SE62276_F" "SE62276_R" "SE62276_S" "SE62050_F"
##  [97] "SE62050_R" "SE62050_S" "SE62205_F" "SE62205_R" "SE62205_S" "SE62190_F"
## [103] "SE62190_R" "SE62190_S" "SE62024_F" "SE62024_R" "SE62024_S" "SE82902_F"
## [109] "SE82902_R" "SE82902_S" "SE82025_F" "SE82025_R" "SE82025_S" "SE82273_F"
## [115] "SE82273_R" "SE82273_S" "SE82131_F" "SE82131_R" "SE82131_S" "SE82171_F"
## [121] "SE82171_R" "SE82171_S" "SE82250_F" "SE82250_R" "SE82250_S" "SE82275_F"
## [127] "SE82275_R" "SE82275_S" "SE82343_F" "SE82343_R" "SE82343_S" "SE82347_F"
## [133] "SE82347_R" "SE82347_S" "S82T01_F"  "S82T01_R"  "S82T01_S"  "S82T02_F" 
## [139] "S82T02_R"  "S82T02_S"  "S82T03_F"  "S82T03_R"  "S82T03_S"  "S82T04_F" 
## [145] "S82T04_R"  "S82T04_S"  "S82T05_F"  "S82T05_R"  "S82T05_S"  "S82T06_F" 
## [151] "S82T06_R"  "S82T06_S"  "S72S01_F"  "S72S01_R"  "S72S01_S"  "S72S02_F" 
## [157] "S72S02_R"  "S72S02_S"  "S72S03_F"  "S72S03_R"  "S72S03_S"  "S72S04_F" 
## [163] "S72S04_R"  "S72S04_S"  "S72S05_F"  "S72S05_R"  "S72S05_S"  "S72S06_F" 
## [169] "S72S06_R"  "S72S06_S"  "S72S07_F"  "S72S07_R"  "S72S07_S"  "S72S08_F" 
## [175] "S72S08_R"  "S72S08_S"  "S72S09_F"  "S72S09_R"  "S72S09_S"  "S72S10_F" 
## [181] "S72S10_R"  "S72S10_S"  "S72S11_F"  "S72S11_R"  "S72S11_S"  "S72S12_F" 
## [187] "S72S12_R"  "S72S12_S"  "S72S13_F"  "S72S13_R"  "S72S13_S"  "S72S14_F" 
## [193] "S72S14_R"  "S72S14_S"  "SE82092_F" "SE82092_R" "SE82092_S" "SE82068_F"
## [199] "SE82068_R" "SE82068_S" "SE82008_F" "SE82008_R" "SE82008_S" "SE82021_F"
## [205] "SE82021_R" "SE82021_S" "SE82071_F" "SE82071_R" "SE82071_S" "SE82175_F"
## [211] "SE82175_R" "SE82175_S" "SE82104_F" "SE82104_R" "SE82104_S" "SE82172_F"
## [217] "SE82172_R" "SE82172_S" "SE82280_F" "SE82280_R" "SE82280_S" "SE82205_F"
## [223] "SE82205_R" "SE82205_S" "SE82262_F" "SE82262_R" "SE82262_S" "SE82341_F"
## [229] "SE82341_R" "SE82341_S" "SE82323_F" "SE82323_R" "SE82323_S" "SE82099_F"
## [235] "SE82099_R" "SE82099_S" "SE82325_F" "SE82325_R" "SE82325_S" "SE82020_F"
## [241] "SE82020_R" "SE82020_S" "SE82064_F" "SE82064_R" "SE82064_S" "SE82001_F"
## [247] "SE82001_R" "SE82001_S" "SE82050_F" "SE82050_R" "SE82050_S" "SE82040_F"
## [253] "SE82040_R" "SE82040_S" "SE82060_F" "SE82060_R" "SE82060_S" "SE82143_F"
## [259] "SE82143_R" "SE82143_S" "SE82147_F" "SE82147_R" "SE82147_S" "SE82224_F"
## [265] "SE82224_R" "SE82224_S" "SE82203_F" "SE82203_R" "SE82203_S" "SE82223_F"
## [271] "SE82223_R" "SE82223_S" "SE82264_F" "SE82264_R" "SE82264_S" "SE82340_F"
## [277] "SE82340_R" "SE82340_S" "SE82327_F" "SE82327_R" "SE82327_S" "SE82233_F"
## [283] "SE82233_R" "SE82233_S" "SE72078_F" "SE72078_R" "SE72078_S" "SE72460_F"
## [289] "SE72460_R" "SE72460_S" "SE72000_F" "SE72000_R" "SE72000_S" "SE72906_F"
## [295] "SE72906_R" "SE72906_S" "SE72901_F" "SE72901_R" "SE72901_S" "SE72038_F"
## [301] "SE72038_R" "SE72038_S" "SE72120_F" "SE72120_R" "SE72120_S" "SE72143_F"
## [307] "SE72143_R" "SE72143_S" "SE72523_F" "SE72523_R" "SE72523_S" "SE72168_F"
## [313] "SE72168_R" "SE72168_S" "SE72205_F" "SE72205_R" "SE72205_S" "SE72293_F"
## [319] "SE72293_R" "SE72293_S" "SE72280_F" "SE72280_R" "SE72280_S" "SE72370_F"
## [325] "SE72370_R" "SE72370_S" "SE72329_F" "SE72329_R" "SE72329_S" "SE82042_F"
## [331] "SE82042_R" "SE82042_S" "SE82043_F" "SE82043_R" "SE82043_S" "SE82002_F"
## [337] "SE82002_R" "SE82002_S" "SE82073_F" "SE82073_R" "SE82073_S" "SE82070_F"
## [343] "SE82070_R" "SE82070_S" "SE82144_F" "SE82144_R" "SE82144_S" "SE82108_F"
## [349] "SE82108_R" "SE82108_S" "SE82137_F" "SE82137_R" "SE82137_S" "SE82200_F"
## [355] "SE82200_R" "SE82200_S" "SE82244_F" "SE82244_R" "SE82244_S" "SE82271_F"
## [361] "SE82271_R" "SE82271_S" "SE82346_F" "SE82346_R" "SE82346_S" "SE82335_F"
## [367] "SE82335_R" "SE82335_S" "SE62091_F" "SE62091_R" "SE62091_S" "SE62100_F"
## [373] "SE62100_R" "SE62100_S" "SE62097_F" "SE62097_R" "SE62097_S" "SE62101_F"
## [379] "SE62101_R" "SE62101_S" "SE62266_F" "SE62266_R" "SE62266_S" "SE62128_F"
## [385] "SE62128_R" "SE62128_S" "SE62047_F" "SE62047_R" "SE62047_S" "SE62042_F"
## [391] "SE62042_R" "SE62042_S" "SE62250_F" "SE62250_R" "SE62250_S" "SE62246_F"
## [397] "SE62246_R" "SE62246_S" "SE62056_F" "SE62056_R" "SE62056_S" "SE62235_F"
## [403] "SE62235_R" "SE62235_S" "SE62180_F" "SE62180_R" "SE62180_S" "SE62022_F"
## [409] "SE62022_R" "SE62022_S" "SE62243_F" "SE62243_R" "SE62243_S" "SE82007_F"
## [415] "SE82007_R" "SE82007_S" "SE82045_F" "SE82045_R" "SE82045_S" "SE82075_F"
## [421] "SE82075_R" "SE82075_S" "SE82027_F" "SE82027_R" "SE82027_S" "SE82080_F"
## [427] "SE82080_R" "SE82080_S" "SE82005_F" "SE82005_R" "SE82005_S" "SE82170_F"
## [433] "SE82170_R" "SE82170_S" "SE82138_F" "SE82138_R" "SE82138_S" "SE82281_F"
## [439] "SE82281_R" "SE82281_S" "SE82225_F" "SE82225_R" "SE82225_S" "SE82263_F"
## [445] "SE82263_R" "SE82263_S" "SE82307_F" "SE82307_R" "SE82307_S" "SE82333_F"
## [451] "SE82333_R" "SE82333_S" "SE82304_F" "SE82304_R" "SE82304_S" "SE82721_F"
## [457] "SE82721_R" "SE82721_S" "S82L01_F"  "S82L01_R"  "S82L01_S"  "S82L02_F" 
## [463] "S82L02_R"  "S82L02_S"  "S82L03_F"  "S82L03_R"  "S82L03_S"  "S82L05_F" 
## [469] "S82L05_R"  "S82L05_S"  "S82L06_F"  "S82L06_R"  "S82L06_S"  "S82L08_F" 
## [475] "S82L08_R"  "S82L08_S"  "S82L09_F"  "S82L09_R"  "S82L09_S"  "S82L10_F" 
## [481] "S82L10_R"  "S82L10_S"  "SE82077_F" "SE82077_R" "SE82077_S" "SE82103_F"
## [487] "SE82103_R" "SE82103_S" "SE82146_F" "SE82146_R" "SE82146_S" "SE82246_F"
## [493] "SE82246_R" "SE82246_S" "SE82331_F" "SE82331_R" "SE82331_S" "SE62279_F"
## [499] "SE62279_R" "SE62279_S" "SE62112_F" "SE62112_R" "SE62112_S" "SE62119_F"
## [505] "SE62119_R" "SE62119_S" "SE62093_F" "SE62093_R" "SE62093_S" "SE62089_F"
## [511] "SE62089_R" "SE62089_S" "SE62006_F" "SE62006_R" "SE62006_S" "SE62067_F"
## [517] "SE62067_R" "SE62067_S" "SE62247_F" "SE62247_R" "SE62247_S" "SE62177_F"
## [523] "SE62177_R" "SE62177_S" "SE62186_F" "SE62186_R" "SE62186_S" "SE62211_F"
## [529] "SE62211_R" "SE62211_S" "SE62036_F" "SE62036_R" "SE62036_S" "SE62033_F"
## [535] "SE62033_R" "SE62033_S" "SE62037_F" "SE62037_R" "SE62037_S" "SE62242_F"
## [541] "SE62242_R" "SE62242_S" "SE72033_F" "SE72033_R" "SE72033_S" "SE72440_F"
## [547] "SE72440_R" "SE72440_S" "SE72032_F" "SE72032_R" "SE72032_S" "SE72031_F"
## [553] "SE72031_R" "SE72031_S" "SE72086_F" "SE72086_R" "SE72086_S" "SE72005_F"
## [559] "SE72005_R" "SE72005_S" "SE72048_F" "SE72048_R" "SE72048_S" "SE72123_F"
## [565] "SE72123_R" "SE72123_S" "SE72116_F" "SE72116_R" "SE72116_S" "SE72920_F"
## [571] "SE72920_R" "SE72920_S" "SE72294_F" "SE72294_R" "SE72294_S" "SE72231_F"
## [577] "SE72231_R" "SE72231_S" "SE72261_F" "SE72261_R" "SE72261_S" "SE72220_F"
## [583] "SE72220_R" "SE72220_S" "SE72348_F" "SE72348_R" "SE72348_S" "SE72720_F"
## [589] "SE72720_R" "SE72720_S" "SE72011_F" "SE72011_R" "SE72011_S" "SE72905_F"
## [595] "SE72905_R" "SE72905_S" "SE72049_F" "SE72049_R" "SE72049_S" "SE72016_F"
## [601] "SE72016_R" "SE72016_S" "SE72451_F" "SE72451_R" "SE72451_S" "SE72074_F"
## [607] "SE72074_R" "SE72074_S" "SE72091_F" "SE72091_R" "SE72091_S" "SE72109_F"
## [613] "SE72109_R" "SE72109_S" "SE72140_F" "SE72140_R" "SE72140_S" "SE72132_F"
## [619] "SE72132_R" "SE72132_S" "SE72209_F" "SE72209_R" "SE72209_S" "SE72210_F"
## [625] "SE72210_R" "SE72210_S" "SE72249_F" "SE72249_R" "SE72249_S" "SE72323_F"
## [631] "SE72323_R" "SE72323_S" "SE72368_F" "SE72368_R" "SE72368_S" "SE72303_F"
## [637] "SE72303_R" "SE72303_S" "JKZONE"    "JKREP"     "TOTWGT"    "HOUWGT"   
## [643] "WGTFAC1"   "WGTADJ1"   "WGTFAC2"   "WGTADJ2"   "WGTFAC3"   "WGTADJ3"  
## [649] "BSMMAT01"  "BSMMAT02"  "BSMMAT03"  "BSMMAT04"  "BSMMAT05"  "BSMNUM01" 
## [655] "BSMNUM02"  "BSMNUM03"  "BSMNUM04"  "BSMNUM05"  "BSMALG01"  "BSMALG02" 
## [661] "BSMALG03"  "BSMALG04"  "BSMALG05"  "BSMGEO01"  "BSMGEO02"  "BSMGEO03" 
## [667] "BSMGEO04"  "BSMGEO05"  "BSMDAT01"  "BSMDAT02"  "BSMDAT03"  "BSMDAT04" 
## [673] "BSMDAT05"  "BSMKNO01"  "BSMKNO02"  "BSMKNO03"  "BSMKNO04"  "BSMKNO05" 
## [679] "BSMAPP01"  "BSMAPP02"  "BSMAPP03"  "BSMAPP04"  "BSMAPP05"  "BSMREA01" 
## [685] "BSMREA02"  "BSMREA03"  "BSMREA04"  "BSMREA05"  "BSSSCI01"  "BSSSCI02" 
## [691] "BSSSCI03"  "BSSSCI04"  "BSSSCI05"  "BSSBIO01"  "BSSBIO02"  "BSSBIO03" 
## [697] "BSSBIO04"  "BSSBIO05"  "BSSCHE01"  "BSSCHE02"  "BSSCHE03"  "BSSCHE04" 
## [703] "BSSCHE05"  "BSSPHY01"  "BSSPHY02"  "BSSPHY03"  "BSSPHY04"  "BSSPHY05" 
## [709] "BSSEAR01"  "BSSEAR02"  "BSSEAR03"  "BSSEAR04"  "BSSEAR05"  "BSSKNO01" 
## [715] "BSSKNO02"  "BSSKNO03"  "BSSKNO04"  "BSSKNO05"  "BSSAPP01"  "BSSAPP02" 
## [721] "BSSAPP03"  "BSSAPP04"  "BSSAPP05"  "BSSREA01"  "BSSREA02"  "BSSREA03" 
## [727] "BSSREA04"  "BSSREA05"  "BSSENV01"  "BSSENV02"  "BSSENV03"  "BSSENV04" 
## [733] "BSSENV05"  "BSMIBM01"  "BSMIBM02"  "BSMIBM03"  "BSMIBM04"  "BSMIBM05" 
## [739] "BSSIBM01"  "BSSIBM02"  "BSSIBM03"  "BSSIBM04"  "BSSIBM05"  "VERSION"
data_science |> 
  select(starts_with("SE")) |> 
  colnames()
##   [1] "SE72002_F" "SE72002_R" "SE72002_S" "SE72403_F" "SE72403_R" "SE72403_S"
##   [7] "SE72021_F" "SE72021_R" "SE72021_S" "SE72082_F" "SE72082_R" "SE72082_S"
##  [13] "SE72066_F" "SE72066_R" "SE72066_S" "SE72063_F" "SE72063_R" "SE72063_S"
##  [19] "SE72102_F" "SE72102_R" "SE72102_S" "SE72141_F" "SE72141_R" "SE72141_S"
##  [25] "SE72921_F" "SE72921_R" "SE72921_S" "SE72234_F" "SE72234_R" "SE72234_S"
##  [31] "SE72251_F" "SE72251_R" "SE72251_S" "SE72284_F" "SE72284_R" "SE72284_S"
##  [37] "SE72345_F" "SE72345_R" "SE72345_S" "SE72349_F" "SE72349_R" "SE72349_S"
##  [43] "SE72363_F" "SE72363_R" "SE72363_S" "SE62099_F" "SE62099_R" "SE62099_S"
##  [49] "SE62095_F" "SE62095_R" "SE62095_S" "SE62106_F" "SE62106_R" "SE62106_S"
##  [55] "SE62064_F" "SE62064_R" "SE62064_S" "SE62132_F" "SE62132_R" "SE62132_S"
##  [61] "SE62163_F" "SE62163_R" "SE62163_S" "SE62153_F" "SE62153_R" "SE62153_S"
##  [67] "SE62018_F" "SE62018_R" "SE62018_S" "SE62143_F" "SE62143_R" "SE62143_S"
##  [73] "SE62276_F" "SE62276_R" "SE62276_S" "SE62050_F" "SE62050_R" "SE62050_S"
##  [79] "SE62205_F" "SE62205_R" "SE62205_S" "SE62190_F" "SE62190_R" "SE62190_S"
##  [85] "SE62024_F" "SE62024_R" "SE62024_S" "SE82902_F" "SE82902_R" "SE82902_S"
##  [91] "SE82025_F" "SE82025_R" "SE82025_S" "SE82273_F" "SE82273_R" "SE82273_S"
##  [97] "SE82131_F" "SE82131_R" "SE82131_S" "SE82171_F" "SE82171_R" "SE82171_S"
## [103] "SE82250_F" "SE82250_R" "SE82250_S" "SE82275_F" "SE82275_R" "SE82275_S"
## [109] "SE82343_F" "SE82343_R" "SE82343_S" "SE82347_F" "SE82347_R" "SE82347_S"
## [115] "SE82092_F" "SE82092_R" "SE82092_S" "SE82068_F" "SE82068_R" "SE82068_S"
## [121] "SE82008_F" "SE82008_R" "SE82008_S" "SE82021_F" "SE82021_R" "SE82021_S"
## [127] "SE82071_F" "SE82071_R" "SE82071_S" "SE82175_F" "SE82175_R" "SE82175_S"
## [133] "SE82104_F" "SE82104_R" "SE82104_S" "SE82172_F" "SE82172_R" "SE82172_S"
## [139] "SE82280_F" "SE82280_R" "SE82280_S" "SE82205_F" "SE82205_R" "SE82205_S"
## [145] "SE82262_F" "SE82262_R" "SE82262_S" "SE82341_F" "SE82341_R" "SE82341_S"
## [151] "SE82323_F" "SE82323_R" "SE82323_S" "SE82099_F" "SE82099_R" "SE82099_S"
## [157] "SE82325_F" "SE82325_R" "SE82325_S" "SE82020_F" "SE82020_R" "SE82020_S"
## [163] "SE82064_F" "SE82064_R" "SE82064_S" "SE82001_F" "SE82001_R" "SE82001_S"
## [169] "SE82050_F" "SE82050_R" "SE82050_S" "SE82040_F" "SE82040_R" "SE82040_S"
## [175] "SE82060_F" "SE82060_R" "SE82060_S" "SE82143_F" "SE82143_R" "SE82143_S"
## [181] "SE82147_F" "SE82147_R" "SE82147_S" "SE82224_F" "SE82224_R" "SE82224_S"
## [187] "SE82203_F" "SE82203_R" "SE82203_S" "SE82223_F" "SE82223_R" "SE82223_S"
## [193] "SE82264_F" "SE82264_R" "SE82264_S" "SE82340_F" "SE82340_R" "SE82340_S"
## [199] "SE82327_F" "SE82327_R" "SE82327_S" "SE82233_F" "SE82233_R" "SE82233_S"
## [205] "SE72078_F" "SE72078_R" "SE72078_S" "SE72460_F" "SE72460_R" "SE72460_S"
## [211] "SE72000_F" "SE72000_R" "SE72000_S" "SE72906_F" "SE72906_R" "SE72906_S"
## [217] "SE72901_F" "SE72901_R" "SE72901_S" "SE72038_F" "SE72038_R" "SE72038_S"
## [223] "SE72120_F" "SE72120_R" "SE72120_S" "SE72143_F" "SE72143_R" "SE72143_S"
## [229] "SE72523_F" "SE72523_R" "SE72523_S" "SE72168_F" "SE72168_R" "SE72168_S"
## [235] "SE72205_F" "SE72205_R" "SE72205_S" "SE72293_F" "SE72293_R" "SE72293_S"
## [241] "SE72280_F" "SE72280_R" "SE72280_S" "SE72370_F" "SE72370_R" "SE72370_S"
## [247] "SE72329_F" "SE72329_R" "SE72329_S" "SE82042_F" "SE82042_R" "SE82042_S"
## [253] "SE82043_F" "SE82043_R" "SE82043_S" "SE82002_F" "SE82002_R" "SE82002_S"
## [259] "SE82073_F" "SE82073_R" "SE82073_S" "SE82070_F" "SE82070_R" "SE82070_S"
## [265] "SE82144_F" "SE82144_R" "SE82144_S" "SE82108_F" "SE82108_R" "SE82108_S"
## [271] "SE82137_F" "SE82137_R" "SE82137_S" "SE82200_F" "SE82200_R" "SE82200_S"
## [277] "SE82244_F" "SE82244_R" "SE82244_S" "SE82271_F" "SE82271_R" "SE82271_S"
## [283] "SE82346_F" "SE82346_R" "SE82346_S" "SE82335_F" "SE82335_R" "SE82335_S"
## [289] "SE62091_F" "SE62091_R" "SE62091_S" "SE62100_F" "SE62100_R" "SE62100_S"
## [295] "SE62097_F" "SE62097_R" "SE62097_S" "SE62101_F" "SE62101_R" "SE62101_S"
## [301] "SE62266_F" "SE62266_R" "SE62266_S" "SE62128_F" "SE62128_R" "SE62128_S"
## [307] "SE62047_F" "SE62047_R" "SE62047_S" "SE62042_F" "SE62042_R" "SE62042_S"
## [313] "SE62250_F" "SE62250_R" "SE62250_S" "SE62246_F" "SE62246_R" "SE62246_S"
## [319] "SE62056_F" "SE62056_R" "SE62056_S" "SE62235_F" "SE62235_R" "SE62235_S"
## [325] "SE62180_F" "SE62180_R" "SE62180_S" "SE62022_F" "SE62022_R" "SE62022_S"
## [331] "SE62243_F" "SE62243_R" "SE62243_S" "SE82007_F" "SE82007_R" "SE82007_S"
## [337] "SE82045_F" "SE82045_R" "SE82045_S" "SE82075_F" "SE82075_R" "SE82075_S"
## [343] "SE82027_F" "SE82027_R" "SE82027_S" "SE82080_F" "SE82080_R" "SE82080_S"
## [349] "SE82005_F" "SE82005_R" "SE82005_S" "SE82170_F" "SE82170_R" "SE82170_S"
## [355] "SE82138_F" "SE82138_R" "SE82138_S" "SE82281_F" "SE82281_R" "SE82281_S"
## [361] "SE82225_F" "SE82225_R" "SE82225_S" "SE82263_F" "SE82263_R" "SE82263_S"
## [367] "SE82307_F" "SE82307_R" "SE82307_S" "SE82333_F" "SE82333_R" "SE82333_S"
## [373] "SE82304_F" "SE82304_R" "SE82304_S" "SE82721_F" "SE82721_R" "SE82721_S"
## [379] "SE82077_F" "SE82077_R" "SE82077_S" "SE82103_F" "SE82103_R" "SE82103_S"
## [385] "SE82146_F" "SE82146_R" "SE82146_S" "SE82246_F" "SE82246_R" "SE82246_S"
## [391] "SE82331_F" "SE82331_R" "SE82331_S" "SE62279_F" "SE62279_R" "SE62279_S"
## [397] "SE62112_F" "SE62112_R" "SE62112_S" "SE62119_F" "SE62119_R" "SE62119_S"
## [403] "SE62093_F" "SE62093_R" "SE62093_S" "SE62089_F" "SE62089_R" "SE62089_S"
## [409] "SE62006_F" "SE62006_R" "SE62006_S" "SE62067_F" "SE62067_R" "SE62067_S"
## [415] "SE62247_F" "SE62247_R" "SE62247_S" "SE62177_F" "SE62177_R" "SE62177_S"
## [421] "SE62186_F" "SE62186_R" "SE62186_S" "SE62211_F" "SE62211_R" "SE62211_S"
## [427] "SE62036_F" "SE62036_R" "SE62036_S" "SE62033_F" "SE62033_R" "SE62033_S"
## [433] "SE62037_F" "SE62037_R" "SE62037_S" "SE62242_F" "SE62242_R" "SE62242_S"
## [439] "SE72033_F" "SE72033_R" "SE72033_S" "SE72440_F" "SE72440_R" "SE72440_S"
## [445] "SE72032_F" "SE72032_R" "SE72032_S" "SE72031_F" "SE72031_R" "SE72031_S"
## [451] "SE72086_F" "SE72086_R" "SE72086_S" "SE72005_F" "SE72005_R" "SE72005_S"
## [457] "SE72048_F" "SE72048_R" "SE72048_S" "SE72123_F" "SE72123_R" "SE72123_S"
## [463] "SE72116_F" "SE72116_R" "SE72116_S" "SE72920_F" "SE72920_R" "SE72920_S"
## [469] "SE72294_F" "SE72294_R" "SE72294_S" "SE72231_F" "SE72231_R" "SE72231_S"
## [475] "SE72261_F" "SE72261_R" "SE72261_S" "SE72220_F" "SE72220_R" "SE72220_S"
## [481] "SE72348_F" "SE72348_R" "SE72348_S" "SE72720_F" "SE72720_R" "SE72720_S"
## [487] "SE72011_F" "SE72011_R" "SE72011_S" "SE72905_F" "SE72905_R" "SE72905_S"
## [493] "SE72049_F" "SE72049_R" "SE72049_S" "SE72016_F" "SE72016_R" "SE72016_S"
## [499] "SE72451_F" "SE72451_R" "SE72451_S" "SE72074_F" "SE72074_R" "SE72074_S"
## [505] "SE72091_F" "SE72091_R" "SE72091_S" "SE72109_F" "SE72109_R" "SE72109_S"
## [511] "SE72140_F" "SE72140_R" "SE72140_S" "SE72132_F" "SE72132_R" "SE72132_S"
## [517] "SE72209_F" "SE72209_R" "SE72209_S" "SE72210_F" "SE72210_R" "SE72210_S"
## [523] "SE72249_F" "SE72249_R" "SE72249_S" "SE72323_F" "SE72323_R" "SE72323_S"
## [529] "SE72368_F" "SE72368_R" "SE72368_S" "SE72303_F" "SE72303_R" "SE72303_S"

3). Process Data in Booklet 9 (for trial)

#check the booklet 
count(data_science,IDBOOK) # booklet 9 n = 176
## # A tibble: 14 × 2
##    IDBOOK              n
##    <dbl+lbl>       <int>
##  1  1 [Booklet 01]   426
##  2  2 [Booklet 02]   430
##  3  3 [Booklet 03]   421
##  4  4 [Booklet 04]   420
##  5  5 [Booklet 05]   423
##  6  6 [Booklet 06]   429
##  7  7 [Booklet 07]   417
##  8  8 [Booklet 08]   182
##  9  9 [Booklet 09]   176
## 10 10 [Booklet 10]   181
## 11 11 [Booklet 11]   183
## 12 12 [Booklet 12]   180
## 13 13 [Booklet 13]   186
## 14 14 [Booklet 14]   177
#filter the booklet 9
booklet_9<-data_science |> 
  filter(IDBOOK ==9) 

nrow(booklet_9)
## [1] 176
#check the missing
vis_miss(booklet_9)

#delete when the variables are all NA
booklet_9 <- booklet_9 |> 
  select(where(~!all(is.na(.))))

#check the missing again
vis_miss(booklet_9)

#save the booklet_9 in csv file
write_csv(booklet_9, "booklet_9.csv")

items n = 30 “SE72000” “SE72038” “SE72078” “SE72120” “SE72143” “SE72168” “SE72205” “SE72280” “SE72293” “SE72329” “SE72370” “SE72460” “SE72523” “SE72901” “SE72906” “SE82001” “SE82020” “SE82040” “SE82050” “SE82060” “SE82064” “SE82143” “SE82147” “SE82203” “SE82223” “SE82224” “SE82233” “SE82264” “SE82327” “SE82340”

Score Data

load("~/Library/CloudStorage/GoogleDrive-ejmyoung@stanford.edu/My Drive/0.Research/Process Data/TIMSS data/TIMSS2023_IDB_R_G8/2_Data Files/R Data/bsakorm8.rdata")

score <-BSAKORM8
colnames(score)
##    [1] "CTY"       "IDCNTRY"   "IDPOP"     "IDGRADER"  "IDGRADE"   "ITASSESS" 
##    [7] "IDBOOK"    "IDSCHOOL"  "IDCLASS"   "IDSTUD"    "ITSEX"     "BSDAGE"   
##   [13] "ITADMINI"  "ILRELIAB"  "ITLANG_SA" "LCID_SA"   "ME72002"   "ME72188"  
##   [19] "ME72035"   "ME72055A"  "ME72055B"  "ME72055C"  "ME72055D"  "ME72055E" 
##   [25] "ME72055F"  "ME72055"   "ME72222"   "ME72090"   "ME72233"   "ME72106A" 
##   [31] "ME72106B"  "ME72106C"  "ME72128A"  "ME72128B"  "ME72119"   "ME72153A" 
##   [37] "ME72153B"  "ME72172"   "ME72001"   "ME72019"   "ME72189"   "ME72024"  
##   [43] "ME72043"   "ME72221"   "ME72220"   "ME72225A"  "ME72225B"  "ME72225"  
##   [49] "ME72110A"  "ME72110B"  "ME72150"   "ME72139"   "ME72229"   "ME72171"  
##   [55] "ME72211A"  "ME72211B"  "ME82214A"  "ME82214B"  "MQ82N01A"  "MQ82N01B" 
##   [61] "MQ82N02"   "MQ82N03"   "MQ82N04"   "MQ82N07"   "MQ82N08A"  "MQ82N08B" 
##   [67] "ME82711"   "ME82111"   "ME82320"   "ME82707"   "ME82802"   "MQ72D01"  
##   [73] "MQ72D02AA" "MQ72D02AB" "MQ72D02AC" "MQ72D02A"  "MQ72D02B"  "MQ72D03"  
##   [79] "MQ72D04A"  "MQ72D04B"  "MQ72D04"   "MQ72D05"   "MQ72D06A"  "MQ72D06B" 
##   [85] "MQ72D07A"  "MQ72D07B"  "MQ72D08A"  "MQ72D08B"  "MQ82C01"   "MQ82C02"  
##   [91] "MQ82C03"   "MQ82C04"   "MQ82C05"   "ME82808"   "ME82116"   "ME82322"  
##   [97] "ME82199A"  "ME82199B"  "ME82723"   "ME82106"   "ME82807"   "ME82309"  
##  [103] "ME82724"   "ME82403"   "ME82205"   "ME82512"   "ME82617"   "ME82201"  
##  [109] "ME82727"   "ME82310"   "ME82112"   "ME82203"   "ME82404"   "ME82206"  
##  [115] "ME82791"   "ME82103"   "ME62271"   "ME62152"   "ME62215A"  "ME62215B" 
##  [121] "ME62215"   "ME62143"   "ME62230"   "ME62095"   "ME62076"   "ME62030"  
##  [127] "ME62171"   "ME62301"   "ME62194"   "ME62344"   "ME62320"   "ME62296"  
##  [133] "ME82104"   "ME82109"   "ME82702A"  "ME82702B"  "ME82702C"  "ME82702"  
##  [139] "ME82623A"  "ME82623B"  "ME82623C"  "ME82623D"  "ME82623E"  "ME82623F" 
##  [145] "ME82623"   "ME82715"   "ME82790"   "ME82101"   "ME82601"   "ME82502A" 
##  [151] "ME82502B"  "ME82502"   "ME82402"   "ME82410"   "ME82725"   "ME82315"  
##  [157] "ME62001"   "ME62214"   "ME62146"   "ME62154"   "ME62067"   "ME62341"  
##  [163] "ME62242"   "ME62250A"  "ME62250B"  "ME62170A"  "ME62170B"  "ME62170"  
##  [169] "ME62192"   "ME62072"   "ME62048A"  "ME62048B"  "ME62048C"  "ME62048"  
##  [175] "ME62120"   "ME82613"   "ME82708A"  "ME82708B"  "ME82216"   "ME82105"  
##  [181] "ME82405"   "ME82792"   "ME82612"   "ME82313"   "ME82503"   "ME82407"  
##  [187] "ME82312A"  "ME82312B"  "ME82420"   "ME82726"   "ME82510"   "ME82804"  
##  [193] "ME82207"   "ME82219"   "ME82415"   "ME82314A"  "ME82314B"  "ME82314C" 
##  [199] "ME82314D"  "ME82314E"  "ME82314"   "ME82608"   "ME82119"   "ME82611"  
##  [205] "ME82411"   "ME82610"   "ME82801"   "ME82511"   "ME82603"   "ME72178A" 
##  [211] "ME72178B"  "ME72178C"  "ME72178D"  "ME72178E"  "ME72178"   "ME72234"  
##  [217] "ME72020A"  "ME72020B"  "ME72020C"  "ME72020D"  "ME72020"   "ME72027"  
##  [223] "ME72052A"  "ME72052B"  "ME72052"   "ME72067"   "ME72083A"  "ME72083B" 
##  [229] "ME72108A"  "ME72108B"  "ME72181"   "ME72126"   "ME72164A"  "ME72164B" 
##  [235] "ME72164C"  "ME72164D"  "ME72164E"  "ME72164"   "ME72185A"  "ME72185B" 
##  [241] "ME72005"   "ME72021"   "ME72026"   "ME72041A"  "ME72041B"  "ME72223"  
##  [247] "ME72094"   "ME72059"   "ME72080"   "ME72081A"  "ME72081B"  "ME72081C" 
##  [253] "ME72081D"  "ME72081"   "ME72140A"  "ME72140B"  "ME72140C"  "ME72140D" 
##  [259] "ME72140E"  "ME72140F"  "ME72140"   "ME72120"   "ME72131"   "ME72147"  
##  [265] "ME72154"   "ME72192"   "ME72161"   "ME72187"   "ME72022"   "ME72038"  
##  [271] "ME72045"   "ME72049"   "ME72069"   "ME72074"   "ME72013"   "ME72095A" 
##  [277] "ME72095B"  "ME72095"   "ME72109"   "ME72125"   "ME72196"   "ME72237"  
##  [283] "ME72232A"  "ME72232B"  "ME72232C"  "ME72232D"  "ME72232"   "ME72206"  
##  [289] "MP72178"   "MP72234"   "MP72020"   "MP72027"   "MP72052A"  "MP72052B" 
##  [295] "MP72052"   "MP72067"   "MP72083A"  "MP72083B"  "MP72108A"  "MP72108B" 
##  [301] "MP72181"   "MP72126"   "MP72164A"  "MP72164B"  "MP72164C"  "MP72164D" 
##  [307] "MP72164E"  "MP72164"   "MP72185A"  "MP72185B"  "MP72002"   "MP72188"  
##  [313] "MP72035"   "MP72055"   "MP72222"   "MP72090"   "MP72233"   "MP72106A" 
##  [319] "MP72106B"  "MP72106C"  "MP72128A"  "MP72128B"  "MP72119"   "MP72153A" 
##  [325] "MP72153B"  "MP72172"   "MP62329"   "MP62151"   "MP62346"   "MP62212"  
##  [331] "MP62056"   "MP62317"   "MP62350"   "MP62078"   "MP62284"   "MP62245"  
##  [337] "MP62287"   "MP62345A"  "MP62345BA" "MP62345BB" "MP62345BC" "MP62345BD"
##  [343] "MP62345B"  "MP62115"   "MP72187"   "MP72022"   "MP72038"   "MP72045"  
##  [349] "MP72049"   "MP72069"   "MP72074"   "MP72013"   "MP72095A"  "MP72095B" 
##  [355] "MP72095"   "MP72109"   "MP72125"   "MP72196"   "MP72237"   "MP72232A" 
##  [361] "MP72232B"  "MP72232C"  "MP72232D"  "MP72232"   "MP72206"   "MP62271"  
##  [367] "MP62152"   "MP62215"   "MP62143"   "MP62230"   "MP62095"   "MP62076"  
##  [373] "MP62030"   "MP62171"   "MP62301"   "MP62194"   "MP62344"   "MP62320"  
##  [379] "MP62296"   "MP72001"   "MP72019"   "MP72189"   "MP72024"   "MP72043"  
##  [385] "MP72221"   "MP72220"   "MP72225A"  "MP72225B"  "MP72225"   "MP72110A" 
##  [391] "MP72110B"  "MP72150"   "MP72139"   "MP72229"   "MP72171"   "MP72211A" 
##  [397] "MP72211B"  "MP62001"   "MP62214"   "MP62146"   "MP62154"   "MP62067"  
##  [403] "MP62341"   "MP62242"   "MP62250A"  "MP62250B"  "MP62170"   "MP62192"  
##  [409] "MP62072"   "MP62048A"  "MP62048B"  "MP62048C"  "MP62048"   "MP62120"  
##  [415] "MP72005"   "MP72021"   "MP72026"   "MP72041A"  "MP72041B"  "MP72223"  
##  [421] "MP72094"   "MP72059"   "MP72080"   "MP72081"   "MP72140A"  "MP72140B" 
##  [427] "MP72140C"  "MP72140D"  "MP72140E"  "MP72140F"  "MP72140"   "MP72120"  
##  [433] "MP72131"   "MP72147"   "MP72154"   "MP72192"   "MP72161"   "SE72002"  
##  [439] "SE72403A"  "SE72403B"  "SE72403C"  "SE72403D"  "SE72403"   "SE72021"  
##  [445] "SE72082"   "SE72066"   "SE72063"   "SE72102"   "SE72141A"  "SE72141B" 
##  [451] "SE72921"   "SE72234"   "SE72251"   "SE72284"   "SE72345A"  "SE72345B" 
##  [457] "SE72345C"  "SE72345D"  "SE72345E"  "SE72345F"  "SE72345G"  "SE72345"  
##  [463] "SE72349"   "SE72363"   "SE62099"   "SE62095"   "SE62106"   "SE62064"  
##  [469] "SE62132"   "SE62163"   "SE62153"   "SE62018A"  "SE62018B"  "SE62018C" 
##  [475] "SE62018D"  "SE62018E"  "SE62018"   "SE62143"   "SE62276"   "SE62050"  
##  [481] "SE62205"   "SE62190"   "SE62024A"  "SE62024B"  "SE82902"   "SE82025A" 
##  [487] "SE82025B"  "SE82025C"  "SE82025D"  "SE82025"   "SE82273A"  "SE82273B" 
##  [493] "SE82131"   "SE82171"   "SE82250"   "SE82275A"  "SE82275B"  "SE82275C" 
##  [499] "SE82275"   "SE82343A"  "SE82343B"  "SE82343C"  "SE82343"   "SE82347"  
##  [505] "SQ82T01A"  "SQ82T01B"  "SQ82T01C"  "SQ82T01D"  "SQ82T01E"  "SQ82T01"  
##  [511] "SQ82T02AA" "SQ82T02AB" "SQ82T02A"  "SQ82T02B"  "SQ82T03A"  "SQ82T03B" 
##  [517] "SQ82T04"   "SQ72S01"   "SQ72S02"   "SQ72S03"   "SQ72S04"   "SQ72S05"  
##  [523] "SQ72S06"   "SQ72S07A"  "SQ72S07B"  "SQ72S07C"  "SQ72S08"   "SQ72S09A" 
##  [529] "SQ72S09B"  "SQ72S09C"  "SQ72S09"   "SQ72S10"   "SQ72S11"   "SQ72S12"  
##  [535] "SQ72S13A"  "SQ72S13B"  "SE82092"   "SE82068A"  "SE82068B"  "SE82008A" 
##  [541] "SE82008B"  "SE82008C"  "SE82008D"  "SE82008E"  "SE82008F"  "SE82008G" 
##  [547] "SE82008H"  "SE82008"   "SE82021A"  "SE82021B"  "SE82021C"  "SE82021D" 
##  [553] "SE82021"   "SE82071"   "SE82175A"  "SE82175B"  "SE82175C"  "SE82175D" 
##  [559] "SE82175E"  "SE82175"   "SE82104A"  "SE82104B"  "SE82104C"  "SE82104D" 
##  [565] "SE82104"   "SE82172A"  "SE82172B"  "SE82172C"  "SE82172D"  "SE82172"  
##  [571] "SE82280"   "SE82205A"  "SE82205B"  "SE82205C"  "SE82205D"  "SE82205"  
##  [577] "SE82262"   "SE82341A"  "SE82341B"  "SE82341C"  "SE82341D"  "SE82341E" 
##  [583] "SE82341"   "SE82323A"  "SE82323B"  "SE82323C"  "SE82323"   "SE82099"  
##  [589] "SE82325"   "SE82020"   "SE82064A"  "SE82064B"  "SE82064C"  "SE82064D" 
##  [595] "SE82064"   "SE82001A"  "SE82001B"  "SE82001C"  "SE82001D"  "SE82001E" 
##  [601] "SE82001"   "SE82050"   "SE82040A"  "SE82040B"  "SE82040C"  "SE82040D" 
##  [607] "SE82040E"  "SE82040"   "SE82060A"  "SE82060B"  "SE82060"   "SE82143"  
##  [613] "SE82147"   "SE82224"   "SE82203"   "SE82223"   "SE82264"   "SE82340A" 
##  [619] "SE82340B"  "SE82340C"  "SE82340D"  "SE82340E"  "SE82340"   "SE82327A" 
##  [625] "SE82327B"  "SE82327C"  "SE82327D"  "SE82327"   "SE82233"   "SE72078"  
##  [631] "SE72460"   "SE72000A"  "SE72000B"  "SE72000C"  "SE72000D"  "SE72000E" 
##  [637] "SE72000"   "SE72906A"  "SE72906B"  "SE72906C"  "SE72906D"  "SE72906E" 
##  [643] "SE72906"   "SE72901"   "SE72038"   "SE72120"   "SE72143A"  "SE72143B" 
##  [649] "SE72143C"  "SE72143D"  "SE72143"   "SE72523"   "SE72168"   "SE72205"  
##  [655] "SE72293"   "SE72280A"  "SE72280B"  "SE72370"   "SE72329"   "SE82042"  
##  [661] "SE82043"   "SE82002A"  "SE82002B"  "SE82002C"  "SE82002D"  "SE82002E" 
##  [667] "SE82002"   "SE82073"   "SE82070"   "SE82144"   "SE82108"   "SE82137"  
##  [673] "SE82200AA" "SE82200AB" "SE82200AC" "SE82200A"  "SE82200BA" "SE82200BB"
##  [679] "SE82200BC" "SE82200B"  "SE82244A"  "SE82244B"  "SE82244C"  "SE82244"  
##  [685] "SE82271A"  "SE82271B"  "SE82271C"  "SE82271D"  "SE82271"   "SE82346A" 
##  [691] "SE82346B"  "SE82335"   "SE62091A"  "SE62091B"  "SE62100"   "SE62097"  
##  [697] "SE62101A"  "SE62101B"  "SE62101C"  "SE62101D"  "SE62101"   "SE62266"  
##  [703] "SE62128"   "SE62047A"  "SE62047B"  "SE62047C"  "SE62047"   "SE62042A" 
##  [709] "SE62042B"  "SE62042C"  "SE62042D"  "SE62042"   "SE62250"   "SE62246"  
##  [715] "SE62056"   "SE62235"   "SE62180"   "SE62022A"  "SE62022B"  "SE62022C" 
##  [721] "SE62022D"  "SE62022"   "SE62243A"  "SE62243B"  "SE62243C"  "SE62243D" 
##  [727] "SE62243"   "SE82007"   "SE82045"   "SE82075"   "SE82027"   "SE82080"  
##  [733] "SE82005A"  "SE82005B"  "SE82005C"  "SE82005D"  "SE82005E"  "SE82005"  
##  [739] "SE82170"   "SE82138"   "SE82281"   "SE82225"   "SE82263A"  "SE82263B" 
##  [745] "SE82263"   "SE82307"   "SE82333"   "SE82304"   "SE82721"   "SQ82L01A" 
##  [751] "SQ82L01B"  "SQ82L01C"  "SQ82L01D"  "SQ82L01E"  "SQ82L01F"  "SQ82L01"  
##  [757] "SQ82L02A"  "SQ82L02B"  "SQ82L04A"  "SQ82L04B"  "SQ82L05"   "SQ82L07A" 
##  [763] "SQ82L07B"  "SQ82L08"   "SQ82L09A"  "SQ82L09B"  "SE82077A"  "SE82077B" 
##  [769] "SE82077C"  "SE82077"   "SE82103"   "SE82146"   "SE82246A"  "SE82246B" 
##  [775] "SE82246C"  "SE82246D"  "SE82246"   "SE82331"   "SE62279"   "SE62112"  
##  [781] "SE62119"   "SE62093"   "SE62089"   "SE62006A"  "SE62006B"  "SE62006C" 
##  [787] "SE62006"   "SE62067"   "SE62247"   "SE62177"   "SE62186"   "SE62211A" 
##  [793] "SE62211B"  "SE62036"   "SE62033"   "SE62037"   "SE62242A"  "SE62242B" 
##  [799] "SE62242C"  "SE62242D"  "SE62242E"  "SE62242"   "SE72033A"  "SE72033B" 
##  [805] "SE72033C"  "SE72033D"  "SE72033E"  "SE72033"   "SE72440"   "SE72032"  
##  [811] "SE72031"   "SE72086"   "SE72005"   "SE72048"   "SE72123"   "SE72116"  
##  [817] "SE72920"   "SE72294"   "SE72231"   "SE72261A"  "SE72261B"  "SE72261C" 
##  [823] "SE72261D"  "SE72261E"  "SE72261"   "SE72220"   "SE72348"   "SE72720"  
##  [829] "SE72011"   "SE72905A"  "SE72905B"  "SE72905C"  "SE72905D"  "SE72905"  
##  [835] "SE72049"   "SE72016A"  "SE72016B"  "SE72016"   "SE72451"   "SE72074"  
##  [841] "SE72091"   "SE72109"   "SE72140"   "SE72132"   "SE72209"   "SE72210"  
##  [847] "SE72249"   "SE72323"   "SE72368"   "SE72303"   "SP72002"   "SP72403"  
##  [853] "SP72021"   "SP72082"   "SP72066"   "SP72063"   "SP72102"   "SP72141A" 
##  [859] "SP72141B"  "SP72921"   "SP72234"   "SP72251"   "SP72284"   "SP72345A" 
##  [865] "SP72345B"  "SP72345C"  "SP72345D"  "SP72345E"  "SP72345F"  "SP72345G" 
##  [871] "SP72345"   "SP72349"   "SP72363"   "SP72070"   "SP72400"   "SP72024"  
##  [877] "SP72462"   "SP72443"   "SP72903"   "SP72145"   "SP72100"   "SP72133"  
##  [883] "SP72137"   "SP72298"   "SP72215"   "SP72260"   "SP72265A"  "SP72265B" 
##  [889] "SP72265C"  "SP72265D"  "SP72265E"  "SP72265"   "SP72347"   "SP72351"  
##  [895] "SP72367"   "SP62099"   "SP62095"   "SP62106"   "SP62064"   "SP62132"  
##  [901] "SP62163"   "SP62153"   "SP62018A"  "SP62018B"  "SP62018C"  "SP62018D" 
##  [907] "SP62018E"  "SP62018"   "SP62143"   "SP62276"   "SP62050"   "SP62205"  
##  [913] "SP62190"   "SP62024A"  "SP62024B"  "SP72033"   "SP72440"   "SP72032"  
##  [919] "SP72031"   "SP72086"   "SP72005"   "SP72048"   "SP72123"   "SP72116"  
##  [925] "SP72920"   "SP72294"   "SP72231"   "SP72261A"  "SP72261B"  "SP72261C" 
##  [931] "SP72261D"  "SP72261E"  "SP72261"   "SP72220"   "SP72348"   "SP72720"  
##  [937] "SP62279"   "SP62112"   "SP62119"   "SP62093"   "SP62089"   "SP62006"  
##  [943] "SP62067"   "SP62247"   "SP62177"   "SP62186"   "SP62211A"  "SP62211B" 
##  [949] "SP62036"   "SP62033"   "SP62037"   "SP62242A"  "SP62242B"  "SP62242C" 
##  [955] "SP62242D"  "SP62242E"  "SP62242"   "SP72078"   "SP72460"   "SP72000"  
##  [961] "SP72906A"  "SP72906B"  "SP72906C"  "SP72906D"  "SP72906E"  "SP72906"  
##  [967] "SP72901"   "SP72038"   "SP72120"   "SP72143"   "SP72523"   "SP72168"  
##  [973] "SP72205"   "SP72293"   "SP72280A"  "SP72280B"  "SP72370"   "SP72329"  
##  [979] "SP62091A"  "SP62091B"  "SP62100"   "SP62097"   "SP62101"   "SP62266"  
##  [985] "SP62128"   "SP62047A"  "SP62047B"  "SP62047C"  "SP62047"   "SP62042"  
##  [991] "SP62250"   "SP62246"   "SP62056"   "SP62235"   "SP62180"   "SP62022A" 
##  [997] "SP62022B"  "SP62022C"  "SP62022D"  "SP62022"   "SP62243"   "SP72011"  
## [1003] "SP72905"   "SP72049"   "SP72016A"  "SP72016B"  "SP72016"   "SP72451"  
## [1009] "SP72074"   "SP72091"   "SP72109"   "SP72140"   "SP72132"   "SP72209"  
## [1015] "SP72210"   "SP72249"   "SP72323"   "SP72368"   "SP72303"   "BNRGCAL1" 
## [1021] "BNRGCAL2"  "JKZONE"    "JKREP"     "TOTWGT"    "HOUWGT"    "SENWGT"   
## [1027] "WGTFAC1"   "WGTADJ1"   "WGTFAC2"   "WGTADJ2"   "WGTFAC3"   "WGTADJ3"  
## [1033] "BSMMAT01"  "BSMMAT02"  "BSMMAT03"  "BSMMAT04"  "BSMMAT05"  "BSMNUM01" 
## [1039] "BSMNUM02"  "BSMNUM03"  "BSMNUM04"  "BSMNUM05"  "BSMALG01"  "BSMALG02" 
## [1045] "BSMALG03"  "BSMALG04"  "BSMALG05"  "BSMGEO01"  "BSMGEO02"  "BSMGEO03" 
## [1051] "BSMGEO04"  "BSMGEO05"  "BSMDAT01"  "BSMDAT02"  "BSMDAT03"  "BSMDAT04" 
## [1057] "BSMDAT05"  "BSMKNO01"  "BSMKNO02"  "BSMKNO03"  "BSMKNO04"  "BSMKNO05" 
## [1063] "BSMAPP01"  "BSMAPP02"  "BSMAPP03"  "BSMAPP04"  "BSMAPP05"  "BSMREA01" 
## [1069] "BSMREA02"  "BSMREA03"  "BSMREA04"  "BSMREA05"  "BSSSCI01"  "BSSSCI02" 
## [1075] "BSSSCI03"  "BSSSCI04"  "BSSSCI05"  "BSSBIO01"  "BSSBIO02"  "BSSBIO03" 
## [1081] "BSSBIO04"  "BSSBIO05"  "BSSCHE01"  "BSSCHE02"  "BSSCHE03"  "BSSCHE04" 
## [1087] "BSSCHE05"  "BSSPHY01"  "BSSPHY02"  "BSSPHY03"  "BSSPHY04"  "BSSPHY05" 
## [1093] "BSSEAR01"  "BSSEAR02"  "BSSEAR03"  "BSSEAR04"  "BSSEAR05"  "BSSKNO01" 
## [1099] "BSSKNO02"  "BSSKNO03"  "BSSKNO04"  "BSSKNO05"  "BSSAPP01"  "BSSAPP02" 
## [1105] "BSSAPP03"  "BSSAPP04"  "BSSAPP05"  "BSSREA01"  "BSSREA02"  "BSSREA03" 
## [1111] "BSSREA04"  "BSSREA05"  "BSSENV01"  "BSSENV02"  "BSSENV03"  "BSSENV04" 
## [1117] "BSSENV05"  "BSMIBM01"  "BSMIBM02"  "BSMIBM03"  "BSMIBM04"  "BSMIBM05" 
## [1123] "BSSIBM01"  "BSSIBM02"  "BSSIBM03"  "BSSIBM04"  "BSSIBM05"  "ME82NUM"  
## [1129] "ME82ALG"   "ME82GEO"   "ME82DAT"   "ME82KNO"   "ME82APP"   "ME82REA"  
## [1135] "SE82BIO"   "SE82CHE"   "SE82PHY"   "SE82EAR"   "SE82KNO"   "SE82APP"  
## [1141] "SE82REA"   "SE82ENV"   "SE82NEN"   "VERSION"   "SCOPE"
score_9<-score |> 
  filter(IDBOOK ==9)

#only select the items taht are in score_9
score_9_clear<-score_9 |> 
  select(IDCNTRY, IDPOP, IDGRADER, IDGRADE, ITASSESS, IDBOOK, IDSCHOOL, IDCLASS, IDSTUD, ITSEX, BSDAGE, ITADMINI, ILRELIAB, ITLANG_SA,LCID_SA,   "SE72000", "SE72000A", "SE72000B", "SE72000C", "SE72000D", "SE72000E",
  "SE72038", "SE72078", "SE72120", "SE72143", "SE72143A", "SE72143B",
  "SE72143C", "SE72143D", "SE72168", "SE72205", "SE72280A", "SE72280B",
  "SE72293", "SE72370", "SE72460", "SE72523", "SE72901", "SE82001",
  "SE82001A", "SE82001B", "SE82001C", "SE82001D", "SE82001E", "SE82020",
  "SE82040", "SE82040A", "SE82040B", "SE82040C", "SE82040D", "SE82040E",
  "SE82050", "SE82060", "SE82060A", "SE82060B", "SE82064", "SE82064A",
  "SE82064B", "SE82064C", "SE82064D", "SE82143", "SE82147", "SE82203",
  "SE82223", "SE82224", "SE82233", "SE82264", "SE82327", "SE82327A",
  "SE82327B", "SE82327C", "SE82327D", "SE82340", "SE82340A", "SE82340B",
  "SE82340C", "SE82340D", "SE82340E", "SE72329", "SE72906A", "SE72906B",
  "SE72906C", "SE72906D", "SE72906E", "SE72906")

nrow(score_9_clear)
## [1] 177
vis_miss(score_9_clear) 

#pick only variables that we needed 
score_cleansing<- score_9_clear |> 
  select(IDSTUD, starts_with("SE"))

#make the long form of score_9_clear 
score_cleansing <- score_cleansing %>%
  pivot_longer(-IDSTUD, names_to = "ItemID", values_to = "score") 
## Warning: `SE72000` and `SE72000B` have conflicting value labels.
## ℹ Labels for these values will be taken from `SE72000`.
## ✖ Values: 3 and 5
## Warning: `SE72000` and `SE72000C` have conflicting value labels.
## ℹ Labels for these values will be taken from `SE72000`.
## ✖ Values: 2 and 5
## Warning: `SE72000` and `SE72000D` have conflicting value labels.
## ℹ Labels for these values will be taken from `SE72000`.
## ✖ Values: 5 and 6
## Warning: `SE72000` and `SE72000E` have conflicting value labels.
## ℹ Labels for these values will be taken from `SE72000`.
## ✖ Values: 4 and 5
## Warning: `SE72000` and `SE72038` have conflicting value labels.
## ℹ Labels for these values will be taken from `SE72000`.
## ✖ Values: 1 and 6
## Warning: `SE72000` and `SE72078` have conflicting value labels.
## ℹ Labels for these values will be taken from `SE72000`.
## ✖ Values: 10
## Warning: `SE72000` and `SE72120` have conflicting value labels.
## ℹ Labels for these values will be taken from `SE72000`.
## ✖ Values: 2 and 6
## Warning: `SE72000` and `SE72143` have conflicting value labels.
## ℹ Labels for these values will be taken from `SE72000`.
## ✖ Values: 10
## Warning: `SE72000` and `SE72143A` have conflicting value labels.
## ℹ Labels for these values will be taken from `SE72000`.
## ✖ Values: 1, 5, and 6
## Warning: `SE72000` and `SE72143B` have conflicting value labels.
## ℹ Labels for these values will be taken from `SE72000`.
## ✖ Values: 3, 5, and 6
## Warning: `SE72000` and `SE72143C` have conflicting value labels.
## ℹ Labels for these values will be taken from `SE72000`.
## ✖ Values: 4, 5, and 6
## Warning: `SE72000` and `SE72143D` have conflicting value labels.
## ℹ Labels for these values will be taken from `SE72000`.
## ✖ Values: 2, 5, and 6
## Warning: `SE72000` and `SE72168` have conflicting value labels.
## ℹ Labels for these values will be taken from `SE72000`.
## ✖ Values: 4 and 6
## Warning: `SE72000` and `SE72205` have conflicting value labels.
## ℹ Labels for these values will be taken from `SE72000`.
## ✖ Values: 3 and 6
## Warning: `SE72000` and `SE72280A` have conflicting value labels.
## ℹ Labels for these values will be taken from `SE72000`.
## ✖ Values: 10
## Warning: `SE72000` and `SE72280B` have conflicting value labels.
## ℹ Labels for these values will be taken from `SE72000`.
## ✖ Values: 2 and 6
## Warning: `SE72000` and `SE72293` have conflicting value labels.
## ℹ Labels for these values will be taken from `SE72000`.
## ✖ Values: 10
## Warning: `SE72000` and `SE72370` have conflicting value labels.
## ℹ Labels for these values will be taken from `SE72000`.
## ✖ Values: 3 and 6
## Warning: `SE72000` and `SE72460` have conflicting value labels.
## ℹ Labels for these values will be taken from `SE72000`.
## ✖ Values: 1 and 6
## Warning: `SE72000` and `SE72901` have conflicting value labels.
## ℹ Labels for these values will be taken from `SE72000`.
## ✖ Values: 2 and 6
## Warning: `SE72000` and `SE82001` have conflicting value labels.
## ℹ Labels for these values will be taken from `SE72000`.
## ✖ Values: 1 and 6
## Warning: `SE72000` and `SE82001A` have conflicting value labels.
## ℹ Labels for these values will be taken from `SE72000`.
## ✖ Values: 4 and 5
## Warning: `SE72000` and `SE82001B` have conflicting value labels.
## ℹ Labels for these values will be taken from `SE72000`.
## ✖ Values: 3 and 5
## Warning: `SE72000` and `SE82001D` have conflicting value labels.
## ℹ Labels for these values will be taken from `SE72000`.
## ✖ Values: 2 and 5
## Warning: `SE72000` and `SE82001E` have conflicting value labels.
## ℹ Labels for these values will be taken from `SE72000`.
## ✖ Values: 5 and 6
## Warning: `SE72000` and `SE82020` have conflicting value labels.
## ℹ Labels for these values will be taken from `SE72000`.
## ✖ Values: 2 and 6
## Warning: `SE72000` and `SE82040` have conflicting value labels.
## ℹ Labels for these values will be taken from `SE72000`.
## ✖ Values: 1 and 6
## Warning: `SE72000` and `SE82040A` have conflicting value labels.
## ℹ Labels for these values will be taken from `SE72000`.
## ✖ Values: 1 and 6
## Warning: `SE72000` and `SE82040B` have conflicting value labels.
## ℹ Labels for these values will be taken from `SE72000`.
## ✖ Values: 2 and 6
## Warning: `SE72000` and `SE82040C` have conflicting value labels.
## ℹ Labels for these values will be taken from `SE72000`.
## ✖ Values: 2 and 6
## Warning: `SE72000` and `SE82040D` have conflicting value labels.
## ℹ Labels for these values will be taken from `SE72000`.
## ✖ Values: 1 and 6
## Warning: `SE72000` and `SE82040E` have conflicting value labels.
## ℹ Labels for these values will be taken from `SE72000`.
## ✖ Values: 1 and 6
## Warning: `SE72000` and `SE82050` have conflicting value labels.
## ℹ Labels for these values will be taken from `SE72000`.
## ✖ Values: 3 and 6
## Warning: `SE72000` and `SE82060` have conflicting value labels.
## ℹ Labels for these values will be taken from `SE72000`.
## ✖ Values: 1, 2, and 6
## Warning: `SE72000` and `SE82060A` have conflicting value labels.
## ℹ Labels for these values will be taken from `SE72000`.
## ✖ Values: 1 and 6
## Warning: `SE72000` and `SE82060B` have conflicting value labels.
## ℹ Labels for these values will be taken from `SE72000`.
## ✖ Values: 1 and 6
## Warning: `SE72000` and `SE82064` have conflicting value labels.
## ℹ Labels for these values will be taken from `SE72000`.
## ✖ Values: 1 and 6
## Warning: `SE72000` and `SE82064A` have conflicting value labels.
## ℹ Labels for these values will be taken from `SE72000`.
## ✖ Values: 4 and 6
## Warning: `SE72000` and `SE82064B` have conflicting value labels.
## ℹ Labels for these values will be taken from `SE72000`.
## ✖ Values: 1 and 6
## Warning: `SE72000` and `SE82064C` have conflicting value labels.
## ℹ Labels for these values will be taken from `SE72000`.
## ✖ Values: 2 and 6
## Warning: `SE72000` and `SE82064D` have conflicting value labels.
## ℹ Labels for these values will be taken from `SE72000`.
## ✖ Values: 3 and 6
## Warning: `SE72000` and `SE82143` have conflicting value labels.
## ℹ Labels for these values will be taken from `SE72000`.
## ✖ Values: 4 and 6
## Warning: `SE72000` and `SE82147` have conflicting value labels.
## ℹ Labels for these values will be taken from `SE72000`.
## ✖ Values: 1 and 6
## Warning: `SE72000` and `SE82203` have conflicting value labels.
## ℹ Labels for these values will be taken from `SE72000`.
## ✖ Values: 3 and 6
## Warning: `SE72000` and `SE82223` have conflicting value labels.
## ℹ Labels for these values will be taken from `SE72000`.
## ✖ Values: 1 and 6
## Warning: `SE72000` and `SE82224` have conflicting value labels.
## ℹ Labels for these values will be taken from `SE72000`.
## ✖ Values: 3 and 6
## Warning: `SE72000` and `SE82233` have conflicting value labels.
## ℹ Labels for these values will be taken from `SE72000`.
## ✖ Values: 2 and 6
## Warning: `SE72000` and `SE82264` have conflicting value labels.
## ℹ Labels for these values will be taken from `SE72000`.
## ✖ Values: 1 and 6
## Warning: `SE72000` and `SE82327` have conflicting value labels.
## ℹ Labels for these values will be taken from `SE72000`.
## ✖ Values: 1 and 6
## Warning: `SE72000` and `SE82327A` have conflicting value labels.
## ℹ Labels for these values will be taken from `SE72000`.
## ✖ Values: 1 and 6
## Warning: `SE72000` and `SE82327B` have conflicting value labels.
## ℹ Labels for these values will be taken from `SE72000`.
## ✖ Values: 1 and 6
## Warning: `SE72000` and `SE82327C` have conflicting value labels.
## ℹ Labels for these values will be taken from `SE72000`.
## ✖ Values: 3 and 6
## Warning: `SE72000` and `SE82327D` have conflicting value labels.
## ℹ Labels for these values will be taken from `SE72000`.
## ✖ Values: 1 and 6
## Warning: `SE72000` and `SE82340` have conflicting value labels.
## ℹ Labels for these values will be taken from `SE72000`.
## ✖ Values: 1 and 6
## Warning: `SE72000` and `SE82340A` have conflicting value labels.
## ℹ Labels for these values will be taken from `SE72000`.
## ✖ Values: 2 and 6
## Warning: `SE72000` and `SE82340B` have conflicting value labels.
## ℹ Labels for these values will be taken from `SE72000`.
## ✖ Values: 1 and 6
## Warning: `SE72000` and `SE82340C` have conflicting value labels.
## ℹ Labels for these values will be taken from `SE72000`.
## ✖ Values: 2 and 6
## Warning: `SE72000` and `SE82340D` have conflicting value labels.
## ℹ Labels for these values will be taken from `SE72000`.
## ✖ Values: 1 and 6
## Warning: `SE72000` and `SE82340E` have conflicting value labels.
## ℹ Labels for these values will be taken from `SE72000`.
## ✖ Values: 1 and 6
## Warning: `SE72000` and `SE72329` have conflicting value labels.
## ℹ Labels for these values will be taken from `SE72000`.
## ✖ Values: 4 and 6
## Warning: `SE72000` and `SE72906A` have conflicting value labels.
## ℹ Labels for these values will be taken from `SE72000`.
## ✖ Values: 1 and 6
## Warning: `SE72000` and `SE72906B` have conflicting value labels.
## ℹ Labels for these values will be taken from `SE72000`.
## ✖ Values: 2 and 6
## Warning: `SE72000` and `SE72906C` have conflicting value labels.
## ℹ Labels for these values will be taken from `SE72000`.
## ✖ Values: 1 and 6
## Warning: `SE72000` and `SE72906D` have conflicting value labels.
## ℹ Labels for these values will be taken from `SE72000`.
## ✖ Values: 1 and 6
## Warning: `SE72000` and `SE72906E` have conflicting value labels.
## ℹ Labels for these values will be taken from `SE72000`.
## ✖ Values: 2 and 6
## Warning: `SE72000` and `SE72906` have conflicting value labels.
## ℹ Labels for these values will be taken from `SE72000`.
## ✖ Values: 10
score_cleansing <- score_cleansing |>
  mutate(ItemID = case_when
         (ItemID == "SE72280A" ~ "SE72280",
          TRUE ~ ItemID)) |> 
  filter(!str_ends(ItemID, "[A-E]$")) 

In the case of Korean Studetns, “SE72906” variables are all missing in the score one. So it was eventually deleted and only 29 items are included from booklet 9.

Item Information Data

item_info <- read_csv("/Users/eunjungmyoung/Library/CloudStorage/GoogleDrive-ejmyoung@stanford.edu/My Drive/0.Research/Process Data/TIMSS data/TIMSS_Process_Data/item_information.csv")
## Rows: 70 Columns: 20
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (17): Item ID, Screen ID, Subject, Block, Block Sequence, Secure Status,...
## dbl  (3): Grade, Cycle, Maximum Points
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
item_info <- item_info |> 
  rename("ItemID" = `Item ID`)
unique(item_info$ItemID)
##  [1] "SE72000"  "SE72000A" "SE72000B" "SE72000C" "SE72000D" "SE72000E"
##  [7] "SE72038"  "SE72078"  "SE72120"  "SE72143"  "SE72143A" "SE72143B"
## [13] "SE72143C" "SE72143D" "SE72168"  "SE72205"  "SE72280A" "SE72280B"
## [19] "SE72293"  "SE72329"  "SE72370"  "SE72460"  "SE72523"  "SE72901" 
## [25] "SE72906"  "SE72906A" "SE72906B" "SE72906C" "SE72906D" "SE72906E"
## [31] "SE82001"  "SE82001A" "SE82001B" "SE82001C" "SE82001D" "SE82001E"
## [37] "SE82020"  "SE82040"  "SE82040A" "SE82040B" "SE82040C" "SE82040D"
## [43] "SE82040E" "SE82050"  "SE82060"  "SE82060A" "SE82060B" "SE82064" 
## [49] "SE82064A" "SE82064B" "SE82064C" "SE82064D" "SE82143"  "SE82147" 
## [55] "SE82203"  "SE82223"  "SE82224"  "SE82233"  "SE82264"  "SE82327" 
## [61] "SE82327A" "SE82327B" "SE82327C" "SE82327D" "SE82340"  "SE82340A"
## [67] "SE82340B" "SE82340C" "SE82340D" "SE82340E"
head(item_info)
## # A tibble: 6 × 20
##   ItemID  `Screen ID` Subject Grade Cycle Block `Block Sequence` `Secure Status`
##   <chr>   <chr>       <chr>   <dbl> <dbl> <chr> <chr>            <chr>          
## 1 SE72000 SE72000     S           8     7 B_SE2 03               Secured        
## 2 SE7200… SE72000     S           8     7 B_SE2 03A              Secured        
## 3 SE7200… SE72000     S           8     7 B_SE2 03B              Secured        
## 4 SE7200… SE72000     S           8     7 B_SE2 03C              Secured        
## 5 SE7200… SE72000     S           8     7 B_SE2 03D              Secured        
## 6 SE7200… SE72000     S           8     7 B_SE2 03E              Secured        
## # ℹ 12 more variables: `Content Domain` <chr>, `Topic Area` <chr>, Topic <chr>,
## #   `Cognitive Domain` <chr>, `Cognitive Area` <chr>,
## #   `Environmental Knowledge` <chr>, `Maximum Points` <dbl>, `Item Type` <chr>,
## #   `Response Options` <chr>, Key <chr>, `Scaling Status` <chr>, Label <chr>
#change the value "SE72280A" into "SE72280"
item_info<-item_info |> 
  mutate(ItemID = case_when
         (ItemID == "SE72280A" ~ "SE72280",
          TRUE ~ ItemID))
unique(item_info$ItemID)
##  [1] "SE72000"  "SE72000A" "SE72000B" "SE72000C" "SE72000D" "SE72000E"
##  [7] "SE72038"  "SE72078"  "SE72120"  "SE72143"  "SE72143A" "SE72143B"
## [13] "SE72143C" "SE72143D" "SE72168"  "SE72205"  "SE72280"  "SE72280B"
## [19] "SE72293"  "SE72329"  "SE72370"  "SE72460"  "SE72523"  "SE72901" 
## [25] "SE72906"  "SE72906A" "SE72906B" "SE72906C" "SE72906D" "SE72906E"
## [31] "SE82001"  "SE82001A" "SE82001B" "SE82001C" "SE82001D" "SE82001E"
## [37] "SE82020"  "SE82040"  "SE82040A" "SE82040B" "SE82040C" "SE82040D"
## [43] "SE82040E" "SE82050"  "SE82060"  "SE82060A" "SE82060B" "SE82064" 
## [49] "SE82064A" "SE82064B" "SE82064C" "SE82064D" "SE82143"  "SE82147" 
## [55] "SE82203"  "SE82223"  "SE82224"  "SE82233"  "SE82264"  "SE82327" 
## [61] "SE82327A" "SE82327B" "SE82327C" "SE82327D" "SE82340"  "SE82340A"
## [67] "SE82340B" "SE82340C" "SE82340D" "SE82340E"
#Filter the value that ends with A, B, C, D,E
item_info_clear <- item_info |> 
  filter(!str_ends(ItemID, "[A-E]$")) 

unique(item_info_clear$ItemID)
##  [1] "SE72000" "SE72038" "SE72078" "SE72120" "SE72143" "SE72168" "SE72205"
##  [8] "SE72280" "SE72293" "SE72329" "SE72370" "SE72460" "SE72523" "SE72901"
## [15] "SE72906" "SE82001" "SE82020" "SE82040" "SE82050" "SE82060" "SE82064"
## [22] "SE82143" "SE82147" "SE82203" "SE82223" "SE82224" "SE82233" "SE82264"
## [29] "SE82327" "SE82340"

Clean the item information data: item format/ cognitive domain

colnames(item_info_clear)
##  [1] "ItemID"                  "Screen ID"              
##  [3] "Subject"                 "Grade"                  
##  [5] "Cycle"                   "Block"                  
##  [7] "Block Sequence"          "Secure Status"          
##  [9] "Content Domain"          "Topic Area"             
## [11] "Topic"                   "Cognitive Domain"       
## [13] "Cognitive Area"          "Environmental Knowledge"
## [15] "Maximum Points"          "Item Type"              
## [17] "Response Options"        "Key"                    
## [19] "Scaling Status"          "Label"
#What I need : ItemID, Cognitive Domain, "Item Type"
item_info_cleansing <- item_info_clear |> 
  select(ItemID, `Cognitive Domain`, `Item Type`)

#Recode the Item Type CR = 1, MC = 0
item_info_cleansing <- item_info_cleansing |> 
  mutate(`Item Type` = case_when
         (`Item Type` == "CR" ~ 1,
          `Item Type` == "MC" ~ 0,
          TRUE ~ NA_real_))

#Create new variables of Applying and Reasoning 
item_info_cleansing <- item_info_cleansing |>
  mutate(Applying = case_when(
    `Cognitive Domain` == "Applying" ~ 1,TRUE ~ 0),
    Reasoning = case_when(
      `Cognitive Domain` == "Reasoning" ~ 1,TRUE ~ 0)) |> 
  select(-`Cognitive Domain`)

Rubric Data

rubric <-read_csv("/Users/eunjungmyoung/Library/CloudStorage/GoogleDrive-ejmyoung@stanford.edu/My Drive/0.Research/Process Data/TIMSS data/TIMSS_Process_Data/rubric.csv")
## Rows: 70 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): Variable, Value Scheme Detailed, Correct Answer, Missing Scheme Det...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
rubric<-rubric |> 
  rename("ItemID" = Variable,
         "Correct Answer" = `Correct Answer`) |> 
  select(ItemID, `Correct Answer`)

#delete the variables with A, B, C, D, E
rubric <- rubric |> 
  mutate(ItemID = case_when
         (ItemID == "SE72280A" ~ "SE72280",
          TRUE ~ ItemID)) |> 
  filter(!str_ends(ItemID, "[A-E]$"))

unique(rubric$ItemID)
##  [1] "SE72000" "SE72038" "SE72078" "SE72120" "SE72143" "SE72168" "SE72205"
##  [8] "SE72280" "SE72293" "SE72370" "SE72460" "SE72523" "SE72901" "SE82001"
## [15] "SE82020" "SE82040" "SE82050" "SE82060" "SE82064" "SE82143" "SE82147"
## [22] "SE82203" "SE82223" "SE82224" "SE82233" "SE82264" "SE82327" "SE82340"
## [29] "SE72329" "SE72906"
#merge the score_cleansing with the rubric with "Item ID")
recode_response <- score_cleansing |> 
  full_join(rubric, by = c("ItemID" = "ItemID")) 
#change the score and `Correct Answer` to numeric
recode_response <- recode_response |> 
  mutate(score = as.numeric(score),
         `Correct Answer` = as.numeric(`Correct Answer`))
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `Correct Answer = as.numeric(`Correct Answer`)`.
## Caused by warning:
## ! NAs introduced by coercion
#mutate the new variable : "Recoded Score" as 2 when the "score" aznd the `Correct Answer` value is identical, otherwise 0
recode_response <- recode_response |> 
  mutate(`Recoded Score` = ifelse(score == `Correct Answer`, 2, 0)) 

colnames(recode_response)
## [1] "IDSTUD"         "ItemID"         "score"          "Correct Answer"
## [5] "Recoded Score"
#manually code the partially correct response 
#SE72078 , #SE72523 #SE82060
#SE72078 : score 10, 11 ~ 2, if else ~ 0, 
#SE72523 : score 10 ~ 1, score 20 ~ 2, ifelse ~ 0,
#SE82060 : score 0 ~ 0, score 1 ~ 1, score 2 ~2
recode_response<- recode_response |> 
  mutate(`Recoded Score` = case_when
         (ItemID == "SE72078" & score %in% c(10,11) ~ 2,
          ItemID == "SE72078" & !(score %in% c(10,11)) ~ 0,
          ItemID == "SE72523" & score == 10 ~ 1,
          ItemID == "SE72523" & score == 20 ~ 2,
          ItemID == "SE72523" & !(score %in% c(10,20)) ~ 0,
          ItemID == "SE82060" & score == 0 ~ 0,
          ItemID == "SE82060" & score == 1 ~ 1,
          ItemID == "SE82060" & score == 2 ~ 2,
          TRUE ~ `Recoded Score`)) |> 
  select(-`Correct Answer`)
head(recode_response)
## # A tibble: 6 × 4
##   IDSTUD    ItemID  score `Recoded Score`
##   <dbl+lbl> <chr>   <dbl>           <dbl>
## 1 50010526  SE72000    79               0
## 2 50010526  SE72038     1               2
## 3 50010526  SE72078    10               2
## 4 50010526  SE72120     4               0
## 5 50010526  SE72143    10               2
## 6 50010526  SE72168     4               2
nrow(recode_response) #177 students with 64 items 
## [1] 5310
table(recode_response$`Recoded Score`)
## 
##    0    1    2 
## 1697   80 3356
Final_grade<-recode_response |> 
  select(IDSTUD, ItemID, `Recoded Score`) |>
  rename("Grade" = `Recoded Score`)

Calculate Item Difficulty following the CTT

#Classical Test Theory (CTT) approach;  Treat difficulty as the mean proportion score (average score ÷ maximum possible score). Example: Suppose 100 students answer an item scored 0–2: 40 students scored 0 / 30 students scored 1 / 30 students scored 2 , Average score = (40×0 + 30×1 + 30×2) / 100 = 90 / 100 = 0.90 // Maximum possible score = 2 // Difficulty index = 0.90 / 2 = 0.45 ->  Interpretation: On average, students scored 45% of the maximum possible score. The closer to 1.0, the “easier” the item.
item_difficulty <- Final_grade |> 
  group_by(ItemID) |> 
  summarise(Average_Score = mean(Grade, na.rm = TRUE),
            Max_Possible_Score = 2,
            Difficulty = Average_Score / Max_Possible_Score) |> 
  arrange(Difficulty) |> 
  select(ItemID, Difficulty) |> 
  #round digits for two
  mutate(Difficulty = round(Difficulty, 2))

Analysis

1. Trial with F

Merging all the data to create Final_F dataset

#Check the "Booklet with F"
booklet_9_F <- booklet_9 %>%
  select(IDSTUD, ends_with("_F"))

#pivot longer the booklet_9_F to have "IDSTUD", "ItemID", "F", the IDSTUD should be maintained
booklet_9_F_long <- booklet_9_F %>%
  group_by(IDSTUD) %>%
  pivot_longer(-IDSTUD, names_to = "ItemID", values_to = "F") 

#get rid of the _F from the ItemID
booklet_9_F_long <- booklet_9_F_long %>%
  mutate(ItemID = str_remove(ItemID, "_F$"))
nrow(booklet_9_F_long) # 176 students with the 30 items 
## [1] 5280
unique(booklet_9_F_long$ItemID)
##  [1] "SE82020" "SE82064" "SE82001" "SE82050" "SE82040" "SE82060" "SE82143"
##  [8] "SE82147" "SE82224" "SE82203" "SE82223" "SE82264" "SE82340" "SE82327"
## [15] "SE82233" "SE72078" "SE72460" "SE72000" "SE72906" "SE72901" "SE72038"
## [22] "SE72120" "SE72143" "SE72523" "SE72168" "SE72205" "SE72293" "SE72280"
## [29] "SE72370" "SE72329"
#Merge the "Booklet_9_F_long" with "item_info_cleansing" with "ItemID"
item_info_F <- booklet_9_F_long |> 
  full_join(item_info_cleansing, by = c("ItemID" = "ItemID"))

colnames(item_info_F)
## [1] "IDSTUD"    "ItemID"    "F"         "Item Type" "Applying"  "Reasoning"
#Merge the Final grade with item_info_F with "IDSTUD" and "ItemID"
item_F_grade <- item_info_F |> 
  full_join(Final_grade, by = c("IDSTUD" = "IDSTUD", "ItemID" = "ItemID"))

colnames(item_F_grade)
## [1] "IDSTUD"    "ItemID"    "F"         "Item Type" "Applying"  "Reasoning"
## [7] "Grade"
#Merge the item difficulty
head(item_difficulty)
## # A tibble: 6 × 2
##   ItemID  Difficulty
##   <chr>        <dbl>
## 1 SE72000       0.24
## 2 SE72038       0.46
## 3 SE72120       0.46
## 4 SE82233       0.46
## 5 SE72280       0.49
## 6 SE72901       0.49
Final_F <- item_F_grade |> 
  left_join(item_difficulty, by = c("ItemID" = "ItemID"))

head(Final_F)
## # A tibble: 6 × 8
## # Groups:   IDSTUD [1]
##   IDSTUD    ItemID  F         `Item Type` Applying Reasoning Grade Difficulty
##   <dbl+lbl> <chr>   <dbl+lbl>       <dbl>    <dbl>     <dbl> <dbl>      <dbl>
## 1 50010526  SE82020 1                   0        0         0     2       0.89
## 2 50010526  SE82064 1                   1        1         0     2       0.81
## 3 50010526  SE82001 1                   1        0         0     2       0.74
## 4 50010526  SE82050 1                   0        0         0     2       0.77
## 5 50010526  SE82040 1                   1        1         0     2       0.59
## 6 50010526  SE82060 1                   1        0         1     2       0.73
#delete the missing
Final_F <- Final_F |> 
  filter(!is.na(F) & !is.na(Grade) & !is.na(Difficulty))
#number of IDSTUD
n_distinct(Final_F$IDSTUD) # 176 students
## [1] 176
#number of the itemID
n_distinct(Final_F$ItemID) # 29 items -> 2906 is missing
## [1] 29
#save the data
write_csv(Final_F, "Final_F.csv")

Analysis with Negative Binomial GLMM

# install.packages(c("glmmTMB","performance","emmeans","broom.mixed"))
library(glmmTMB)
## Warning: package 'glmmTMB' was built under R version 4.3.3
library(performance)
## Warning: package 'performance' was built under R version 4.3.3
library(emmeans)
## Warning: package 'emmeans' was built under R version 4.3.3
## Welcome to emmeans.
## Caution: You lose important information if you filter this package's results.
## See '? untidy'
library(broom.mixed)
## Warning: package 'broom.mixed' was built under R version 4.3.3
## Registered S3 method overwritten by 'future':
##   method               from      
##   all.equal.connection parallelly
library(clubSandwich)
## Warning: package 'clubSandwich' was built under R version 4.3.3
## Registered S3 method overwritten by 'clubSandwich':
##   method    from    
##   bread.mlm sandwich
library(DHARMa)
## Warning: package 'DHARMa' was built under R version 4.3.3
## This is DHARMa 0.4.7. For overview type '?DHARMa'. For recent changes, type news(package = 'DHARMa')
# install.packages("glmmTMB")
# install.packages("performance")
# install.packages("emmeans")
# install.packages("broom.mixed")
# install.packages("clubSandwich")
# install.packages("DHARMa")

df<-Final_F |> 
  rename("Type" = `Item Type`) |> 
  rename("Frequency" = F)
# Expected columns:
# IDSTUD, ItemID, F, Type, Applying, Reasoning, Grade, Difficulty
# - F: count outcome (1..9)
# - Type: 0=MC, 1=CR
# - Applying, Reasoning: 0/1 dummies (Knowing is reference when both 0)
# - Grade: 0/1/2 (control)
# - Difficulty: CTT p-correct (closer to 1 = easier)

# Factors for random effects
df$IDSTUD <- factor(df$IDSTUD)
df$ItemID <- factor(df$ItemID)

# Make a "hardness" score so larger = harder (and scale it)
df$diff_hard <- 1 - pmin(pmax(df$Difficulty, 1e-6), 1-1e-6)

# Ensure predictors are numeric 0/1; Grade as numeric (or factor if you prefer nonlinearity)
df$Type      <- as.numeric(df$Type)
df$Applying  <- as.numeric(df$Applying)
df$Reasoning <- as.numeric(df$Reasoning)
df$Grade     <- as.numeric(df$Grade)
df$Frequency <- as.integer(df$Frequency)
df$diff_hard <- as.numeric(df$diff_hard)

Model 1 to 6

# Model 1d: Type only (+ difficulty)
m1d <- glmmTMB(
  Frequency ~ Type + diff_hard + (1|IDSTUD) + (1|ItemID),
  data = df, family = nbinom2())
summary(m1d)
##  Family: nbinom2  ( log )
## Formula:          Frequency ~ Type + diff_hard + (1 | IDSTUD) + (1 | ItemID)
## Data: df
## 
##       AIC       BIC    logLik -2*log(L)  df.resid 
##   13051.4   13090.7   -6519.7   13039.4      5098 
## 
## Random effects:
## 
## Conditional model:
##  Groups Name        Variance  Std.Dev. 
##  IDSTUD (Intercept) 1.405e-01 3.748e-01
##  ItemID (Intercept) 2.225e-09 4.717e-05
## Number of obs: 5104, groups:  IDSTUD, 176; ItemID, 29
## 
## Dispersion parameter for nbinom2 family (): 2.93e+08 
## 
## Conditional model:
##              Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  0.355266   0.039686   8.952   <2e-16 ***
## Type        -0.003896   0.022506  -0.173    0.863    
## diff_hard    0.060518   0.070374   0.860    0.390    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Model 2d: + Grade (control)
m2d <- glmmTMB(
  Frequency ~ Type + Grade + diff_hard + (1|IDSTUD) + (1|ItemID),
  data = df, family = nbinom2())
## Warning in finalizeTMB(TMBStruc, obj, fit, h, data.tmb.old): Model convergence
## problem; false convergence (8). See vignette('troubleshooting'),
## help('diagnose')
summary(m2d)
##  Family: nbinom2  ( log )
## Formula:          
## Frequency ~ Type + Grade + diff_hard + (1 | IDSTUD) + (1 | ItemID)
## Data: df
## 
##       AIC       BIC    logLik -2*log(L)  df.resid 
##   13053.0   13098.8   -6519.5   13039.0      5097 
## 
## Random effects:
## 
## Conditional model:
##  Groups Name        Variance  Std.Dev. 
##  IDSTUD (Intercept) 1.395e-01 3.735e-01
##  ItemID (Intercept) 1.118e-09 3.344e-05
## Number of obs: 5104, groups:  IDSTUD, 176; ItemID, 29
## 
## Dispersion parameter for nbinom2 family (): 5.14e+07 
## 
## Conditional model:
##              Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  0.336386   0.048767   6.898 5.28e-12 ***
## Type        -0.003948   0.022506  -0.175    0.861    
## Grade        0.009491   0.014284   0.664    0.506    
## diff_hard    0.079178   0.075749   1.045    0.296    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Model 3d: Domain only (+ difficulty)
m3d <- glmmTMB(
  Frequency ~ Applying + Reasoning + diff_hard + (1|IDSTUD) + (1|ItemID),data = df, family = nbinom2())
## Warning in finalizeTMB(TMBStruc, obj, fit, h, data.tmb.old): Model convergence
## problem; non-positive-definite Hessian matrix. See vignette('troubleshooting')
# Model 4d: Domain + Grade (+ difficulty)
m4d <- glmmTMB(
  Frequency ~ Applying + Reasoning + Grade + diff_hard + (1|IDSTUD) + (1|ItemID), data = df, family = nbinom2())

# Model 5d: Type × Domain (+ difficulty)
m5d <- glmmTMB(
  Frequency ~ Type * Applying + Type * Reasoning + diff_hard +
       (1|IDSTUD) + (1|ItemID), data = df, family = nbinom2())
## Warning in finalizeTMB(TMBStruc, obj, fit, h, data.tmb.old): Model convergence
## problem; non-positive-definite Hessian matrix. See vignette('troubleshooting')
## Warning in finalizeTMB(TMBStruc, obj, fit, h, data.tmb.old): Model convergence
## problem; false convergence (8). See vignette('troubleshooting'),
## help('diagnose')
# Model 6d: Full (Type × Domain) + Grade (+ difficulty)
# You can also add interactions with difficulty if desired (see commented lines).
m6d <- glmmTMB(
  Frequency ~ Type * Applying + Type * Reasoning + Grade + diff_hard +
       # Type:diff_hard + Applying:diff_hard + Reasoning:diff_hard +
       (1|IDSTUD) + (1|ItemID),
  data = df, family = nbinom2())
## Warning in finalizeTMB(TMBStruc, obj, fit, h, data.tmb.old): Model convergence
## problem; non-positive-definite Hessian matrix. See vignette('troubleshooting')
# ---- 3) Model checks & summaries ----
# Compare fit
AIC(m1d, m2d, m3d, m4d, m5d, m6d)
##     df      AIC
## m1d  6 13051.43
## m2d  7 13052.99
## m3d  7       NA
## m4d  8 13054.12
## m5d 10       NA
## m6d 11       NA
summary(m6d)
##  Family: nbinom2  ( log )
## Formula:          
## Frequency ~ Type * Applying + Type * Reasoning + Grade + diff_hard +  
##     (1 | IDSTUD) + (1 | ItemID)
## Data: df
## 
##       AIC       BIC    logLik -2*log(L)  df.resid 
##        NA        NA        NA        NA      5093 
## 
## Random effects:
## 
## Conditional model:
##  Groups Name        Variance  Std.Dev.
##  IDSTUD (Intercept) 1.395e-01 0.373520
##  ItemID (Intercept) 1.717e-08 0.000131
## Number of obs: 5104, groups:  IDSTUD, 176; ItemID, 29
## 
## Dispersion parameter for nbinom2 family (): 2.04e+07 
## 
## Conditional model:
##                 Estimate Std. Error z value Pr(>|z|)    
## (Intercept)     0.328883   0.053652   6.130 8.79e-10 ***
## Type           -0.026435   0.037725  -0.701    0.483    
## Applying        0.018699   0.033523   0.558    0.577    
## Reasoning      -0.026547   0.065537  -0.405    0.685    
## Grade           0.009421   0.014291   0.659    0.510    
## diff_hard       0.084973   0.084210   1.009    0.313    
## Type:Applying   0.016302   0.056678   0.288    0.774    
## Type:Reasoning  0.069377   0.076713   0.904    0.366    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
performance::check_overdispersion(m6d)
## # Overdispersion test
## 
##  dispersion ratio =   0.489
##           p-value = < 0.001
## Underdispersion detected.
performance::check_convergence(m6d)
## [1] FALSE
# # Rate ratios (exp of log-coefs)
# exp(coef(summary(m6d))$cond[, "Estimate"])
# exp(confint(m6d, parm = "cond", method = "Wald"))

# Marginal means (helpful for interactions)
emmeans(m6d, ~ Type | Applying + Reasoning, type = "response")
## Applying = 0, Reasoning = 0:
##  Type response     SE  df asymp.LCL asymp.UCL
##     0     1.45 0.0516 Inf      1.35      1.55
##     1     1.41 0.0593 Inf      1.30      1.53
## 
## Applying = 1, Reasoning = 0:
##  Type response     SE  df asymp.LCL asymp.UCL
##     0     1.48 0.0564 Inf      1.37      1.59
##     1     1.46 0.0588 Inf      1.35      1.58
## 
## Applying = 0, Reasoning = 1:
##  Type response     SE  df asymp.LCL asymp.UCL
##     0     1.41 0.0965 Inf      1.23      1.61
##     1     1.47 0.0576 Inf      1.36      1.59
## 
## Applying = 1, Reasoning = 1:
##  Type response     SE  df asymp.LCL asymp.UCL
##     0     1.44 0.1070 Inf      1.24      1.66
##     1     1.52 0.0880 Inf      1.36      1.71
## 
## Confidence level used: 0.95 
## Intervals are back-transformed from the log scale

Trial with S

#Check the "Booklet with F"
booklet_9_S <- booklet_9 %>%
  select(IDSTUD, ends_with("_S"))

#pivot longer the booklet_9_F to have "IDSTUD", "ItemID", "F", the IDSTUD should be maintained
booklet_9_S_long <- booklet_9_S %>%
  group_by(IDSTUD) %>%
  pivot_longer(-IDSTUD, names_to = "ItemID", values_to = "S") 

#get rid of the _F from the ItemID
booklet_9_S_long <- booklet_9_S_long %>%
  mutate(ItemID = str_remove(ItemID, "_S$"))
nrow(booklet_9_S_long) # 176 students with the 30 items 
## [1] 5280
#Merge the "Booklet_9_S_long" with "item_info_cleansing" with "ItemID"
item_info_S <- booklet_9_S_long |> 
  full_join(item_info_cleansing, by = c("ItemID" = "ItemID"))

colnames(item_info_S)
## [1] "IDSTUD"    "ItemID"    "S"         "Item Type" "Applying"  "Reasoning"
#Merge the Final grade with item_info_F with "IDSTUD" and "ItemID"
item_S_grade <- item_info_S |> 
  full_join(Final_grade, by = c("IDSTUD" = "IDSTUD", "ItemID" = "ItemID"))

colnames(item_S_grade)
## [1] "IDSTUD"    "ItemID"    "S"         "Item Type" "Applying"  "Reasoning"
## [7] "Grade"
#Merge the item difficulty
head(item_difficulty)
## # A tibble: 6 × 2
##   ItemID  Difficulty
##   <chr>        <dbl>
## 1 SE72000       0.24
## 2 SE72038       0.46
## 3 SE72120       0.46
## 4 SE82233       0.46
## 5 SE72280       0.49
## 6 SE72901       0.49
Final_S <- item_S_grade |> 
  left_join(item_difficulty, by = c("ItemID" = "ItemID"))
head(Final_S)
## # A tibble: 6 × 8
## # Groups:   IDSTUD [1]
##   IDSTUD    ItemID  S         `Item Type` Applying Reasoning Grade Difficulty
##   <dbl+lbl> <chr>   <dbl+lbl>       <dbl>    <dbl>     <dbl> <dbl>      <dbl>
## 1 50010526  SE82020 24.0                0        0         0     2       0.89
## 2 50010526  SE82064 16.4                1        1         0     2       0.81
## 3 50010526  SE82001 64.5                1        0         0     2       0.74
## 4 50010526  SE82050 21.2                0        0         0     2       0.77
## 5 50010526  SE82040 83.1                1        1         0     2       0.59
## 6 50010526  SE82060 82.0                1        0         1     2       0.73
#delete the missing
Final_S <- Final_S |> 
  filter(!is.na(S) & !is.na(Grade) & !is.na(Difficulty))
#number of IDSTUD
n_distinct(Final_S$IDSTUD) # 176 students
## [1] 176
#number of the itemID
n_distinct(Final_S$ItemID) # 29 items -> 2906 is missing
## [1] 29
#save the data
write_csv(Final_S, "Final_S.csv")

##Run the analysis

# ----- Load & prep (Time) -----
dfS <- read.csv("Final_S.csv", stringsAsFactors = FALSE)

dfS<-dfS |> 
  rename("Time" = S) |> 
  rename("Type" = `Item.Type`)

  dif <- pmin(pmax(dfS$Difficulty, 1e-6), 1 - 1e-6)
  hard <- 1 - dif                         # complement (simple, stable)
  # Or, if you prefer: hard <- -qlogis(dif)  # logit scale (often nice)

  dfS$diff_hard   <- as.numeric(hard)
  
# ----- Model set (Gamma log) -----
# S1d: Type + difficulty
s1d_S <- glmmTMB(
  Time ~ Type + diff_hard + (1|IDSTUD) + (1|ItemID),
  data = dfS, family = Gamma(link = "log"))

# S2d: + Grade
s2d_S <- glmmTMB(
  Time ~ Type + Grade + diff_hard + (1|IDSTUD) + (1|ItemID),
  data = dfS, family = Gamma(link = "log"))

# S3d: Domain dummies + difficulty
s3d_S <- glmmTMB(
  Time ~ Applying + Reasoning + diff_hard + (1|IDSTUD) + (1|ItemID),
  data = dfS, family = Gamma(link = "log"))

# S4d: Domain dummies + Grade + difficulty
s4d_S <- glmmTMB(
  Time ~ Applying + Reasoning + Grade + diff_hard+ (1|IDSTUD) + (1|ItemID),
  data = dfS, family = Gamma(link = "log"))

# S5d: Type × Domain (dummies) + difficulty
s5d_S <- glmmTMB(
  Time ~ Type*Applying + Type*Reasoning + diff_hard +
         (1|IDSTUD) + (1|ItemID),
  data = dfS, family = Gamma(link = "log"))

# S6d: Full (Type × Domain dummies) + Grade + difficulty
s6d_S <- glmmTMB(
  Time ~ Type*Applying + Type*Reasoning + Grade + diff_hard +
         (1|IDSTUD) + (1|ItemID),
  data = dfS, family = Gamma(link = "log"))

# ----- Model comparison & key outputs -----
AIC(s1d_S, s2d_S, s3d_S, s4d_S, s5d_S, s6d_S)
##       df      AIC
## s1d_S  6 42578.92
## s2d_S  7 42560.02
## s3d_S  7 42583.35
## s4d_S  8 42564.40
## s5d_S 10 42574.85
## s6d_S 11 42555.97
summary(s6d_S)
##  Family: Gamma  ( log )
## Formula:          
## Time ~ Type * Applying + Type * Reasoning + Grade + diff_hard +  
##     (1 | IDSTUD) + (1 | ItemID)
## Data: dfS
## 
##       AIC       BIC    logLik -2*log(L)  df.resid 
##   42556.0   42627.9  -21267.0   42534.0      5093 
## 
## Random effects:
## 
## Conditional model:
##  Groups Name        Variance Std.Dev.
##  IDSTUD (Intercept) 0.14311  0.3783  
##  ItemID (Intercept) 0.08609  0.2934  
## Number of obs: 5104, groups:  IDSTUD, 176; ItemID, 29
## 
## Dispersion estimate for Gamma family (sigma^2): 0.247 
## 
## Conditional model:
##                 Estimate Std. Error z value Pr(>|z|)    
## (Intercept)     2.774622   0.176898  15.685  < 2e-16 ***
## Type            0.437307   0.183302   2.386   0.0170 *  
## Applying        0.218589   0.165196   1.323   0.1858    
## Reasoning       0.641947   0.319687   2.008   0.0446 *  
## Grade          -0.039092   0.008566  -4.563 5.03e-06 ***
## diff_hard       0.872051   0.392509   2.222   0.0263 *  
## Type:Applying   0.179184   0.277826   0.645   0.5190    
## Type:Reasoning -0.016905   0.374256  -0.045   0.9640    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
performance::check_convergence(s6d_S)
## [1] TRUE
# Time ratios (= multiplicative effect on expected time)
# exp(coef(summary(s6d_S))$cond[, "Estimate"])
# exp(confint(s6d_S, parm = "cond", method = "Wald"))

# Marginal means
emmeans(s6d_S, ~ Type | Applying + Reasoning, type = "response")
## Applying = 0, Reasoning = 0:
##  Type response    SE  df asymp.LCL asymp.UCL
##     0     20.5  2.22 Inf      16.5      25.3
##     1     31.7  4.82 Inf      23.5      42.7
## 
## Applying = 1, Reasoning = 0:
##  Type response    SE  df asymp.LCL asymp.UCL
##     0     25.5  3.29 Inf      19.8      32.8
##     1     47.2  6.75 Inf      35.6      62.4
## 
## Applying = 0, Reasoning = 1:
##  Type response    SE  df asymp.LCL asymp.UCL
##     0     38.9 11.80 Inf      21.4      70.6
##     1     59.2  8.02 Inf      45.4      77.2
## 
## Applying = 1, Reasoning = 1:
##  Type response    SE  df asymp.LCL asymp.UCL
##     0     48.4 16.40 Inf      24.9      93.9
##     1     88.1 21.80 Inf      54.2     143.2
## 
## Confidence level used: 0.95 
## Intervals are back-transformed from the log scale

Descriptive Statistics

#Number of the Item Type, Number of Knowing, Applying, Knowing,> 30 items 
des<-item_info_cleansing |> 
  summarise(Total_items = n(),
            MC_items = sum(`Item Type` == 0),
            CR_items = sum(`Item Type` == 1),
            Knowing_items = sum(Applying == 0 & Reasoning == 0),
            Applying_items = sum(Applying == 1),
            Reasoning_items = sum(Reasoning == 1)) |> 
  gt()

des
Total_items MC_items CR_items Knowing_items Applying_items Reasoning_items
30 15 15 12 11 7
# Item Format, Cognitive Domain, and their difficulty
item_summmary<- item_info_cleansing |> 
  left_join(item_difficulty, by = c("ItemID" = "ItemID")) |> 
  group_by(`Item Type`, Applying, Reasoning) |> 
  summarise(Average_Difficulty = mean(Difficulty, na.rm = TRUE),
            Count = n()) |> 
  ungroup() |> 
  mutate(`Item Type` = case_when
         (`Item Type` == 0 ~ "MC",
          `Item Type` == 1 ~ "CR",
          TRUE ~ NA_character_)) |>
  mutate("Cognitive Domain" = case_when(
    Applying == 1 ~ "Applying",
    Reasoning == 1 ~ "Reasoning",
    Applying == 0 & Reasoning == 0 ~ "Knowing",
    TRUE ~ NA_character_)) |> 
    select(-Applying, -Reasoning)
## `summarise()` has grouped output by 'Item Type', 'Applying'. You can override
## using the `.groups` argument.
item_summmary
## # A tibble: 6 × 4
##   `Item Type` Average_Difficulty Count `Cognitive Domain`
##   <chr>                    <dbl> <int> <chr>             
## 1 MC                       0.646     8 Knowing           
## 2 MC                       0.49      1 Reasoning         
## 3 MC                       0.753     6 Applying          
## 4 CR                       0.718     4 Knowing           
## 5 CR                       0.682     6 Reasoning         
## 6 CR                       0.542     5 Applying
#Join the average_F and average_S with the item_summmary
average_F <- Final_F |> 
  group_by(`Item Type`, Applying, Reasoning) |> 
  summarise(Average_F = mean(F, na.rm = TRUE)) |> 
  mutate(`Item Type` = case_when
         (`Item Type` == 0 ~ "MC",
          `Item Type` == 1 ~ "CR",
          TRUE ~ NA_character_)) |>
  ungroup() |> 
  mutate("Cognitive Domain" = case_when(
    Applying == 1 ~ "Applying",
    Reasoning == 1 ~ "Reasoning",
    Applying == 0 & Reasoning == 0 ~ "Knowing",
    TRUE ~ NA_character_)) |> 
  select(-Applying, -Reasoning)
## `summarise()` has grouped output by 'Item Type', 'Applying'. You can override
## using the `.groups` argument.
average_S <- Final_S |>
  group_by(`Item Type`, Applying, Reasoning) |> 
  summarise(Average_S = mean(S, na.rm = TRUE)) |> 
  mutate(`Item Type` = case_when
         (`Item Type` == 0 ~ "MC",
          `Item Type` == 1 ~ "CR",
          TRUE ~ NA_character_)) |>
  ungroup() |>
  mutate("Cognitive Domain" = case_when(
    Applying == 1 ~ "Applying",
    Reasoning == 1 ~ "Reasoning",
    Applying == 0 & Reasoning == 0 ~ "Knowing",
    TRUE ~ NA_character_)) |> 
  select(-Applying, -Reasoning)
## `summarise()` has grouped output by 'Item Type', 'Applying'. You can override
## using the `.groups` argument.
item_summmary <- item_summmary |>
  left_join(average_F, by = c("Item Type" = "Item Type", "Cognitive Domain" = "Cognitive Domain")) |> 
  left_join(average_S, by = c("Item Type" = "Item Type", "Cognitive Domain" = "Cognitive Domain")) |> 
  mutate(Average_Difficulty = round(Average_Difficulty, 2)) |> 
  mutate(Average_F = round(Average_F, 2)) |>
  mutate(Average_S = round(Average_S, 2)) |>
  gt()

item_summmary
Item Type Average_Difficulty Count Cognitive Domain Average_F Average_S
MC 0.65 8 Knowing 1.57 22.76
MC 0.49 1 Reasoning 1.54 49.62
MC 0.75 6 Applying 1.58 27.21
CR 0.72 4 Knowing 1.52 34.61
CR 0.68 6 Reasoning 1.59 64.12
CR 0.54 5 Applying 1.59 59.95