library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyr)
library(readr)
library(stringr)
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.3.3
library(haven)
library(naniar)
library(tibble)
## Warning: package 'tibble' was built under R version 4.3.3
library(gt)
## Warning: package 'gt' was built under R version 4.3.3
BSPKORM8 : Process Data
load("~/Library/CloudStorage/GoogleDrive-ejmyoung@stanford.edu/My Drive/0.Research/Process Data/TIMSS data/TIMSS2023_IDB_R_G8/2_Data Files/R Data/bspkorm8.rdata")
data<-BSPKORM8
data_booklet<-data |>
select(IDSTUD, ITPARTPT1, ITPARTPT2, FLAGPT1, FLAGPT2)
#check the values
# table(data_booklet$ITPARTPT1)
# table(data_booklet$ITPARTPT2)
# table(data_booklet$FLAGPT1)
# table(data_booklet$FLAGPT2)
#data understanding
## FLAGPT : 0: No issues; 1: Deleted due to technical issue; 2: Deleted due to outlier
## ITPARTPT :1: Yes; 2: No
## Delete the ITPARTP 2 and FLAGPT 1/2 is appropriate.
data<-data |>
filter(ITPARTPT1 == 1 & ITPARTPT2 ==1
& FLAGPT1 == 0 & FLAGPT2 == 0)
table(data$ITPARTPT1)
##
## 1
## 4231
table(data$ITPARTPT2)
##
## 1
## 4231
table(data$FLAGPT1)
##
## 0
## 4231
table(data$FLAGPT2)
##
## 0
## 4231
data_science<-data |>
select(!starts_with("M")) |>
select(!SENWGT) |>
select(!SCOPE)
ncol(data_science)
## [1] 744
colnames(data_science)
## [1] "CTY" "IDCNTRY" "IDPOP" "IDGRADER" "IDGRADE" "ITASSESS"
## [7] "IDBOOK" "IDSCHOOL" "IDCLASS" "IDSTUD" "ITSEX" "BSDAGE"
## [13] "ITADMINI" "ILRELIAB" "ITLANG_SA" "LCID_SA" "ITPARTPT1" "ITPARTPT2"
## [19] "FLAGPT1" "FLAGPT2" "SE72002_F" "SE72002_R" "SE72002_S" "SE72403_F"
## [25] "SE72403_R" "SE72403_S" "SE72021_F" "SE72021_R" "SE72021_S" "SE72082_F"
## [31] "SE72082_R" "SE72082_S" "SE72066_F" "SE72066_R" "SE72066_S" "SE72063_F"
## [37] "SE72063_R" "SE72063_S" "SE72102_F" "SE72102_R" "SE72102_S" "SE72141_F"
## [43] "SE72141_R" "SE72141_S" "SE72921_F" "SE72921_R" "SE72921_S" "SE72234_F"
## [49] "SE72234_R" "SE72234_S" "SE72251_F" "SE72251_R" "SE72251_S" "SE72284_F"
## [55] "SE72284_R" "SE72284_S" "SE72345_F" "SE72345_R" "SE72345_S" "SE72349_F"
## [61] "SE72349_R" "SE72349_S" "SE72363_F" "SE72363_R" "SE72363_S" "SE62099_F"
## [67] "SE62099_R" "SE62099_S" "SE62095_F" "SE62095_R" "SE62095_S" "SE62106_F"
## [73] "SE62106_R" "SE62106_S" "SE62064_F" "SE62064_R" "SE62064_S" "SE62132_F"
## [79] "SE62132_R" "SE62132_S" "SE62163_F" "SE62163_R" "SE62163_S" "SE62153_F"
## [85] "SE62153_R" "SE62153_S" "SE62018_F" "SE62018_R" "SE62018_S" "SE62143_F"
## [91] "SE62143_R" "SE62143_S" "SE62276_F" "SE62276_R" "SE62276_S" "SE62050_F"
## [97] "SE62050_R" "SE62050_S" "SE62205_F" "SE62205_R" "SE62205_S" "SE62190_F"
## [103] "SE62190_R" "SE62190_S" "SE62024_F" "SE62024_R" "SE62024_S" "SE82902_F"
## [109] "SE82902_R" "SE82902_S" "SE82025_F" "SE82025_R" "SE82025_S" "SE82273_F"
## [115] "SE82273_R" "SE82273_S" "SE82131_F" "SE82131_R" "SE82131_S" "SE82171_F"
## [121] "SE82171_R" "SE82171_S" "SE82250_F" "SE82250_R" "SE82250_S" "SE82275_F"
## [127] "SE82275_R" "SE82275_S" "SE82343_F" "SE82343_R" "SE82343_S" "SE82347_F"
## [133] "SE82347_R" "SE82347_S" "S82T01_F" "S82T01_R" "S82T01_S" "S82T02_F"
## [139] "S82T02_R" "S82T02_S" "S82T03_F" "S82T03_R" "S82T03_S" "S82T04_F"
## [145] "S82T04_R" "S82T04_S" "S82T05_F" "S82T05_R" "S82T05_S" "S82T06_F"
## [151] "S82T06_R" "S82T06_S" "S72S01_F" "S72S01_R" "S72S01_S" "S72S02_F"
## [157] "S72S02_R" "S72S02_S" "S72S03_F" "S72S03_R" "S72S03_S" "S72S04_F"
## [163] "S72S04_R" "S72S04_S" "S72S05_F" "S72S05_R" "S72S05_S" "S72S06_F"
## [169] "S72S06_R" "S72S06_S" "S72S07_F" "S72S07_R" "S72S07_S" "S72S08_F"
## [175] "S72S08_R" "S72S08_S" "S72S09_F" "S72S09_R" "S72S09_S" "S72S10_F"
## [181] "S72S10_R" "S72S10_S" "S72S11_F" "S72S11_R" "S72S11_S" "S72S12_F"
## [187] "S72S12_R" "S72S12_S" "S72S13_F" "S72S13_R" "S72S13_S" "S72S14_F"
## [193] "S72S14_R" "S72S14_S" "SE82092_F" "SE82092_R" "SE82092_S" "SE82068_F"
## [199] "SE82068_R" "SE82068_S" "SE82008_F" "SE82008_R" "SE82008_S" "SE82021_F"
## [205] "SE82021_R" "SE82021_S" "SE82071_F" "SE82071_R" "SE82071_S" "SE82175_F"
## [211] "SE82175_R" "SE82175_S" "SE82104_F" "SE82104_R" "SE82104_S" "SE82172_F"
## [217] "SE82172_R" "SE82172_S" "SE82280_F" "SE82280_R" "SE82280_S" "SE82205_F"
## [223] "SE82205_R" "SE82205_S" "SE82262_F" "SE82262_R" "SE82262_S" "SE82341_F"
## [229] "SE82341_R" "SE82341_S" "SE82323_F" "SE82323_R" "SE82323_S" "SE82099_F"
## [235] "SE82099_R" "SE82099_S" "SE82325_F" "SE82325_R" "SE82325_S" "SE82020_F"
## [241] "SE82020_R" "SE82020_S" "SE82064_F" "SE82064_R" "SE82064_S" "SE82001_F"
## [247] "SE82001_R" "SE82001_S" "SE82050_F" "SE82050_R" "SE82050_S" "SE82040_F"
## [253] "SE82040_R" "SE82040_S" "SE82060_F" "SE82060_R" "SE82060_S" "SE82143_F"
## [259] "SE82143_R" "SE82143_S" "SE82147_F" "SE82147_R" "SE82147_S" "SE82224_F"
## [265] "SE82224_R" "SE82224_S" "SE82203_F" "SE82203_R" "SE82203_S" "SE82223_F"
## [271] "SE82223_R" "SE82223_S" "SE82264_F" "SE82264_R" "SE82264_S" "SE82340_F"
## [277] "SE82340_R" "SE82340_S" "SE82327_F" "SE82327_R" "SE82327_S" "SE82233_F"
## [283] "SE82233_R" "SE82233_S" "SE72078_F" "SE72078_R" "SE72078_S" "SE72460_F"
## [289] "SE72460_R" "SE72460_S" "SE72000_F" "SE72000_R" "SE72000_S" "SE72906_F"
## [295] "SE72906_R" "SE72906_S" "SE72901_F" "SE72901_R" "SE72901_S" "SE72038_F"
## [301] "SE72038_R" "SE72038_S" "SE72120_F" "SE72120_R" "SE72120_S" "SE72143_F"
## [307] "SE72143_R" "SE72143_S" "SE72523_F" "SE72523_R" "SE72523_S" "SE72168_F"
## [313] "SE72168_R" "SE72168_S" "SE72205_F" "SE72205_R" "SE72205_S" "SE72293_F"
## [319] "SE72293_R" "SE72293_S" "SE72280_F" "SE72280_R" "SE72280_S" "SE72370_F"
## [325] "SE72370_R" "SE72370_S" "SE72329_F" "SE72329_R" "SE72329_S" "SE82042_F"
## [331] "SE82042_R" "SE82042_S" "SE82043_F" "SE82043_R" "SE82043_S" "SE82002_F"
## [337] "SE82002_R" "SE82002_S" "SE82073_F" "SE82073_R" "SE82073_S" "SE82070_F"
## [343] "SE82070_R" "SE82070_S" "SE82144_F" "SE82144_R" "SE82144_S" "SE82108_F"
## [349] "SE82108_R" "SE82108_S" "SE82137_F" "SE82137_R" "SE82137_S" "SE82200_F"
## [355] "SE82200_R" "SE82200_S" "SE82244_F" "SE82244_R" "SE82244_S" "SE82271_F"
## [361] "SE82271_R" "SE82271_S" "SE82346_F" "SE82346_R" "SE82346_S" "SE82335_F"
## [367] "SE82335_R" "SE82335_S" "SE62091_F" "SE62091_R" "SE62091_S" "SE62100_F"
## [373] "SE62100_R" "SE62100_S" "SE62097_F" "SE62097_R" "SE62097_S" "SE62101_F"
## [379] "SE62101_R" "SE62101_S" "SE62266_F" "SE62266_R" "SE62266_S" "SE62128_F"
## [385] "SE62128_R" "SE62128_S" "SE62047_F" "SE62047_R" "SE62047_S" "SE62042_F"
## [391] "SE62042_R" "SE62042_S" "SE62250_F" "SE62250_R" "SE62250_S" "SE62246_F"
## [397] "SE62246_R" "SE62246_S" "SE62056_F" "SE62056_R" "SE62056_S" "SE62235_F"
## [403] "SE62235_R" "SE62235_S" "SE62180_F" "SE62180_R" "SE62180_S" "SE62022_F"
## [409] "SE62022_R" "SE62022_S" "SE62243_F" "SE62243_R" "SE62243_S" "SE82007_F"
## [415] "SE82007_R" "SE82007_S" "SE82045_F" "SE82045_R" "SE82045_S" "SE82075_F"
## [421] "SE82075_R" "SE82075_S" "SE82027_F" "SE82027_R" "SE82027_S" "SE82080_F"
## [427] "SE82080_R" "SE82080_S" "SE82005_F" "SE82005_R" "SE82005_S" "SE82170_F"
## [433] "SE82170_R" "SE82170_S" "SE82138_F" "SE82138_R" "SE82138_S" "SE82281_F"
## [439] "SE82281_R" "SE82281_S" "SE82225_F" "SE82225_R" "SE82225_S" "SE82263_F"
## [445] "SE82263_R" "SE82263_S" "SE82307_F" "SE82307_R" "SE82307_S" "SE82333_F"
## [451] "SE82333_R" "SE82333_S" "SE82304_F" "SE82304_R" "SE82304_S" "SE82721_F"
## [457] "SE82721_R" "SE82721_S" "S82L01_F" "S82L01_R" "S82L01_S" "S82L02_F"
## [463] "S82L02_R" "S82L02_S" "S82L03_F" "S82L03_R" "S82L03_S" "S82L05_F"
## [469] "S82L05_R" "S82L05_S" "S82L06_F" "S82L06_R" "S82L06_S" "S82L08_F"
## [475] "S82L08_R" "S82L08_S" "S82L09_F" "S82L09_R" "S82L09_S" "S82L10_F"
## [481] "S82L10_R" "S82L10_S" "SE82077_F" "SE82077_R" "SE82077_S" "SE82103_F"
## [487] "SE82103_R" "SE82103_S" "SE82146_F" "SE82146_R" "SE82146_S" "SE82246_F"
## [493] "SE82246_R" "SE82246_S" "SE82331_F" "SE82331_R" "SE82331_S" "SE62279_F"
## [499] "SE62279_R" "SE62279_S" "SE62112_F" "SE62112_R" "SE62112_S" "SE62119_F"
## [505] "SE62119_R" "SE62119_S" "SE62093_F" "SE62093_R" "SE62093_S" "SE62089_F"
## [511] "SE62089_R" "SE62089_S" "SE62006_F" "SE62006_R" "SE62006_S" "SE62067_F"
## [517] "SE62067_R" "SE62067_S" "SE62247_F" "SE62247_R" "SE62247_S" "SE62177_F"
## [523] "SE62177_R" "SE62177_S" "SE62186_F" "SE62186_R" "SE62186_S" "SE62211_F"
## [529] "SE62211_R" "SE62211_S" "SE62036_F" "SE62036_R" "SE62036_S" "SE62033_F"
## [535] "SE62033_R" "SE62033_S" "SE62037_F" "SE62037_R" "SE62037_S" "SE62242_F"
## [541] "SE62242_R" "SE62242_S" "SE72033_F" "SE72033_R" "SE72033_S" "SE72440_F"
## [547] "SE72440_R" "SE72440_S" "SE72032_F" "SE72032_R" "SE72032_S" "SE72031_F"
## [553] "SE72031_R" "SE72031_S" "SE72086_F" "SE72086_R" "SE72086_S" "SE72005_F"
## [559] "SE72005_R" "SE72005_S" "SE72048_F" "SE72048_R" "SE72048_S" "SE72123_F"
## [565] "SE72123_R" "SE72123_S" "SE72116_F" "SE72116_R" "SE72116_S" "SE72920_F"
## [571] "SE72920_R" "SE72920_S" "SE72294_F" "SE72294_R" "SE72294_S" "SE72231_F"
## [577] "SE72231_R" "SE72231_S" "SE72261_F" "SE72261_R" "SE72261_S" "SE72220_F"
## [583] "SE72220_R" "SE72220_S" "SE72348_F" "SE72348_R" "SE72348_S" "SE72720_F"
## [589] "SE72720_R" "SE72720_S" "SE72011_F" "SE72011_R" "SE72011_S" "SE72905_F"
## [595] "SE72905_R" "SE72905_S" "SE72049_F" "SE72049_R" "SE72049_S" "SE72016_F"
## [601] "SE72016_R" "SE72016_S" "SE72451_F" "SE72451_R" "SE72451_S" "SE72074_F"
## [607] "SE72074_R" "SE72074_S" "SE72091_F" "SE72091_R" "SE72091_S" "SE72109_F"
## [613] "SE72109_R" "SE72109_S" "SE72140_F" "SE72140_R" "SE72140_S" "SE72132_F"
## [619] "SE72132_R" "SE72132_S" "SE72209_F" "SE72209_R" "SE72209_S" "SE72210_F"
## [625] "SE72210_R" "SE72210_S" "SE72249_F" "SE72249_R" "SE72249_S" "SE72323_F"
## [631] "SE72323_R" "SE72323_S" "SE72368_F" "SE72368_R" "SE72368_S" "SE72303_F"
## [637] "SE72303_R" "SE72303_S" "JKZONE" "JKREP" "TOTWGT" "HOUWGT"
## [643] "WGTFAC1" "WGTADJ1" "WGTFAC2" "WGTADJ2" "WGTFAC3" "WGTADJ3"
## [649] "BSMMAT01" "BSMMAT02" "BSMMAT03" "BSMMAT04" "BSMMAT05" "BSMNUM01"
## [655] "BSMNUM02" "BSMNUM03" "BSMNUM04" "BSMNUM05" "BSMALG01" "BSMALG02"
## [661] "BSMALG03" "BSMALG04" "BSMALG05" "BSMGEO01" "BSMGEO02" "BSMGEO03"
## [667] "BSMGEO04" "BSMGEO05" "BSMDAT01" "BSMDAT02" "BSMDAT03" "BSMDAT04"
## [673] "BSMDAT05" "BSMKNO01" "BSMKNO02" "BSMKNO03" "BSMKNO04" "BSMKNO05"
## [679] "BSMAPP01" "BSMAPP02" "BSMAPP03" "BSMAPP04" "BSMAPP05" "BSMREA01"
## [685] "BSMREA02" "BSMREA03" "BSMREA04" "BSMREA05" "BSSSCI01" "BSSSCI02"
## [691] "BSSSCI03" "BSSSCI04" "BSSSCI05" "BSSBIO01" "BSSBIO02" "BSSBIO03"
## [697] "BSSBIO04" "BSSBIO05" "BSSCHE01" "BSSCHE02" "BSSCHE03" "BSSCHE04"
## [703] "BSSCHE05" "BSSPHY01" "BSSPHY02" "BSSPHY03" "BSSPHY04" "BSSPHY05"
## [709] "BSSEAR01" "BSSEAR02" "BSSEAR03" "BSSEAR04" "BSSEAR05" "BSSKNO01"
## [715] "BSSKNO02" "BSSKNO03" "BSSKNO04" "BSSKNO05" "BSSAPP01" "BSSAPP02"
## [721] "BSSAPP03" "BSSAPP04" "BSSAPP05" "BSSREA01" "BSSREA02" "BSSREA03"
## [727] "BSSREA04" "BSSREA05" "BSSENV01" "BSSENV02" "BSSENV03" "BSSENV04"
## [733] "BSSENV05" "BSMIBM01" "BSMIBM02" "BSMIBM03" "BSMIBM04" "BSMIBM05"
## [739] "BSSIBM01" "BSSIBM02" "BSSIBM03" "BSSIBM04" "BSSIBM05" "VERSION"
data_science |>
select(starts_with("SE")) |>
colnames()
## [1] "SE72002_F" "SE72002_R" "SE72002_S" "SE72403_F" "SE72403_R" "SE72403_S"
## [7] "SE72021_F" "SE72021_R" "SE72021_S" "SE72082_F" "SE72082_R" "SE72082_S"
## [13] "SE72066_F" "SE72066_R" "SE72066_S" "SE72063_F" "SE72063_R" "SE72063_S"
## [19] "SE72102_F" "SE72102_R" "SE72102_S" "SE72141_F" "SE72141_R" "SE72141_S"
## [25] "SE72921_F" "SE72921_R" "SE72921_S" "SE72234_F" "SE72234_R" "SE72234_S"
## [31] "SE72251_F" "SE72251_R" "SE72251_S" "SE72284_F" "SE72284_R" "SE72284_S"
## [37] "SE72345_F" "SE72345_R" "SE72345_S" "SE72349_F" "SE72349_R" "SE72349_S"
## [43] "SE72363_F" "SE72363_R" "SE72363_S" "SE62099_F" "SE62099_R" "SE62099_S"
## [49] "SE62095_F" "SE62095_R" "SE62095_S" "SE62106_F" "SE62106_R" "SE62106_S"
## [55] "SE62064_F" "SE62064_R" "SE62064_S" "SE62132_F" "SE62132_R" "SE62132_S"
## [61] "SE62163_F" "SE62163_R" "SE62163_S" "SE62153_F" "SE62153_R" "SE62153_S"
## [67] "SE62018_F" "SE62018_R" "SE62018_S" "SE62143_F" "SE62143_R" "SE62143_S"
## [73] "SE62276_F" "SE62276_R" "SE62276_S" "SE62050_F" "SE62050_R" "SE62050_S"
## [79] "SE62205_F" "SE62205_R" "SE62205_S" "SE62190_F" "SE62190_R" "SE62190_S"
## [85] "SE62024_F" "SE62024_R" "SE62024_S" "SE82902_F" "SE82902_R" "SE82902_S"
## [91] "SE82025_F" "SE82025_R" "SE82025_S" "SE82273_F" "SE82273_R" "SE82273_S"
## [97] "SE82131_F" "SE82131_R" "SE82131_S" "SE82171_F" "SE82171_R" "SE82171_S"
## [103] "SE82250_F" "SE82250_R" "SE82250_S" "SE82275_F" "SE82275_R" "SE82275_S"
## [109] "SE82343_F" "SE82343_R" "SE82343_S" "SE82347_F" "SE82347_R" "SE82347_S"
## [115] "SE82092_F" "SE82092_R" "SE82092_S" "SE82068_F" "SE82068_R" "SE82068_S"
## [121] "SE82008_F" "SE82008_R" "SE82008_S" "SE82021_F" "SE82021_R" "SE82021_S"
## [127] "SE82071_F" "SE82071_R" "SE82071_S" "SE82175_F" "SE82175_R" "SE82175_S"
## [133] "SE82104_F" "SE82104_R" "SE82104_S" "SE82172_F" "SE82172_R" "SE82172_S"
## [139] "SE82280_F" "SE82280_R" "SE82280_S" "SE82205_F" "SE82205_R" "SE82205_S"
## [145] "SE82262_F" "SE82262_R" "SE82262_S" "SE82341_F" "SE82341_R" "SE82341_S"
## [151] "SE82323_F" "SE82323_R" "SE82323_S" "SE82099_F" "SE82099_R" "SE82099_S"
## [157] "SE82325_F" "SE82325_R" "SE82325_S" "SE82020_F" "SE82020_R" "SE82020_S"
## [163] "SE82064_F" "SE82064_R" "SE82064_S" "SE82001_F" "SE82001_R" "SE82001_S"
## [169] "SE82050_F" "SE82050_R" "SE82050_S" "SE82040_F" "SE82040_R" "SE82040_S"
## [175] "SE82060_F" "SE82060_R" "SE82060_S" "SE82143_F" "SE82143_R" "SE82143_S"
## [181] "SE82147_F" "SE82147_R" "SE82147_S" "SE82224_F" "SE82224_R" "SE82224_S"
## [187] "SE82203_F" "SE82203_R" "SE82203_S" "SE82223_F" "SE82223_R" "SE82223_S"
## [193] "SE82264_F" "SE82264_R" "SE82264_S" "SE82340_F" "SE82340_R" "SE82340_S"
## [199] "SE82327_F" "SE82327_R" "SE82327_S" "SE82233_F" "SE82233_R" "SE82233_S"
## [205] "SE72078_F" "SE72078_R" "SE72078_S" "SE72460_F" "SE72460_R" "SE72460_S"
## [211] "SE72000_F" "SE72000_R" "SE72000_S" "SE72906_F" "SE72906_R" "SE72906_S"
## [217] "SE72901_F" "SE72901_R" "SE72901_S" "SE72038_F" "SE72038_R" "SE72038_S"
## [223] "SE72120_F" "SE72120_R" "SE72120_S" "SE72143_F" "SE72143_R" "SE72143_S"
## [229] "SE72523_F" "SE72523_R" "SE72523_S" "SE72168_F" "SE72168_R" "SE72168_S"
## [235] "SE72205_F" "SE72205_R" "SE72205_S" "SE72293_F" "SE72293_R" "SE72293_S"
## [241] "SE72280_F" "SE72280_R" "SE72280_S" "SE72370_F" "SE72370_R" "SE72370_S"
## [247] "SE72329_F" "SE72329_R" "SE72329_S" "SE82042_F" "SE82042_R" "SE82042_S"
## [253] "SE82043_F" "SE82043_R" "SE82043_S" "SE82002_F" "SE82002_R" "SE82002_S"
## [259] "SE82073_F" "SE82073_R" "SE82073_S" "SE82070_F" "SE82070_R" "SE82070_S"
## [265] "SE82144_F" "SE82144_R" "SE82144_S" "SE82108_F" "SE82108_R" "SE82108_S"
## [271] "SE82137_F" "SE82137_R" "SE82137_S" "SE82200_F" "SE82200_R" "SE82200_S"
## [277] "SE82244_F" "SE82244_R" "SE82244_S" "SE82271_F" "SE82271_R" "SE82271_S"
## [283] "SE82346_F" "SE82346_R" "SE82346_S" "SE82335_F" "SE82335_R" "SE82335_S"
## [289] "SE62091_F" "SE62091_R" "SE62091_S" "SE62100_F" "SE62100_R" "SE62100_S"
## [295] "SE62097_F" "SE62097_R" "SE62097_S" "SE62101_F" "SE62101_R" "SE62101_S"
## [301] "SE62266_F" "SE62266_R" "SE62266_S" "SE62128_F" "SE62128_R" "SE62128_S"
## [307] "SE62047_F" "SE62047_R" "SE62047_S" "SE62042_F" "SE62042_R" "SE62042_S"
## [313] "SE62250_F" "SE62250_R" "SE62250_S" "SE62246_F" "SE62246_R" "SE62246_S"
## [319] "SE62056_F" "SE62056_R" "SE62056_S" "SE62235_F" "SE62235_R" "SE62235_S"
## [325] "SE62180_F" "SE62180_R" "SE62180_S" "SE62022_F" "SE62022_R" "SE62022_S"
## [331] "SE62243_F" "SE62243_R" "SE62243_S" "SE82007_F" "SE82007_R" "SE82007_S"
## [337] "SE82045_F" "SE82045_R" "SE82045_S" "SE82075_F" "SE82075_R" "SE82075_S"
## [343] "SE82027_F" "SE82027_R" "SE82027_S" "SE82080_F" "SE82080_R" "SE82080_S"
## [349] "SE82005_F" "SE82005_R" "SE82005_S" "SE82170_F" "SE82170_R" "SE82170_S"
## [355] "SE82138_F" "SE82138_R" "SE82138_S" "SE82281_F" "SE82281_R" "SE82281_S"
## [361] "SE82225_F" "SE82225_R" "SE82225_S" "SE82263_F" "SE82263_R" "SE82263_S"
## [367] "SE82307_F" "SE82307_R" "SE82307_S" "SE82333_F" "SE82333_R" "SE82333_S"
## [373] "SE82304_F" "SE82304_R" "SE82304_S" "SE82721_F" "SE82721_R" "SE82721_S"
## [379] "SE82077_F" "SE82077_R" "SE82077_S" "SE82103_F" "SE82103_R" "SE82103_S"
## [385] "SE82146_F" "SE82146_R" "SE82146_S" "SE82246_F" "SE82246_R" "SE82246_S"
## [391] "SE82331_F" "SE82331_R" "SE82331_S" "SE62279_F" "SE62279_R" "SE62279_S"
## [397] "SE62112_F" "SE62112_R" "SE62112_S" "SE62119_F" "SE62119_R" "SE62119_S"
## [403] "SE62093_F" "SE62093_R" "SE62093_S" "SE62089_F" "SE62089_R" "SE62089_S"
## [409] "SE62006_F" "SE62006_R" "SE62006_S" "SE62067_F" "SE62067_R" "SE62067_S"
## [415] "SE62247_F" "SE62247_R" "SE62247_S" "SE62177_F" "SE62177_R" "SE62177_S"
## [421] "SE62186_F" "SE62186_R" "SE62186_S" "SE62211_F" "SE62211_R" "SE62211_S"
## [427] "SE62036_F" "SE62036_R" "SE62036_S" "SE62033_F" "SE62033_R" "SE62033_S"
## [433] "SE62037_F" "SE62037_R" "SE62037_S" "SE62242_F" "SE62242_R" "SE62242_S"
## [439] "SE72033_F" "SE72033_R" "SE72033_S" "SE72440_F" "SE72440_R" "SE72440_S"
## [445] "SE72032_F" "SE72032_R" "SE72032_S" "SE72031_F" "SE72031_R" "SE72031_S"
## [451] "SE72086_F" "SE72086_R" "SE72086_S" "SE72005_F" "SE72005_R" "SE72005_S"
## [457] "SE72048_F" "SE72048_R" "SE72048_S" "SE72123_F" "SE72123_R" "SE72123_S"
## [463] "SE72116_F" "SE72116_R" "SE72116_S" "SE72920_F" "SE72920_R" "SE72920_S"
## [469] "SE72294_F" "SE72294_R" "SE72294_S" "SE72231_F" "SE72231_R" "SE72231_S"
## [475] "SE72261_F" "SE72261_R" "SE72261_S" "SE72220_F" "SE72220_R" "SE72220_S"
## [481] "SE72348_F" "SE72348_R" "SE72348_S" "SE72720_F" "SE72720_R" "SE72720_S"
## [487] "SE72011_F" "SE72011_R" "SE72011_S" "SE72905_F" "SE72905_R" "SE72905_S"
## [493] "SE72049_F" "SE72049_R" "SE72049_S" "SE72016_F" "SE72016_R" "SE72016_S"
## [499] "SE72451_F" "SE72451_R" "SE72451_S" "SE72074_F" "SE72074_R" "SE72074_S"
## [505] "SE72091_F" "SE72091_R" "SE72091_S" "SE72109_F" "SE72109_R" "SE72109_S"
## [511] "SE72140_F" "SE72140_R" "SE72140_S" "SE72132_F" "SE72132_R" "SE72132_S"
## [517] "SE72209_F" "SE72209_R" "SE72209_S" "SE72210_F" "SE72210_R" "SE72210_S"
## [523] "SE72249_F" "SE72249_R" "SE72249_S" "SE72323_F" "SE72323_R" "SE72323_S"
## [529] "SE72368_F" "SE72368_R" "SE72368_S" "SE72303_F" "SE72303_R" "SE72303_S"
#check the booklet
count(data_science,IDBOOK) # booklet 9 n = 176
## # A tibble: 14 × 2
## IDBOOK n
## <dbl+lbl> <int>
## 1 1 [Booklet 01] 426
## 2 2 [Booklet 02] 430
## 3 3 [Booklet 03] 421
## 4 4 [Booklet 04] 420
## 5 5 [Booklet 05] 423
## 6 6 [Booklet 06] 429
## 7 7 [Booklet 07] 417
## 8 8 [Booklet 08] 182
## 9 9 [Booklet 09] 176
## 10 10 [Booklet 10] 181
## 11 11 [Booklet 11] 183
## 12 12 [Booklet 12] 180
## 13 13 [Booklet 13] 186
## 14 14 [Booklet 14] 177
#filter the booklet 9
booklet_9<-data_science |>
filter(IDBOOK ==9)
nrow(booklet_9)
## [1] 176
#check the missing
vis_miss(booklet_9)
#delete when the variables are all NA
booklet_9 <- booklet_9 |>
select(where(~!all(is.na(.))))
#check the missing again
vis_miss(booklet_9)
#save the booklet_9 in csv file
write_csv(booklet_9, "booklet_9.csv")
items n = 30 “SE72000” “SE72038” “SE72078” “SE72120” “SE72143” “SE72168” “SE72205” “SE72280” “SE72293” “SE72329” “SE72370” “SE72460” “SE72523” “SE72901” “SE72906” “SE82001” “SE82020” “SE82040” “SE82050” “SE82060” “SE82064” “SE82143” “SE82147” “SE82203” “SE82223” “SE82224” “SE82233” “SE82264” “SE82327” “SE82340”
load("~/Library/CloudStorage/GoogleDrive-ejmyoung@stanford.edu/My Drive/0.Research/Process Data/TIMSS data/TIMSS2023_IDB_R_G8/2_Data Files/R Data/bsakorm8.rdata")
score <-BSAKORM8
colnames(score)
## [1] "CTY" "IDCNTRY" "IDPOP" "IDGRADER" "IDGRADE" "ITASSESS"
## [7] "IDBOOK" "IDSCHOOL" "IDCLASS" "IDSTUD" "ITSEX" "BSDAGE"
## [13] "ITADMINI" "ILRELIAB" "ITLANG_SA" "LCID_SA" "ME72002" "ME72188"
## [19] "ME72035" "ME72055A" "ME72055B" "ME72055C" "ME72055D" "ME72055E"
## [25] "ME72055F" "ME72055" "ME72222" "ME72090" "ME72233" "ME72106A"
## [31] "ME72106B" "ME72106C" "ME72128A" "ME72128B" "ME72119" "ME72153A"
## [37] "ME72153B" "ME72172" "ME72001" "ME72019" "ME72189" "ME72024"
## [43] "ME72043" "ME72221" "ME72220" "ME72225A" "ME72225B" "ME72225"
## [49] "ME72110A" "ME72110B" "ME72150" "ME72139" "ME72229" "ME72171"
## [55] "ME72211A" "ME72211B" "ME82214A" "ME82214B" "MQ82N01A" "MQ82N01B"
## [61] "MQ82N02" "MQ82N03" "MQ82N04" "MQ82N07" "MQ82N08A" "MQ82N08B"
## [67] "ME82711" "ME82111" "ME82320" "ME82707" "ME82802" "MQ72D01"
## [73] "MQ72D02AA" "MQ72D02AB" "MQ72D02AC" "MQ72D02A" "MQ72D02B" "MQ72D03"
## [79] "MQ72D04A" "MQ72D04B" "MQ72D04" "MQ72D05" "MQ72D06A" "MQ72D06B"
## [85] "MQ72D07A" "MQ72D07B" "MQ72D08A" "MQ72D08B" "MQ82C01" "MQ82C02"
## [91] "MQ82C03" "MQ82C04" "MQ82C05" "ME82808" "ME82116" "ME82322"
## [97] "ME82199A" "ME82199B" "ME82723" "ME82106" "ME82807" "ME82309"
## [103] "ME82724" "ME82403" "ME82205" "ME82512" "ME82617" "ME82201"
## [109] "ME82727" "ME82310" "ME82112" "ME82203" "ME82404" "ME82206"
## [115] "ME82791" "ME82103" "ME62271" "ME62152" "ME62215A" "ME62215B"
## [121] "ME62215" "ME62143" "ME62230" "ME62095" "ME62076" "ME62030"
## [127] "ME62171" "ME62301" "ME62194" "ME62344" "ME62320" "ME62296"
## [133] "ME82104" "ME82109" "ME82702A" "ME82702B" "ME82702C" "ME82702"
## [139] "ME82623A" "ME82623B" "ME82623C" "ME82623D" "ME82623E" "ME82623F"
## [145] "ME82623" "ME82715" "ME82790" "ME82101" "ME82601" "ME82502A"
## [151] "ME82502B" "ME82502" "ME82402" "ME82410" "ME82725" "ME82315"
## [157] "ME62001" "ME62214" "ME62146" "ME62154" "ME62067" "ME62341"
## [163] "ME62242" "ME62250A" "ME62250B" "ME62170A" "ME62170B" "ME62170"
## [169] "ME62192" "ME62072" "ME62048A" "ME62048B" "ME62048C" "ME62048"
## [175] "ME62120" "ME82613" "ME82708A" "ME82708B" "ME82216" "ME82105"
## [181] "ME82405" "ME82792" "ME82612" "ME82313" "ME82503" "ME82407"
## [187] "ME82312A" "ME82312B" "ME82420" "ME82726" "ME82510" "ME82804"
## [193] "ME82207" "ME82219" "ME82415" "ME82314A" "ME82314B" "ME82314C"
## [199] "ME82314D" "ME82314E" "ME82314" "ME82608" "ME82119" "ME82611"
## [205] "ME82411" "ME82610" "ME82801" "ME82511" "ME82603" "ME72178A"
## [211] "ME72178B" "ME72178C" "ME72178D" "ME72178E" "ME72178" "ME72234"
## [217] "ME72020A" "ME72020B" "ME72020C" "ME72020D" "ME72020" "ME72027"
## [223] "ME72052A" "ME72052B" "ME72052" "ME72067" "ME72083A" "ME72083B"
## [229] "ME72108A" "ME72108B" "ME72181" "ME72126" "ME72164A" "ME72164B"
## [235] "ME72164C" "ME72164D" "ME72164E" "ME72164" "ME72185A" "ME72185B"
## [241] "ME72005" "ME72021" "ME72026" "ME72041A" "ME72041B" "ME72223"
## [247] "ME72094" "ME72059" "ME72080" "ME72081A" "ME72081B" "ME72081C"
## [253] "ME72081D" "ME72081" "ME72140A" "ME72140B" "ME72140C" "ME72140D"
## [259] "ME72140E" "ME72140F" "ME72140" "ME72120" "ME72131" "ME72147"
## [265] "ME72154" "ME72192" "ME72161" "ME72187" "ME72022" "ME72038"
## [271] "ME72045" "ME72049" "ME72069" "ME72074" "ME72013" "ME72095A"
## [277] "ME72095B" "ME72095" "ME72109" "ME72125" "ME72196" "ME72237"
## [283] "ME72232A" "ME72232B" "ME72232C" "ME72232D" "ME72232" "ME72206"
## [289] "MP72178" "MP72234" "MP72020" "MP72027" "MP72052A" "MP72052B"
## [295] "MP72052" "MP72067" "MP72083A" "MP72083B" "MP72108A" "MP72108B"
## [301] "MP72181" "MP72126" "MP72164A" "MP72164B" "MP72164C" "MP72164D"
## [307] "MP72164E" "MP72164" "MP72185A" "MP72185B" "MP72002" "MP72188"
## [313] "MP72035" "MP72055" "MP72222" "MP72090" "MP72233" "MP72106A"
## [319] "MP72106B" "MP72106C" "MP72128A" "MP72128B" "MP72119" "MP72153A"
## [325] "MP72153B" "MP72172" "MP62329" "MP62151" "MP62346" "MP62212"
## [331] "MP62056" "MP62317" "MP62350" "MP62078" "MP62284" "MP62245"
## [337] "MP62287" "MP62345A" "MP62345BA" "MP62345BB" "MP62345BC" "MP62345BD"
## [343] "MP62345B" "MP62115" "MP72187" "MP72022" "MP72038" "MP72045"
## [349] "MP72049" "MP72069" "MP72074" "MP72013" "MP72095A" "MP72095B"
## [355] "MP72095" "MP72109" "MP72125" "MP72196" "MP72237" "MP72232A"
## [361] "MP72232B" "MP72232C" "MP72232D" "MP72232" "MP72206" "MP62271"
## [367] "MP62152" "MP62215" "MP62143" "MP62230" "MP62095" "MP62076"
## [373] "MP62030" "MP62171" "MP62301" "MP62194" "MP62344" "MP62320"
## [379] "MP62296" "MP72001" "MP72019" "MP72189" "MP72024" "MP72043"
## [385] "MP72221" "MP72220" "MP72225A" "MP72225B" "MP72225" "MP72110A"
## [391] "MP72110B" "MP72150" "MP72139" "MP72229" "MP72171" "MP72211A"
## [397] "MP72211B" "MP62001" "MP62214" "MP62146" "MP62154" "MP62067"
## [403] "MP62341" "MP62242" "MP62250A" "MP62250B" "MP62170" "MP62192"
## [409] "MP62072" "MP62048A" "MP62048B" "MP62048C" "MP62048" "MP62120"
## [415] "MP72005" "MP72021" "MP72026" "MP72041A" "MP72041B" "MP72223"
## [421] "MP72094" "MP72059" "MP72080" "MP72081" "MP72140A" "MP72140B"
## [427] "MP72140C" "MP72140D" "MP72140E" "MP72140F" "MP72140" "MP72120"
## [433] "MP72131" "MP72147" "MP72154" "MP72192" "MP72161" "SE72002"
## [439] "SE72403A" "SE72403B" "SE72403C" "SE72403D" "SE72403" "SE72021"
## [445] "SE72082" "SE72066" "SE72063" "SE72102" "SE72141A" "SE72141B"
## [451] "SE72921" "SE72234" "SE72251" "SE72284" "SE72345A" "SE72345B"
## [457] "SE72345C" "SE72345D" "SE72345E" "SE72345F" "SE72345G" "SE72345"
## [463] "SE72349" "SE72363" "SE62099" "SE62095" "SE62106" "SE62064"
## [469] "SE62132" "SE62163" "SE62153" "SE62018A" "SE62018B" "SE62018C"
## [475] "SE62018D" "SE62018E" "SE62018" "SE62143" "SE62276" "SE62050"
## [481] "SE62205" "SE62190" "SE62024A" "SE62024B" "SE82902" "SE82025A"
## [487] "SE82025B" "SE82025C" "SE82025D" "SE82025" "SE82273A" "SE82273B"
## [493] "SE82131" "SE82171" "SE82250" "SE82275A" "SE82275B" "SE82275C"
## [499] "SE82275" "SE82343A" "SE82343B" "SE82343C" "SE82343" "SE82347"
## [505] "SQ82T01A" "SQ82T01B" "SQ82T01C" "SQ82T01D" "SQ82T01E" "SQ82T01"
## [511] "SQ82T02AA" "SQ82T02AB" "SQ82T02A" "SQ82T02B" "SQ82T03A" "SQ82T03B"
## [517] "SQ82T04" "SQ72S01" "SQ72S02" "SQ72S03" "SQ72S04" "SQ72S05"
## [523] "SQ72S06" "SQ72S07A" "SQ72S07B" "SQ72S07C" "SQ72S08" "SQ72S09A"
## [529] "SQ72S09B" "SQ72S09C" "SQ72S09" "SQ72S10" "SQ72S11" "SQ72S12"
## [535] "SQ72S13A" "SQ72S13B" "SE82092" "SE82068A" "SE82068B" "SE82008A"
## [541] "SE82008B" "SE82008C" "SE82008D" "SE82008E" "SE82008F" "SE82008G"
## [547] "SE82008H" "SE82008" "SE82021A" "SE82021B" "SE82021C" "SE82021D"
## [553] "SE82021" "SE82071" "SE82175A" "SE82175B" "SE82175C" "SE82175D"
## [559] "SE82175E" "SE82175" "SE82104A" "SE82104B" "SE82104C" "SE82104D"
## [565] "SE82104" "SE82172A" "SE82172B" "SE82172C" "SE82172D" "SE82172"
## [571] "SE82280" "SE82205A" "SE82205B" "SE82205C" "SE82205D" "SE82205"
## [577] "SE82262" "SE82341A" "SE82341B" "SE82341C" "SE82341D" "SE82341E"
## [583] "SE82341" "SE82323A" "SE82323B" "SE82323C" "SE82323" "SE82099"
## [589] "SE82325" "SE82020" "SE82064A" "SE82064B" "SE82064C" "SE82064D"
## [595] "SE82064" "SE82001A" "SE82001B" "SE82001C" "SE82001D" "SE82001E"
## [601] "SE82001" "SE82050" "SE82040A" "SE82040B" "SE82040C" "SE82040D"
## [607] "SE82040E" "SE82040" "SE82060A" "SE82060B" "SE82060" "SE82143"
## [613] "SE82147" "SE82224" "SE82203" "SE82223" "SE82264" "SE82340A"
## [619] "SE82340B" "SE82340C" "SE82340D" "SE82340E" "SE82340" "SE82327A"
## [625] "SE82327B" "SE82327C" "SE82327D" "SE82327" "SE82233" "SE72078"
## [631] "SE72460" "SE72000A" "SE72000B" "SE72000C" "SE72000D" "SE72000E"
## [637] "SE72000" "SE72906A" "SE72906B" "SE72906C" "SE72906D" "SE72906E"
## [643] "SE72906" "SE72901" "SE72038" "SE72120" "SE72143A" "SE72143B"
## [649] "SE72143C" "SE72143D" "SE72143" "SE72523" "SE72168" "SE72205"
## [655] "SE72293" "SE72280A" "SE72280B" "SE72370" "SE72329" "SE82042"
## [661] "SE82043" "SE82002A" "SE82002B" "SE82002C" "SE82002D" "SE82002E"
## [667] "SE82002" "SE82073" "SE82070" "SE82144" "SE82108" "SE82137"
## [673] "SE82200AA" "SE82200AB" "SE82200AC" "SE82200A" "SE82200BA" "SE82200BB"
## [679] "SE82200BC" "SE82200B" "SE82244A" "SE82244B" "SE82244C" "SE82244"
## [685] "SE82271A" "SE82271B" "SE82271C" "SE82271D" "SE82271" "SE82346A"
## [691] "SE82346B" "SE82335" "SE62091A" "SE62091B" "SE62100" "SE62097"
## [697] "SE62101A" "SE62101B" "SE62101C" "SE62101D" "SE62101" "SE62266"
## [703] "SE62128" "SE62047A" "SE62047B" "SE62047C" "SE62047" "SE62042A"
## [709] "SE62042B" "SE62042C" "SE62042D" "SE62042" "SE62250" "SE62246"
## [715] "SE62056" "SE62235" "SE62180" "SE62022A" "SE62022B" "SE62022C"
## [721] "SE62022D" "SE62022" "SE62243A" "SE62243B" "SE62243C" "SE62243D"
## [727] "SE62243" "SE82007" "SE82045" "SE82075" "SE82027" "SE82080"
## [733] "SE82005A" "SE82005B" "SE82005C" "SE82005D" "SE82005E" "SE82005"
## [739] "SE82170" "SE82138" "SE82281" "SE82225" "SE82263A" "SE82263B"
## [745] "SE82263" "SE82307" "SE82333" "SE82304" "SE82721" "SQ82L01A"
## [751] "SQ82L01B" "SQ82L01C" "SQ82L01D" "SQ82L01E" "SQ82L01F" "SQ82L01"
## [757] "SQ82L02A" "SQ82L02B" "SQ82L04A" "SQ82L04B" "SQ82L05" "SQ82L07A"
## [763] "SQ82L07B" "SQ82L08" "SQ82L09A" "SQ82L09B" "SE82077A" "SE82077B"
## [769] "SE82077C" "SE82077" "SE82103" "SE82146" "SE82246A" "SE82246B"
## [775] "SE82246C" "SE82246D" "SE82246" "SE82331" "SE62279" "SE62112"
## [781] "SE62119" "SE62093" "SE62089" "SE62006A" "SE62006B" "SE62006C"
## [787] "SE62006" "SE62067" "SE62247" "SE62177" "SE62186" "SE62211A"
## [793] "SE62211B" "SE62036" "SE62033" "SE62037" "SE62242A" "SE62242B"
## [799] "SE62242C" "SE62242D" "SE62242E" "SE62242" "SE72033A" "SE72033B"
## [805] "SE72033C" "SE72033D" "SE72033E" "SE72033" "SE72440" "SE72032"
## [811] "SE72031" "SE72086" "SE72005" "SE72048" "SE72123" "SE72116"
## [817] "SE72920" "SE72294" "SE72231" "SE72261A" "SE72261B" "SE72261C"
## [823] "SE72261D" "SE72261E" "SE72261" "SE72220" "SE72348" "SE72720"
## [829] "SE72011" "SE72905A" "SE72905B" "SE72905C" "SE72905D" "SE72905"
## [835] "SE72049" "SE72016A" "SE72016B" "SE72016" "SE72451" "SE72074"
## [841] "SE72091" "SE72109" "SE72140" "SE72132" "SE72209" "SE72210"
## [847] "SE72249" "SE72323" "SE72368" "SE72303" "SP72002" "SP72403"
## [853] "SP72021" "SP72082" "SP72066" "SP72063" "SP72102" "SP72141A"
## [859] "SP72141B" "SP72921" "SP72234" "SP72251" "SP72284" "SP72345A"
## [865] "SP72345B" "SP72345C" "SP72345D" "SP72345E" "SP72345F" "SP72345G"
## [871] "SP72345" "SP72349" "SP72363" "SP72070" "SP72400" "SP72024"
## [877] "SP72462" "SP72443" "SP72903" "SP72145" "SP72100" "SP72133"
## [883] "SP72137" "SP72298" "SP72215" "SP72260" "SP72265A" "SP72265B"
## [889] "SP72265C" "SP72265D" "SP72265E" "SP72265" "SP72347" "SP72351"
## [895] "SP72367" "SP62099" "SP62095" "SP62106" "SP62064" "SP62132"
## [901] "SP62163" "SP62153" "SP62018A" "SP62018B" "SP62018C" "SP62018D"
## [907] "SP62018E" "SP62018" "SP62143" "SP62276" "SP62050" "SP62205"
## [913] "SP62190" "SP62024A" "SP62024B" "SP72033" "SP72440" "SP72032"
## [919] "SP72031" "SP72086" "SP72005" "SP72048" "SP72123" "SP72116"
## [925] "SP72920" "SP72294" "SP72231" "SP72261A" "SP72261B" "SP72261C"
## [931] "SP72261D" "SP72261E" "SP72261" "SP72220" "SP72348" "SP72720"
## [937] "SP62279" "SP62112" "SP62119" "SP62093" "SP62089" "SP62006"
## [943] "SP62067" "SP62247" "SP62177" "SP62186" "SP62211A" "SP62211B"
## [949] "SP62036" "SP62033" "SP62037" "SP62242A" "SP62242B" "SP62242C"
## [955] "SP62242D" "SP62242E" "SP62242" "SP72078" "SP72460" "SP72000"
## [961] "SP72906A" "SP72906B" "SP72906C" "SP72906D" "SP72906E" "SP72906"
## [967] "SP72901" "SP72038" "SP72120" "SP72143" "SP72523" "SP72168"
## [973] "SP72205" "SP72293" "SP72280A" "SP72280B" "SP72370" "SP72329"
## [979] "SP62091A" "SP62091B" "SP62100" "SP62097" "SP62101" "SP62266"
## [985] "SP62128" "SP62047A" "SP62047B" "SP62047C" "SP62047" "SP62042"
## [991] "SP62250" "SP62246" "SP62056" "SP62235" "SP62180" "SP62022A"
## [997] "SP62022B" "SP62022C" "SP62022D" "SP62022" "SP62243" "SP72011"
## [1003] "SP72905" "SP72049" "SP72016A" "SP72016B" "SP72016" "SP72451"
## [1009] "SP72074" "SP72091" "SP72109" "SP72140" "SP72132" "SP72209"
## [1015] "SP72210" "SP72249" "SP72323" "SP72368" "SP72303" "BNRGCAL1"
## [1021] "BNRGCAL2" "JKZONE" "JKREP" "TOTWGT" "HOUWGT" "SENWGT"
## [1027] "WGTFAC1" "WGTADJ1" "WGTFAC2" "WGTADJ2" "WGTFAC3" "WGTADJ3"
## [1033] "BSMMAT01" "BSMMAT02" "BSMMAT03" "BSMMAT04" "BSMMAT05" "BSMNUM01"
## [1039] "BSMNUM02" "BSMNUM03" "BSMNUM04" "BSMNUM05" "BSMALG01" "BSMALG02"
## [1045] "BSMALG03" "BSMALG04" "BSMALG05" "BSMGEO01" "BSMGEO02" "BSMGEO03"
## [1051] "BSMGEO04" "BSMGEO05" "BSMDAT01" "BSMDAT02" "BSMDAT03" "BSMDAT04"
## [1057] "BSMDAT05" "BSMKNO01" "BSMKNO02" "BSMKNO03" "BSMKNO04" "BSMKNO05"
## [1063] "BSMAPP01" "BSMAPP02" "BSMAPP03" "BSMAPP04" "BSMAPP05" "BSMREA01"
## [1069] "BSMREA02" "BSMREA03" "BSMREA04" "BSMREA05" "BSSSCI01" "BSSSCI02"
## [1075] "BSSSCI03" "BSSSCI04" "BSSSCI05" "BSSBIO01" "BSSBIO02" "BSSBIO03"
## [1081] "BSSBIO04" "BSSBIO05" "BSSCHE01" "BSSCHE02" "BSSCHE03" "BSSCHE04"
## [1087] "BSSCHE05" "BSSPHY01" "BSSPHY02" "BSSPHY03" "BSSPHY04" "BSSPHY05"
## [1093] "BSSEAR01" "BSSEAR02" "BSSEAR03" "BSSEAR04" "BSSEAR05" "BSSKNO01"
## [1099] "BSSKNO02" "BSSKNO03" "BSSKNO04" "BSSKNO05" "BSSAPP01" "BSSAPP02"
## [1105] "BSSAPP03" "BSSAPP04" "BSSAPP05" "BSSREA01" "BSSREA02" "BSSREA03"
## [1111] "BSSREA04" "BSSREA05" "BSSENV01" "BSSENV02" "BSSENV03" "BSSENV04"
## [1117] "BSSENV05" "BSMIBM01" "BSMIBM02" "BSMIBM03" "BSMIBM04" "BSMIBM05"
## [1123] "BSSIBM01" "BSSIBM02" "BSSIBM03" "BSSIBM04" "BSSIBM05" "ME82NUM"
## [1129] "ME82ALG" "ME82GEO" "ME82DAT" "ME82KNO" "ME82APP" "ME82REA"
## [1135] "SE82BIO" "SE82CHE" "SE82PHY" "SE82EAR" "SE82KNO" "SE82APP"
## [1141] "SE82REA" "SE82ENV" "SE82NEN" "VERSION" "SCOPE"
score_9<-score |>
filter(IDBOOK ==9)
#only select the items taht are in score_9
score_9_clear<-score_9 |>
select(IDCNTRY, IDPOP, IDGRADER, IDGRADE, ITASSESS, IDBOOK, IDSCHOOL, IDCLASS, IDSTUD, ITSEX, BSDAGE, ITADMINI, ILRELIAB, ITLANG_SA,LCID_SA, "SE72000", "SE72000A", "SE72000B", "SE72000C", "SE72000D", "SE72000E",
"SE72038", "SE72078", "SE72120", "SE72143", "SE72143A", "SE72143B",
"SE72143C", "SE72143D", "SE72168", "SE72205", "SE72280A", "SE72280B",
"SE72293", "SE72370", "SE72460", "SE72523", "SE72901", "SE82001",
"SE82001A", "SE82001B", "SE82001C", "SE82001D", "SE82001E", "SE82020",
"SE82040", "SE82040A", "SE82040B", "SE82040C", "SE82040D", "SE82040E",
"SE82050", "SE82060", "SE82060A", "SE82060B", "SE82064", "SE82064A",
"SE82064B", "SE82064C", "SE82064D", "SE82143", "SE82147", "SE82203",
"SE82223", "SE82224", "SE82233", "SE82264", "SE82327", "SE82327A",
"SE82327B", "SE82327C", "SE82327D", "SE82340", "SE82340A", "SE82340B",
"SE82340C", "SE82340D", "SE82340E", "SE72329", "SE72906A", "SE72906B",
"SE72906C", "SE72906D", "SE72906E", "SE72906")
nrow(score_9_clear)
## [1] 177
vis_miss(score_9_clear)
#pick only variables that we needed
score_cleansing<- score_9_clear |>
select(IDSTUD, starts_with("SE"))
#make the long form of score_9_clear
score_cleansing <- score_cleansing %>%
pivot_longer(-IDSTUD, names_to = "ItemID", values_to = "score")
## Warning: `SE72000` and `SE72000B` have conflicting value labels.
## ℹ Labels for these values will be taken from `SE72000`.
## ✖ Values: 3 and 5
## Warning: `SE72000` and `SE72000C` have conflicting value labels.
## ℹ Labels for these values will be taken from `SE72000`.
## ✖ Values: 2 and 5
## Warning: `SE72000` and `SE72000D` have conflicting value labels.
## ℹ Labels for these values will be taken from `SE72000`.
## ✖ Values: 5 and 6
## Warning: `SE72000` and `SE72000E` have conflicting value labels.
## ℹ Labels for these values will be taken from `SE72000`.
## ✖ Values: 4 and 5
## Warning: `SE72000` and `SE72038` have conflicting value labels.
## ℹ Labels for these values will be taken from `SE72000`.
## ✖ Values: 1 and 6
## Warning: `SE72000` and `SE72078` have conflicting value labels.
## ℹ Labels for these values will be taken from `SE72000`.
## ✖ Values: 10
## Warning: `SE72000` and `SE72120` have conflicting value labels.
## ℹ Labels for these values will be taken from `SE72000`.
## ✖ Values: 2 and 6
## Warning: `SE72000` and `SE72143` have conflicting value labels.
## ℹ Labels for these values will be taken from `SE72000`.
## ✖ Values: 10
## Warning: `SE72000` and `SE72143A` have conflicting value labels.
## ℹ Labels for these values will be taken from `SE72000`.
## ✖ Values: 1, 5, and 6
## Warning: `SE72000` and `SE72143B` have conflicting value labels.
## ℹ Labels for these values will be taken from `SE72000`.
## ✖ Values: 3, 5, and 6
## Warning: `SE72000` and `SE72143C` have conflicting value labels.
## ℹ Labels for these values will be taken from `SE72000`.
## ✖ Values: 4, 5, and 6
## Warning: `SE72000` and `SE72143D` have conflicting value labels.
## ℹ Labels for these values will be taken from `SE72000`.
## ✖ Values: 2, 5, and 6
## Warning: `SE72000` and `SE72168` have conflicting value labels.
## ℹ Labels for these values will be taken from `SE72000`.
## ✖ Values: 4 and 6
## Warning: `SE72000` and `SE72205` have conflicting value labels.
## ℹ Labels for these values will be taken from `SE72000`.
## ✖ Values: 3 and 6
## Warning: `SE72000` and `SE72280A` have conflicting value labels.
## ℹ Labels for these values will be taken from `SE72000`.
## ✖ Values: 10
## Warning: `SE72000` and `SE72280B` have conflicting value labels.
## ℹ Labels for these values will be taken from `SE72000`.
## ✖ Values: 2 and 6
## Warning: `SE72000` and `SE72293` have conflicting value labels.
## ℹ Labels for these values will be taken from `SE72000`.
## ✖ Values: 10
## Warning: `SE72000` and `SE72370` have conflicting value labels.
## ℹ Labels for these values will be taken from `SE72000`.
## ✖ Values: 3 and 6
## Warning: `SE72000` and `SE72460` have conflicting value labels.
## ℹ Labels for these values will be taken from `SE72000`.
## ✖ Values: 1 and 6
## Warning: `SE72000` and `SE72901` have conflicting value labels.
## ℹ Labels for these values will be taken from `SE72000`.
## ✖ Values: 2 and 6
## Warning: `SE72000` and `SE82001` have conflicting value labels.
## ℹ Labels for these values will be taken from `SE72000`.
## ✖ Values: 1 and 6
## Warning: `SE72000` and `SE82001A` have conflicting value labels.
## ℹ Labels for these values will be taken from `SE72000`.
## ✖ Values: 4 and 5
## Warning: `SE72000` and `SE82001B` have conflicting value labels.
## ℹ Labels for these values will be taken from `SE72000`.
## ✖ Values: 3 and 5
## Warning: `SE72000` and `SE82001D` have conflicting value labels.
## ℹ Labels for these values will be taken from `SE72000`.
## ✖ Values: 2 and 5
## Warning: `SE72000` and `SE82001E` have conflicting value labels.
## ℹ Labels for these values will be taken from `SE72000`.
## ✖ Values: 5 and 6
## Warning: `SE72000` and `SE82020` have conflicting value labels.
## ℹ Labels for these values will be taken from `SE72000`.
## ✖ Values: 2 and 6
## Warning: `SE72000` and `SE82040` have conflicting value labels.
## ℹ Labels for these values will be taken from `SE72000`.
## ✖ Values: 1 and 6
## Warning: `SE72000` and `SE82040A` have conflicting value labels.
## ℹ Labels for these values will be taken from `SE72000`.
## ✖ Values: 1 and 6
## Warning: `SE72000` and `SE82040B` have conflicting value labels.
## ℹ Labels for these values will be taken from `SE72000`.
## ✖ Values: 2 and 6
## Warning: `SE72000` and `SE82040C` have conflicting value labels.
## ℹ Labels for these values will be taken from `SE72000`.
## ✖ Values: 2 and 6
## Warning: `SE72000` and `SE82040D` have conflicting value labels.
## ℹ Labels for these values will be taken from `SE72000`.
## ✖ Values: 1 and 6
## Warning: `SE72000` and `SE82040E` have conflicting value labels.
## ℹ Labels for these values will be taken from `SE72000`.
## ✖ Values: 1 and 6
## Warning: `SE72000` and `SE82050` have conflicting value labels.
## ℹ Labels for these values will be taken from `SE72000`.
## ✖ Values: 3 and 6
## Warning: `SE72000` and `SE82060` have conflicting value labels.
## ℹ Labels for these values will be taken from `SE72000`.
## ✖ Values: 1, 2, and 6
## Warning: `SE72000` and `SE82060A` have conflicting value labels.
## ℹ Labels for these values will be taken from `SE72000`.
## ✖ Values: 1 and 6
## Warning: `SE72000` and `SE82060B` have conflicting value labels.
## ℹ Labels for these values will be taken from `SE72000`.
## ✖ Values: 1 and 6
## Warning: `SE72000` and `SE82064` have conflicting value labels.
## ℹ Labels for these values will be taken from `SE72000`.
## ✖ Values: 1 and 6
## Warning: `SE72000` and `SE82064A` have conflicting value labels.
## ℹ Labels for these values will be taken from `SE72000`.
## ✖ Values: 4 and 6
## Warning: `SE72000` and `SE82064B` have conflicting value labels.
## ℹ Labels for these values will be taken from `SE72000`.
## ✖ Values: 1 and 6
## Warning: `SE72000` and `SE82064C` have conflicting value labels.
## ℹ Labels for these values will be taken from `SE72000`.
## ✖ Values: 2 and 6
## Warning: `SE72000` and `SE82064D` have conflicting value labels.
## ℹ Labels for these values will be taken from `SE72000`.
## ✖ Values: 3 and 6
## Warning: `SE72000` and `SE82143` have conflicting value labels.
## ℹ Labels for these values will be taken from `SE72000`.
## ✖ Values: 4 and 6
## Warning: `SE72000` and `SE82147` have conflicting value labels.
## ℹ Labels for these values will be taken from `SE72000`.
## ✖ Values: 1 and 6
## Warning: `SE72000` and `SE82203` have conflicting value labels.
## ℹ Labels for these values will be taken from `SE72000`.
## ✖ Values: 3 and 6
## Warning: `SE72000` and `SE82223` have conflicting value labels.
## ℹ Labels for these values will be taken from `SE72000`.
## ✖ Values: 1 and 6
## Warning: `SE72000` and `SE82224` have conflicting value labels.
## ℹ Labels for these values will be taken from `SE72000`.
## ✖ Values: 3 and 6
## Warning: `SE72000` and `SE82233` have conflicting value labels.
## ℹ Labels for these values will be taken from `SE72000`.
## ✖ Values: 2 and 6
## Warning: `SE72000` and `SE82264` have conflicting value labels.
## ℹ Labels for these values will be taken from `SE72000`.
## ✖ Values: 1 and 6
## Warning: `SE72000` and `SE82327` have conflicting value labels.
## ℹ Labels for these values will be taken from `SE72000`.
## ✖ Values: 1 and 6
## Warning: `SE72000` and `SE82327A` have conflicting value labels.
## ℹ Labels for these values will be taken from `SE72000`.
## ✖ Values: 1 and 6
## Warning: `SE72000` and `SE82327B` have conflicting value labels.
## ℹ Labels for these values will be taken from `SE72000`.
## ✖ Values: 1 and 6
## Warning: `SE72000` and `SE82327C` have conflicting value labels.
## ℹ Labels for these values will be taken from `SE72000`.
## ✖ Values: 3 and 6
## Warning: `SE72000` and `SE82327D` have conflicting value labels.
## ℹ Labels for these values will be taken from `SE72000`.
## ✖ Values: 1 and 6
## Warning: `SE72000` and `SE82340` have conflicting value labels.
## ℹ Labels for these values will be taken from `SE72000`.
## ✖ Values: 1 and 6
## Warning: `SE72000` and `SE82340A` have conflicting value labels.
## ℹ Labels for these values will be taken from `SE72000`.
## ✖ Values: 2 and 6
## Warning: `SE72000` and `SE82340B` have conflicting value labels.
## ℹ Labels for these values will be taken from `SE72000`.
## ✖ Values: 1 and 6
## Warning: `SE72000` and `SE82340C` have conflicting value labels.
## ℹ Labels for these values will be taken from `SE72000`.
## ✖ Values: 2 and 6
## Warning: `SE72000` and `SE82340D` have conflicting value labels.
## ℹ Labels for these values will be taken from `SE72000`.
## ✖ Values: 1 and 6
## Warning: `SE72000` and `SE82340E` have conflicting value labels.
## ℹ Labels for these values will be taken from `SE72000`.
## ✖ Values: 1 and 6
## Warning: `SE72000` and `SE72329` have conflicting value labels.
## ℹ Labels for these values will be taken from `SE72000`.
## ✖ Values: 4 and 6
## Warning: `SE72000` and `SE72906A` have conflicting value labels.
## ℹ Labels for these values will be taken from `SE72000`.
## ✖ Values: 1 and 6
## Warning: `SE72000` and `SE72906B` have conflicting value labels.
## ℹ Labels for these values will be taken from `SE72000`.
## ✖ Values: 2 and 6
## Warning: `SE72000` and `SE72906C` have conflicting value labels.
## ℹ Labels for these values will be taken from `SE72000`.
## ✖ Values: 1 and 6
## Warning: `SE72000` and `SE72906D` have conflicting value labels.
## ℹ Labels for these values will be taken from `SE72000`.
## ✖ Values: 1 and 6
## Warning: `SE72000` and `SE72906E` have conflicting value labels.
## ℹ Labels for these values will be taken from `SE72000`.
## ✖ Values: 2 and 6
## Warning: `SE72000` and `SE72906` have conflicting value labels.
## ℹ Labels for these values will be taken from `SE72000`.
## ✖ Values: 10
score_cleansing <- score_cleansing |>
mutate(ItemID = case_when
(ItemID == "SE72280A" ~ "SE72280",
TRUE ~ ItemID)) |>
filter(!str_ends(ItemID, "[A-E]$"))
In the case of Korean Studetns, “SE72906” variables are all missing in the score one. So it was eventually deleted and only 29 items are included from booklet 9.
item_info <- read_csv("/Users/eunjungmyoung/Library/CloudStorage/GoogleDrive-ejmyoung@stanford.edu/My Drive/0.Research/Process Data/TIMSS data/TIMSS_Process_Data/item_information.csv")
## Rows: 70 Columns: 20
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (17): Item ID, Screen ID, Subject, Block, Block Sequence, Secure Status,...
## dbl (3): Grade, Cycle, Maximum Points
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
item_info <- item_info |>
rename("ItemID" = `Item ID`)
unique(item_info$ItemID)
## [1] "SE72000" "SE72000A" "SE72000B" "SE72000C" "SE72000D" "SE72000E"
## [7] "SE72038" "SE72078" "SE72120" "SE72143" "SE72143A" "SE72143B"
## [13] "SE72143C" "SE72143D" "SE72168" "SE72205" "SE72280A" "SE72280B"
## [19] "SE72293" "SE72329" "SE72370" "SE72460" "SE72523" "SE72901"
## [25] "SE72906" "SE72906A" "SE72906B" "SE72906C" "SE72906D" "SE72906E"
## [31] "SE82001" "SE82001A" "SE82001B" "SE82001C" "SE82001D" "SE82001E"
## [37] "SE82020" "SE82040" "SE82040A" "SE82040B" "SE82040C" "SE82040D"
## [43] "SE82040E" "SE82050" "SE82060" "SE82060A" "SE82060B" "SE82064"
## [49] "SE82064A" "SE82064B" "SE82064C" "SE82064D" "SE82143" "SE82147"
## [55] "SE82203" "SE82223" "SE82224" "SE82233" "SE82264" "SE82327"
## [61] "SE82327A" "SE82327B" "SE82327C" "SE82327D" "SE82340" "SE82340A"
## [67] "SE82340B" "SE82340C" "SE82340D" "SE82340E"
head(item_info)
## # A tibble: 6 × 20
## ItemID `Screen ID` Subject Grade Cycle Block `Block Sequence` `Secure Status`
## <chr> <chr> <chr> <dbl> <dbl> <chr> <chr> <chr>
## 1 SE72000 SE72000 S 8 7 B_SE2 03 Secured
## 2 SE7200… SE72000 S 8 7 B_SE2 03A Secured
## 3 SE7200… SE72000 S 8 7 B_SE2 03B Secured
## 4 SE7200… SE72000 S 8 7 B_SE2 03C Secured
## 5 SE7200… SE72000 S 8 7 B_SE2 03D Secured
## 6 SE7200… SE72000 S 8 7 B_SE2 03E Secured
## # ℹ 12 more variables: `Content Domain` <chr>, `Topic Area` <chr>, Topic <chr>,
## # `Cognitive Domain` <chr>, `Cognitive Area` <chr>,
## # `Environmental Knowledge` <chr>, `Maximum Points` <dbl>, `Item Type` <chr>,
## # `Response Options` <chr>, Key <chr>, `Scaling Status` <chr>, Label <chr>
#change the value "SE72280A" into "SE72280"
item_info<-item_info |>
mutate(ItemID = case_when
(ItemID == "SE72280A" ~ "SE72280",
TRUE ~ ItemID))
unique(item_info$ItemID)
## [1] "SE72000" "SE72000A" "SE72000B" "SE72000C" "SE72000D" "SE72000E"
## [7] "SE72038" "SE72078" "SE72120" "SE72143" "SE72143A" "SE72143B"
## [13] "SE72143C" "SE72143D" "SE72168" "SE72205" "SE72280" "SE72280B"
## [19] "SE72293" "SE72329" "SE72370" "SE72460" "SE72523" "SE72901"
## [25] "SE72906" "SE72906A" "SE72906B" "SE72906C" "SE72906D" "SE72906E"
## [31] "SE82001" "SE82001A" "SE82001B" "SE82001C" "SE82001D" "SE82001E"
## [37] "SE82020" "SE82040" "SE82040A" "SE82040B" "SE82040C" "SE82040D"
## [43] "SE82040E" "SE82050" "SE82060" "SE82060A" "SE82060B" "SE82064"
## [49] "SE82064A" "SE82064B" "SE82064C" "SE82064D" "SE82143" "SE82147"
## [55] "SE82203" "SE82223" "SE82224" "SE82233" "SE82264" "SE82327"
## [61] "SE82327A" "SE82327B" "SE82327C" "SE82327D" "SE82340" "SE82340A"
## [67] "SE82340B" "SE82340C" "SE82340D" "SE82340E"
#Filter the value that ends with A, B, C, D,E
item_info_clear <- item_info |>
filter(!str_ends(ItemID, "[A-E]$"))
unique(item_info_clear$ItemID)
## [1] "SE72000" "SE72038" "SE72078" "SE72120" "SE72143" "SE72168" "SE72205"
## [8] "SE72280" "SE72293" "SE72329" "SE72370" "SE72460" "SE72523" "SE72901"
## [15] "SE72906" "SE82001" "SE82020" "SE82040" "SE82050" "SE82060" "SE82064"
## [22] "SE82143" "SE82147" "SE82203" "SE82223" "SE82224" "SE82233" "SE82264"
## [29] "SE82327" "SE82340"
colnames(item_info_clear)
## [1] "ItemID" "Screen ID"
## [3] "Subject" "Grade"
## [5] "Cycle" "Block"
## [7] "Block Sequence" "Secure Status"
## [9] "Content Domain" "Topic Area"
## [11] "Topic" "Cognitive Domain"
## [13] "Cognitive Area" "Environmental Knowledge"
## [15] "Maximum Points" "Item Type"
## [17] "Response Options" "Key"
## [19] "Scaling Status" "Label"
#What I need : ItemID, Cognitive Domain, "Item Type"
item_info_cleansing <- item_info_clear |>
select(ItemID, `Cognitive Domain`, `Item Type`)
#Recode the Item Type CR = 1, MC = 0
item_info_cleansing <- item_info_cleansing |>
mutate(`Item Type` = case_when
(`Item Type` == "CR" ~ 1,
`Item Type` == "MC" ~ 0,
TRUE ~ NA_real_))
#Create new variables of Applying and Reasoning
item_info_cleansing <- item_info_cleansing |>
mutate(Applying = case_when(
`Cognitive Domain` == "Applying" ~ 1,TRUE ~ 0),
Reasoning = case_when(
`Cognitive Domain` == "Reasoning" ~ 1,TRUE ~ 0)) |>
select(-`Cognitive Domain`)
rubric <-read_csv("/Users/eunjungmyoung/Library/CloudStorage/GoogleDrive-ejmyoung@stanford.edu/My Drive/0.Research/Process Data/TIMSS data/TIMSS_Process_Data/rubric.csv")
## Rows: 70 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): Variable, Value Scheme Detailed, Correct Answer, Missing Scheme Det...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
rubric<-rubric |>
rename("ItemID" = Variable,
"Correct Answer" = `Correct Answer`) |>
select(ItemID, `Correct Answer`)
#delete the variables with A, B, C, D, E
rubric <- rubric |>
mutate(ItemID = case_when
(ItemID == "SE72280A" ~ "SE72280",
TRUE ~ ItemID)) |>
filter(!str_ends(ItemID, "[A-E]$"))
unique(rubric$ItemID)
## [1] "SE72000" "SE72038" "SE72078" "SE72120" "SE72143" "SE72168" "SE72205"
## [8] "SE72280" "SE72293" "SE72370" "SE72460" "SE72523" "SE72901" "SE82001"
## [15] "SE82020" "SE82040" "SE82050" "SE82060" "SE82064" "SE82143" "SE82147"
## [22] "SE82203" "SE82223" "SE82224" "SE82233" "SE82264" "SE82327" "SE82340"
## [29] "SE72329" "SE72906"
#merge the score_cleansing with the rubric with "Item ID")
recode_response <- score_cleansing |>
full_join(rubric, by = c("ItemID" = "ItemID"))
#change the score and `Correct Answer` to numeric
recode_response <- recode_response |>
mutate(score = as.numeric(score),
`Correct Answer` = as.numeric(`Correct Answer`))
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `Correct Answer = as.numeric(`Correct Answer`)`.
## Caused by warning:
## ! NAs introduced by coercion
#mutate the new variable : "Recoded Score" as 2 when the "score" aznd the `Correct Answer` value is identical, otherwise 0
recode_response <- recode_response |>
mutate(`Recoded Score` = ifelse(score == `Correct Answer`, 2, 0))
colnames(recode_response)
## [1] "IDSTUD" "ItemID" "score" "Correct Answer"
## [5] "Recoded Score"
#manually code the partially correct response
#SE72078 , #SE72523 #SE82060
#SE72078 : score 10, 11 ~ 2, if else ~ 0,
#SE72523 : score 10 ~ 1, score 20 ~ 2, ifelse ~ 0,
#SE82060 : score 0 ~ 0, score 1 ~ 1, score 2 ~2
recode_response<- recode_response |>
mutate(`Recoded Score` = case_when
(ItemID == "SE72078" & score %in% c(10,11) ~ 2,
ItemID == "SE72078" & !(score %in% c(10,11)) ~ 0,
ItemID == "SE72523" & score == 10 ~ 1,
ItemID == "SE72523" & score == 20 ~ 2,
ItemID == "SE72523" & !(score %in% c(10,20)) ~ 0,
ItemID == "SE82060" & score == 0 ~ 0,
ItemID == "SE82060" & score == 1 ~ 1,
ItemID == "SE82060" & score == 2 ~ 2,
TRUE ~ `Recoded Score`)) |>
select(-`Correct Answer`)
head(recode_response)
## # A tibble: 6 × 4
## IDSTUD ItemID score `Recoded Score`
## <dbl+lbl> <chr> <dbl> <dbl>
## 1 50010526 SE72000 79 0
## 2 50010526 SE72038 1 2
## 3 50010526 SE72078 10 2
## 4 50010526 SE72120 4 0
## 5 50010526 SE72143 10 2
## 6 50010526 SE72168 4 2
nrow(recode_response) #177 students with 64 items
## [1] 5310
table(recode_response$`Recoded Score`)
##
## 0 1 2
## 1697 80 3356
Final_grade<-recode_response |>
select(IDSTUD, ItemID, `Recoded Score`) |>
rename("Grade" = `Recoded Score`)
#Classical Test Theory (CTT) approach; Treat difficulty as the mean proportion score (average score ÷ maximum possible score). Example: Suppose 100 students answer an item scored 0–2: 40 students scored 0 / 30 students scored 1 / 30 students scored 2 , Average score = (40×0 + 30×1 + 30×2) / 100 = 90 / 100 = 0.90 // Maximum possible score = 2 // Difficulty index = 0.90 / 2 = 0.45 -> Interpretation: On average, students scored 45% of the maximum possible score. The closer to 1.0, the “easier” the item.
item_difficulty <- Final_grade |>
group_by(ItemID) |>
summarise(Average_Score = mean(Grade, na.rm = TRUE),
Max_Possible_Score = 2,
Difficulty = Average_Score / Max_Possible_Score) |>
arrange(Difficulty) |>
select(ItemID, Difficulty) |>
#round digits for two
mutate(Difficulty = round(Difficulty, 2))
#Check the "Booklet with F"
booklet_9_F <- booklet_9 %>%
select(IDSTUD, ends_with("_F"))
#pivot longer the booklet_9_F to have "IDSTUD", "ItemID", "F", the IDSTUD should be maintained
booklet_9_F_long <- booklet_9_F %>%
group_by(IDSTUD) %>%
pivot_longer(-IDSTUD, names_to = "ItemID", values_to = "F")
#get rid of the _F from the ItemID
booklet_9_F_long <- booklet_9_F_long %>%
mutate(ItemID = str_remove(ItemID, "_F$"))
nrow(booklet_9_F_long) # 176 students with the 30 items
## [1] 5280
unique(booklet_9_F_long$ItemID)
## [1] "SE82020" "SE82064" "SE82001" "SE82050" "SE82040" "SE82060" "SE82143"
## [8] "SE82147" "SE82224" "SE82203" "SE82223" "SE82264" "SE82340" "SE82327"
## [15] "SE82233" "SE72078" "SE72460" "SE72000" "SE72906" "SE72901" "SE72038"
## [22] "SE72120" "SE72143" "SE72523" "SE72168" "SE72205" "SE72293" "SE72280"
## [29] "SE72370" "SE72329"
#Merge the "Booklet_9_F_long" with "item_info_cleansing" with "ItemID"
item_info_F <- booklet_9_F_long |>
full_join(item_info_cleansing, by = c("ItemID" = "ItemID"))
colnames(item_info_F)
## [1] "IDSTUD" "ItemID" "F" "Item Type" "Applying" "Reasoning"
#Merge the Final grade with item_info_F with "IDSTUD" and "ItemID"
item_F_grade <- item_info_F |>
full_join(Final_grade, by = c("IDSTUD" = "IDSTUD", "ItemID" = "ItemID"))
colnames(item_F_grade)
## [1] "IDSTUD" "ItemID" "F" "Item Type" "Applying" "Reasoning"
## [7] "Grade"
#Merge the item difficulty
head(item_difficulty)
## # A tibble: 6 × 2
## ItemID Difficulty
## <chr> <dbl>
## 1 SE72000 0.24
## 2 SE72038 0.46
## 3 SE72120 0.46
## 4 SE82233 0.46
## 5 SE72280 0.49
## 6 SE72901 0.49
Final_F <- item_F_grade |>
left_join(item_difficulty, by = c("ItemID" = "ItemID"))
head(Final_F)
## # A tibble: 6 × 8
## # Groups: IDSTUD [1]
## IDSTUD ItemID F `Item Type` Applying Reasoning Grade Difficulty
## <dbl+lbl> <chr> <dbl+lbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 50010526 SE82020 1 0 0 0 2 0.89
## 2 50010526 SE82064 1 1 1 0 2 0.81
## 3 50010526 SE82001 1 1 0 0 2 0.74
## 4 50010526 SE82050 1 0 0 0 2 0.77
## 5 50010526 SE82040 1 1 1 0 2 0.59
## 6 50010526 SE82060 1 1 0 1 2 0.73
#delete the missing
Final_F <- Final_F |>
filter(!is.na(F) & !is.na(Grade) & !is.na(Difficulty))
#number of IDSTUD
n_distinct(Final_F$IDSTUD) # 176 students
## [1] 176
#number of the itemID
n_distinct(Final_F$ItemID) # 29 items -> 2906 is missing
## [1] 29
#save the data
write_csv(Final_F, "Final_F.csv")
# install.packages(c("glmmTMB","performance","emmeans","broom.mixed"))
library(glmmTMB)
## Warning: package 'glmmTMB' was built under R version 4.3.3
library(performance)
## Warning: package 'performance' was built under R version 4.3.3
library(emmeans)
## Warning: package 'emmeans' was built under R version 4.3.3
## Welcome to emmeans.
## Caution: You lose important information if you filter this package's results.
## See '? untidy'
library(broom.mixed)
## Warning: package 'broom.mixed' was built under R version 4.3.3
## Registered S3 method overwritten by 'future':
## method from
## all.equal.connection parallelly
library(clubSandwich)
## Warning: package 'clubSandwich' was built under R version 4.3.3
## Registered S3 method overwritten by 'clubSandwich':
## method from
## bread.mlm sandwich
library(DHARMa)
## Warning: package 'DHARMa' was built under R version 4.3.3
## This is DHARMa 0.4.7. For overview type '?DHARMa'. For recent changes, type news(package = 'DHARMa')
# install.packages("glmmTMB")
# install.packages("performance")
# install.packages("emmeans")
# install.packages("broom.mixed")
# install.packages("clubSandwich")
# install.packages("DHARMa")
df<-Final_F |>
rename("Type" = `Item Type`) |>
rename("Frequency" = F)
# Expected columns:
# IDSTUD, ItemID, F, Type, Applying, Reasoning, Grade, Difficulty
# - F: count outcome (1..9)
# - Type: 0=MC, 1=CR
# - Applying, Reasoning: 0/1 dummies (Knowing is reference when both 0)
# - Grade: 0/1/2 (control)
# - Difficulty: CTT p-correct (closer to 1 = easier)
# Factors for random effects
df$IDSTUD <- factor(df$IDSTUD)
df$ItemID <- factor(df$ItemID)
# Make a "hardness" score so larger = harder (and scale it)
df$diff_hard <- 1 - pmin(pmax(df$Difficulty, 1e-6), 1-1e-6)
# Ensure predictors are numeric 0/1; Grade as numeric (or factor if you prefer nonlinearity)
df$Type <- as.numeric(df$Type)
df$Applying <- as.numeric(df$Applying)
df$Reasoning <- as.numeric(df$Reasoning)
df$Grade <- as.numeric(df$Grade)
df$Frequency <- as.integer(df$Frequency)
df$diff_hard <- as.numeric(df$diff_hard)
# Model 1d: Type only (+ difficulty)
m1d <- glmmTMB(
Frequency ~ Type + diff_hard + (1|IDSTUD) + (1|ItemID),
data = df, family = nbinom2())
summary(m1d)
## Family: nbinom2 ( log )
## Formula: Frequency ~ Type + diff_hard + (1 | IDSTUD) + (1 | ItemID)
## Data: df
##
## AIC BIC logLik -2*log(L) df.resid
## 13051.4 13090.7 -6519.7 13039.4 5098
##
## Random effects:
##
## Conditional model:
## Groups Name Variance Std.Dev.
## IDSTUD (Intercept) 1.405e-01 3.748e-01
## ItemID (Intercept) 2.225e-09 4.717e-05
## Number of obs: 5104, groups: IDSTUD, 176; ItemID, 29
##
## Dispersion parameter for nbinom2 family (): 2.93e+08
##
## Conditional model:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.355266 0.039686 8.952 <2e-16 ***
## Type -0.003896 0.022506 -0.173 0.863
## diff_hard 0.060518 0.070374 0.860 0.390
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Model 2d: + Grade (control)
m2d <- glmmTMB(
Frequency ~ Type + Grade + diff_hard + (1|IDSTUD) + (1|ItemID),
data = df, family = nbinom2())
## Warning in finalizeTMB(TMBStruc, obj, fit, h, data.tmb.old): Model convergence
## problem; false convergence (8). See vignette('troubleshooting'),
## help('diagnose')
summary(m2d)
## Family: nbinom2 ( log )
## Formula:
## Frequency ~ Type + Grade + diff_hard + (1 | IDSTUD) + (1 | ItemID)
## Data: df
##
## AIC BIC logLik -2*log(L) df.resid
## 13053.0 13098.8 -6519.5 13039.0 5097
##
## Random effects:
##
## Conditional model:
## Groups Name Variance Std.Dev.
## IDSTUD (Intercept) 1.395e-01 3.735e-01
## ItemID (Intercept) 1.118e-09 3.344e-05
## Number of obs: 5104, groups: IDSTUD, 176; ItemID, 29
##
## Dispersion parameter for nbinom2 family (): 5.14e+07
##
## Conditional model:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.336386 0.048767 6.898 5.28e-12 ***
## Type -0.003948 0.022506 -0.175 0.861
## Grade 0.009491 0.014284 0.664 0.506
## diff_hard 0.079178 0.075749 1.045 0.296
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Model 3d: Domain only (+ difficulty)
m3d <- glmmTMB(
Frequency ~ Applying + Reasoning + diff_hard + (1|IDSTUD) + (1|ItemID),data = df, family = nbinom2())
## Warning in finalizeTMB(TMBStruc, obj, fit, h, data.tmb.old): Model convergence
## problem; non-positive-definite Hessian matrix. See vignette('troubleshooting')
# Model 4d: Domain + Grade (+ difficulty)
m4d <- glmmTMB(
Frequency ~ Applying + Reasoning + Grade + diff_hard + (1|IDSTUD) + (1|ItemID), data = df, family = nbinom2())
# Model 5d: Type × Domain (+ difficulty)
m5d <- glmmTMB(
Frequency ~ Type * Applying + Type * Reasoning + diff_hard +
(1|IDSTUD) + (1|ItemID), data = df, family = nbinom2())
## Warning in finalizeTMB(TMBStruc, obj, fit, h, data.tmb.old): Model convergence
## problem; non-positive-definite Hessian matrix. See vignette('troubleshooting')
## Warning in finalizeTMB(TMBStruc, obj, fit, h, data.tmb.old): Model convergence
## problem; false convergence (8). See vignette('troubleshooting'),
## help('diagnose')
# Model 6d: Full (Type × Domain) + Grade (+ difficulty)
# You can also add interactions with difficulty if desired (see commented lines).
m6d <- glmmTMB(
Frequency ~ Type * Applying + Type * Reasoning + Grade + diff_hard +
# Type:diff_hard + Applying:diff_hard + Reasoning:diff_hard +
(1|IDSTUD) + (1|ItemID),
data = df, family = nbinom2())
## Warning in finalizeTMB(TMBStruc, obj, fit, h, data.tmb.old): Model convergence
## problem; non-positive-definite Hessian matrix. See vignette('troubleshooting')
# ---- 3) Model checks & summaries ----
# Compare fit
AIC(m1d, m2d, m3d, m4d, m5d, m6d)
## df AIC
## m1d 6 13051.43
## m2d 7 13052.99
## m3d 7 NA
## m4d 8 13054.12
## m5d 10 NA
## m6d 11 NA
summary(m6d)
## Family: nbinom2 ( log )
## Formula:
## Frequency ~ Type * Applying + Type * Reasoning + Grade + diff_hard +
## (1 | IDSTUD) + (1 | ItemID)
## Data: df
##
## AIC BIC logLik -2*log(L) df.resid
## NA NA NA NA 5093
##
## Random effects:
##
## Conditional model:
## Groups Name Variance Std.Dev.
## IDSTUD (Intercept) 1.395e-01 0.373520
## ItemID (Intercept) 1.717e-08 0.000131
## Number of obs: 5104, groups: IDSTUD, 176; ItemID, 29
##
## Dispersion parameter for nbinom2 family (): 2.04e+07
##
## Conditional model:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.328883 0.053652 6.130 8.79e-10 ***
## Type -0.026435 0.037725 -0.701 0.483
## Applying 0.018699 0.033523 0.558 0.577
## Reasoning -0.026547 0.065537 -0.405 0.685
## Grade 0.009421 0.014291 0.659 0.510
## diff_hard 0.084973 0.084210 1.009 0.313
## Type:Applying 0.016302 0.056678 0.288 0.774
## Type:Reasoning 0.069377 0.076713 0.904 0.366
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
performance::check_overdispersion(m6d)
## # Overdispersion test
##
## dispersion ratio = 0.489
## p-value = < 0.001
## Underdispersion detected.
performance::check_convergence(m6d)
## [1] FALSE
# # Rate ratios (exp of log-coefs)
# exp(coef(summary(m6d))$cond[, "Estimate"])
# exp(confint(m6d, parm = "cond", method = "Wald"))
# Marginal means (helpful for interactions)
emmeans(m6d, ~ Type | Applying + Reasoning, type = "response")
## Applying = 0, Reasoning = 0:
## Type response SE df asymp.LCL asymp.UCL
## 0 1.45 0.0516 Inf 1.35 1.55
## 1 1.41 0.0593 Inf 1.30 1.53
##
## Applying = 1, Reasoning = 0:
## Type response SE df asymp.LCL asymp.UCL
## 0 1.48 0.0564 Inf 1.37 1.59
## 1 1.46 0.0588 Inf 1.35 1.58
##
## Applying = 0, Reasoning = 1:
## Type response SE df asymp.LCL asymp.UCL
## 0 1.41 0.0965 Inf 1.23 1.61
## 1 1.47 0.0576 Inf 1.36 1.59
##
## Applying = 1, Reasoning = 1:
## Type response SE df asymp.LCL asymp.UCL
## 0 1.44 0.1070 Inf 1.24 1.66
## 1 1.52 0.0880 Inf 1.36 1.71
##
## Confidence level used: 0.95
## Intervals are back-transformed from the log scale
#Check the "Booklet with F"
booklet_9_S <- booklet_9 %>%
select(IDSTUD, ends_with("_S"))
#pivot longer the booklet_9_F to have "IDSTUD", "ItemID", "F", the IDSTUD should be maintained
booklet_9_S_long <- booklet_9_S %>%
group_by(IDSTUD) %>%
pivot_longer(-IDSTUD, names_to = "ItemID", values_to = "S")
#get rid of the _F from the ItemID
booklet_9_S_long <- booklet_9_S_long %>%
mutate(ItemID = str_remove(ItemID, "_S$"))
nrow(booklet_9_S_long) # 176 students with the 30 items
## [1] 5280
#Merge the "Booklet_9_S_long" with "item_info_cleansing" with "ItemID"
item_info_S <- booklet_9_S_long |>
full_join(item_info_cleansing, by = c("ItemID" = "ItemID"))
colnames(item_info_S)
## [1] "IDSTUD" "ItemID" "S" "Item Type" "Applying" "Reasoning"
#Merge the Final grade with item_info_F with "IDSTUD" and "ItemID"
item_S_grade <- item_info_S |>
full_join(Final_grade, by = c("IDSTUD" = "IDSTUD", "ItemID" = "ItemID"))
colnames(item_S_grade)
## [1] "IDSTUD" "ItemID" "S" "Item Type" "Applying" "Reasoning"
## [7] "Grade"
#Merge the item difficulty
head(item_difficulty)
## # A tibble: 6 × 2
## ItemID Difficulty
## <chr> <dbl>
## 1 SE72000 0.24
## 2 SE72038 0.46
## 3 SE72120 0.46
## 4 SE82233 0.46
## 5 SE72280 0.49
## 6 SE72901 0.49
Final_S <- item_S_grade |>
left_join(item_difficulty, by = c("ItemID" = "ItemID"))
head(Final_S)
## # A tibble: 6 × 8
## # Groups: IDSTUD [1]
## IDSTUD ItemID S `Item Type` Applying Reasoning Grade Difficulty
## <dbl+lbl> <chr> <dbl+lbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 50010526 SE82020 24.0 0 0 0 2 0.89
## 2 50010526 SE82064 16.4 1 1 0 2 0.81
## 3 50010526 SE82001 64.5 1 0 0 2 0.74
## 4 50010526 SE82050 21.2 0 0 0 2 0.77
## 5 50010526 SE82040 83.1 1 1 0 2 0.59
## 6 50010526 SE82060 82.0 1 0 1 2 0.73
#delete the missing
Final_S <- Final_S |>
filter(!is.na(S) & !is.na(Grade) & !is.na(Difficulty))
#number of IDSTUD
n_distinct(Final_S$IDSTUD) # 176 students
## [1] 176
#number of the itemID
n_distinct(Final_S$ItemID) # 29 items -> 2906 is missing
## [1] 29
#save the data
write_csv(Final_S, "Final_S.csv")
##Run the analysis
# ----- Load & prep (Time) -----
dfS <- read.csv("Final_S.csv", stringsAsFactors = FALSE)
dfS<-dfS |>
rename("Time" = S) |>
rename("Type" = `Item.Type`)
dif <- pmin(pmax(dfS$Difficulty, 1e-6), 1 - 1e-6)
hard <- 1 - dif # complement (simple, stable)
# Or, if you prefer: hard <- -qlogis(dif) # logit scale (often nice)
dfS$diff_hard <- as.numeric(hard)
# ----- Model set (Gamma log) -----
# S1d: Type + difficulty
s1d_S <- glmmTMB(
Time ~ Type + diff_hard + (1|IDSTUD) + (1|ItemID),
data = dfS, family = Gamma(link = "log"))
# S2d: + Grade
s2d_S <- glmmTMB(
Time ~ Type + Grade + diff_hard + (1|IDSTUD) + (1|ItemID),
data = dfS, family = Gamma(link = "log"))
# S3d: Domain dummies + difficulty
s3d_S <- glmmTMB(
Time ~ Applying + Reasoning + diff_hard + (1|IDSTUD) + (1|ItemID),
data = dfS, family = Gamma(link = "log"))
# S4d: Domain dummies + Grade + difficulty
s4d_S <- glmmTMB(
Time ~ Applying + Reasoning + Grade + diff_hard+ (1|IDSTUD) + (1|ItemID),
data = dfS, family = Gamma(link = "log"))
# S5d: Type × Domain (dummies) + difficulty
s5d_S <- glmmTMB(
Time ~ Type*Applying + Type*Reasoning + diff_hard +
(1|IDSTUD) + (1|ItemID),
data = dfS, family = Gamma(link = "log"))
# S6d: Full (Type × Domain dummies) + Grade + difficulty
s6d_S <- glmmTMB(
Time ~ Type*Applying + Type*Reasoning + Grade + diff_hard +
(1|IDSTUD) + (1|ItemID),
data = dfS, family = Gamma(link = "log"))
# ----- Model comparison & key outputs -----
AIC(s1d_S, s2d_S, s3d_S, s4d_S, s5d_S, s6d_S)
## df AIC
## s1d_S 6 42578.92
## s2d_S 7 42560.02
## s3d_S 7 42583.35
## s4d_S 8 42564.40
## s5d_S 10 42574.85
## s6d_S 11 42555.97
summary(s6d_S)
## Family: Gamma ( log )
## Formula:
## Time ~ Type * Applying + Type * Reasoning + Grade + diff_hard +
## (1 | IDSTUD) + (1 | ItemID)
## Data: dfS
##
## AIC BIC logLik -2*log(L) df.resid
## 42556.0 42627.9 -21267.0 42534.0 5093
##
## Random effects:
##
## Conditional model:
## Groups Name Variance Std.Dev.
## IDSTUD (Intercept) 0.14311 0.3783
## ItemID (Intercept) 0.08609 0.2934
## Number of obs: 5104, groups: IDSTUD, 176; ItemID, 29
##
## Dispersion estimate for Gamma family (sigma^2): 0.247
##
## Conditional model:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 2.774622 0.176898 15.685 < 2e-16 ***
## Type 0.437307 0.183302 2.386 0.0170 *
## Applying 0.218589 0.165196 1.323 0.1858
## Reasoning 0.641947 0.319687 2.008 0.0446 *
## Grade -0.039092 0.008566 -4.563 5.03e-06 ***
## diff_hard 0.872051 0.392509 2.222 0.0263 *
## Type:Applying 0.179184 0.277826 0.645 0.5190
## Type:Reasoning -0.016905 0.374256 -0.045 0.9640
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
performance::check_convergence(s6d_S)
## [1] TRUE
# Time ratios (= multiplicative effect on expected time)
# exp(coef(summary(s6d_S))$cond[, "Estimate"])
# exp(confint(s6d_S, parm = "cond", method = "Wald"))
# Marginal means
emmeans(s6d_S, ~ Type | Applying + Reasoning, type = "response")
## Applying = 0, Reasoning = 0:
## Type response SE df asymp.LCL asymp.UCL
## 0 20.5 2.22 Inf 16.5 25.3
## 1 31.7 4.82 Inf 23.5 42.7
##
## Applying = 1, Reasoning = 0:
## Type response SE df asymp.LCL asymp.UCL
## 0 25.5 3.29 Inf 19.8 32.8
## 1 47.2 6.75 Inf 35.6 62.4
##
## Applying = 0, Reasoning = 1:
## Type response SE df asymp.LCL asymp.UCL
## 0 38.9 11.80 Inf 21.4 70.6
## 1 59.2 8.02 Inf 45.4 77.2
##
## Applying = 1, Reasoning = 1:
## Type response SE df asymp.LCL asymp.UCL
## 0 48.4 16.40 Inf 24.9 93.9
## 1 88.1 21.80 Inf 54.2 143.2
##
## Confidence level used: 0.95
## Intervals are back-transformed from the log scale
#Number of the Item Type, Number of Knowing, Applying, Knowing,> 30 items
des<-item_info_cleansing |>
summarise(Total_items = n(),
MC_items = sum(`Item Type` == 0),
CR_items = sum(`Item Type` == 1),
Knowing_items = sum(Applying == 0 & Reasoning == 0),
Applying_items = sum(Applying == 1),
Reasoning_items = sum(Reasoning == 1)) |>
gt()
des
| Total_items | MC_items | CR_items | Knowing_items | Applying_items | Reasoning_items |
|---|---|---|---|---|---|
| 30 | 15 | 15 | 12 | 11 | 7 |
# Item Format, Cognitive Domain, and their difficulty
item_summmary<- item_info_cleansing |>
left_join(item_difficulty, by = c("ItemID" = "ItemID")) |>
group_by(`Item Type`, Applying, Reasoning) |>
summarise(Average_Difficulty = mean(Difficulty, na.rm = TRUE),
Count = n()) |>
ungroup() |>
mutate(`Item Type` = case_when
(`Item Type` == 0 ~ "MC",
`Item Type` == 1 ~ "CR",
TRUE ~ NA_character_)) |>
mutate("Cognitive Domain" = case_when(
Applying == 1 ~ "Applying",
Reasoning == 1 ~ "Reasoning",
Applying == 0 & Reasoning == 0 ~ "Knowing",
TRUE ~ NA_character_)) |>
select(-Applying, -Reasoning)
## `summarise()` has grouped output by 'Item Type', 'Applying'. You can override
## using the `.groups` argument.
item_summmary
## # A tibble: 6 × 4
## `Item Type` Average_Difficulty Count `Cognitive Domain`
## <chr> <dbl> <int> <chr>
## 1 MC 0.646 8 Knowing
## 2 MC 0.49 1 Reasoning
## 3 MC 0.753 6 Applying
## 4 CR 0.718 4 Knowing
## 5 CR 0.682 6 Reasoning
## 6 CR 0.542 5 Applying
#Join the average_F and average_S with the item_summmary
average_F <- Final_F |>
group_by(`Item Type`, Applying, Reasoning) |>
summarise(Average_F = mean(F, na.rm = TRUE)) |>
mutate(`Item Type` = case_when
(`Item Type` == 0 ~ "MC",
`Item Type` == 1 ~ "CR",
TRUE ~ NA_character_)) |>
ungroup() |>
mutate("Cognitive Domain" = case_when(
Applying == 1 ~ "Applying",
Reasoning == 1 ~ "Reasoning",
Applying == 0 & Reasoning == 0 ~ "Knowing",
TRUE ~ NA_character_)) |>
select(-Applying, -Reasoning)
## `summarise()` has grouped output by 'Item Type', 'Applying'. You can override
## using the `.groups` argument.
average_S <- Final_S |>
group_by(`Item Type`, Applying, Reasoning) |>
summarise(Average_S = mean(S, na.rm = TRUE)) |>
mutate(`Item Type` = case_when
(`Item Type` == 0 ~ "MC",
`Item Type` == 1 ~ "CR",
TRUE ~ NA_character_)) |>
ungroup() |>
mutate("Cognitive Domain" = case_when(
Applying == 1 ~ "Applying",
Reasoning == 1 ~ "Reasoning",
Applying == 0 & Reasoning == 0 ~ "Knowing",
TRUE ~ NA_character_)) |>
select(-Applying, -Reasoning)
## `summarise()` has grouped output by 'Item Type', 'Applying'. You can override
## using the `.groups` argument.
item_summmary <- item_summmary |>
left_join(average_F, by = c("Item Type" = "Item Type", "Cognitive Domain" = "Cognitive Domain")) |>
left_join(average_S, by = c("Item Type" = "Item Type", "Cognitive Domain" = "Cognitive Domain")) |>
mutate(Average_Difficulty = round(Average_Difficulty, 2)) |>
mutate(Average_F = round(Average_F, 2)) |>
mutate(Average_S = round(Average_S, 2)) |>
gt()
item_summmary
| Item Type | Average_Difficulty | Count | Cognitive Domain | Average_F | Average_S |
|---|---|---|---|---|---|
| MC | 0.65 | 8 | Knowing | 1.57 | 22.76 |
| MC | 0.49 | 1 | Reasoning | 1.54 | 49.62 |
| MC | 0.75 | 6 | Applying | 1.58 | 27.21 |
| CR | 0.72 | 4 | Knowing | 1.52 | 34.61 |
| CR | 0.68 | 6 | Reasoning | 1.59 | 64.12 |
| CR | 0.54 | 5 | Applying | 1.59 | 59.95 |