Pirls process data descriptives

{r,echo = FALSE, warning=FALSE, message=FALSE} knitr::opts_chunk$set(echo = FALSE, warning=FALSE, message=FALSE) library(tidyverse, quietly=T) theme_set(theme_minimal())

knitr::opts_chunk$set(echo = T, warning=FALSE, message=FALSE)
library(tidyverse, quietly=T)
library(EdSurvey)
library(broom); library(knitr)
theme_set(theme_minimal())
nor <- readPIRLS("./dataset/PIRLS/2021", countries="nor")
Found cached data for country code "nor".
proc <- haven::read_sav("dataset/PIRLS/2021/ASPNORR5.sav")
proc$gender <- nor$itsex #table(nor$idstud==proc$IDSTUD) TRUE all

#look at hefter
items <- grep("RE51", colnames(proc), v=T)
hefter <- stringr::str_extract(items, "RE51[A-Z](?=[0-2])") %>% stringr::str_remove("RE51") %>% unique()

hefte <- "Z"
itemsS <- grep(paste0("RE51",hefte), colnames(proc), v=T) %>% grep(pattern="_S", v=T)
itemsR <- grep(paste0("RE51",hefte), colnames(proc), v=T) %>% grep(pattern="_R", v=T)
itemsF <- grep(paste0("RE51",hefte), colnames(proc), v=T) %>% grep(pattern="_F", v=T)

# #which students were exposed to hefte? if sum to NA then not given
# tmp <- select(proc, all_of(itemsS))
# idx <- !apply(is.na(tmp), 1, all)
# tmp <- tmp[idx, ]
# naniar::vis_miss(tmp)
# 
# total <- rowSums(tmp, na.rm=T)
# tmp <- select(proc, all_of(itemsR))
# tmp <- tmp[idx, ]
# naniar::vis_miss(tmp)
# first <- rowSums(tmp, na.rm=T)
# 
# tmp <- select(proc, all_of(itemsF))
# tmp <- tmp[idx, ]
# naniar::vis_miss(tmp)
# freq <- rowSums(tmp, na.rm=T)

#aggr.df <- data.frame(total=total, first=first, freq=freq)

Hefter

mylist <- list()

for(hefte in hefter ){
  cat("\n\n\\pagebreak\n")
  cat("\n\n## Hefte ",hefte , "\n\n")
  itemsS <- 
    grep(paste0("RE51",hefte), colnames(proc), v=T) %>%
    grep(pattern="_S", v=T)
  itemsR <- grep(paste0("RE51",hefte), colnames(proc), v=T) %>% grep(pattern="_R", v=T)
  itemsF <- grep(paste0("RE51",hefte), colnames(proc), v=T) %>% grep(pattern="_F", v=T)
  
  print(itemsS)
  
  cat("\n\n### Missingness ",hefte , "\n\n")
    
  tmp <- select(proc, all_of(c(itemsS,"gender")))
  idx <- !apply(is.na(select(tmp, -"gender")), 1, all)
  tmp <- tmp[idx, ]
  plot(naniar::vis_miss(select(tmp, -"gender"))+ggtitle("_S items"))
  tmpS <- tmp[complete.cases(tmp), ]
  
  
  tmp <- select(proc, all_of(c(itemsR,"gender")))
  idx <- !apply(is.na(select(tmp, -"gender")), 1, all)
  tmp <- tmp[idx, ]
  plot(naniar::vis_miss(select(tmp, -"gender"))+ggtitle("_R items"))
  tmpR <- tmp[complete.cases(tmp), ]

  tmp <- select(proc, all_of(c(itemsF,"gender")))
  idx <- !apply(is.na(select(tmp, -"gender")), 1, all)
  tmp <- tmp[idx, ]
  plot(naniar::vis_miss(select(tmp, -"gender"))+ggtitle("_F items"))
  tmpF <- tmp[complete.cases(tmp), ]
  mylist[[length(mylist)+1]] <- list(Z=list(tmpS, tmpR, tmpF))
  
  cat("\n\n### Aggregating over items in  ",hefte , "\n\n")
  
  cat("\n _S har ", nrow(tmpS), "komplette observasjoner.\n",
      "\n _R har ", nrow(tmpR), "komplette observasjoner.\n",
      "\n _F har ", nrow(tmpF), "komplette observasjoner.\n")
  
  
  tmp <- tmpS
  tt <- "_S"
  cat("\n\n", tt, "\n")
  tmp$mean <- apply(select(tmp, -"gender"), 1, mean)
  tmp$median <- apply(select(tmp, -"gender"), 1, median)
  cat("\n Mean of mean: ", round(mean(tmp$mean),1), " \n Mean of median: ", round(mean(tmp$median),1), "\n")
  ggplot(tmp, aes(mean))+geom_histogram()+ggtitle(tt)
  cat("\n\n  By gender \n\n")
 t_out <- t.test(tmp$mean ~tmp$gender)
 a <- c(round(t_out$estimate,2), round(t_out$p.value,2))
 names(a)[3] <- "pval"
 a %>% kable %>% print
 plot(ggplot(tmp, aes(gender, mean, color=gender))+geom_boxplot()+ggtitle(tt))
 
 tmp <- tmpR
  tt <- "_R"
  cat("\n\n", tt, "\n")
  tmp$mean <- apply(select(tmp, -"gender"), 1, mean)
  tmp$median <- apply(select(tmp, -"gender"), 1, median)
  cat("\n Mean of mean: ", round(mean(tmp$mean),1), " \n Mean of median: ", round(mean(tmp$median),1), "\n")
  ggplot(tmp, aes(mean))+geom_histogram()+ggtitle(tt)
  cat("\n\n  By gender \n\n")
 t_out <- t.test(tmp$mean ~tmp$gender)
 a <- c(round(t_out$estimate,2), round(t_out$p.value,2))
 names(a)[3] <- "pval"
 a %>% kable %>% print
 plot(ggplot(tmp, aes(gender, mean, color=gender))+geom_boxplot()+ggtitle(tt))
 
 tmp <- tmpF
  tt <- "_F"
  cat("\n\n", tt, "\n")
  tmp$mean <- apply(select(tmp, -"gender"), 1, mean)
  tmp$median <- apply(select(tmp, -"gender"), 1, median)
 cat("\n Mean of mean: ", round(mean(tmp$mean),1), " \n Mean of median: ", round(mean(tmp$median),1), "\n")
  ggplot(tmp, aes(mean))+geom_histogram()+ggtitle(tt)
  cat("\n\n  By gender \n\n")
 t_out <- t.test(tmp$mean ~tmp$gender)
 a <- c(round(t_out$estimate,2), round(t_out$p.value,2))
 names(a)[3] <- "pval"
 a %>% kable %>% print
 plot(ggplot(tmp, aes(gender, mean, color=gender))+geom_boxplot()+ggtitle(tt))
 
 
}

Hefte C

[1] “RE51C01_S” “RE51C02_S” “RE51C03_S” “RE51C04_S” “RE51C05_S” “RE51C06_S” [7] “RE51C07_S” “RE51C08_S” “RE51C09_S” “RE51C10_S” “RE51C11_S” “RE51C12_S” [13] “RE51C13_S” “RE51C14_S” “RE51C15_S”

Missingness C

Aggregating over items in C

_S har 449 komplette observasjoner.

_R har 449 komplette observasjoner.

_F har 458 komplette observasjoner.

_S

Mean of mean: 91.1
Mean of median: 71.4

By gender

x
mean in group GIRL 92.62
mean in group BOY 89.52
pval 0.33

_R

Mean of mean: 69.2
Mean of median: 53.2

By gender

x
mean in group GIRL 71.57
mean in group BOY 66.78
pval 0.09

_F

Mean of mean: 2.4
Mean of median: 2.3

By gender

x
mean in group GIRL 2.44
mean in group BOY 2.41
pval 0.84

Hefte D

[1] “RE51D01_S” “RE51D02_S” “RE51D03_S” “RE51D04_S” “RE51D05_S” “RE51D06_S” [7] “RE51D07_S” “RE51D08_S” “RE51D09_S” “RE51D10_S” “RE51D11_S” “RE51D12_S” [13] “RE51D13_S” “RE51D14_S” “RE51D15_S” “RE51D16_S”

Missingness D

Aggregating over items in D

_S har 391 komplette observasjoner.

_R har 391 komplette observasjoner.

_F har 402 komplette observasjoner.

_S

Mean of mean: 79
Mean of median: 62.5

By gender

x
mean in group GIRL 77.57
mean in group BOY 80.21
pval 0.37

_R

Mean of mean: 62.3
Mean of median: 47.8

By gender

x
mean in group GIRL 60.49
mean in group BOY 63.86
pval 0.17

_F

Mean of mean: 2.1
Mean of median: 1.9

By gender

x
mean in group GIRL 2.18
mean in group BOY 2.03
pval 0.27

Hefte N

[1] “RE51N01_S” “RE51N02_S” “RE51N03_S” “RE51N04_S” “RE51N05_S” “RE51N06_S” [7] “RE51N07_S” “RE51N08_S” “RE51N09_S” “RE51N10_S” “RE51N11_S” “RE51N12_S” [13] “RE51N13_S” “RE51N14_S” “RE51N15_S”

Missingness N

Aggregating over items in N

_S har 451 komplette observasjoner.

_R har 451 komplette observasjoner.

_F har 470 komplette observasjoner.

_S

Mean of mean: 90.4
Mean of median: 71.7

By gender

x
mean in group GIRL 91.07
mean in group BOY 89.74
pval 0.67

_R

Mean of mean: 74.3
Mean of median: 58.8

By gender

x
mean in group GIRL 75.33
mean in group BOY 73.31
pval 0.47

_F

Mean of mean: 1.9
Mean of median: 1.7

By gender

x
mean in group GIRL 1.89
mean in group BOY 1.90
pval 0.95

Hefte R

[1] “RE51R01_S” “RE51R02_S” “RE51R03_S” “RE51R04_S” “RE51R05_S” “RE51R06_S” [7] “RE51R07_S” “RE51R08_S” “RE51R09_S” “RE51R10_S” “RE51R11_S” “RE51R13_S” [13] “RE51R14_S” “RE51R15_S” “RE51R16_S” “RE51R17_S”

Missingness R

Aggregating over items in R

_S har 388 komplette observasjoner.

_R har 388 komplette observasjoner.

_F har 390 komplette observasjoner.

_S

Mean of mean: 63.7
Mean of median: 49.5

By gender

x
mean in group GIRL 63.34
mean in group BOY 64.11
pval 0.75

_R

Mean of mean: 50.4
Mean of median: 38.4

By gender

x
mean in group GIRL 52.15
mean in group BOY 48.70
pval 0.06

_F

Mean of mean: 2.4
Mean of median: 2.2

By gender

x
mean in group GIRL 2.22
mean in group BOY 2.58
pval 0.02

Hefte T

[1] “RE51T01_S” “RE51T02_S” “RE51T03_S” “RE51T04_S” “RE51T05_S” “RE51T06_S” [7] “RE51T07_S” “RE51T08_S” “RE51T09_S” “RE51T10_S” “RE51T11_S” “RE51T12_S” [13] “RE51T13_S” “RE51T14_S” “RE51T15_S” “RE51T16_S”

Missingness T

Aggregating over items in T

_S har 393 komplette observasjoner.

_R har 393 komplette observasjoner.

_F har 394 komplette observasjoner.

_S

Mean of mean: 70.4
Mean of median: 55

By gender

x
mean in group GIRL 71.17
mean in group BOY 69.59
pval 0.57

_R

Mean of mean: 55.8
Mean of median: 43.7

By gender

x
mean in group GIRL 57.31
mean in group BOY 54.16
pval 0.15

_F

Mean of mean: 2.2
Mean of median: 2.1

By gender

x
mean in group GIRL 2.24
mean in group BOY 2.22
pval 0.87

Hefte Z

[1] “RE51Z01_S” “RE51Z02_S” “RE51Z03_S” “RE51Z04_S” “RE51Z05_S” “RE51Z06_S” [7] “RE51Z07_S” “RE51Z08_S” “RE51Z09_S” “RE51Z10_S” “RE51Z11_S” “RE51Z12_S” [13] “RE51Z13_S” “RE51Z14_S” “RE51Z15_S”

Missingness Z

Aggregating over items in Z

_S har 432 komplette observasjoner.

_R har 432 komplette observasjoner.

_F har 443 komplette observasjoner.

_S

Mean of mean: 85
Mean of median: 68.6

By gender

x
mean in group GIRL 85.65
mean in group BOY 84.37
pval 0.67

_R

Mean of mean: 64.8
Mean of median: 51.9

By gender

x
mean in group GIRL 65.79
mean in group BOY 63.89
pval 0.45

_F

Mean of mean: 2.5
Mean of median: 2.3

By gender

x
mean in group GIRL 2.48
mean in group BOY 2.43
pval 0.74