1 Description

The Progress in International Reading Literacy Study (PIRLS) is an large scale international assessment of reading literacy of young students. For this problem, the data set is the PIRLS 2006 data from only the United States. The question of interest is to predict “enjoy reading” categorical responses from the other co-variates.

Source: PIRLS 2006

Column 1: Reading score
Column 2: School ID
Column 3: Gender ID, 1 = Female, 0 = Male
Column 4: US born, 1 = Yes, 0 = No
Column 5: Watch TV, 1 = no time, 2 = up to 1 hour, 3 = 1-3 hours, 4 = 3-5 hours, 5 = 5 hours or more
Column 6: Economically disadvantaged, > 50%, 26 - 50%, 11 - 25%, < 10%
Column 7: The response to a statement “I enjoy reading”, 1 = disagree a lot, 2 = disagree a little, 3 = agree a little, 4 = agree a lot

2 load or install packages to be used

pacman::p_load(magrittr, vcrpart, ggplot2, dplyr, tidyr, ltm, eRm, lme4)

3 Input Data

dta2 <- read.table("C:/Users/HANK/Desktop/HOMEWORK/pirls_us.txt", h=T)

names(dta2) <- c("School", "Gender", "US", "TVtime", "Economic", "Reading", "Enjoy")

dta2 <- dta2 %>% 
  mutate(School = factor(School),
         Gender = factor(Gender, 
                         levels=0:1, 
                         labels=c("Male","Female")),
         US = factor(US),
         TVtime = factor(TVtime,
                         levels=1:5,
                         labels=c("no time",
                                  "up to 1 hour", 
                                  "1-3 hours",
                                  "3-5 hours",
                                  "5 hours or more")), 
         Economic = factor(Economic,
                           levels = c(">50%", 
                                      "26-50", 
                                      "11-25", 
                                      "<10%")),
         Enjoy = factor(Enjoy, levels = 1:4))

head(dta2)
##   School Gender US    TVtime Economic  Reading Enjoy
## 1      1   Male  1 1-3 hours     <10% 547.0412     1
## 2      1   Male  1 3-5 hours     <10% 545.9297     2
## 3      1   Male  1 3-5 hours     <10% 587.8460     2
## 4      1 Female  1 1-3 hours     <10% 450.8517     2
## 5      1   Male  1   no time     <10% 525.2201     3
## 6      1   Male  1   no time     <10% 555.4918     3
with(dta2, ftable(US, Gender, Enjoy))
##           Enjoy    1    2    3    4
## US Gender                          
## 0  Male           41   19   58   84
##    Female          9   13   31  103
## 1  Male          332  230  589  943
##    Female        155  186  487 1334
with(dta2, ftable(US, Economic, Enjoy))
##             Enjoy   1   2   3   4
## US Economic                      
## 0  >50%            30  13  50 112
##    26-50           10   8  22  30
##    11-25            5   4   3  15
##    <10%             5   7  14  30
## 1  >50%           214 188 439 891
##    26-50          114  95 246 541
##    11-25           60  36 125 293
##    <10%            99  97 266 552
with(dta2, ftable(US, Economic, TVtime, Enjoy))
##                             Enjoy   1   2   3   4
## US Economic TVtime                               
## 0  >50%     no time                18   7  17  34
##             up to 1 hour            1   0  10  11
##             1-3 hours               5   2  14  28
##             3-5 hours               5   4   8  32
##             5 hours or more         1   0   1   7
##    26-50    no time                 8   3   6   8
##             up to 1 hour            0   2   2   7
##             1-3 hours               2   1  10  10
##             3-5 hours               0   2   4   3
##             5 hours or more         0   0   0   2
##    11-25    no time                 4   1   0   4
##             up to 1 hour            1   0   1   2
##             1-3 hours               0   2   2   4
##             3-5 hours               0   1   0   4
##             5 hours or more         0   0   0   1
##    <10%     no time                 2   2   3   5
##             up to 1 hour            0   1   4   3
##             1-3 hours               2   3   3  12
##             3-5 hours               1   1   4   9
##             5 hours or more         0   0   0   1
## 1  >50%     no time               119  73 140 268
##             up to 1 hour           26  39  85 126
##             1-3 hours              31  45 135 239
##             3-5 hours              30  28  65 224
##             5 hours or more         8   3  14  34
##    26-50    no time                54  29  71 125
##             up to 1 hour           19  24  39  83
##             1-3 hours              24  23  83 162
##             3-5 hours              15  18  51 145
##             5 hours or more         2   1   2  26
##    11-25    no time                24  10  27  55
##             up to 1 hour           11   7  27  41
##             1-3 hours              13  10  47 130
##             3-5 hours               8   8  21  59
##             5 hours or more         4   1   3   8
##    <10%     no time                44  23  61  73
##             up to 1 hour           15  21  59  81
##             1-3 hours              20  30  80 221
##             3-5 hours              14  20  61 147
##             5 hours or more         6   3   5  30
barplot(with(dta2, prop.table(table(Enjoy, US), m=2)), beside=T, legend=T, args.legend=list(x="topleft", cex=.5))

4 GLMM

m0 <- ordinal::clmm(Enjoy ~ Gender + US + Reading + (1 | School) + (1 | TVtime) + (1 | Economic), data = dta2)
## Warning: Using formula(x) is deprecated when x is a character vector of length > 1.
##   Consider formula(paste(x, collapse = " ")) instead.
## Warning: (2) Model is nearly unidentifiable: very large eigenvalue
##  - Rescale variables? 
## In addition: Absolute and relative convergence criteria were met
summary(m0)
## Cumulative Link Mixed Model fitted with the Laplace approximation
## 
## formula: Enjoy ~ Gender + US + Reading + (1 | School) + (1 | TVtime) +  
##     (1 | Economic)
## data:    dta2
## 
##  link  threshold nobs logLik   AIC      niter     max.grad cond.H 
##  logit flexible  4614 -5095.06 10208.13 599(1850) 2.82e-01 4.9e+06
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  School   (Intercept) 0.166974 0.40863 
##  TVtime   (Intercept) 0.093257 0.30538 
##  Economic (Intercept) 0.003375 0.05809 
## Number of groups:  School 180,  TVtime 5,  Economic 4 
## 
## Coefficients:
##                Estimate Std. Error z value Pr(>|z|)    
## GenderFemale  0.6984740  0.0593244  11.774   <2e-16 ***
## US1          -0.1629514  0.1117947  -1.458    0.145    
## Reading       0.0058490  0.0004666  12.535   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Threshold coefficients:
##     Estimate Std. Error z value
## 1|2   1.0564     0.3011   3.509
## 2|3   1.8456     0.3015   6.122
## 3|4   3.1448     0.3041  10.343

5 end