Init

library(pacman)
p_load(kirkegaard, sf, rms, caret, doFuture)
options(digits = 3)

Parallel

doFuture::registerDoFuture()
plan(multiprocess(workers = 4))
options(future.globals.maxSize = Inf)

Ad hoc functions

names2clip = function(x) tibble(colnames(x)) %>% write_clipboard(print = F)

na_outside = function(x, lower = -Inf, upper = Inf) {
  x[x < lower | x > upper] = NA
  x
}

1:10 %>% na_outside(2, 7)
##  [1] NA  2  3  4  5  6  7 NA NA NA

Data

Microdata

#read some large datasets
enem = read_csv2("data/Microdados_enem_2016/DADOS/microdados_enem_2016.csv")
## Using ',' as decimal and '.' as grouping mark. Use read_delim() for more control.
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   NO_MUNICIPIO_RESIDENCIA = col_character(),
##   SG_UF_RESIDENCIA = col_character(),
##   TP_SEXO = col_character(),
##   NO_MUNICIPIO_NASCIMENTO = col_character(),
##   SG_UF_NASCIMENTO = col_character(),
##   NO_MUNICIPIO_ESC = col_character(),
##   SG_UF_ESC = col_character(),
##   NO_ENTIDADE_CERTIFICACAO = col_character(),
##   SG_UF_ENTIDADE_CERTIFICACAO = col_character(),
##   NO_MUNICIPIO_PROVA = col_character(),
##   SG_UF_PROVA = col_character(),
##   NU_NOTA_CN = col_number(),
##   NU_NOTA_CH = col_number(),
##   NU_NOTA_LC = col_number(),
##   NU_NOTA_MT = col_number(),
##   TX_RESPOSTAS_CN = col_character(),
##   TX_RESPOSTAS_CH = col_character(),
##   TX_RESPOSTAS_LC = col_character(),
##   TX_RESPOSTAS_MT = col_character(),
##   TX_GABARITO_CN = col_character()
##   # ... with 39 more columns
## )
## See spec(...) for full column specifications.

Spatial data

# munis = read_sf("data/spatial/bra_adm2/BRA_adm2.shp")
# munis = read_sf("data/spatial/br_localidades_2010/BR_Localidades_2010.shp")
munis_raw = read_sf("data/spatial/br_municipios/BRMUE250GC_SIR.shp")
munis_raw$id = munis_raw$CD_GEOCMU %>% as.numeric()

Recode

Enem

#fix scores
enem$science = case_when(enem$NU_NOTA_CN < 2500 ~ enem$NU_NOTA_CN * 10,
                         enem$NU_NOTA_CN > 2500 ~ enem$NU_NOTA_CN) %>% 
  na_outside(2000)

enem$humanities = case_when(enem$NU_NOTA_CH < 2000 ~ enem$NU_NOTA_CH * 10,
                         enem$NU_NOTA_CH > 2000 ~ enem$NU_NOTA_CH) %>% 
  na_outside(2000)

enem$language = case_when(enem$NU_NOTA_LC < 2000 ~ enem$NU_NOTA_LC * 10,
                         enem$NU_NOTA_LC > 2000 ~ enem$NU_NOTA_LC) %>% 
  na_outside(2000)

enem$math = case_when(enem$NU_NOTA_MT < 2000 ~ enem$NU_NOTA_MT * 10,
                         enem$NU_NOTA_MT > 2000 ~ enem$NU_NOTA_MT) %>% 
  na_outside(2000)

enem$g = enem %>% select(science:math) %>% map_df(standardize) %>% rowMeans(na.rm = T)

enem %>% select(science:g) %>% wtd.cors()
##            science humanities language  math     g
## science      1.000      0.614    0.553 0.601 0.836
## humanities   0.614      1.000    0.695 0.539 0.861
## language     0.553      0.695    1.000 0.485 0.825
## math         0.601      0.539    0.485 1.000 0.792
## g            0.836      0.861    0.825 0.792 1.000
#outcomes
enem$dad_edu = enem$Q001 %>% plyr::mapvalues(c("H"), c(NA)) %>% factor() %>% as.numeric()
enem$mom_edu = enem$Q002 %>% plyr::mapvalues(c("H"), c(NA)) %>% factor() %>% as.numeric()
enem$fam_income = enem$Q006 %>% factor() %>% as.numeric()
enem$bathrooms = enem$Q008 %>% factor() %>% as.numeric()
enem$bedrooms = enem$Q009 %>% factor() %>% as.numeric()
enem$cars = enem$Q010 %>% factor() %>% as.numeric()
enem$motorcycles = enem$Q011 %>% factor() %>% as.numeric()
enem$refrigerators = enem$Q012 %>% factor() %>% as.numeric() 
enem$washmachines = enem$Q014 %>% factor() %>% as.numeric() 
enem$internet = enem$Q025 %>% factor() %>% as.numeric() 

#S factor
enem_S = enem %>% select(dad_edu:internet) %>% sample_n(10e3) %>% miss_impute() %>% fa()
enem_S
## Factor Analysis using method =  minres
## Call: fa(r = .)
## Standardized loadings (pattern matrix) based upon correlation matrix
##                MR1    h2   u2 com
## dad_edu       0.57 0.320 0.68   1
## mom_edu       0.52 0.268 0.73   1
## fam_income    0.79 0.631 0.37   1
## bathrooms     0.70 0.492 0.51   1
## bedrooms      0.53 0.276 0.72   1
## cars          0.69 0.482 0.52   1
## motorcycles   0.07 0.005 0.99   1
## refrigerators 0.35 0.122 0.88   1
## washmachines  0.48 0.234 0.77   1
## internet      0.47 0.223 0.78   1
## 
##                 MR1
## SS loadings    3.05
## Proportion Var 0.31
## 
## Mean item complexity =  1
## Test of the hypothesis that 1 factor is sufficient.
## 
## The degrees of freedom for the null model are  45  and the objective function was  2.54 with Chi Square of  25388
## The degrees of freedom for the model are 35  and the objective function was  0.4 
## 
## The root mean square of the residuals (RMSR) is  0.07 
## The df corrected root mean square of the residuals is  0.08 
## 
## The harmonic number of observations is  10000 with the empirical chi square  4017  with prob <  0 
## The total number of observations was  10000  with Likelihood Chi Square =  4041  with prob <  0 
## 
## Tucker Lewis Index of factoring reliability =  0.797
## RMSEA index =  0.107  and the 90 % confidence intervals are  0.104 0.11
## BIC =  3719
## Fit based upon off diagonal values = 0.95
## Measures of factor score adequacy             
##                                                    MR1
## Correlation of (regression) scores with factors   0.92
## Multiple R square of scores with factors          0.85
## Minimum correlation of possible factor scores     0.69
#score cases
enem_S_scores = psych::factor.scores(enem %>% select(dad_edu:internet), f = enem_S, impute = "mean")
enem$S = enem_S_scores$scores[, 1]

By municipality

enem_munis = enem %>% 
  group_by(CO_MUNICIPIO_NASCIMENTO) %>% 
  dplyr::summarise(
    n = n(),
    white = (TP_COR_RACA == 1) %>% wtd_mean(),
    black = (TP_COR_RACA == 2) %>% wtd_mean(),
    brown = (TP_COR_RACA == 3) %>% wtd_mean(),
    yellow = (TP_COR_RACA == 4) %>% wtd_mean(),
    amerindian = (TP_COR_RACA == 5) %>% wtd_mean(),
    other = (TP_COR_RACA %in% c(0, 6)) %>% wtd_mean(),
    heterogeneity = white^2 + black^2 + brown^2 + yellow^2 + amerindian^2 + other^2,
    g = g %>% wtd_mean()
  )
enem_munis$id = enem_munis$CO_MUNICIPIO_NASCIMENTO

Maps

#merge
munis = left_join(munis_raw, enem_munis)
## Joining, by = "id"
#clean map
munis %>% 
  ggplot() +
  geom_sf(lwd = .1) +
  theme_classic()

#white
munis %>% 
  ggplot() +
  geom_sf(aes(fill = white), lwd = .1) +
  theme_classic()

#g
munis %>% 
  ggplot() +
  geom_sf(aes(fill = g), lwd = .1) +
  theme_classic()

#values
enem_munis %>% select(n:g) %>% wtd.cors()
##                      n   white   black     brown   yellow amerindian
## n              1.00000 -0.0284  0.0339  0.020433  0.00355  -0.007932
## white         -0.02845  1.0000 -0.5886 -0.927916 -0.35211  -0.129410
## black          0.03386 -0.5886  1.0000  0.301952  0.13341  -0.049630
## brown          0.02043 -0.9279  0.3020  1.000000  0.27814  -0.000698
## yellow         0.00355 -0.3521  0.1334  0.278143  1.00000  -0.027472
## amerindian    -0.00793 -0.1294 -0.0496 -0.000698 -0.02747   1.000000
## other          0.01743 -0.0490 -0.0199 -0.022466  0.00944  -0.012594
## heterogeneity -0.05521  0.5552 -0.5116 -0.404787 -0.37520  -0.069742
## g              0.09526  0.6609 -0.2766 -0.654253 -0.22712  -0.135940
##                  other heterogeneity       g
## n              0.01743       -0.0552  0.0953
## white         -0.04895        0.5552  0.6609
## black         -0.01991       -0.5116 -0.2766
## brown         -0.02247       -0.4048 -0.6543
## yellow         0.00944       -0.3752 -0.2271
## amerindian    -0.01259       -0.0697 -0.1359
## other          1.00000       -0.1563 -0.0180
## heterogeneity -0.15634        1.0000  0.1835
## g             -0.01795        0.1835  1.0000
#regression
ols(g ~ white, data = enem_munis)
## Frequencies of Missing Values Due to Each Variable
##     g white 
##     3     0 
## 
## Linear Regression Model
##  
##  ols(formula = g ~ white, data = enem_munis)
##  
##  
##                  Model Likelihood     Discrimination    
##                     Ratio Test           Indexes        
##  Obs    5567    LR chi2    3196.61    R2       0.437    
##  sigma0.1920    d.f.             1    R2 adj   0.437    
##  d.f.   5565    Pr(> chi2)  0.0000    g        0.192    
##  
##  Residuals
##  
##       Min       1Q   Median       3Q      Max 
##  -1.03659 -0.12091 -0.01316  0.11954  0.90343 
##  
##  
##            Coef    S.E.   t      Pr(>|t|)
##  Intercept -0.4424 0.0048 -92.93 <0.0001 
##  white      0.6488 0.0099  65.70 <0.0001 
## 
ols(g ~ heterogeneity, data = enem_munis)
## Frequencies of Missing Values Due to Each Variable
##             g heterogeneity 
##             3             0 
## 
## Linear Regression Model
##  
##  ols(formula = g ~ heterogeneity, data = enem_munis)
##  
##  
##                 Model Likelihood     Discrimination    
##                    Ratio Test           Indexes        
##  Obs    5567    LR chi2    190.78    R2       0.034    
##  sigma0.2516    d.f.            1    R2 adj   0.034    
##  d.f.   5565    Pr(> chi2) 0.0000    g        0.049    
##  
##  Residuals
##  
##       Min       1Q   Median       3Q      Max 
##  -1.23662 -0.18412 -0.01453  0.17815  0.96067 
##  
##  
##                Coef    S.E.   t      Pr(>|t|)
##  Intercept     -0.3492 0.0127 -27.59 <0.0001 
##  heterogeneity  0.3479 0.0250  13.93 <0.0001 
## 
ols(g ~ black + brown + yellow + amerindian + other, data = enem_munis)
## Frequencies of Missing Values Due to Each Variable
##          g      black      brown     yellow amerindian      other 
##          3          0          0          0          0          0 
## 
## Linear Regression Model
##  
##  ols(formula = g ~ black + brown + yellow + amerindian + other, 
##      data = enem_munis)
##  
##  
##                  Model Likelihood     Discrimination    
##                     Ratio Test           Indexes        
##  Obs    5567    LR chi2    3409.06    R2       0.458    
##  sigma0.1885    d.f.             5    R2 adj   0.457    
##  d.f.   5561    Pr(> chi2)  0.0000    g        0.197    
##  
##  Residuals
##  
##        Min        1Q    Median        3Q       Max 
##  -1.047145 -0.116910 -0.008911  0.116005  0.957132 
##  
##  
##             Coef    S.E.   t      Pr(>|t|)
##  Intercept   0.2043 0.0066  30.99 <0.0001 
##  black      -0.2630 0.0297  -8.86 <0.0001 
##  brown      -0.7520 0.0131 -57.33 <0.0001 
##  yellow     -0.5342 0.1167  -4.58 <0.0001 
##  amerindian -0.9307 0.0645 -14.42 <0.0001 
##  other      -0.4549 0.1294  -3.51 0.0004  
## 
ols(g ~ black + brown + yellow + amerindian + other + heterogeneity, data = enem_munis)
## Frequencies of Missing Values Due to Each Variable
##             g         black         brown        yellow    amerindian 
##             3             0             0             0             0 
##         other heterogeneity 
##             0             0 
## 
## Linear Regression Model
##  
##  ols(formula = g ~ black + brown + yellow + amerindian + other + 
##      heterogeneity, data = enem_munis)
##  
##  
##                  Model Likelihood     Discrimination    
##                     Ratio Test           Indexes        
##  Obs    5567    LR chi2    3813.74    R2       0.496    
##  sigma0.1818    d.f.             6    R2 adj   0.495    
##  d.f.   5560    Pr(> chi2)  0.0000    g        0.206    
##  
##  Residuals
##  
##        Min        1Q    Median        3Q       Max 
##  -1.001985 -0.112930 -0.004762  0.113402  0.903757 
##  
##  
##                Coef    S.E.   t      Pr(>|t|)
##  Intercept      0.5364 0.0174  30.79 <0.0001 
##  black         -0.5753 0.0325 -17.73 <0.0001 
##  brown         -0.8187 0.0131 -62.68 <0.0001 
##  yellow        -1.2995 0.1186 -10.96 <0.0001 
##  amerindian    -1.0997 0.0628 -17.52 <0.0001 
##  other         -1.0241 0.1279  -8.01 <0.0001 
##  heterogeneity -0.4907 0.0240 -20.48 <0.0001 
## 

Item analysis

Recode

#alternative forms
enem$forms = str_c(enem$TX_GABARITO_CN, "_", enem$TX_GABARITO_CH, "_", enem$TX_GABARITO_LC, "_", enem$TX_GABARITO_MT)
enem_forms_table = enem$forms %>% table2()

#subset to largest same item dataset
enem_sub1 = enem %>% filter(forms == enem_forms_table$Group[2])

#split up
enem_sub1_items = cbind(
  enem_sub1$TX_RESPOSTAS_CN %>% str_split("", simplify = T) %>% set_colnames("science_" + 1:45),
  enem_sub1$TX_RESPOSTAS_CH %>% str_split("", simplify = T) %>% set_colnames("humanities_" + 1:45),
  enem_sub1$TX_RESPOSTAS_LC %>% str_split("", simplify = T) %>% set_colnames("language_" + 1:50),
  enem_sub1$TX_RESPOSTAS_MT %>% str_split("", simplify = T) %>% set_colnames("math_" + 1:45)         
  ) %>% as_tibble()

#binary versions
enem_sub1_items_key = enem_sub1$forms[1] %>% str_replace_all("_", "") %>% str_split("", simplify = T) %>% as.vector()
length(enem_sub1_items_key)
## [1] 185
dim(enem_sub1_items)
## [1] 551438    185
#score
for (i in seq_along(enem_sub1_items_key)) {
  var_i = colnames(enem_sub1_items)[i]
  enem_sub1_items[[var_i + "b"]] = (enem_sub1_items[[var_i]] == enem_sub1_items_key[i])
}

#analyze
enem_sub1_items %>% select(science_1b:math_45b) %>% head(1000) %>% wtd.cors() %>% head()
##            science_1b science_2b science_3b science_4b science_5b
## science_1b    1.00000   0.109389     0.1953  -0.007468     0.1314
## science_2b    0.10939   1.000000     0.0486   0.000471     0.0494
## science_3b    0.19533   0.048559     1.0000  -0.026072     0.1060
## science_4b   -0.00747   0.000471    -0.0261   1.000000     0.0601
## science_5b    0.13136   0.049443     0.1060   0.060126     1.0000
## science_6b    0.09261   0.048029     0.0841  -0.028572     0.0780
##            science_6b science_7b science_8b science_9b science_10b
## science_1b     0.0926   0.105594    0.12136    0.04666      0.0638
## science_2b     0.0480   0.003348    0.07170    0.03109      0.0734
## science_3b     0.0841   0.033362    0.06079    0.00171      0.0752
## science_4b    -0.0286   0.000552    0.02271   -0.03926     -0.0371
## science_5b     0.0780   0.104462    0.09606    0.04401      0.0414
## science_6b     1.0000   0.066168   -0.00495    0.00208      0.0909
##            science_11b science_12b science_13b science_14b science_15b
## science_1b      0.1433      0.1852      0.0462      0.0301     0.15301
## science_2b      0.1160      0.0541      0.0533      0.0686     0.02859
## science_3b      0.1499      0.1389      0.0621     -0.0229     0.04104
## science_4b     -0.0299     -0.0319      0.0300     -0.0189    -0.00866
## science_5b      0.1409      0.1255      0.0343      0.0425     0.07380
## science_6b      0.0688      0.0960      0.0277      0.0177     0.08224
##            science_16b science_17b science_18b science_19b science_20b
## science_1b     0.06422      0.1130    -0.00354      0.0234      0.1899
## science_2b     0.01620      0.0821    -0.00442      0.0691      0.0476
## science_3b    -0.01577      0.0348     0.06453      0.0467      0.2071
## science_4b     0.00172     -0.0409     0.03373      0.0278     -0.0734
## science_5b     0.03147      0.0453     0.01070      0.0649      0.1411
## science_6b     0.08571      0.1067     0.03824      0.0608      0.0907
##            science_21b science_22b science_23b science_24b science_25b
## science_1b      0.0936      0.2229     0.08114    0.056410     0.06354
## science_2b      0.0570      0.1102     0.06089    0.034534     0.03860
## science_3b      0.0353      0.0949     0.08352    0.000378     0.08324
## science_4b      0.0113     -0.0697     0.00576   -0.074757     0.00589
## science_5b      0.0469      0.0927     0.10896    0.060205     0.01729
## science_6b      0.0297      0.0751     0.10738    0.033991    -0.00543
##            science_26b science_27b science_28b science_29b science_30b
## science_1b      0.1652      0.1688      0.0842      0.1187      0.1266
## science_2b      0.0771      0.0119      0.0847      0.0379      0.0409
## science_3b      0.1787      0.1147      0.0950      0.0690      0.0712
## science_4b     -0.0556     -0.0226     -0.0340     -0.0199     -0.0461
## science_5b      0.0324      0.1007     -0.0398      0.1447      0.0820
## science_6b      0.0475      0.0683     -0.0178      0.0491      0.1202
##            science_31b science_32b science_33b science_34b science_35b
## science_1b   -0.019708      0.0583    -0.04366      0.1678      0.1626
## science_2b    0.007918      0.0732     0.00937      0.1580      0.0398
## science_3b   -0.000762      0.0466    -0.01966      0.0875      0.0707
## science_4b    0.044995     -0.0113    -0.00117     -0.0386     -0.0856
## science_5b    0.052541      0.0300     0.05912      0.0364      0.0990
## science_6b    0.025695      0.1066    -0.00909      0.0244      0.1655
##            science_36b science_37b science_38b science_39b science_40b
## science_1b      0.0617      0.0445      0.1164     0.05752      0.0850
## science_2b      0.0271      0.0728      0.0510    -0.00334      0.0581
## science_3b      0.0582      0.0778      0.1548     0.05667      0.0537
## science_4b     -0.0701     -0.0116     -0.0222    -0.02877      0.0493
## science_5b      0.0958      0.0902      0.1316     0.11149      0.1125
## science_6b      0.0604      0.0113      0.0722    -0.00268      0.0729
##            science_41b science_42b science_43b science_44b science_45b
## science_1b     0.03918     0.15713     0.16514      0.2387      0.2607
## science_2b     0.00679     0.06653     0.06437      0.0773      0.0785
## science_3b     0.07403     0.08076     0.13147      0.1769      0.2192
## science_4b    -0.02920    -0.00808     0.00259     -0.0862     -0.0766
## science_5b     0.04262     0.06227     0.12063      0.1204      0.1264
## science_6b     0.00787     0.07899     0.13682      0.1368      0.1187
##            humanities_1b humanities_2b humanities_3b humanities_4b
## science_1b      -0.02222       0.09011        0.1773      -0.02371
## science_2b      -0.03369       0.07441        0.0979      -0.00535
## science_3b      -0.00054       0.01795        0.0854      -0.03978
## science_4b      -0.03277      -0.00784       -0.0440       0.05458
## science_5b      -0.05732       0.02938        0.0848      -0.05165
## science_6b      -0.01178       0.03327        0.0549       0.00125
##            humanities_5b humanities_6b humanities_7b humanities_8b
## science_1b        0.0929       0.06419       0.19896      -0.01575
## science_2b        0.0288      -0.04748       0.00579       0.01926
## science_3b        0.1172       0.02595       0.12372       0.02221
## science_4b       -0.0438       0.02266      -0.10150       0.00992
## science_5b        0.0582      -0.00495       0.04725       0.02076
## science_6b        0.0629      -0.00330       0.06810       0.02540
##            humanities_9b humanities_10b humanities_11b humanities_12b
## science_1b       0.10797        0.11252        0.13585         0.0852
## science_2b       0.03828        0.00281        0.06612         0.0354
## science_3b       0.02917        0.05652        0.11599         0.1062
## science_4b       0.00516       -0.06662        0.00835        -0.0630
## science_5b       0.09870        0.07169        0.09533         0.0960
## science_6b       0.05237        0.07437        0.06274         0.0406
##            humanities_13b humanities_14b humanities_15b humanities_16b
## science_1b         0.0892         0.1222         0.2071         0.2547
## science_2b         0.0602         0.0162         0.0665         0.1019
## science_3b         0.0663         0.1677         0.1100         0.2152
## science_4b        -0.0433        -0.0707        -0.0474        -0.0611
## science_5b         0.0237         0.0741         0.0743         0.1287
## science_6b         0.0318         0.1475         0.0836         0.0589
##            humanities_17b humanities_18b humanities_19b humanities_20b
## science_1b        0.00249         0.1202        0.02954        0.08120
## science_2b       -0.03498         0.0631        0.05080       -0.00625
## science_3b        0.02167         0.0813       -0.04862       -0.00690
## science_4b       -0.05337        -0.0708        0.00113       -0.03120
## science_5b        0.08869         0.0483        0.04173        0.05491
## science_6b        0.07724         0.0765        0.05100        0.02312
##            humanities_21b humanities_22b humanities_23b humanities_24b
## science_1b         0.1383        0.17288        0.09468         0.1995
## science_2b         0.0523        0.02711        0.00289         0.0305
## science_3b         0.1270        0.03587       -0.02197         0.1387
## science_4b        -0.0641        0.00164        0.01913        -0.1252
## science_5b         0.0448        0.02476        0.09158         0.0844
## science_6b         0.1159        0.07944        0.02612         0.1010
##            humanities_25b humanities_26b humanities_27b humanities_28b
## science_1b        -0.0221         0.1707         0.1713         0.1013
## science_2b        -0.0444         0.0147         0.0792         0.0141
## science_3b        -0.0206         0.1245         0.1945         0.0387
## science_4b         0.0188        -0.0770        -0.0964        -0.0345
## science_5b         0.0125         0.1008         0.0559         0.0371
## science_6b        -0.0209         0.0248         0.0886         0.0232
##            humanities_29b humanities_30b humanities_31b humanities_32b
## science_1b         0.0789         0.1840         0.2025       0.163466
## science_2b         0.0492         0.0762         0.0665       0.000337
## science_3b         0.0174         0.1751         0.1190       0.142799
## science_4b        -0.0125        -0.0895        -0.0372      -0.046773
## science_5b         0.0318         0.1514         0.0321       0.091794
## science_6b         0.0125         0.0624         0.0612       0.117168
##            humanities_33b humanities_34b humanities_35b humanities_36b
## science_1b        0.12476         0.1560         0.1894         0.0775
## science_2b        0.03278         0.0162         0.0409         0.0511
## science_3b        0.10132         0.1295         0.0797         0.1846
## science_4b       -0.00415         0.0095        -0.0657        -0.0254
## science_5b       -0.01112         0.0881         0.1079         0.0416
## science_6b        0.05745         0.0697         0.1087         0.0711
##            humanities_37b humanities_38b humanities_39b humanities_40b
## science_1b       0.054379         0.1123         0.1435        0.06487
## science_2b       0.024335         0.0586         0.0345       -0.00295
## science_3b       0.016080         0.0849         0.2015        0.05333
## science_4b       0.000484        -0.0878        -0.0436       -0.00461
## science_5b       0.021755         0.0424         0.0651        0.06273
## science_6b       0.044041         0.0459         0.1127       -0.00122
##            humanities_41b humanities_42b humanities_43b humanities_44b
## science_1b         0.1221         0.0403         0.1300         0.0556
## science_2b         0.0456        -0.0292         0.0216        -0.0552
## science_3b         0.1170         0.0573         0.0775        -0.0365
## science_4b        -0.0467        -0.0340        -0.0597        -0.0608
## science_5b         0.0302        -0.0141         0.0461         0.0170
## science_6b         0.0165         0.0475         0.0506         0.0832
##            humanities_45b language_1b language_2b language_3b language_4b
## science_1b         0.1863      0.1696      0.1483      0.1558      0.1317
## science_2b         0.0255      0.1149      0.0121      0.0999      0.0827
## science_3b         0.1595      0.1429      0.0946      0.1374      0.1075
## science_4b        -0.0745     -0.0617     -0.0609     -0.0358     -0.0672
## science_5b         0.0715      0.0933      0.1027      0.1336      0.0812
## science_6b         0.0783      0.1332      0.0641      0.0750      0.0873
##            language_5b language_6b language_7b language_8b language_9b
## science_1b      0.1200     0.04928    -0.00383    0.019015     0.00302
## science_2b      0.1000    -0.07052    -0.01729   -0.000714    -0.06106
## science_3b      0.0789     0.02437    -0.02759   -0.031873    -0.00257
## science_4b     -0.0798     0.03358    -0.05397   -0.003879     0.00969
## science_5b      0.0741    -0.04179    -0.01952   -0.062779     0.01577
## science_6b      0.0507    -0.00538    -0.03836   -0.034658     0.01890
##            language_10b language_11b language_12b language_13b
## science_1b      0.03709       0.2107      0.13656      0.07147
## science_2b     -0.02595       0.0539      0.00399     -0.00381
## science_3b      0.04117       0.0812      0.08920      0.06672
## science_4b     -0.01511      -0.0184     -0.03298     -0.02887
## science_5b      0.00614       0.1122      0.00852      0.04565
## science_6b     -0.02008       0.0975      0.01963      0.03059
##            language_14b language_15b language_16b language_17b
## science_1b       0.1028      0.08227      0.02592      0.04061
## science_2b       0.0235      0.00594     -0.00367     -0.06179
## science_3b       0.0747      0.05149      0.02622      0.07744
## science_4b      -0.0767      0.02091     -0.05004     -0.04812
## science_5b       0.0694     -0.01487      0.02388     -0.01648
## science_6b       0.0747      0.01555     -0.01115      0.00275
##            language_18b language_19b language_20b language_21b
## science_1b      0.02962       0.1006       0.0773      0.02376
## science_2b     -0.01884       0.0443       0.0270     -0.00132
## science_3b      0.00867      -0.0383       0.0625      0.00867
## science_4b      0.05873      -0.0290      -0.0268     -0.00215
## science_5b     -0.02042      -0.0436       0.0159      0.06594
## science_6b      0.05474       0.0509       0.0435      0.04781
##            language_22b language_23b language_24b language_25b
## science_1b      0.07082      0.03536       0.1324       0.1068
## science_2b     -0.00329      0.01554       0.0839       0.0196
## science_3b      0.04674      0.04004       0.0988       0.1031
## science_4b     -0.05826     -0.03765      -0.0773       0.0144
## science_5b      0.03339      0.03119       0.1019       0.0447
## science_6b      0.02378      0.00707       0.0757       0.0447
##            language_26b language_27b language_28b language_29b
## science_1b       0.1108       0.1799       0.1588       0.1221
## science_2b       0.0196       0.0439      -0.0194       0.0991
## science_3b       0.0716       0.0441       0.0766       0.1022
## science_4b      -0.0472      -0.0334      -0.0615      -0.0464
## science_5b       0.0449       0.0249       0.0104       0.0833
## science_6b       0.0665       0.0470       0.0741       0.0338
##            language_30b language_31b language_32b language_33b
## science_1b     -0.01122      0.00816       0.2467      0.13195
## science_2b     -0.03106      0.01133       0.0613      0.00413
## science_3b      0.01245      0.00627       0.0932      0.03724
## science_4b      0.01003     -0.03007      -0.0556     -0.02569
## science_5b     -0.00419     -0.00785       0.0549      0.08374
## science_6b      0.06211     -0.03477       0.1286      0.07379
##            language_34b language_35b language_36b language_37b
## science_1b      0.01360       0.1134       0.1281       0.0843
## science_2b      0.00805       0.0189       0.0335       0.0288
## science_3b      0.00782       0.0591       0.0638       0.0435
## science_4b     -0.03655       0.0290      -0.0457      -0.0313
## science_5b     -0.01105       0.0518       0.0341       0.0486
## science_6b      0.03208       0.0224       0.0545       0.1025
##            language_38b language_39b language_40b language_41b
## science_1b       0.1243       0.1916       0.1650     -0.04102
## science_2b       0.0282       0.0462       0.0555     -0.00821
## science_3b       0.0780       0.0870       0.1019     -0.01847
## science_4b      -0.0503      -0.0278       0.0244      0.07438
## science_5b       0.0556       0.0586       0.0345      0.02563
## science_6b       0.0716       0.1221       0.0951      0.00249
##            language_42b language_43b language_44b language_45b
## science_1b       0.1288       0.1180      0.00866       0.2121
## science_2b       0.0337       0.0491      0.03445       0.0545
## science_3b       0.1552       0.0954      0.06103       0.0664
## science_4b      -0.0425      -0.0468      0.01701       0.0119
## science_5b       0.0343       0.0736      0.03494       0.1778
## science_6b       0.0492       0.0616      0.03223       0.0707
##            language_46b language_47b language_48b language_49b
## science_1b       0.0815       0.0541      0.13696      0.05100
## science_2b       0.0418       0.0650      0.00655      0.01483
## science_3b       0.0690       0.0400      0.07287     -0.01448
## science_4b      -0.0235      -0.0131     -0.03179      0.02827
## science_5b      -0.0307       0.0585      0.09106     -0.01782
## science_6b       0.0399       0.0120      0.05084      0.00811
##            language_50b  math_1b  math_2b math_3b math_4b math_5b math_6b
## science_1b     0.001302  0.06075  0.11130  0.1771  0.0969  0.1312  0.1849
## science_2b    -0.019245  0.03453  0.07672  0.0174  0.0408  0.1009  0.0938
## science_3b     0.009013  0.09817  0.10355  0.0734  0.0752  0.1278  0.1049
## science_4b    -0.000301 -0.00723 -0.00232 -0.0462 -0.0429 -0.0490 -0.0994
## science_5b    -0.022734  0.05038  0.10346  0.1237  0.1026  0.1499  0.1048
## science_6b    -0.062073  0.11083 -0.01061  0.1096  0.1211  0.0584  0.0727
##             math_7b math_8b math_9b math_10b math_11b math_12b math_13b
## science_1b  0.05253  0.0966 0.06754 -0.02302  0.10234  0.03456   0.1436
## science_2b  0.00728  0.0209 0.04281  0.09852 -0.00556  0.00991   0.0605
## science_3b  0.03148  0.1005 0.00105 -0.02197  0.10583  0.04120   0.0430
## science_4b -0.00902 -0.0765 0.00531  0.00395 -0.02594 -0.05272  -0.0238
## science_5b  0.04585  0.0589 0.06724  0.09655  0.12553 -0.00734   0.0593
## science_6b  0.08724  0.0704 0.00121  0.01592  0.11234  0.03027   0.0560
##            math_14b math_15b math_16b math_17b math_18b math_19b math_20b
## science_1b   0.1464  0.09750   0.0638   0.0681  0.05129  -0.0284  0.11799
## science_2b   0.0532  0.02277   0.0181   0.0814  0.10849   0.0315 -0.00210
## science_3b   0.1267  0.08732   0.0348   0.0538 -0.02686  -0.0359  0.05618
## science_4b  -0.0696  0.00486  -0.0180   0.0177 -0.01238  -0.0373 -0.00246
## science_5b   0.0989  0.14544   0.0615   0.0433  0.05292   0.0389  0.11058
## science_6b   0.1012  0.04480   0.0279   0.1176  0.00994  -0.0711  0.12113
##            math_21b math_22b math_23b math_24b math_25b math_26b math_27b
## science_1b   0.1036  0.05126   0.1350 0.005697   0.1302  0.07118 -0.02302
## science_2b   0.0384 -0.01313   0.0557 0.005423   0.0139  0.06656 -0.00825
## science_3b   0.0556 -0.01039   0.0855 0.052146   0.0624 -0.00252 -0.02954
## science_4b   0.0224 -0.03727  -0.0414 0.000076  -0.0210 -0.06822  0.02565
## science_5b   0.0868  0.01634   0.0767 0.003780   0.1022  0.07130 -0.02745
## science_6b   0.0582 -0.00639   0.1031 0.011919   0.0587  0.07103 -0.05308
##            math_28b math_29b math_30b math_31b math_32b math_33b math_34b
## science_1b  0.13588   0.1490   0.1298   0.0624   0.1977  0.11401   0.0644
## science_2b  0.05982   0.0314   0.0646   0.1498   0.0654 -0.00177   0.0253
## science_3b  0.10169   0.1247   0.0618  -0.0147   0.0844  0.01774   0.1119
## science_4b  0.00703  -0.0675  -0.0226   0.0850  -0.0711 -0.02273  -0.0422
## science_5b  0.06180   0.0654   0.0570   0.0788   0.0902  0.09284   0.0664
## science_6b  0.05627   0.0999   0.0460   0.0314   0.0600 -0.01117   0.0870
##            math_35b math_36b math_37b math_38b math_39b math_40b math_41b
## science_1b   0.1009   0.0811   0.0831   0.1281   0.0312  0.04479  0.06699
## science_2b   0.0441  -0.0372   0.0514   0.0714   0.0657  0.01490 -0.00222
## science_3b   0.0597   0.0813  -0.0105   0.0913  -0.0495 -0.00419  0.04667
## science_4b  -0.0239  -0.0432  -0.0150  -0.0687  -0.0360  0.00624  0.03128
## science_5b   0.0123   0.0438   0.0860   0.0913   0.0317  0.09116  0.12029
## science_6b   0.0380   0.0414   0.0287   0.0845   0.0223  0.05346  0.04820
##            math_42b math_43b math_44b math_45b
## science_1b   0.1833   0.1237  0.11778  0.20983
## science_2b   0.0426   0.0493  0.07555  0.00669
## science_3b   0.1095   0.1006  0.00934  0.09043
## science_4b  -0.0618   0.0099 -0.02334 -0.07628
## science_5b   0.1219   0.0890  0.13661  0.07434
## science_6b   0.0629   0.0648  0.03018  0.09306
#simple score
enem_sub1$g_sum = enem_sub1_items %>% select(science_1b:math_45b) %>% rowSums()

#IRT
enem_sub1_fa = irt.fa(enem_sub1_items %>% select(science_1b:math_45b) %>% head(10000) %>% map_df(as.numeric))
## Warning in cor.smooth(mat): Matrix was not positive definite, smoothing was
## done
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs
## = np.obs, : The estimated weights for the factor scores are probably
## incorrect. Try a different factor extraction method.

enem_sub1_fa$fa
## Factor Analysis using method =  minres
## Call: fa(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, 
##     fm = fm)
## Standardized loadings (pattern matrix) based upon correlation matrix
##                  MR1      h2   u2 com
## science_1b      0.49 2.4e-01 0.76   1
## science_2b      0.16 2.5e-02 0.98   1
## science_3b      0.39 1.5e-01 0.85   1
## science_4b     -0.07 4.9e-03 1.00   1
## science_5b      0.27 7.4e-02 0.93   1
## science_6b      0.27 7.5e-02 0.93   1
## science_7b      0.23 5.4e-02 0.95   1
## science_8b      0.24 6.0e-02 0.94   1
## science_9b      0.13 1.6e-02 0.98   1
## science_10b     0.22 4.9e-02 0.95   1
## science_11b     0.37 1.3e-01 0.87   1
## science_12b     0.47 2.2e-01 0.78   1
## science_13b     0.16 2.7e-02 0.97   1
## science_14b     0.10 1.1e-02 0.99   1
## science_15b     0.32 1.0e-01 0.90   1
## science_16b     0.20 3.9e-02 0.96   1
## science_17b     0.37 1.4e-01 0.86   1
## science_18b     0.01 9.5e-05 1.00   1
## science_19b     0.16 2.7e-02 0.97   1
## science_20b     0.48 2.3e-01 0.77   1
## science_21b     0.18 3.3e-02 0.97   1
## science_22b     0.50 2.5e-01 0.75   1
## science_23b     0.34 1.1e-01 0.89   1
## science_24b     0.15 2.4e-02 0.98   1
## science_25b     0.11 1.2e-02 0.99   1
## science_26b     0.39 1.5e-01 0.85   1
## science_27b     0.26 6.9e-02 0.93   1
## science_28b     0.21 4.3e-02 0.96   1
## science_29b     0.37 1.4e-01 0.86   1
## science_30b     0.29 8.3e-02 0.92   1
## science_31b     0.12 1.3e-02 0.99   1
## science_32b     0.14 2.0e-02 0.98   1
## science_33b     0.08 6.7e-03 0.99   1
## science_34b     0.37 1.3e-01 0.87   1
## science_35b     0.51 2.6e-01 0.74   1
## science_36b     0.21 4.4e-02 0.96   1
## science_37b     0.15 2.2e-02 0.98   1
## science_38b     0.31 9.5e-02 0.90   1
## science_39b     0.18 3.4e-02 0.97   1
## science_40b     0.23 5.3e-02 0.95   1
## science_41b     0.06 4.1e-03 1.00   1
## science_42b     0.46 2.1e-01 0.79   1
## science_43b     0.30 8.9e-02 0.91   1
## science_44b     0.60 3.6e-01 0.64   1
## science_45b     0.68 4.6e-01 0.54   1
## humanities_1b   0.01 5.4e-05 1.00   1
## humanities_2b   0.14 2.0e-02 0.98   1
## humanities_3b   0.44 1.9e-01 0.81   1
## humanities_4b   0.11 1.1e-02 0.99   1
## humanities_5b   0.23 5.3e-02 0.95   1
## humanities_6b   0.08 6.7e-03 0.99   1
## humanities_7b   0.54 2.9e-01 0.71   1
## humanities_8b   0.20 3.9e-02 0.96   1
## humanities_9b   0.31 9.4e-02 0.91   1
## humanities_10b  0.26 6.7e-02 0.93   1
## humanities_11b  0.33 1.1e-01 0.89   1
## humanities_12b  0.33 1.1e-01 0.89   1
## humanities_13b  0.33 1.1e-01 0.89   1
## humanities_14b  0.35 1.2e-01 0.88   1
## humanities_15b  0.57 3.3e-01 0.67   1
## humanities_16b  0.67 4.5e-01 0.55   1
## humanities_17b  0.07 4.6e-03 1.00   1
## humanities_18b  0.33 1.1e-01 0.89   1
## humanities_19b  0.08 6.9e-03 0.99   1
## humanities_20b  0.25 6.2e-02 0.94   1
## humanities_21b  0.41 1.7e-01 0.83   1
## humanities_22b  0.37 1.4e-01 0.86   1
## humanities_23b  0.17 2.9e-02 0.97   1
## humanities_24b  0.57 3.2e-01 0.68   1
## humanities_25b -0.09 8.1e-03 0.99   1
## humanities_26b  0.39 1.6e-01 0.84   1
## humanities_27b  0.61 3.8e-01 0.62   1
## humanities_28b  0.25 6.3e-02 0.94   1
## humanities_29b  0.26 6.7e-02 0.93   1
## humanities_30b  0.57 3.2e-01 0.68   1
## humanities_31b  0.58 3.4e-01 0.66   1
## humanities_32b  0.39 1.5e-01 0.85   1
## humanities_33b  0.47 2.2e-01 0.78   1
## humanities_34b  0.48 2.3e-01 0.77   1
## humanities_35b  0.53 2.8e-01 0.72   1
## humanities_36b  0.23 5.1e-02 0.95   1
## humanities_37b  0.17 2.9e-02 0.97   1
## humanities_38b  0.49 2.4e-01 0.76   1
## humanities_39b  0.47 2.2e-01 0.78   1
## humanities_40b  0.24 5.6e-02 0.94   1
## humanities_41b  0.45 2.0e-01 0.80   1
## humanities_42b  0.19 3.6e-02 0.96   1
## humanities_43b  0.35 1.3e-01 0.87   1
## humanities_44b  0.13 1.8e-02 0.98   1
## humanities_45b  0.53 2.8e-01 0.72   1
## language_1b     0.62 3.9e-01 0.61   1
## language_2b     0.55 3.0e-01 0.70   1
## language_3b     0.60 3.6e-01 0.64   1
## language_4b     0.51 2.6e-01 0.74   1
## language_5b     0.45 2.0e-01 0.80   1
## language_6b    -0.09 8.5e-03 0.99   1
## language_7b    -0.06 3.2e-03 1.00   1
## language_8b    -0.13 1.6e-02 0.98   1
## language_9b    -0.13 1.7e-02 0.98   1
## language_10b   -0.01 4.2e-05 1.00   1
## language_11b    0.48 2.3e-01 0.77   1
## language_12b    0.32 1.0e-01 0.90   1
## language_13b    0.23 5.3e-02 0.95   1
## language_14b    0.35 1.2e-01 0.88   1
## language_15b    0.30 8.8e-02 0.91   1
## language_16b    0.19 3.7e-02 0.96   1
## language_17b    0.36 1.3e-01 0.87   1
## language_18b    0.11 1.2e-02 0.99   1
## language_19b    0.40 1.6e-01 0.84   1
## language_20b    0.42 1.8e-01 0.82   1
## language_21b    0.14 1.9e-02 0.98   1
## language_22b    0.19 3.6e-02 0.96   1
## language_23b   -0.02 4.2e-04 1.00   1
## language_24b    0.50 2.5e-01 0.75   1
## language_25b    0.29 8.5e-02 0.92   1
## language_26b    0.34 1.2e-01 0.88   1
## language_27b    0.32 1.0e-01 0.90   1
## language_28b    0.39 1.5e-01 0.85   1
## language_29b    0.40 1.6e-01 0.84   1
## language_30b    0.15 2.2e-02 0.98   1
## language_31b    0.13 1.8e-02 0.98   1
## language_32b    0.57 3.2e-01 0.68   1
## language_33b    0.32 1.0e-01 0.90   1
## language_34b    0.18 3.1e-02 0.97   1
## language_35b    0.23 5.4e-02 0.95   1
## language_36b    0.42 1.8e-01 0.82   1
## language_37b    0.24 5.9e-02 0.94   1
## language_38b    0.29 8.7e-02 0.91   1
## language_39b    0.51 2.6e-01 0.74   1
## language_40b    0.48 2.3e-01 0.77   1
## language_41b   -0.14 2.1e-02 0.98   1
## language_42b    0.40 1.6e-01 0.84   1
## language_43b    0.35 1.2e-01 0.88   1
## language_44b    0.13 1.7e-02 0.98   1
## language_45b    0.46 2.1e-01 0.79   1
## language_46b    0.24 5.8e-02 0.94   1
## language_47b    0.20 4.0e-02 0.96   1
## language_48b    0.33 1.1e-01 0.89   1
## language_49b    0.20 4.0e-02 0.96   1
## language_50b    0.11 1.2e-02 0.99   1
## math_1b         0.32 1.0e-01 0.90   1
## math_2b         0.28 7.7e-02 0.92   1
## math_3b         0.35 1.2e-01 0.88   1
## math_4b         0.39 1.6e-01 0.84   1
## math_5b         0.38 1.5e-01 0.85   1
## math_6b         0.35 1.2e-01 0.88   1
## math_7b         0.13 1.8e-02 0.98   1
## math_8b         0.35 1.2e-01 0.88   1
## math_9b         0.17 2.9e-02 0.97   1
## math_10b        0.11 1.1e-02 0.99   1
## math_11b        0.29 8.6e-02 0.91   1
## math_12b        0.07 5.4e-03 0.99   1
## math_13b        0.39 1.6e-01 0.84   1
## math_14b        0.49 2.4e-01 0.76   1
## math_15b        0.42 1.7e-01 0.83   1
## math_16b        0.18 3.2e-02 0.97   1
## math_17b        0.16 2.5e-02 0.97   1
## math_18b        0.22 5.1e-02 0.95   1
## math_19b        0.00 7.3e-06 1.00   1
## math_20b        0.26 7.0e-02 0.93   1
## math_21b        0.28 8.1e-02 0.92   1
## math_22b        0.21 4.6e-02 0.95   1
## math_23b        0.43 1.8e-01 0.82   1
## math_24b        0.13 1.7e-02 0.98   1
## math_25b        0.32 1.0e-01 0.90   1
## math_26b        0.26 7.0e-02 0.93   1
## math_27b       -0.03 8.9e-04 1.00   1
## math_28b        0.33 1.1e-01 0.89   1
## math_29b        0.49 2.4e-01 0.76   1
## math_30b        0.29 8.4e-02 0.92   1
## math_31b        0.14 1.9e-02 0.98   1
## math_32b        0.47 2.2e-01 0.78   1
## math_33b        0.25 6.2e-02 0.94   1
## math_34b        0.28 7.9e-02 0.92   1
## math_35b        0.20 3.8e-02 0.96   1
## math_36b        0.11 1.2e-02 0.99   1
## math_37b        0.26 6.8e-02 0.93   1
## math_38b        0.46 2.1e-01 0.79   1
## math_39b        0.10 1.1e-02 0.99   1
## math_40b        0.12 1.5e-02 0.98   1
## math_41b        0.23 5.3e-02 0.95   1
## math_42b        0.39 1.5e-01 0.85   1
## math_43b        0.36 1.3e-01 0.87   1
## math_44b        0.31 9.6e-02 0.90   1
## math_45b        0.40 1.6e-01 0.84   1
## 
##                  MR1
## SS loadings    20.45
## Proportion Var  0.11
## 
## Mean item complexity =  1
## Test of the hypothesis that 1 factor is sufficient.
## 
## The degrees of freedom for the null model are  17020  and the objective function was  63.5 with Chi Square of  630873
## The degrees of freedom for the model are 16835  and the objective function was  43.8 
## 
## The root mean square of the residuals (RMSR) is  0.04 
## The df corrected root mean square of the residuals is  0.04 
## 
## The harmonic number of observations is  10000 with the empirical chi square  592645  with prob <  0 
## The total number of observations was  10000  with Likelihood Chi Square =  435118  with prob <  0 
## 
## Tucker Lewis Index of factoring reliability =  0.311
## RMSEA index =  0.05  and the 90 % confidence intervals are  0.05 NA
## BIC =  280062
## Fit based upon off diagonal values = 0.87
enem_sub1_fa$fa$loadings[, 1] %>% GG_denhist()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

#score
enem_sub1_fa_score = scoreIrt(enem_sub1_fa, enem_sub1_items %>% select(science_1b:math_45b) %>% map_df(as.numeric))
enem_sub1$g_irt = enem_sub1_fa_score$theta1 %>% standardize()

They appear to have divided ~10% of their data by 10 for some reason. But otherwise, we replicate their scores.

Simple results

#cors
enem_sub1 %>% select(dad_edu:S, g, g_sum, g_irt) %>% wtd.cors()
##               dad_edu  mom_edu fam_income bathrooms bedrooms   cars
## dad_edu        1.0000  0.59739     0.4641    0.3639    0.222 0.3454
## mom_edu        0.5974  1.00000     0.4216    0.3310    0.219 0.3180
## fam_income     0.4641  0.42160     1.0000    0.5902    0.385 0.5989
## bathrooms      0.3639  0.33103     0.5902    1.0000    0.482 0.5118
## bedrooms       0.2221  0.21882     0.3851    0.4819    1.000 0.3841
## cars           0.3454  0.31803     0.5989    0.5118    0.384 1.0000
## motorcycles   -0.0406 -0.00231     0.0148    0.0308    0.125 0.0448
## refrigerators  0.1352  0.11959     0.2369    0.2920    0.225 0.2342
## washmachines   0.2803  0.24047     0.3942    0.3164    0.259 0.3841
## internet       0.2900  0.26995     0.3734    0.2832    0.248 0.3423
## S              0.6270  0.57151     0.8695    0.7700    0.568 0.7610
## g              0.3552  0.33190     0.4770    0.3407    0.188 0.3261
## g_sum          0.3494  0.32615     0.4784    0.3444    0.186 0.3213
## g_irt          0.3688  0.34249     0.4923    0.3538    0.194 0.3347
##               motorcycles refrigerators washmachines internet      S
## dad_edu          -0.04061        0.1352       0.2803   0.2900 0.6270
## mom_edu          -0.00231        0.1196       0.2405   0.2699 0.5715
## fam_income        0.01477        0.2369       0.3942   0.3734 0.8695
## bathrooms         0.03076        0.2920       0.3164   0.2832 0.7700
## bedrooms          0.12460        0.2253       0.2594   0.2476 0.5680
## cars              0.04477        0.2342       0.3841   0.3423 0.7610
## motorcycles       1.00000        0.0509       0.0279   0.0337 0.0631
## refrigerators     0.05094        1.0000       0.1822   0.1323 0.3812
## washmachines      0.02793        0.1822       1.0000   0.3531 0.5348
## internet          0.03371        0.1323       0.3531   1.0000 0.5177
## S                 0.06312        0.3812       0.5348   0.5177 1.0000
## g                -0.03941        0.1137       0.2292   0.2857 0.4837
## g_sum            -0.04193        0.1134       0.2220   0.2719 0.4803
## g_irt            -0.04505        0.1175       0.2387   0.2909 0.4994
##                     g   g_sum  g_irt
## dad_edu        0.3552  0.3494  0.369
## mom_edu        0.3319  0.3262  0.342
## fam_income     0.4770  0.4784  0.492
## bathrooms      0.3407  0.3444  0.354
## bedrooms       0.1881  0.1856  0.194
## cars           0.3261  0.3213  0.335
## motorcycles   -0.0394 -0.0419 -0.045
## refrigerators  0.1137  0.1134  0.117
## washmachines   0.2292  0.2220  0.239
## internet       0.2857  0.2719  0.291
## S              0.4837  0.4803  0.499
## g              1.0000  0.9617  0.969
## g_sum          0.9617  1.0000  0.956
## g_irt          0.9687  0.9565  1.000
#examine
GG_scatter(enem_sub1, "g", "g_irt")

GG_scatter(enem_sub1, "g_sum", "g_irt")

#Jensen
fa_Jensens_method(enem_S, enem_sub1, "g_irt") +
  scale_x_continuous("S factor loading") +
  scale_y_continuous("Correlation with g factor score")
## Using latent correlations for the criterion-indicators relationships.

Machine learning

(not included in Rpubs because takes hours to fit)

#enet
enem_sub1_glmnet_S = caret::train(x = enem_sub1_items %>% select(science_1b:math_45b),
             y = enem_sub1$S,
             method = "glmnet",
             tuneLength = 5
             )

Write data

enem_sub1 %>% write_csv("data/enem_sub1.csv", na = "")
enem_sub1_items %>% write_csv("data/enem_sub1_items.csv", na = "")

Versions

sessionInfo()
## R version 3.5.3 (2019-03-11)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Linux Mint 19.1
## 
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.7.1
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.7.1
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] parallel  stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] doFuture_0.8.0     iterators_1.0.10   foreach_1.4.4     
##  [4] future_1.13.0      globals_0.12.4     caret_6.0-84      
##  [7] rms_5.1-3.1        SparseM_1.77       sf_0.7-5          
## [10] kirkegaard_2018.05 metafor_2.1-0      Matrix_1.2-17     
## [13] psych_1.8.12       magrittr_1.5       assertthat_0.2.1  
## [16] weights_1.0        mice_3.5.0         gdata_2.18.0      
## [19] Hmisc_4.2-0        Formula_1.2-3      survival_2.44-1.1 
## [22] lattice_0.20-38    forcats_0.4.0      stringr_1.4.0     
## [25] dplyr_0.8.1        purrr_0.3.2        readr_1.3.1       
## [28] tidyr_0.8.3        tibble_2.1.3       ggplot2_3.2.0     
## [31] tidyverse_1.2.1    pacman_0.5.1      
## 
## loaded via a namespace (and not attached):
##   [1] readxl_1.3.1        backports_1.1.4     plyr_1.8.4         
##   [4] lazyeval_0.2.2      sp_1.3-1            splines_3.5.3      
##   [7] polycor_0.7-9       listenv_0.7.0       TH.data_1.0-10     
##  [10] digest_0.6.19       htmltools_0.3.6     checkmate_1.9.3    
##  [13] cluster_2.0.9       openxlsx_4.1.0.1    recipes_0.1.5      
##  [16] modelr_0.1.4        gower_0.2.1         sandwich_2.5-1     
##  [19] colorspace_1.4-1    rvest_0.3.4         haven_2.1.0        
##  [22] pan_1.6             xfun_0.7            crayon_1.3.4       
##  [25] jsonlite_1.6        lme4_1.1-21         zoo_1.8-6          
##  [28] glue_1.3.1          gtable_0.3.0        ipred_0.9-9        
##  [31] MatrixModels_0.4-1  car_3.0-3           DEoptimR_1.0-8     
##  [34] jomo_2.6-8          abind_1.4-5         VIM_4.8.0          
##  [37] scales_1.0.0        mvtnorm_1.0-10      DBI_1.0.0          
##  [40] Rcpp_1.0.1          laeken_0.5.0        htmlTable_1.13.1   
##  [43] units_0.6-3         foreign_0.8-70      stats4_3.5.3       
##  [46] lava_1.6.5          prodlim_2018.04.18  vcd_1.4-4          
##  [49] htmlwidgets_1.3     httr_1.4.0          RColorBrewer_1.1-2 
##  [52] acepack_1.4.1       pkgconfig_2.0.2     nnet_7.3-12        
##  [55] labeling_0.3        tidyselect_0.2.5    rlang_0.3.4        
##  [58] reshape2_1.4.3      multilevel_2.6      munsell_0.5.0      
##  [61] cellranger_1.1.0    tools_3.5.3         cli_1.1.0          
##  [64] generics_0.0.2      ranger_0.11.2       broom_0.5.2        
##  [67] evaluate_0.14       yaml_2.2.0          ModelMetrics_1.2.2 
##  [70] knitr_1.23          zip_2.0.2           robustbase_0.93-5  
##  [73] mitml_0.3-7         nlme_3.1-140        quantreg_5.38      
##  [76] xml2_1.2.0          psychometric_2.2    compiler_3.5.3     
##  [79] rstudioapi_0.10     curl_3.3            e1071_1.7-2        
##  [82] stringi_1.4.3       classInt_0.3-3      nloptr_1.2.1       
##  [85] pillar_1.4.1        lmtest_0.9-37       data.table_1.12.2  
##  [88] R6_2.4.0            latticeExtra_0.6-28 KernSmooth_2.23-15 
##  [91] gridExtra_2.3       rio_0.5.16          codetools_0.2-16   
##  [94] polspline_1.1.14    boot_1.3-22         MASS_7.3-51.4      
##  [97] gtools_3.8.1        withr_2.1.2         mnormt_1.5-5       
## [100] multcomp_1.4-10     hms_0.4.2           grid_3.5.3         
## [103] rpart_4.1-15        timeDate_3043.102   class_7.3-15       
## [106] minqa_1.2.4         rmarkdown_1.13      carData_3.0-2      
## [109] lubridate_1.7.4     base64enc_0.1-3