library(pacman)
p_load(kirkegaard, sf, rms, caret, doFuture)
options(digits = 3)
doFuture::registerDoFuture()
plan(multiprocess(workers = 4))
options(future.globals.maxSize = Inf)
names2clip = function(x) tibble(colnames(x)) %>% write_clipboard(print = F)
na_outside = function(x, lower = -Inf, upper = Inf) {
x[x < lower | x > upper] = NA
x
}
1:10 %>% na_outside(2, 7)
## [1] NA 2 3 4 5 6 7 NA NA NA
#read some large datasets
enem = read_csv2("data/Microdados_enem_2016/DADOS/microdados_enem_2016.csv")
## Using ',' as decimal and '.' as grouping mark. Use read_delim() for more control.
## Parsed with column specification:
## cols(
## .default = col_double(),
## NO_MUNICIPIO_RESIDENCIA = col_character(),
## SG_UF_RESIDENCIA = col_character(),
## TP_SEXO = col_character(),
## NO_MUNICIPIO_NASCIMENTO = col_character(),
## SG_UF_NASCIMENTO = col_character(),
## NO_MUNICIPIO_ESC = col_character(),
## SG_UF_ESC = col_character(),
## NO_ENTIDADE_CERTIFICACAO = col_character(),
## SG_UF_ENTIDADE_CERTIFICACAO = col_character(),
## NO_MUNICIPIO_PROVA = col_character(),
## SG_UF_PROVA = col_character(),
## NU_NOTA_CN = col_number(),
## NU_NOTA_CH = col_number(),
## NU_NOTA_LC = col_number(),
## NU_NOTA_MT = col_number(),
## TX_RESPOSTAS_CN = col_character(),
## TX_RESPOSTAS_CH = col_character(),
## TX_RESPOSTAS_LC = col_character(),
## TX_RESPOSTAS_MT = col_character(),
## TX_GABARITO_CN = col_character()
## # ... with 39 more columns
## )
## See spec(...) for full column specifications.
# munis = read_sf("data/spatial/bra_adm2/BRA_adm2.shp")
# munis = read_sf("data/spatial/br_localidades_2010/BR_Localidades_2010.shp")
munis_raw = read_sf("data/spatial/br_municipios/BRMUE250GC_SIR.shp")
munis_raw$id = munis_raw$CD_GEOCMU %>% as.numeric()
#fix scores
enem$science = case_when(enem$NU_NOTA_CN < 2500 ~ enem$NU_NOTA_CN * 10,
enem$NU_NOTA_CN > 2500 ~ enem$NU_NOTA_CN) %>%
na_outside(2000)
enem$humanities = case_when(enem$NU_NOTA_CH < 2000 ~ enem$NU_NOTA_CH * 10,
enem$NU_NOTA_CH > 2000 ~ enem$NU_NOTA_CH) %>%
na_outside(2000)
enem$language = case_when(enem$NU_NOTA_LC < 2000 ~ enem$NU_NOTA_LC * 10,
enem$NU_NOTA_LC > 2000 ~ enem$NU_NOTA_LC) %>%
na_outside(2000)
enem$math = case_when(enem$NU_NOTA_MT < 2000 ~ enem$NU_NOTA_MT * 10,
enem$NU_NOTA_MT > 2000 ~ enem$NU_NOTA_MT) %>%
na_outside(2000)
enem$g = enem %>% select(science:math) %>% map_df(standardize) %>% rowMeans(na.rm = T)
enem %>% select(science:g) %>% wtd.cors()
## science humanities language math g
## science 1.000 0.614 0.553 0.601 0.836
## humanities 0.614 1.000 0.695 0.539 0.861
## language 0.553 0.695 1.000 0.485 0.825
## math 0.601 0.539 0.485 1.000 0.792
## g 0.836 0.861 0.825 0.792 1.000
#outcomes
enem$dad_edu = enem$Q001 %>% plyr::mapvalues(c("H"), c(NA)) %>% factor() %>% as.numeric()
enem$mom_edu = enem$Q002 %>% plyr::mapvalues(c("H"), c(NA)) %>% factor() %>% as.numeric()
enem$fam_income = enem$Q006 %>% factor() %>% as.numeric()
enem$bathrooms = enem$Q008 %>% factor() %>% as.numeric()
enem$bedrooms = enem$Q009 %>% factor() %>% as.numeric()
enem$cars = enem$Q010 %>% factor() %>% as.numeric()
enem$motorcycles = enem$Q011 %>% factor() %>% as.numeric()
enem$refrigerators = enem$Q012 %>% factor() %>% as.numeric()
enem$washmachines = enem$Q014 %>% factor() %>% as.numeric()
enem$internet = enem$Q025 %>% factor() %>% as.numeric()
#S factor
enem_S = enem %>% select(dad_edu:internet) %>% sample_n(10e3) %>% miss_impute() %>% fa()
enem_S
## Factor Analysis using method = minres
## Call: fa(r = .)
## Standardized loadings (pattern matrix) based upon correlation matrix
## MR1 h2 u2 com
## dad_edu 0.57 0.320 0.68 1
## mom_edu 0.52 0.268 0.73 1
## fam_income 0.79 0.631 0.37 1
## bathrooms 0.70 0.492 0.51 1
## bedrooms 0.53 0.276 0.72 1
## cars 0.69 0.482 0.52 1
## motorcycles 0.07 0.005 0.99 1
## refrigerators 0.35 0.122 0.88 1
## washmachines 0.48 0.234 0.77 1
## internet 0.47 0.223 0.78 1
##
## MR1
## SS loadings 3.05
## Proportion Var 0.31
##
## Mean item complexity = 1
## Test of the hypothesis that 1 factor is sufficient.
##
## The degrees of freedom for the null model are 45 and the objective function was 2.54 with Chi Square of 25388
## The degrees of freedom for the model are 35 and the objective function was 0.4
##
## The root mean square of the residuals (RMSR) is 0.07
## The df corrected root mean square of the residuals is 0.08
##
## The harmonic number of observations is 10000 with the empirical chi square 4017 with prob < 0
## The total number of observations was 10000 with Likelihood Chi Square = 4041 with prob < 0
##
## Tucker Lewis Index of factoring reliability = 0.797
## RMSEA index = 0.107 and the 90 % confidence intervals are 0.104 0.11
## BIC = 3719
## Fit based upon off diagonal values = 0.95
## Measures of factor score adequacy
## MR1
## Correlation of (regression) scores with factors 0.92
## Multiple R square of scores with factors 0.85
## Minimum correlation of possible factor scores 0.69
#score cases
enem_S_scores = psych::factor.scores(enem %>% select(dad_edu:internet), f = enem_S, impute = "mean")
enem$S = enem_S_scores$scores[, 1]
enem_munis = enem %>%
group_by(CO_MUNICIPIO_NASCIMENTO) %>%
dplyr::summarise(
n = n(),
white = (TP_COR_RACA == 1) %>% wtd_mean(),
black = (TP_COR_RACA == 2) %>% wtd_mean(),
brown = (TP_COR_RACA == 3) %>% wtd_mean(),
yellow = (TP_COR_RACA == 4) %>% wtd_mean(),
amerindian = (TP_COR_RACA == 5) %>% wtd_mean(),
other = (TP_COR_RACA %in% c(0, 6)) %>% wtd_mean(),
heterogeneity = white^2 + black^2 + brown^2 + yellow^2 + amerindian^2 + other^2,
g = g %>% wtd_mean()
)
enem_munis$id = enem_munis$CO_MUNICIPIO_NASCIMENTO
#merge
munis = left_join(munis_raw, enem_munis)
## Joining, by = "id"
#clean map
munis %>%
ggplot() +
geom_sf(lwd = .1) +
theme_classic()
#white
munis %>%
ggplot() +
geom_sf(aes(fill = white), lwd = .1) +
theme_classic()
#g
munis %>%
ggplot() +
geom_sf(aes(fill = g), lwd = .1) +
theme_classic()
#values
enem_munis %>% select(n:g) %>% wtd.cors()
## n white black brown yellow amerindian
## n 1.00000 -0.0284 0.0339 0.020433 0.00355 -0.007932
## white -0.02845 1.0000 -0.5886 -0.927916 -0.35211 -0.129410
## black 0.03386 -0.5886 1.0000 0.301952 0.13341 -0.049630
## brown 0.02043 -0.9279 0.3020 1.000000 0.27814 -0.000698
## yellow 0.00355 -0.3521 0.1334 0.278143 1.00000 -0.027472
## amerindian -0.00793 -0.1294 -0.0496 -0.000698 -0.02747 1.000000
## other 0.01743 -0.0490 -0.0199 -0.022466 0.00944 -0.012594
## heterogeneity -0.05521 0.5552 -0.5116 -0.404787 -0.37520 -0.069742
## g 0.09526 0.6609 -0.2766 -0.654253 -0.22712 -0.135940
## other heterogeneity g
## n 0.01743 -0.0552 0.0953
## white -0.04895 0.5552 0.6609
## black -0.01991 -0.5116 -0.2766
## brown -0.02247 -0.4048 -0.6543
## yellow 0.00944 -0.3752 -0.2271
## amerindian -0.01259 -0.0697 -0.1359
## other 1.00000 -0.1563 -0.0180
## heterogeneity -0.15634 1.0000 0.1835
## g -0.01795 0.1835 1.0000
#regression
ols(g ~ white, data = enem_munis)
## Frequencies of Missing Values Due to Each Variable
## g white
## 3 0
##
## Linear Regression Model
##
## ols(formula = g ~ white, data = enem_munis)
##
##
## Model Likelihood Discrimination
## Ratio Test Indexes
## Obs 5567 LR chi2 3196.61 R2 0.437
## sigma0.1920 d.f. 1 R2 adj 0.437
## d.f. 5565 Pr(> chi2) 0.0000 g 0.192
##
## Residuals
##
## Min 1Q Median 3Q Max
## -1.03659 -0.12091 -0.01316 0.11954 0.90343
##
##
## Coef S.E. t Pr(>|t|)
## Intercept -0.4424 0.0048 -92.93 <0.0001
## white 0.6488 0.0099 65.70 <0.0001
##
ols(g ~ heterogeneity, data = enem_munis)
## Frequencies of Missing Values Due to Each Variable
## g heterogeneity
## 3 0
##
## Linear Regression Model
##
## ols(formula = g ~ heterogeneity, data = enem_munis)
##
##
## Model Likelihood Discrimination
## Ratio Test Indexes
## Obs 5567 LR chi2 190.78 R2 0.034
## sigma0.2516 d.f. 1 R2 adj 0.034
## d.f. 5565 Pr(> chi2) 0.0000 g 0.049
##
## Residuals
##
## Min 1Q Median 3Q Max
## -1.23662 -0.18412 -0.01453 0.17815 0.96067
##
##
## Coef S.E. t Pr(>|t|)
## Intercept -0.3492 0.0127 -27.59 <0.0001
## heterogeneity 0.3479 0.0250 13.93 <0.0001
##
ols(g ~ black + brown + yellow + amerindian + other, data = enem_munis)
## Frequencies of Missing Values Due to Each Variable
## g black brown yellow amerindian other
## 3 0 0 0 0 0
##
## Linear Regression Model
##
## ols(formula = g ~ black + brown + yellow + amerindian + other,
## data = enem_munis)
##
##
## Model Likelihood Discrimination
## Ratio Test Indexes
## Obs 5567 LR chi2 3409.06 R2 0.458
## sigma0.1885 d.f. 5 R2 adj 0.457
## d.f. 5561 Pr(> chi2) 0.0000 g 0.197
##
## Residuals
##
## Min 1Q Median 3Q Max
## -1.047145 -0.116910 -0.008911 0.116005 0.957132
##
##
## Coef S.E. t Pr(>|t|)
## Intercept 0.2043 0.0066 30.99 <0.0001
## black -0.2630 0.0297 -8.86 <0.0001
## brown -0.7520 0.0131 -57.33 <0.0001
## yellow -0.5342 0.1167 -4.58 <0.0001
## amerindian -0.9307 0.0645 -14.42 <0.0001
## other -0.4549 0.1294 -3.51 0.0004
##
ols(g ~ black + brown + yellow + amerindian + other + heterogeneity, data = enem_munis)
## Frequencies of Missing Values Due to Each Variable
## g black brown yellow amerindian
## 3 0 0 0 0
## other heterogeneity
## 0 0
##
## Linear Regression Model
##
## ols(formula = g ~ black + brown + yellow + amerindian + other +
## heterogeneity, data = enem_munis)
##
##
## Model Likelihood Discrimination
## Ratio Test Indexes
## Obs 5567 LR chi2 3813.74 R2 0.496
## sigma0.1818 d.f. 6 R2 adj 0.495
## d.f. 5560 Pr(> chi2) 0.0000 g 0.206
##
## Residuals
##
## Min 1Q Median 3Q Max
## -1.001985 -0.112930 -0.004762 0.113402 0.903757
##
##
## Coef S.E. t Pr(>|t|)
## Intercept 0.5364 0.0174 30.79 <0.0001
## black -0.5753 0.0325 -17.73 <0.0001
## brown -0.8187 0.0131 -62.68 <0.0001
## yellow -1.2995 0.1186 -10.96 <0.0001
## amerindian -1.0997 0.0628 -17.52 <0.0001
## other -1.0241 0.1279 -8.01 <0.0001
## heterogeneity -0.4907 0.0240 -20.48 <0.0001
##
#alternative forms
enem$forms = str_c(enem$TX_GABARITO_CN, "_", enem$TX_GABARITO_CH, "_", enem$TX_GABARITO_LC, "_", enem$TX_GABARITO_MT)
enem_forms_table = enem$forms %>% table2()
#subset to largest same item dataset
enem_sub1 = enem %>% filter(forms == enem_forms_table$Group[2])
#split up
enem_sub1_items = cbind(
enem_sub1$TX_RESPOSTAS_CN %>% str_split("", simplify = T) %>% set_colnames("science_" + 1:45),
enem_sub1$TX_RESPOSTAS_CH %>% str_split("", simplify = T) %>% set_colnames("humanities_" + 1:45),
enem_sub1$TX_RESPOSTAS_LC %>% str_split("", simplify = T) %>% set_colnames("language_" + 1:50),
enem_sub1$TX_RESPOSTAS_MT %>% str_split("", simplify = T) %>% set_colnames("math_" + 1:45)
) %>% as_tibble()
#binary versions
enem_sub1_items_key = enem_sub1$forms[1] %>% str_replace_all("_", "") %>% str_split("", simplify = T) %>% as.vector()
length(enem_sub1_items_key)
## [1] 185
dim(enem_sub1_items)
## [1] 551438 185
#score
for (i in seq_along(enem_sub1_items_key)) {
var_i = colnames(enem_sub1_items)[i]
enem_sub1_items[[var_i + "b"]] = (enem_sub1_items[[var_i]] == enem_sub1_items_key[i])
}
#analyze
enem_sub1_items %>% select(science_1b:math_45b) %>% head(1000) %>% wtd.cors() %>% head()
## science_1b science_2b science_3b science_4b science_5b
## science_1b 1.00000 0.109389 0.1953 -0.007468 0.1314
## science_2b 0.10939 1.000000 0.0486 0.000471 0.0494
## science_3b 0.19533 0.048559 1.0000 -0.026072 0.1060
## science_4b -0.00747 0.000471 -0.0261 1.000000 0.0601
## science_5b 0.13136 0.049443 0.1060 0.060126 1.0000
## science_6b 0.09261 0.048029 0.0841 -0.028572 0.0780
## science_6b science_7b science_8b science_9b science_10b
## science_1b 0.0926 0.105594 0.12136 0.04666 0.0638
## science_2b 0.0480 0.003348 0.07170 0.03109 0.0734
## science_3b 0.0841 0.033362 0.06079 0.00171 0.0752
## science_4b -0.0286 0.000552 0.02271 -0.03926 -0.0371
## science_5b 0.0780 0.104462 0.09606 0.04401 0.0414
## science_6b 1.0000 0.066168 -0.00495 0.00208 0.0909
## science_11b science_12b science_13b science_14b science_15b
## science_1b 0.1433 0.1852 0.0462 0.0301 0.15301
## science_2b 0.1160 0.0541 0.0533 0.0686 0.02859
## science_3b 0.1499 0.1389 0.0621 -0.0229 0.04104
## science_4b -0.0299 -0.0319 0.0300 -0.0189 -0.00866
## science_5b 0.1409 0.1255 0.0343 0.0425 0.07380
## science_6b 0.0688 0.0960 0.0277 0.0177 0.08224
## science_16b science_17b science_18b science_19b science_20b
## science_1b 0.06422 0.1130 -0.00354 0.0234 0.1899
## science_2b 0.01620 0.0821 -0.00442 0.0691 0.0476
## science_3b -0.01577 0.0348 0.06453 0.0467 0.2071
## science_4b 0.00172 -0.0409 0.03373 0.0278 -0.0734
## science_5b 0.03147 0.0453 0.01070 0.0649 0.1411
## science_6b 0.08571 0.1067 0.03824 0.0608 0.0907
## science_21b science_22b science_23b science_24b science_25b
## science_1b 0.0936 0.2229 0.08114 0.056410 0.06354
## science_2b 0.0570 0.1102 0.06089 0.034534 0.03860
## science_3b 0.0353 0.0949 0.08352 0.000378 0.08324
## science_4b 0.0113 -0.0697 0.00576 -0.074757 0.00589
## science_5b 0.0469 0.0927 0.10896 0.060205 0.01729
## science_6b 0.0297 0.0751 0.10738 0.033991 -0.00543
## science_26b science_27b science_28b science_29b science_30b
## science_1b 0.1652 0.1688 0.0842 0.1187 0.1266
## science_2b 0.0771 0.0119 0.0847 0.0379 0.0409
## science_3b 0.1787 0.1147 0.0950 0.0690 0.0712
## science_4b -0.0556 -0.0226 -0.0340 -0.0199 -0.0461
## science_5b 0.0324 0.1007 -0.0398 0.1447 0.0820
## science_6b 0.0475 0.0683 -0.0178 0.0491 0.1202
## science_31b science_32b science_33b science_34b science_35b
## science_1b -0.019708 0.0583 -0.04366 0.1678 0.1626
## science_2b 0.007918 0.0732 0.00937 0.1580 0.0398
## science_3b -0.000762 0.0466 -0.01966 0.0875 0.0707
## science_4b 0.044995 -0.0113 -0.00117 -0.0386 -0.0856
## science_5b 0.052541 0.0300 0.05912 0.0364 0.0990
## science_6b 0.025695 0.1066 -0.00909 0.0244 0.1655
## science_36b science_37b science_38b science_39b science_40b
## science_1b 0.0617 0.0445 0.1164 0.05752 0.0850
## science_2b 0.0271 0.0728 0.0510 -0.00334 0.0581
## science_3b 0.0582 0.0778 0.1548 0.05667 0.0537
## science_4b -0.0701 -0.0116 -0.0222 -0.02877 0.0493
## science_5b 0.0958 0.0902 0.1316 0.11149 0.1125
## science_6b 0.0604 0.0113 0.0722 -0.00268 0.0729
## science_41b science_42b science_43b science_44b science_45b
## science_1b 0.03918 0.15713 0.16514 0.2387 0.2607
## science_2b 0.00679 0.06653 0.06437 0.0773 0.0785
## science_3b 0.07403 0.08076 0.13147 0.1769 0.2192
## science_4b -0.02920 -0.00808 0.00259 -0.0862 -0.0766
## science_5b 0.04262 0.06227 0.12063 0.1204 0.1264
## science_6b 0.00787 0.07899 0.13682 0.1368 0.1187
## humanities_1b humanities_2b humanities_3b humanities_4b
## science_1b -0.02222 0.09011 0.1773 -0.02371
## science_2b -0.03369 0.07441 0.0979 -0.00535
## science_3b -0.00054 0.01795 0.0854 -0.03978
## science_4b -0.03277 -0.00784 -0.0440 0.05458
## science_5b -0.05732 0.02938 0.0848 -0.05165
## science_6b -0.01178 0.03327 0.0549 0.00125
## humanities_5b humanities_6b humanities_7b humanities_8b
## science_1b 0.0929 0.06419 0.19896 -0.01575
## science_2b 0.0288 -0.04748 0.00579 0.01926
## science_3b 0.1172 0.02595 0.12372 0.02221
## science_4b -0.0438 0.02266 -0.10150 0.00992
## science_5b 0.0582 -0.00495 0.04725 0.02076
## science_6b 0.0629 -0.00330 0.06810 0.02540
## humanities_9b humanities_10b humanities_11b humanities_12b
## science_1b 0.10797 0.11252 0.13585 0.0852
## science_2b 0.03828 0.00281 0.06612 0.0354
## science_3b 0.02917 0.05652 0.11599 0.1062
## science_4b 0.00516 -0.06662 0.00835 -0.0630
## science_5b 0.09870 0.07169 0.09533 0.0960
## science_6b 0.05237 0.07437 0.06274 0.0406
## humanities_13b humanities_14b humanities_15b humanities_16b
## science_1b 0.0892 0.1222 0.2071 0.2547
## science_2b 0.0602 0.0162 0.0665 0.1019
## science_3b 0.0663 0.1677 0.1100 0.2152
## science_4b -0.0433 -0.0707 -0.0474 -0.0611
## science_5b 0.0237 0.0741 0.0743 0.1287
## science_6b 0.0318 0.1475 0.0836 0.0589
## humanities_17b humanities_18b humanities_19b humanities_20b
## science_1b 0.00249 0.1202 0.02954 0.08120
## science_2b -0.03498 0.0631 0.05080 -0.00625
## science_3b 0.02167 0.0813 -0.04862 -0.00690
## science_4b -0.05337 -0.0708 0.00113 -0.03120
## science_5b 0.08869 0.0483 0.04173 0.05491
## science_6b 0.07724 0.0765 0.05100 0.02312
## humanities_21b humanities_22b humanities_23b humanities_24b
## science_1b 0.1383 0.17288 0.09468 0.1995
## science_2b 0.0523 0.02711 0.00289 0.0305
## science_3b 0.1270 0.03587 -0.02197 0.1387
## science_4b -0.0641 0.00164 0.01913 -0.1252
## science_5b 0.0448 0.02476 0.09158 0.0844
## science_6b 0.1159 0.07944 0.02612 0.1010
## humanities_25b humanities_26b humanities_27b humanities_28b
## science_1b -0.0221 0.1707 0.1713 0.1013
## science_2b -0.0444 0.0147 0.0792 0.0141
## science_3b -0.0206 0.1245 0.1945 0.0387
## science_4b 0.0188 -0.0770 -0.0964 -0.0345
## science_5b 0.0125 0.1008 0.0559 0.0371
## science_6b -0.0209 0.0248 0.0886 0.0232
## humanities_29b humanities_30b humanities_31b humanities_32b
## science_1b 0.0789 0.1840 0.2025 0.163466
## science_2b 0.0492 0.0762 0.0665 0.000337
## science_3b 0.0174 0.1751 0.1190 0.142799
## science_4b -0.0125 -0.0895 -0.0372 -0.046773
## science_5b 0.0318 0.1514 0.0321 0.091794
## science_6b 0.0125 0.0624 0.0612 0.117168
## humanities_33b humanities_34b humanities_35b humanities_36b
## science_1b 0.12476 0.1560 0.1894 0.0775
## science_2b 0.03278 0.0162 0.0409 0.0511
## science_3b 0.10132 0.1295 0.0797 0.1846
## science_4b -0.00415 0.0095 -0.0657 -0.0254
## science_5b -0.01112 0.0881 0.1079 0.0416
## science_6b 0.05745 0.0697 0.1087 0.0711
## humanities_37b humanities_38b humanities_39b humanities_40b
## science_1b 0.054379 0.1123 0.1435 0.06487
## science_2b 0.024335 0.0586 0.0345 -0.00295
## science_3b 0.016080 0.0849 0.2015 0.05333
## science_4b 0.000484 -0.0878 -0.0436 -0.00461
## science_5b 0.021755 0.0424 0.0651 0.06273
## science_6b 0.044041 0.0459 0.1127 -0.00122
## humanities_41b humanities_42b humanities_43b humanities_44b
## science_1b 0.1221 0.0403 0.1300 0.0556
## science_2b 0.0456 -0.0292 0.0216 -0.0552
## science_3b 0.1170 0.0573 0.0775 -0.0365
## science_4b -0.0467 -0.0340 -0.0597 -0.0608
## science_5b 0.0302 -0.0141 0.0461 0.0170
## science_6b 0.0165 0.0475 0.0506 0.0832
## humanities_45b language_1b language_2b language_3b language_4b
## science_1b 0.1863 0.1696 0.1483 0.1558 0.1317
## science_2b 0.0255 0.1149 0.0121 0.0999 0.0827
## science_3b 0.1595 0.1429 0.0946 0.1374 0.1075
## science_4b -0.0745 -0.0617 -0.0609 -0.0358 -0.0672
## science_5b 0.0715 0.0933 0.1027 0.1336 0.0812
## science_6b 0.0783 0.1332 0.0641 0.0750 0.0873
## language_5b language_6b language_7b language_8b language_9b
## science_1b 0.1200 0.04928 -0.00383 0.019015 0.00302
## science_2b 0.1000 -0.07052 -0.01729 -0.000714 -0.06106
## science_3b 0.0789 0.02437 -0.02759 -0.031873 -0.00257
## science_4b -0.0798 0.03358 -0.05397 -0.003879 0.00969
## science_5b 0.0741 -0.04179 -0.01952 -0.062779 0.01577
## science_6b 0.0507 -0.00538 -0.03836 -0.034658 0.01890
## language_10b language_11b language_12b language_13b
## science_1b 0.03709 0.2107 0.13656 0.07147
## science_2b -0.02595 0.0539 0.00399 -0.00381
## science_3b 0.04117 0.0812 0.08920 0.06672
## science_4b -0.01511 -0.0184 -0.03298 -0.02887
## science_5b 0.00614 0.1122 0.00852 0.04565
## science_6b -0.02008 0.0975 0.01963 0.03059
## language_14b language_15b language_16b language_17b
## science_1b 0.1028 0.08227 0.02592 0.04061
## science_2b 0.0235 0.00594 -0.00367 -0.06179
## science_3b 0.0747 0.05149 0.02622 0.07744
## science_4b -0.0767 0.02091 -0.05004 -0.04812
## science_5b 0.0694 -0.01487 0.02388 -0.01648
## science_6b 0.0747 0.01555 -0.01115 0.00275
## language_18b language_19b language_20b language_21b
## science_1b 0.02962 0.1006 0.0773 0.02376
## science_2b -0.01884 0.0443 0.0270 -0.00132
## science_3b 0.00867 -0.0383 0.0625 0.00867
## science_4b 0.05873 -0.0290 -0.0268 -0.00215
## science_5b -0.02042 -0.0436 0.0159 0.06594
## science_6b 0.05474 0.0509 0.0435 0.04781
## language_22b language_23b language_24b language_25b
## science_1b 0.07082 0.03536 0.1324 0.1068
## science_2b -0.00329 0.01554 0.0839 0.0196
## science_3b 0.04674 0.04004 0.0988 0.1031
## science_4b -0.05826 -0.03765 -0.0773 0.0144
## science_5b 0.03339 0.03119 0.1019 0.0447
## science_6b 0.02378 0.00707 0.0757 0.0447
## language_26b language_27b language_28b language_29b
## science_1b 0.1108 0.1799 0.1588 0.1221
## science_2b 0.0196 0.0439 -0.0194 0.0991
## science_3b 0.0716 0.0441 0.0766 0.1022
## science_4b -0.0472 -0.0334 -0.0615 -0.0464
## science_5b 0.0449 0.0249 0.0104 0.0833
## science_6b 0.0665 0.0470 0.0741 0.0338
## language_30b language_31b language_32b language_33b
## science_1b -0.01122 0.00816 0.2467 0.13195
## science_2b -0.03106 0.01133 0.0613 0.00413
## science_3b 0.01245 0.00627 0.0932 0.03724
## science_4b 0.01003 -0.03007 -0.0556 -0.02569
## science_5b -0.00419 -0.00785 0.0549 0.08374
## science_6b 0.06211 -0.03477 0.1286 0.07379
## language_34b language_35b language_36b language_37b
## science_1b 0.01360 0.1134 0.1281 0.0843
## science_2b 0.00805 0.0189 0.0335 0.0288
## science_3b 0.00782 0.0591 0.0638 0.0435
## science_4b -0.03655 0.0290 -0.0457 -0.0313
## science_5b -0.01105 0.0518 0.0341 0.0486
## science_6b 0.03208 0.0224 0.0545 0.1025
## language_38b language_39b language_40b language_41b
## science_1b 0.1243 0.1916 0.1650 -0.04102
## science_2b 0.0282 0.0462 0.0555 -0.00821
## science_3b 0.0780 0.0870 0.1019 -0.01847
## science_4b -0.0503 -0.0278 0.0244 0.07438
## science_5b 0.0556 0.0586 0.0345 0.02563
## science_6b 0.0716 0.1221 0.0951 0.00249
## language_42b language_43b language_44b language_45b
## science_1b 0.1288 0.1180 0.00866 0.2121
## science_2b 0.0337 0.0491 0.03445 0.0545
## science_3b 0.1552 0.0954 0.06103 0.0664
## science_4b -0.0425 -0.0468 0.01701 0.0119
## science_5b 0.0343 0.0736 0.03494 0.1778
## science_6b 0.0492 0.0616 0.03223 0.0707
## language_46b language_47b language_48b language_49b
## science_1b 0.0815 0.0541 0.13696 0.05100
## science_2b 0.0418 0.0650 0.00655 0.01483
## science_3b 0.0690 0.0400 0.07287 -0.01448
## science_4b -0.0235 -0.0131 -0.03179 0.02827
## science_5b -0.0307 0.0585 0.09106 -0.01782
## science_6b 0.0399 0.0120 0.05084 0.00811
## language_50b math_1b math_2b math_3b math_4b math_5b math_6b
## science_1b 0.001302 0.06075 0.11130 0.1771 0.0969 0.1312 0.1849
## science_2b -0.019245 0.03453 0.07672 0.0174 0.0408 0.1009 0.0938
## science_3b 0.009013 0.09817 0.10355 0.0734 0.0752 0.1278 0.1049
## science_4b -0.000301 -0.00723 -0.00232 -0.0462 -0.0429 -0.0490 -0.0994
## science_5b -0.022734 0.05038 0.10346 0.1237 0.1026 0.1499 0.1048
## science_6b -0.062073 0.11083 -0.01061 0.1096 0.1211 0.0584 0.0727
## math_7b math_8b math_9b math_10b math_11b math_12b math_13b
## science_1b 0.05253 0.0966 0.06754 -0.02302 0.10234 0.03456 0.1436
## science_2b 0.00728 0.0209 0.04281 0.09852 -0.00556 0.00991 0.0605
## science_3b 0.03148 0.1005 0.00105 -0.02197 0.10583 0.04120 0.0430
## science_4b -0.00902 -0.0765 0.00531 0.00395 -0.02594 -0.05272 -0.0238
## science_5b 0.04585 0.0589 0.06724 0.09655 0.12553 -0.00734 0.0593
## science_6b 0.08724 0.0704 0.00121 0.01592 0.11234 0.03027 0.0560
## math_14b math_15b math_16b math_17b math_18b math_19b math_20b
## science_1b 0.1464 0.09750 0.0638 0.0681 0.05129 -0.0284 0.11799
## science_2b 0.0532 0.02277 0.0181 0.0814 0.10849 0.0315 -0.00210
## science_3b 0.1267 0.08732 0.0348 0.0538 -0.02686 -0.0359 0.05618
## science_4b -0.0696 0.00486 -0.0180 0.0177 -0.01238 -0.0373 -0.00246
## science_5b 0.0989 0.14544 0.0615 0.0433 0.05292 0.0389 0.11058
## science_6b 0.1012 0.04480 0.0279 0.1176 0.00994 -0.0711 0.12113
## math_21b math_22b math_23b math_24b math_25b math_26b math_27b
## science_1b 0.1036 0.05126 0.1350 0.005697 0.1302 0.07118 -0.02302
## science_2b 0.0384 -0.01313 0.0557 0.005423 0.0139 0.06656 -0.00825
## science_3b 0.0556 -0.01039 0.0855 0.052146 0.0624 -0.00252 -0.02954
## science_4b 0.0224 -0.03727 -0.0414 0.000076 -0.0210 -0.06822 0.02565
## science_5b 0.0868 0.01634 0.0767 0.003780 0.1022 0.07130 -0.02745
## science_6b 0.0582 -0.00639 0.1031 0.011919 0.0587 0.07103 -0.05308
## math_28b math_29b math_30b math_31b math_32b math_33b math_34b
## science_1b 0.13588 0.1490 0.1298 0.0624 0.1977 0.11401 0.0644
## science_2b 0.05982 0.0314 0.0646 0.1498 0.0654 -0.00177 0.0253
## science_3b 0.10169 0.1247 0.0618 -0.0147 0.0844 0.01774 0.1119
## science_4b 0.00703 -0.0675 -0.0226 0.0850 -0.0711 -0.02273 -0.0422
## science_5b 0.06180 0.0654 0.0570 0.0788 0.0902 0.09284 0.0664
## science_6b 0.05627 0.0999 0.0460 0.0314 0.0600 -0.01117 0.0870
## math_35b math_36b math_37b math_38b math_39b math_40b math_41b
## science_1b 0.1009 0.0811 0.0831 0.1281 0.0312 0.04479 0.06699
## science_2b 0.0441 -0.0372 0.0514 0.0714 0.0657 0.01490 -0.00222
## science_3b 0.0597 0.0813 -0.0105 0.0913 -0.0495 -0.00419 0.04667
## science_4b -0.0239 -0.0432 -0.0150 -0.0687 -0.0360 0.00624 0.03128
## science_5b 0.0123 0.0438 0.0860 0.0913 0.0317 0.09116 0.12029
## science_6b 0.0380 0.0414 0.0287 0.0845 0.0223 0.05346 0.04820
## math_42b math_43b math_44b math_45b
## science_1b 0.1833 0.1237 0.11778 0.20983
## science_2b 0.0426 0.0493 0.07555 0.00669
## science_3b 0.1095 0.1006 0.00934 0.09043
## science_4b -0.0618 0.0099 -0.02334 -0.07628
## science_5b 0.1219 0.0890 0.13661 0.07434
## science_6b 0.0629 0.0648 0.03018 0.09306
#simple score
enem_sub1$g_sum = enem_sub1_items %>% select(science_1b:math_45b) %>% rowSums()
#IRT
enem_sub1_fa = irt.fa(enem_sub1_items %>% select(science_1b:math_45b) %>% head(10000) %>% map_df(as.numeric))
## Warning in cor.smooth(mat): Matrix was not positive definite, smoothing was
## done
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs
## = np.obs, : The estimated weights for the factor scores are probably
## incorrect. Try a different factor extraction method.
enem_sub1_fa$fa
## Factor Analysis using method = minres
## Call: fa(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate,
## fm = fm)
## Standardized loadings (pattern matrix) based upon correlation matrix
## MR1 h2 u2 com
## science_1b 0.49 2.4e-01 0.76 1
## science_2b 0.16 2.5e-02 0.98 1
## science_3b 0.39 1.5e-01 0.85 1
## science_4b -0.07 4.9e-03 1.00 1
## science_5b 0.27 7.4e-02 0.93 1
## science_6b 0.27 7.5e-02 0.93 1
## science_7b 0.23 5.4e-02 0.95 1
## science_8b 0.24 6.0e-02 0.94 1
## science_9b 0.13 1.6e-02 0.98 1
## science_10b 0.22 4.9e-02 0.95 1
## science_11b 0.37 1.3e-01 0.87 1
## science_12b 0.47 2.2e-01 0.78 1
## science_13b 0.16 2.7e-02 0.97 1
## science_14b 0.10 1.1e-02 0.99 1
## science_15b 0.32 1.0e-01 0.90 1
## science_16b 0.20 3.9e-02 0.96 1
## science_17b 0.37 1.4e-01 0.86 1
## science_18b 0.01 9.5e-05 1.00 1
## science_19b 0.16 2.7e-02 0.97 1
## science_20b 0.48 2.3e-01 0.77 1
## science_21b 0.18 3.3e-02 0.97 1
## science_22b 0.50 2.5e-01 0.75 1
## science_23b 0.34 1.1e-01 0.89 1
## science_24b 0.15 2.4e-02 0.98 1
## science_25b 0.11 1.2e-02 0.99 1
## science_26b 0.39 1.5e-01 0.85 1
## science_27b 0.26 6.9e-02 0.93 1
## science_28b 0.21 4.3e-02 0.96 1
## science_29b 0.37 1.4e-01 0.86 1
## science_30b 0.29 8.3e-02 0.92 1
## science_31b 0.12 1.3e-02 0.99 1
## science_32b 0.14 2.0e-02 0.98 1
## science_33b 0.08 6.7e-03 0.99 1
## science_34b 0.37 1.3e-01 0.87 1
## science_35b 0.51 2.6e-01 0.74 1
## science_36b 0.21 4.4e-02 0.96 1
## science_37b 0.15 2.2e-02 0.98 1
## science_38b 0.31 9.5e-02 0.90 1
## science_39b 0.18 3.4e-02 0.97 1
## science_40b 0.23 5.3e-02 0.95 1
## science_41b 0.06 4.1e-03 1.00 1
## science_42b 0.46 2.1e-01 0.79 1
## science_43b 0.30 8.9e-02 0.91 1
## science_44b 0.60 3.6e-01 0.64 1
## science_45b 0.68 4.6e-01 0.54 1
## humanities_1b 0.01 5.4e-05 1.00 1
## humanities_2b 0.14 2.0e-02 0.98 1
## humanities_3b 0.44 1.9e-01 0.81 1
## humanities_4b 0.11 1.1e-02 0.99 1
## humanities_5b 0.23 5.3e-02 0.95 1
## humanities_6b 0.08 6.7e-03 0.99 1
## humanities_7b 0.54 2.9e-01 0.71 1
## humanities_8b 0.20 3.9e-02 0.96 1
## humanities_9b 0.31 9.4e-02 0.91 1
## humanities_10b 0.26 6.7e-02 0.93 1
## humanities_11b 0.33 1.1e-01 0.89 1
## humanities_12b 0.33 1.1e-01 0.89 1
## humanities_13b 0.33 1.1e-01 0.89 1
## humanities_14b 0.35 1.2e-01 0.88 1
## humanities_15b 0.57 3.3e-01 0.67 1
## humanities_16b 0.67 4.5e-01 0.55 1
## humanities_17b 0.07 4.6e-03 1.00 1
## humanities_18b 0.33 1.1e-01 0.89 1
## humanities_19b 0.08 6.9e-03 0.99 1
## humanities_20b 0.25 6.2e-02 0.94 1
## humanities_21b 0.41 1.7e-01 0.83 1
## humanities_22b 0.37 1.4e-01 0.86 1
## humanities_23b 0.17 2.9e-02 0.97 1
## humanities_24b 0.57 3.2e-01 0.68 1
## humanities_25b -0.09 8.1e-03 0.99 1
## humanities_26b 0.39 1.6e-01 0.84 1
## humanities_27b 0.61 3.8e-01 0.62 1
## humanities_28b 0.25 6.3e-02 0.94 1
## humanities_29b 0.26 6.7e-02 0.93 1
## humanities_30b 0.57 3.2e-01 0.68 1
## humanities_31b 0.58 3.4e-01 0.66 1
## humanities_32b 0.39 1.5e-01 0.85 1
## humanities_33b 0.47 2.2e-01 0.78 1
## humanities_34b 0.48 2.3e-01 0.77 1
## humanities_35b 0.53 2.8e-01 0.72 1
## humanities_36b 0.23 5.1e-02 0.95 1
## humanities_37b 0.17 2.9e-02 0.97 1
## humanities_38b 0.49 2.4e-01 0.76 1
## humanities_39b 0.47 2.2e-01 0.78 1
## humanities_40b 0.24 5.6e-02 0.94 1
## humanities_41b 0.45 2.0e-01 0.80 1
## humanities_42b 0.19 3.6e-02 0.96 1
## humanities_43b 0.35 1.3e-01 0.87 1
## humanities_44b 0.13 1.8e-02 0.98 1
## humanities_45b 0.53 2.8e-01 0.72 1
## language_1b 0.62 3.9e-01 0.61 1
## language_2b 0.55 3.0e-01 0.70 1
## language_3b 0.60 3.6e-01 0.64 1
## language_4b 0.51 2.6e-01 0.74 1
## language_5b 0.45 2.0e-01 0.80 1
## language_6b -0.09 8.5e-03 0.99 1
## language_7b -0.06 3.2e-03 1.00 1
## language_8b -0.13 1.6e-02 0.98 1
## language_9b -0.13 1.7e-02 0.98 1
## language_10b -0.01 4.2e-05 1.00 1
## language_11b 0.48 2.3e-01 0.77 1
## language_12b 0.32 1.0e-01 0.90 1
## language_13b 0.23 5.3e-02 0.95 1
## language_14b 0.35 1.2e-01 0.88 1
## language_15b 0.30 8.8e-02 0.91 1
## language_16b 0.19 3.7e-02 0.96 1
## language_17b 0.36 1.3e-01 0.87 1
## language_18b 0.11 1.2e-02 0.99 1
## language_19b 0.40 1.6e-01 0.84 1
## language_20b 0.42 1.8e-01 0.82 1
## language_21b 0.14 1.9e-02 0.98 1
## language_22b 0.19 3.6e-02 0.96 1
## language_23b -0.02 4.2e-04 1.00 1
## language_24b 0.50 2.5e-01 0.75 1
## language_25b 0.29 8.5e-02 0.92 1
## language_26b 0.34 1.2e-01 0.88 1
## language_27b 0.32 1.0e-01 0.90 1
## language_28b 0.39 1.5e-01 0.85 1
## language_29b 0.40 1.6e-01 0.84 1
## language_30b 0.15 2.2e-02 0.98 1
## language_31b 0.13 1.8e-02 0.98 1
## language_32b 0.57 3.2e-01 0.68 1
## language_33b 0.32 1.0e-01 0.90 1
## language_34b 0.18 3.1e-02 0.97 1
## language_35b 0.23 5.4e-02 0.95 1
## language_36b 0.42 1.8e-01 0.82 1
## language_37b 0.24 5.9e-02 0.94 1
## language_38b 0.29 8.7e-02 0.91 1
## language_39b 0.51 2.6e-01 0.74 1
## language_40b 0.48 2.3e-01 0.77 1
## language_41b -0.14 2.1e-02 0.98 1
## language_42b 0.40 1.6e-01 0.84 1
## language_43b 0.35 1.2e-01 0.88 1
## language_44b 0.13 1.7e-02 0.98 1
## language_45b 0.46 2.1e-01 0.79 1
## language_46b 0.24 5.8e-02 0.94 1
## language_47b 0.20 4.0e-02 0.96 1
## language_48b 0.33 1.1e-01 0.89 1
## language_49b 0.20 4.0e-02 0.96 1
## language_50b 0.11 1.2e-02 0.99 1
## math_1b 0.32 1.0e-01 0.90 1
## math_2b 0.28 7.7e-02 0.92 1
## math_3b 0.35 1.2e-01 0.88 1
## math_4b 0.39 1.6e-01 0.84 1
## math_5b 0.38 1.5e-01 0.85 1
## math_6b 0.35 1.2e-01 0.88 1
## math_7b 0.13 1.8e-02 0.98 1
## math_8b 0.35 1.2e-01 0.88 1
## math_9b 0.17 2.9e-02 0.97 1
## math_10b 0.11 1.1e-02 0.99 1
## math_11b 0.29 8.6e-02 0.91 1
## math_12b 0.07 5.4e-03 0.99 1
## math_13b 0.39 1.6e-01 0.84 1
## math_14b 0.49 2.4e-01 0.76 1
## math_15b 0.42 1.7e-01 0.83 1
## math_16b 0.18 3.2e-02 0.97 1
## math_17b 0.16 2.5e-02 0.97 1
## math_18b 0.22 5.1e-02 0.95 1
## math_19b 0.00 7.3e-06 1.00 1
## math_20b 0.26 7.0e-02 0.93 1
## math_21b 0.28 8.1e-02 0.92 1
## math_22b 0.21 4.6e-02 0.95 1
## math_23b 0.43 1.8e-01 0.82 1
## math_24b 0.13 1.7e-02 0.98 1
## math_25b 0.32 1.0e-01 0.90 1
## math_26b 0.26 7.0e-02 0.93 1
## math_27b -0.03 8.9e-04 1.00 1
## math_28b 0.33 1.1e-01 0.89 1
## math_29b 0.49 2.4e-01 0.76 1
## math_30b 0.29 8.4e-02 0.92 1
## math_31b 0.14 1.9e-02 0.98 1
## math_32b 0.47 2.2e-01 0.78 1
## math_33b 0.25 6.2e-02 0.94 1
## math_34b 0.28 7.9e-02 0.92 1
## math_35b 0.20 3.8e-02 0.96 1
## math_36b 0.11 1.2e-02 0.99 1
## math_37b 0.26 6.8e-02 0.93 1
## math_38b 0.46 2.1e-01 0.79 1
## math_39b 0.10 1.1e-02 0.99 1
## math_40b 0.12 1.5e-02 0.98 1
## math_41b 0.23 5.3e-02 0.95 1
## math_42b 0.39 1.5e-01 0.85 1
## math_43b 0.36 1.3e-01 0.87 1
## math_44b 0.31 9.6e-02 0.90 1
## math_45b 0.40 1.6e-01 0.84 1
##
## MR1
## SS loadings 20.45
## Proportion Var 0.11
##
## Mean item complexity = 1
## Test of the hypothesis that 1 factor is sufficient.
##
## The degrees of freedom for the null model are 17020 and the objective function was 63.5 with Chi Square of 630873
## The degrees of freedom for the model are 16835 and the objective function was 43.8
##
## The root mean square of the residuals (RMSR) is 0.04
## The df corrected root mean square of the residuals is 0.04
##
## The harmonic number of observations is 10000 with the empirical chi square 592645 with prob < 0
## The total number of observations was 10000 with Likelihood Chi Square = 435118 with prob < 0
##
## Tucker Lewis Index of factoring reliability = 0.311
## RMSEA index = 0.05 and the 90 % confidence intervals are 0.05 NA
## BIC = 280062
## Fit based upon off diagonal values = 0.87
enem_sub1_fa$fa$loadings[, 1] %>% GG_denhist()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
#score
enem_sub1_fa_score = scoreIrt(enem_sub1_fa, enem_sub1_items %>% select(science_1b:math_45b) %>% map_df(as.numeric))
enem_sub1$g_irt = enem_sub1_fa_score$theta1 %>% standardize()
They appear to have divided ~10% of their data by 10 for some reason. But otherwise, we replicate their scores.
#cors
enem_sub1 %>% select(dad_edu:S, g, g_sum, g_irt) %>% wtd.cors()
## dad_edu mom_edu fam_income bathrooms bedrooms cars
## dad_edu 1.0000 0.59739 0.4641 0.3639 0.222 0.3454
## mom_edu 0.5974 1.00000 0.4216 0.3310 0.219 0.3180
## fam_income 0.4641 0.42160 1.0000 0.5902 0.385 0.5989
## bathrooms 0.3639 0.33103 0.5902 1.0000 0.482 0.5118
## bedrooms 0.2221 0.21882 0.3851 0.4819 1.000 0.3841
## cars 0.3454 0.31803 0.5989 0.5118 0.384 1.0000
## motorcycles -0.0406 -0.00231 0.0148 0.0308 0.125 0.0448
## refrigerators 0.1352 0.11959 0.2369 0.2920 0.225 0.2342
## washmachines 0.2803 0.24047 0.3942 0.3164 0.259 0.3841
## internet 0.2900 0.26995 0.3734 0.2832 0.248 0.3423
## S 0.6270 0.57151 0.8695 0.7700 0.568 0.7610
## g 0.3552 0.33190 0.4770 0.3407 0.188 0.3261
## g_sum 0.3494 0.32615 0.4784 0.3444 0.186 0.3213
## g_irt 0.3688 0.34249 0.4923 0.3538 0.194 0.3347
## motorcycles refrigerators washmachines internet S
## dad_edu -0.04061 0.1352 0.2803 0.2900 0.6270
## mom_edu -0.00231 0.1196 0.2405 0.2699 0.5715
## fam_income 0.01477 0.2369 0.3942 0.3734 0.8695
## bathrooms 0.03076 0.2920 0.3164 0.2832 0.7700
## bedrooms 0.12460 0.2253 0.2594 0.2476 0.5680
## cars 0.04477 0.2342 0.3841 0.3423 0.7610
## motorcycles 1.00000 0.0509 0.0279 0.0337 0.0631
## refrigerators 0.05094 1.0000 0.1822 0.1323 0.3812
## washmachines 0.02793 0.1822 1.0000 0.3531 0.5348
## internet 0.03371 0.1323 0.3531 1.0000 0.5177
## S 0.06312 0.3812 0.5348 0.5177 1.0000
## g -0.03941 0.1137 0.2292 0.2857 0.4837
## g_sum -0.04193 0.1134 0.2220 0.2719 0.4803
## g_irt -0.04505 0.1175 0.2387 0.2909 0.4994
## g g_sum g_irt
## dad_edu 0.3552 0.3494 0.369
## mom_edu 0.3319 0.3262 0.342
## fam_income 0.4770 0.4784 0.492
## bathrooms 0.3407 0.3444 0.354
## bedrooms 0.1881 0.1856 0.194
## cars 0.3261 0.3213 0.335
## motorcycles -0.0394 -0.0419 -0.045
## refrigerators 0.1137 0.1134 0.117
## washmachines 0.2292 0.2220 0.239
## internet 0.2857 0.2719 0.291
## S 0.4837 0.4803 0.499
## g 1.0000 0.9617 0.969
## g_sum 0.9617 1.0000 0.956
## g_irt 0.9687 0.9565 1.000
#examine
GG_scatter(enem_sub1, "g", "g_irt")
GG_scatter(enem_sub1, "g_sum", "g_irt")
#Jensen
fa_Jensens_method(enem_S, enem_sub1, "g_irt") +
scale_x_continuous("S factor loading") +
scale_y_continuous("Correlation with g factor score")
## Using latent correlations for the criterion-indicators relationships.
(not included in Rpubs because takes hours to fit)
#enet
enem_sub1_glmnet_S = caret::train(x = enem_sub1_items %>% select(science_1b:math_45b),
y = enem_sub1$S,
method = "glmnet",
tuneLength = 5
)
enem_sub1 %>% write_csv("data/enem_sub1.csv", na = "")
enem_sub1_items %>% write_csv("data/enem_sub1_items.csv", na = "")
sessionInfo()
## R version 3.5.3 (2019-03-11)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Linux Mint 19.1
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.7.1
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.7.1
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] parallel stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] doFuture_0.8.0 iterators_1.0.10 foreach_1.4.4
## [4] future_1.13.0 globals_0.12.4 caret_6.0-84
## [7] rms_5.1-3.1 SparseM_1.77 sf_0.7-5
## [10] kirkegaard_2018.05 metafor_2.1-0 Matrix_1.2-17
## [13] psych_1.8.12 magrittr_1.5 assertthat_0.2.1
## [16] weights_1.0 mice_3.5.0 gdata_2.18.0
## [19] Hmisc_4.2-0 Formula_1.2-3 survival_2.44-1.1
## [22] lattice_0.20-38 forcats_0.4.0 stringr_1.4.0
## [25] dplyr_0.8.1 purrr_0.3.2 readr_1.3.1
## [28] tidyr_0.8.3 tibble_2.1.3 ggplot2_3.2.0
## [31] tidyverse_1.2.1 pacman_0.5.1
##
## loaded via a namespace (and not attached):
## [1] readxl_1.3.1 backports_1.1.4 plyr_1.8.4
## [4] lazyeval_0.2.2 sp_1.3-1 splines_3.5.3
## [7] polycor_0.7-9 listenv_0.7.0 TH.data_1.0-10
## [10] digest_0.6.19 htmltools_0.3.6 checkmate_1.9.3
## [13] cluster_2.0.9 openxlsx_4.1.0.1 recipes_0.1.5
## [16] modelr_0.1.4 gower_0.2.1 sandwich_2.5-1
## [19] colorspace_1.4-1 rvest_0.3.4 haven_2.1.0
## [22] pan_1.6 xfun_0.7 crayon_1.3.4
## [25] jsonlite_1.6 lme4_1.1-21 zoo_1.8-6
## [28] glue_1.3.1 gtable_0.3.0 ipred_0.9-9
## [31] MatrixModels_0.4-1 car_3.0-3 DEoptimR_1.0-8
## [34] jomo_2.6-8 abind_1.4-5 VIM_4.8.0
## [37] scales_1.0.0 mvtnorm_1.0-10 DBI_1.0.0
## [40] Rcpp_1.0.1 laeken_0.5.0 htmlTable_1.13.1
## [43] units_0.6-3 foreign_0.8-70 stats4_3.5.3
## [46] lava_1.6.5 prodlim_2018.04.18 vcd_1.4-4
## [49] htmlwidgets_1.3 httr_1.4.0 RColorBrewer_1.1-2
## [52] acepack_1.4.1 pkgconfig_2.0.2 nnet_7.3-12
## [55] labeling_0.3 tidyselect_0.2.5 rlang_0.3.4
## [58] reshape2_1.4.3 multilevel_2.6 munsell_0.5.0
## [61] cellranger_1.1.0 tools_3.5.3 cli_1.1.0
## [64] generics_0.0.2 ranger_0.11.2 broom_0.5.2
## [67] evaluate_0.14 yaml_2.2.0 ModelMetrics_1.2.2
## [70] knitr_1.23 zip_2.0.2 robustbase_0.93-5
## [73] mitml_0.3-7 nlme_3.1-140 quantreg_5.38
## [76] xml2_1.2.0 psychometric_2.2 compiler_3.5.3
## [79] rstudioapi_0.10 curl_3.3 e1071_1.7-2
## [82] stringi_1.4.3 classInt_0.3-3 nloptr_1.2.1
## [85] pillar_1.4.1 lmtest_0.9-37 data.table_1.12.2
## [88] R6_2.4.0 latticeExtra_0.6-28 KernSmooth_2.23-15
## [91] gridExtra_2.3 rio_0.5.16 codetools_0.2-16
## [94] polspline_1.1.14 boot_1.3-22 MASS_7.3-51.4
## [97] gtools_3.8.1 withr_2.1.2 mnormt_1.5-5
## [100] multcomp_1.4-10 hms_0.4.2 grid_3.5.3
## [103] rpart_4.1-15 timeDate_3043.102 class_7.3-15
## [106] minqa_1.2.4 rmarkdown_1.13 carData_3.0-2
## [109] lubridate_1.7.4 base64enc_0.1-3