# if (!requireNamespace("BiocManager", quietly = TRUE))
# install.packages("BiocManager")
# BiocManager::install("limma")
library(limma)
library(MKmisc)
##
## Attaching package: 'MKmisc'
## The following objects are masked from 'package:psych':
##
## AUC, corPlot
library(ResourceSelection)
df1 <- dd[c("gpa", "age", "classnum", "v17_2", "v17_a", "v17_b", "v17_2etc", "v18_1", "v18_6", "v21_1", "v21_2", "v21_3", "v21_4", "v21_5", "v21_6", "v21_7", "v21_8etcvar", "momedu", "v29", "ISCOmom", "ISCOmom_2", "dadedu", "v32", "ISCOdad", "ISCOdad_2", "v35_1", "v36", "v37_1", "v38", "v39_1", "v40", "v41", "v42", "codnat_samooopred", "codnat_samooopredbinom", "codenat_parents", "codenat_parentsbinom", "v43_1", "codlanhome", "v47", "v48", "v48_4_Years", "v48_4_Month", "v49_2", "v50_10", "gender", "v54", "iseimom", "iseidad", "codenat_language")]
# df1 <- na.omit(df1)
head(df1)
| gpa | age | classnum | v17_2 | v17_a | v17_b | v17_2etc | v18_1 | v18_6 | v21_1 | v21_2 | v21_3 | v21_4 | v21_5 | v21_6 | v21_7 | v21_8etcvar | momedu | v29 | ISCOmom | ISCOmom_2 | dadedu | v32 | ISCOdad | ISCOdad_2 | v35_1 | v36 | v37_1 | v38 | v39_1 | v40 | v41 | v42 | codnat_samooopred | codnat_samooopredbinom | codenat_parents | codenat_parentsbinom | v43_1 | codlanhome | v47 | v48 | v48_4_Years | v48_4_Month | v49_2 | v50_10 | gender | v54 | iseimom | iseidad | codenat_language |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 3.43 | 16 | 7 | 7 | 3 | 1 | 2 | 3 | 1 | 5.23e+03 | 2 | 1 | 5.41e+03 | Армения | армянский | Армения | армянский | Армения | 13 | армянский | армений | 5166 | 0 | 5166 | 0 | 1 | 5.17e+03 | 1 | 4 | 13 | 156 | 6 | 3 | 1 | 16 | 31 | 40 | 5.17e+03 | ||||||||||||
| 3.29 | 16 | 7 | 7 | персональным фитнес тренером | 3 | 2 | 5 | 5 | 1 | 2.34e+03 | 4 | 1 | 2.14e+03 | Белоруссия | белорусский | Россия | русский | Россия матушка | русский | русск | 1110 | 1 | 1111 | 0 | 2 | 1 | 2 | 6 | 4 | 1 | 16 | 47 | 65 | 1.11e+03 | |||||||||||||||
| 3.43 | 15 | 9 | 3 | 3 | 2 | 4 | 7 | 4 | 2 | 2.6e+03 | 2 | 3 | 7.1e+03 | Россия | русский | Россия | русский | Россия | пару месяцев | русский | русский | 1110 | 1 | 1110 | 1 | 2 | 2.12e+03 | 1 | 2 | 5 | 1 | 1 | 15 | 66 | 34 | 1.11e+03 | |||||||||||||
| 4.14 | 15 | 9 | 3 | 3 | 3 | 1 | 6 | 2 | 1 | 4.3e+03 | 5 | 1 | 4.3e+03 | Россия | русский | Россия | русский | Россия | 12 | русский | русский | 1110 | 1 | 1110 | 1 | 1 | 3 | 4 | 12 | 144 | 6 | 2 | 1 | 15 | 43 | 43 | 1.11e+03 | ||||||||||||
| 3.71 | 14 | 9 | 7 | 7 | тренер (спорт) | 3 | 1 | 2 | 3 | 4 | 1 | 1.44e+03 | 2.3e+03 | 6 | 1 | 1e+03 | Грузия (Абхазия) | грузинский | Россия | грузинский | Россия | грузинский | Грузия | 5177 | 0 | 5177 | 0 | 2 | 5.18e+03 | 1 | 2 | 6 | 1 | 1 | 14 | 59 | 62 | 5.18e+03 | |||||||||||
| 3.71 | 16 | 9 | 2 | 2 | 5 | 1 | 3 | 1 | 1 | 5.12e+03 | 6 | 2 | Кыргызстан | узбекский | Узбекистан | узбекский | Кыргызстан | 14 | узбекский | узбекский | 6212 | 0 | 6212 | 0 | 1 | 6.21e+03 | 3 | 4 | 13 | 156 | 5 | 1 | 2 | 16 | 27 | 6.21e+03 |
df_ea <- dd[c("v21_1", "v21_2", "v21_3", "v21_4", "v21_5", "v21_6", "v21_7" )]
df_oa <- dd[c("v17_a", "v17_b", "v17_c", "v17_d")]
df_edupar <- dd[c("momedu","dadedu")]
df_iseipar <- dd [c("iseimom", "iseidad")]
df_age <- dd [c("age", "classnum", "v48_4_Month")]
# df1[is.na(df1)] <- 0
#Если надо будет заменить все на нолики
sapply(df1,function(x) sum(is.na(x)))
## gpa age classnum
## 15 11 2
## v17_2 v17_a v17_b
## 0 23 312
## v17_2etc v18_1 v18_6
## 0 1 2
## v21_1 v21_2 v21_3
## 351 308 306
## v21_4 v21_5 v21_6
## 350 321 312
## v21_7 v21_8etcvar momedu
## 193 0 6
## v29 ISCOmom ISCOmom_2
## 11 52 328
## dadedu v32 ISCOdad
## 7 43 91
## ISCOdad_2 v35_1 v36
## 333 0 0
## v37_1 v38 v39_1
## 0 0 0
## v40 v41 v42
## 0 0 0
## codnat_samooopred codnat_samooopredbinom codenat_parents
## 55 55 10
## codenat_parentsbinom v43_1 codlanhome
## 10 9 234
## v47 v48 v48_4_Years
## 15 14 0
## v48_4_Month v49_2 v50_10
## 216 2 6
## gender v54 iseimom
## 7 0 52
## iseidad codenat_language
## 95 6
1 - мальчик, 2 - девочка
table(df1$gender) # 170 182
##
## 1 2
## 170 182
df1$gender <- as.factor(df1$gender)
class(df1$gender)
## [1] "factor"
df1$gender1 [df1$gender == 1] <- "boy"
df1$gender1 [df1$gender == 2] <- "girl"
df1$gender1 <- as.factor(df1$gender1)
table(df1$gender1)
##
## boy girl
## 170 182
round(prop.table(table(df1$gender1))*100, digits = 2)
##
## boy girl
## 48.3 51.7
в кодировке 0 - это мигрант, 1 - это не мигрант. здесь я перекодирую и делаю так, чтобы 0 стал не мигрантом , а 1 - мигрантом.
df1$mig0 [df1$codenat_parentsbinom == 0] <- 1
df1$mig0 [df1$codenat_parentsbinom == 1] <- 0
# df1$mig0 <- as.factor(df1$mig0)
table(df1$mig0)
##
## 0 1
## 206 143
class(df1$mig0)
## [1] "numeric"
df1$mig [df1$mig0 == 1] <- 'Мигрант'
df1$mig [df1$mig0 == 0] <- 'Не мигрант'
df1$mig <- as.factor(df1$mig)
table(df1$mig)
##
## Мигрант Не мигрант
## 143 206
round(prop.table(table(df1$mig))*100, digits = 2)
##
## Мигрант Не мигрант
## 40.97 59.03
Два родителя не в россии - мигрант Хотя бы один в россии - не мигрант
здесь меня интересуют НЕ мигранты
Здесь я обращаю внимание на то, на каком языке говорят дома. 0 - это когда на русском.1 - это когда не на русском
table(df1$codenat_language)
##
## 1110 1111 1112 1113 2116 3210 4013 4210 4215 5010 5166 5177 5212 5255 6212 6213
## 231 2 6 1 2 2 1 1 1 1 23 9 1 23 14 16
## 6214 6216 7015 9015
## 11 1 6 1
df1$eth0 [df1$codenat_language == 1110] <- 0
df1$eth0 [df1$codenat_language > 1110] <- 1
# df1$eth0 <- as.factor(df1$eth0)
table(df1$eth0)
##
## 0 1
## 231 122
class(df1$eth0)
## [1] "numeric"
df1$eth [df1$codenat_language == 1110] <- "Russian"
df1$eth [df1$codenat_language > 1110] <- "Not Russian"
df1$eth <- as.factor(df1$eth)
table(df1$eth)
##
## Not Russian Russian
## 122 231
round(prop.table(table(df1$eth))*100, digits = 2)
##
## Not Russian Russian
## 34.56 65.44
table(df1$mig, df1$eth)
##
## Not Russian Russian
## Мигрант 116 25
## Не мигрант 5 199
table(df1$mig0, df1$eth0)
##
## 0 1
## 0 199 5
## 1 25 116
df1$mig0 = as.numeric(df1$mig0)
class(df1$mig0)
## [1] "numeric"
df1$eth0 = as.numeric(df1$eth0)
class(df1$eth0)
## [1] "numeric"
df1$mig0 [is.na(df1$mig0 )] <- 0
df1$eth0 [is.na(df1$eth0 )] <- 0
df1$migeth <- df1$mig0 + df1$eth0
table(df1$migeth)
##
## 0 1 2
## 210 33 116
df1$migeth0 [df1$migeth > 0] = 1
df1$migeth0 [df1$migeth == 0] = 0
table(df1$migeth0)
##
## 0 1
## 210 149
df1$migeth0 = as.factor(df1$migeth0)
round(prop.table(table(df1$migeth0))*100, digits = 2)
##
## 0 1
## 58.5 41.5
Здесь я достаю всех тех, кто либо говорит дома не на русском, либо чьи родители родились не в России.
1 =это мигрант иговорит дома на нерусском 0 - это не мигранты, которые говорят дома на русском
df_ea[is.na(df_ea)] <- 0
df_eamax <- as.matrix(df_ea)
df1$ea <- rowMaxs(df_eamax)
table(df1$ea) # тут я сравнила значения в строке и попросила выбрать максимальное. Это - то образовательное намерение, которое меня интересует.
##
## 0 1 2 3 4 5 6 7
## 18 6 44 50 6 31 38 166
0 Рабочий рынок труда: колледж или работа 1 Профессионал с высшим образованием
df1$eawu_min [df1$v21_3 == 3] <- "uni_min"
df1$eawu_min [df1$v21_6 == 6] <- "uni_min"
df1$eawu_min [df1$v21_7 == 7] <- "uni_min"
df1$eawu_min [df1$v21_1 == 1] <- "work_min"
df1$eawu_min [df1$v21_4 == 4] <- "work_min"
df1$eawu_min [df1$v21_2 == 2] <- "work_min"
df1$eawu_min [df1$v21_5 == 5] <- "work_min"
df1$eawu_min0 [df1$v21_3 == 3] <- 1
df1$eawu_min0 [df1$v21_6 == 6] <- 1
df1$eawu_min0 [df1$v21_7 == 7] <- 1
df1$eawu_min0 [df1$v21_1 == 1] <- 0
df1$eawu_min0 [df1$v21_4 == 4] <- 0
df1$eawu_min0 [df1$v21_2 == 2] <- 0
df1$eawu_min0 [df1$v21_5 == 5] <- 0
table(df1$eawu_min)
##
## uni_min work_min
## 243 98
table(df1$eawu_min0)
##
## 0 1
## 98 243
round(prop.table(table(df1$eawu_min0))*100, digits = 2)
##
## 0 1
## 28.74 71.26
df1$eawu_max [df1$ea == 1] <- 'reswork'
df1$eawu_max [df1$ea == 4] <- 'reswork'
df1$eawu_max [df1$ea == 2] <- 'reswork'
df1$eawu_max [df1$ea == 5] <- 'reswork'
df1$eawu_max [df1$ea == 7] <- 'resuni'
df1$eawu_max [df1$ea == 3] <- 'resuni'
df1$eawu_max [df1$ea == 6] <- 'resuni'
df1$eawu_max0 [df1$ea == 1] <- 0
df1$eawu_max0 [df1$ea == 4] <- 0
df1$eawu_max0 [df1$ea == 2] <- 0
df1$eawu_max0 [df1$ea == 5] <- 0
df1$eawu_max0 [df1$ea == 7] <- 1
df1$eawu_max0 [df1$ea == 3] <- 1
df1$eawu_max0 [df1$ea == 6] <- 1
table(df1$eawu_max)
##
## resuni reswork
## 254 87
table(df1$eawu_max0)
##
## 0 1
## 87 254
round(prop.table(table(df1$eawu_max0))*100, digits = 2)
##
## 0 1
## 25.51 74.49
0 - невысшее 1 - высшее (даже если незаконченное)
table(df1$momedu)
##
## 1 2 3 4 5 6
## 63 70 16 151 45 8
df1$momedubi [df1$momedu == 1] <- 0
df1$momedubi [df1$momedu == 2] <- 0
df1$momedubi [df1$momedu == 3] <- 1
df1$momedubi [df1$momedu == 4] <- 1
df1$momedubi <- as.factor(df1$momedubi)
table(df1$momedubi)
##
## 0 1
## 133 167
round(prop.table(table(df1$momedubi))*100, digits = 2)
##
## 0 1
## 44.33 55.67
0 - нет высшего 1 - высшее
table(df1$dadedu)
##
## 1 2 3 4 5 6 7
## 49 68 21 94 12 69 39
df1$dadedubi [df1$dadedu == 1] <- 0
df1$dadedubi [df1$dadedu == 2] <- 0
df1$dadedubi [df1$dadedu == 3] <- 1
df1$dadedubi [df1$dadedu == 4] <- 1
df1$dadedubi <- as.factor(df1$dadedubi)
table(df1$dadedubi)
##
## 0 1
## 117 115
round(prop.table(table(df1$dadedubi))*100, digits = 2)
##
## 0 1
## 50.43 49.57
0 - В школу 1 - не в школу
df1$agesch [df1$v48_4_Month <= 72 ] <- "beforeschol"
df1$agesch [df1$v48_4_Month > 72 ] <- "inschool"
df1$agesch0 [df1$v48_4_Month <= 72 ] <- 0
df1$agesch0 [df1$v48_4_Month > 72 ] <- 1
df1$agesch0 <- as.factor(df1$agesch0)
table(df1$agesch)
##
## beforeschol inschool
## 52 91
table(df1$agesch0)
##
## 0 1
## 52 91
round(prop.table(table(df1$agesch0))*100, digits = 2)
##
## 0 1
## 36.36 63.64
# df1$gpa[is.na(df1$gpa)] <- 0
df1$classnum <- as.factor(df1$classnum)
# gender
table(df1$gender)
##
## 1 2
## 170 182
class(df1$gender)
## [1] "factor"
# 1 - boy, 2 - girl
table(df1$gender1)
##
## boy girl
## 170 182
prop.table(table(df1$gender1))*100
##
## boy girl
## 48.29545 51.70455
# migration and ethnicity
table(df1$migeth0)
##
## 0 1
## 210 149
class(df1$migeth0)
## [1] "factor"
# 0 - НЕмигрант и этническое большинство, 1 - мигрант и\или этническое меньшинство
prop.table(table(df1$migeth0))*100
##
## 0 1
## 58.49582 41.50418
#edu aspirations
xtabs(~ eawu_max0, data = df1)
## eawu_max0
## 0 1
## 87 254
# 0 - на работу, 1 - в университет
table(df1$eawu_max)
##
## resuni reswork
## 254 87
prop.table(table(df1$eawu_max))*100
##
## resuni reswork
## 74.4868 25.5132
#education of parents
table(df1$momedubi)
##
## 0 1
## 133 167
class(df1$momedubi)
## [1] "factor"
table(df1$dadedubi)
##
## 0 1
## 117 115
class(df1$dadedubi)
## [1] "factor"
# 0 - нет высшего, 1 - есть высшее
prop.table(table(df1$momedubi))*100
##
## 0 1
## 44.33333 55.66667
prop.table(table(df1$dadedubi))*100
##
## 0 1
## 50.43103 49.56897
# возраст прибытия
table(df1$agesch0)
##
## 0 1
## 52 91
class(df1$agesch0)
## [1] "factor"
# 0 - до школы, 1 - в школу
table(df1$agesch)
##
## beforeschol inschool
## 52 91
prop.table(table(df1$agesch))*100
##
## beforeschol inschool
## 36.36364 63.63636
#Успеваемость
describe(df1$gpa)
| vars | n | mean | sd | median | trimmed | mad | min | max | range | skew | kurtosis | se |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 1 | 344 | 3.78 | 0.574 | 3.71 | 3.75 | 0.635 | 2.14 | 5 | 2.86 | 0.304 | -0.375 | 0.0309 |
class(df1$gpa)
## [1] "numeric"
# chisq.test(df1$eawu_max0, df1$agesch)
df1$eawu_max0 <- as.factor(df1$eawu_max0)
df1$eawu_min0 <- as.factor(df1$eawu_min0)
data_mig0 <- df1 [c("migeth0")]
data_mig0 <- na.omit(data_mig0)
# sapply(data,function(x) sum(is.na(x)))
data_mig <- df1[c("eawu_max0", "migeth0")]
data_mig <- na.omit(data_mig)
class(data_mig$eawu_max0)
## [1] "factor"
class(data_mig$migeth0)
## [1] "factor"
chisq.test(data_mig$eawu_max0, data_mig$migeth0)
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: data_mig$eawu_max0 and data_mig$migeth0
## X-squared = 1.5741, df = 1, p-value = 0.2096
p-value > 0.05, so there is NO association between v.
ea_mig <- glm(eawu_max0 ~ migeth0, data = data_mig, family = "binomial")
summary(ea_mig)
##
## Call:
## glm(formula = eawu_max0 ~ migeth0, family = "binomial", data = data_mig)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.7486 -1.5893 0.6991 0.8154 0.8154
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.9305 0.1578 5.897 3.71e-09 ***
## migeth01 0.3540 0.2571 1.377 0.168
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 387.31 on 340 degrees of freedom
## Residual deviance: 385.38 on 339 degrees of freedom
## AIC: 389.38
##
## Number of Fisher Scoring iterations: 4
round(exp(coef(ea_mig)), digits = 2)
## (Intercept) migeth01
## 2.54 1.42
Наличие миграционного опыта не влияет на аспирации (А ЭТНИЧНОСТЬ???). По одс ратио выходит так, что русские меньше хотят в ВУЗ. То есть 0,7 к 1 шансов что русский пойдет в ВУЗ. ИЛИ? Нерусские хотят в ВУЗ больше русских, и 1,3 шансов к 1 что если мигрант, то он пойдет в ВУЗ.
hoslem.test(data_mig$eawu_max0, fitted(ea_mig), g = 10)
## Warning in Ops.factor(1, y): '-' not meaningful for factors
##
## Hosmer and Lemeshow goodness of fit (GOF) test
##
## data: data_mig$eawu_max0, fitted(ea_mig)
## X-squared = 341, df = 8, p-value < 2.2e-16
roc_ea_mig1 <- predict(ea_mig, newdata = data_mig, type = "response")
roc_ea_mig2 <- roc(data_mig$eawu_max0 ~ roc_ea_mig1, plot = TRUE, print.auc = T)
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
Хорошо (плохо) - эта модель не лучше, чем ее отсутствие
data_gen <- df1 [c("eawu_max0", "gender")]
data_gen <- na.omit(data_gen)
chisq.test(data_gen$eawu_max0, data_gen$gender)
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: data_gen$eawu_max0 and data_gen$gender
## X-squared = 4.7245, df = 1, p-value = 0.02974
chisq.test(data_gen$eawu_max0, data_gen$gender)$std
## data_gen$gender
## data_gen$eawu_max0 1 2
## 0 2.299248 -2.299248
## 1 -2.299248 2.299248
p-value is less than 0.05 = there is a association between variables
ea_gen <- glm(eawu_max0 ~ gender, data = data_gen, family = "binomial")
summary(ea_gen)
##
## Call:
## glm(formula = eawu_max0 ~ gender, family = "binomial", data = data_gen)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.7909 -1.5293 0.6702 0.8624 0.8624
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.7975 0.1703 4.682 2.84e-06 ***
## gender2 0.5816 0.2545 2.285 0.0223 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 379.49 on 334 degrees of freedom
## Residual deviance: 374.19 on 333 degrees of freedom
## AIC: 378.19
##
## Number of Fisher Scoring iterations: 4
round(exp(coef(ea_gen)), digits = 2)
## (Intercept) gender2
## 2.22 1.79
У девочек выше аспирации
pR2(ea_gen)
## fitting null model for pseudo-r2
## llh llhNull G2 McFadden r2ML
## -187.09366335 -189.74314189 5.29895709 0.01396350 0.01569334
## r2CU
## 0.02315101
hoslem.test(data_gen$eawu_max0, fitted(ea_gen), g = 10)
## Warning in Ops.factor(1, y): '-' not meaningful for factors
##
## Hosmer and Lemeshow goodness of fit (GOF) test
##
## data: data_gen$eawu_max0, fitted(ea_gen)
## X-squared = 335, df = 8, p-value < 2.2e-16
Model has no predictive power ?!
roc_ea_gen1 <- predict(ea_gen, newdata = data_gen, type = "response")
roc_ea_gen2 <- roc(data_gen$eawu_max0 ~ roc_ea_gen1, plot = TRUE, print.auc = T)
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
data_gpa <- df1[c("eawu_max0", "gpa")]
data_gpa <- na.omit(data_gpa)
ea_gpa <- glm(eawu_max0 ~ gpa, data = data_gpa, family = "binomial")
summary(ea_gpa)
##
## Call:
## glm(formula = eawu_max0 ~ gpa, family = "binomial", data = data_gpa)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.5933 -0.9121 0.5339 0.8230 1.6327
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -5.1387 1.0650 -4.825 1.40e-06 ***
## gpa 1.6932 0.2968 5.705 1.16e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 372.63 on 326 degrees of freedom
## Residual deviance: 330.71 on 325 degrees of freedom
## AIC: 334.71
##
## Number of Fisher Scoring iterations: 5
round(exp(coef(ea_gpa)), digits = 2)
## (Intercept) gpa
## 0.01 5.44
Человек пойдет в Вуз если у него выская успеваемость
pR2(ea_gpa)
## fitting null model for pseudo-r2
## llh llhNull G2 McFadden r2ML r2CU
## -165.3535912 -186.3144340 41.9216857 0.1125025 0.1203233 0.1769375
hoslem.test(data_gpa$eawu_max0, fitted(ea_gpa), g = 10)
## Warning in Ops.factor(1, y): '-' not meaningful for factors
##
## Hosmer and Lemeshow goodness of fit (GOF) test
##
## data: data_gpa$eawu_max0, fitted(ea_gpa)
## X-squared = 327, df = 8, p-value < 2.2e-16
roc_ea_gpa1 <- predict(ea_gpa, newdata = data_gpa, type = "response")
roc_ea_gpa2 <- roc(data_gpa$eawu_max0 ~ roc_ea_gpa1, plot = TRUE, print.auc = T)
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
data_age <- df1 [c("eawu_max0", "agesch0")]
data_age <- na.omit(data_age)
здесь тоже может быть мало наблюдений, потому что не все переезжали. Их тут 140 человек
ea_age <- glm(eawu_max0 ~ agesch0, data = data_age, family = "binomial")
summary(ea_age)
##
## Call:
## glm(formula = eawu_max0 ~ agesch0, family = "binomial", data = data_age)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.7378 0.7066 0.7066 0.8106 0.8106
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.9445 0.3150 2.999 0.00271 **
## agesch01 0.3158 0.4084 0.773 0.43934
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 150.72 on 135 degrees of freedom
## Residual deviance: 150.13 on 134 degrees of freedom
## AIC: 154.13
##
## Number of Fisher Scoring iterations: 4
Возраст незначим
pR2(ea_age)
## fitting null model for pseudo-r2
## llh llhNull G2 McFadden r2ML
## -75.062787247 -75.359229144 0.592883794 0.003933717 0.004349951
## r2CU
## 0.006493868
hoslem.test(data_age$eawu_max0, fitted(ea_age), g = 10)
## Warning in Ops.factor(1, y): '-' not meaningful for factors
##
## Hosmer and Lemeshow goodness of fit (GOF) test
##
## data: data_age$eawu_max0, fitted(ea_age)
## X-squared = 136, df = 8, p-value < 2.2e-16
roc_ea_age1 <- predict(ea_age, newdata = data_age, type = "response")
roc_ea_age2 <- roc(data_age$eawu_max0 ~ roc_ea_age1, plot = TRUE, print.auc = T)
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
data_medu <- df1[c("eawu_max0", "momedubi")]
data_medu <- na.omit(data_medu)
ea_medu <- glm(eawu_max0 ~ momedubi, data = data_medu, family = "binomial")
summary(ea_medu)
##
## Call:
## glm(formula = eawu_max0 ~ momedubi, family = "binomial", data = data_medu)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.7149 0.7227 0.7227 0.7409 0.7409
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.15268 0.20943 5.504 3.71e-08 ***
## momedubi1 0.05668 0.28098 0.202 0.84
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 311.39 on 285 degrees of freedom
## Residual deviance: 311.34 on 284 degrees of freedom
## AIC: 315.34
##
## Number of Fisher Scoring iterations: 4
round(exp(coef(ea_medu)), digits = 2)
## (Intercept) momedubi1
## 3.17 1.06
Образование матери незначимо
pR2(ea_medu)
## fitting null model for pseudo-r2
## llh llhNull G2 McFadden r2ML
## -1.556722e+02 -1.556925e+02 4.065339e-02 1.305566e-04 1.421346e-04
## r2CU
## 2.142627e-04
hoslem.test(data_medu$eawu_max0, fitted(ea_medu), g = 10)
## Warning in Ops.factor(1, y): '-' not meaningful for factors
##
## Hosmer and Lemeshow goodness of fit (GOF) test
##
## data: data_medu$eawu_max0, fitted(ea_medu)
## X-squared = 286, df = 8, p-value < 2.2e-16
p-value = 1
roc_ea_medu1 <- predict(ea_medu, newdata = data_medu, type = "response")
roc_ea_medu2 <- roc(data_medu$eawu_max0 ~ roc_ea_medu1, plot = TRUE, print.auc = T)
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
data_dedu <- df1[c("eawu_max0", "dadedubi")]
data_dedu <- na.omit(data_dedu)
ea_dedu <- glm(eawu_max0 ~ dadedubi, data = data_dedu, family = "binomial")
summary(ea_dedu)
##
## Call:
## glm(formula = eawu_max0 ~ dadedubi, family = "binomial", data = data_dedu)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.7890 0.6715 0.6715 0.7625 0.7625
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.0866 0.2185 4.972 6.62e-07 ***
## dadedubi1 0.2882 0.3236 0.891 0.373
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 235.82 on 219 degrees of freedom
## Residual deviance: 235.02 on 218 degrees of freedom
## AIC: 239.02
##
## Number of Fisher Scoring iterations: 4
round(exp(coef(ea_dedu)), digits = 2)
## (Intercept) dadedubi1
## 2.96 1.33
Образование отца незначимо
hoslem.test(data_dedu$eawu_max0, fitted(ea_dedu), g = 10)
## Warning in Ops.factor(1, y): '-' not meaningful for factors
##
## Hosmer and Lemeshow goodness of fit (GOF) test
##
## data: data_dedu$eawu_max0, fitted(ea_dedu)
## X-squared = 220, df = 8, p-value < 2.2e-16
p-value = 1
roc_ea_dedu1 <- predict(ea_dedu, newdata = data_dedu, type = "response")
roc_ea_dedu2 <- roc(data_dedu$eawu_max0 ~ roc_ea_dedu1, plot = TRUE, print.auc = T)
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
data_imom <- df1[c("eawu_max0", "iseimom")]
data_imom <- na.omit(data_imom)
ea_imom <- glm(eawu_max0 ~ iseimom, data = data_imom, family = "binomial")
summary(ea_imom)
##
## Call:
## glm(formula = eawu_max0 ~ iseimom, family = "binomial", data = data_imom)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.8683 0.6195 0.6961 0.7615 0.8653
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.652930 0.419456 1.557 0.120
## iseimom 0.013643 0.009624 1.418 0.156
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 316.11 on 294 degrees of freedom
## Residual deviance: 314.06 on 293 degrees of freedom
## AIC: 318.06
##
## Number of Fisher Scoring iterations: 4
round(exp(coef(ea_imom)), digits = 2)
## (Intercept) iseimom
## 1.92 1.01
ISEI мамы незначимо
hoslem.test(data_imom$eawu_max0, fitted(ea_imom), g = 10)
## Warning in Ops.factor(1, y): '-' not meaningful for factors
##
## Hosmer and Lemeshow goodness of fit (GOF) test
##
## data: data_imom$eawu_max0, fitted(ea_imom)
## X-squared = NaN, df = 8, p-value = NA
p-value = NA
roc_ea_imom1 <- predict(ea_imom, newdata = data_imom, type = "response")
roc_ea_imom2 <- roc(data_imom$eawu_max0 ~ roc_ea_imom1, plot = TRUE, print.auc = T)
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
data_idad <- df1[c("eawu_max0", "iseidad")]
data_idad <- na.omit(data_idad)
ea_idad <- glm(eawu_max0 ~ iseidad, data = data_idad, family = "binomial")
summary(ea_idad)
##
## Call:
## glm(formula = eawu_max0 ~ iseidad, family = "binomial", data = data_idad)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.9674 0.5279 0.6501 0.8252 1.0056
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.18627 0.51177 -0.364 0.71588
## iseidad 0.03024 0.01160 2.606 0.00916 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 277.82 on 249 degrees of freedom
## Residual deviance: 270.65 on 248 degrees of freedom
## AIC: 274.65
##
## Number of Fisher Scoring iterations: 4
round(exp(coef(ea_idad)), digits = 2)
## (Intercept) iseidad
## 0.83 1.03
ISEI Отца значимо Чем выше статус работы отца, тем больше шансов пойти в университет
hoslem.test(data_idad$eawu_max0, fitted(ea_idad), g = 10)
## Warning in Ops.factor(1, y): '-' not meaningful for factors
##
## Hosmer and Lemeshow goodness of fit (GOF) test
##
## data: data_idad$eawu_max0, fitted(ea_idad)
## X-squared = 250, df = 8, p-value < 2.2e-16
НО ЭТОЙ МОДЕЛИ НЕЛЬЗЯ ДОВЕРЯТЬ! p-value = 0.04116
roc_ea_idad1 <- predict(ea_idad, newdata = data_idad, type = "response")
roc_ea_idad2 <- roc(data_idad$eawu_max0 ~ roc_ea_idad1, plot = TRUE, print.auc = T)
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
data_classnum <-df1[c("eawu_max0", "classnum")]
data_classnum <- na.omit(data_classnum)
ea_class <- glm(eawu_max0 ~ classnum, data = data_classnum, family = "binomial")
summary(ea_class)
##
## Call:
## glm(formula = eawu_max0 ~ classnum, family = "binomial", data = data_classnum)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.0074 -0.4437 0.5799 0.9508 0.9508
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.5596 0.1618 3.458 0.000544 ***
## classnum10 1.1381 0.3423 3.324 0.000886 ***
## classnum11 1.3122 0.3498 3.751 0.000176 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 381.81 on 338 degrees of freedom
## Residual deviance: 359.38 on 336 degrees of freedom
## AIC: 365.38
##
## Number of Fisher Scoring iterations: 4
round(exp(coef(ea_class)), digits = 2)
## (Intercept) classnum10 classnum11
## 1.75 3.12 3.71
Чем выше класс, тем больше шанс, что чел пойдет в ВУЗ
hoslem.test(data_classnum$eawu_max0, fitted(ea_class), g = 10)
## Warning in Ops.factor(1, y): '-' not meaningful for factors
##
## Hosmer and Lemeshow goodness of fit (GOF) test
##
## data: data_classnum$eawu_max0, fitted(ea_class)
## X-squared = 339, df = 8, p-value < 2.2e-16
p-value = 1
library(pROC)
roc_ea_class1 <- predict(ea_class, newdata = data_classnum, type = "response")
roc_ea_class2 <- roc(data_classnum$eawu_max0 ~ roc_ea_class1, plot = TRUE, print.auc = T)
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
export_summs(ea_gen, ea_gpa, ea_age, ea_mig, ea_medu, ea_dedu, ea_imom, ea_idad, ea_class, scale = TRUE)
| Model 1 | Model 2 | Model 3 | Model 4 | Model 5 | Model 6 | Model 7 | Model 8 | Model 9 | |
|---|---|---|---|---|---|---|---|---|---|
| (Intercept) | 0.80 *** | 1.27 *** | 0.94 ** | 0.93 *** | 1.15 *** | 1.09 *** | 1.24 *** | 1.17 *** | 0.56 *** |
| (0.17) | (0.15) | (0.31) | (0.16) | (0.21) | (0.22) | (0.14) | (0.15) | (0.16) | |
| gender | 0.58 * | ||||||||
| (0.25) | |||||||||
| gpa | 0.96 *** | ||||||||
| (0.17) | |||||||||
| agesch0 | 0.32 | ||||||||
| (0.41) | |||||||||
| migeth0 | 0.35 | ||||||||
| (0.26) | |||||||||
| momedubi | 0.06 | ||||||||
| (0.28) | |||||||||
| dadedubi | 0.29 | ||||||||
| (0.32) | |||||||||
| iseimom | 0.20 | ||||||||
| (0.14) | |||||||||
| iseidad | 0.41 ** | ||||||||
| (0.16) | |||||||||
| classnum10 | 1.14 *** | ||||||||
| (0.34) | |||||||||
| classnum11 | 1.31 *** | ||||||||
| (0.35) | |||||||||
| N | 335 | 327 | 136 | 341 | 286 | 220 | 295 | 250 | 339 |
| AIC | 378.19 | 334.71 | 154.13 | 389.38 | 315.34 | 239.02 | 318.06 | 274.65 | 365.38 |
| BIC | 385.82 | 342.29 | 159.95 | 397.05 | 322.66 | 245.81 | 325.44 | 281.70 | 376.86 |
| Pseudo R2 | 0.02 | 0.18 | 0.01 | 0.01 | 0.00 | 0.01 | 0.01 | 0.04 | 0.09 |
| All continuous predictors are mean-centered and scaled by 1 standard deviation. *** p < 0.001; ** p < 0.01; * p < 0.05. | |||||||||
# install.packages("gtsummary")
# library(gtsummary)
# tbl_regression(ea_age, exponentiate = TRUE)
data_migcntrl <- df1 [c("eawu_max0","gender", "classnum", "migeth0" )]
data_migcntrl <- na.omit(data_migcntrl)
migcntrl <- glm(eawu_max0 ~ gender + classnum + migeth0, data = data_migcntrl, family = "binomial")
summary(migcntrl)
##
## Call:
## glm(formula = eawu_max0 ~ gender + classnum + migeth0, family = "binomial",
## data = data_migcntrl)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.3393 0.3661 0.5744 0.7527 1.1643
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.0310 0.2423 0.128 0.8982
## gender2 0.6020 0.2699 2.231 0.0257 *
## classnum10 1.0853 0.3477 3.121 0.0018 **
## classnum11 1.4481 0.3673 3.942 8.07e-05 ***
## migeth01 0.5880 0.2785 2.112 0.0347 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 373.96 on 332 degrees of freedom
## Residual deviance: 342.55 on 328 degrees of freedom
## AIC: 352.55
##
## Number of Fisher Scoring iterations: 4
round(exp(coef(migcntrl)), digits = 2)
## (Intercept) gender2 classnum10 classnum11 migeth01
## 1.03 1.83 2.96 4.25 1.80
При добавлении контрольных переменных миграционный опыт начинает проявлять значимость
vif(migcntrl)
## GVIF Df GVIF^(1/(2*Df))
## gender 1.023031 1 1.011450
## classnum 1.016856 2 1.004188
## migeth0 1.020015 1 1.009958
No correlation
pR2(migcntrl)
## fitting null model for pseudo-r2
## llh llhNull G2 McFadden r2ML
## -171.27590496 -186.98244925 31.41308859 0.08400010 0.09002086
## r2CU
## 0.13342287
hoslem.test(data_migcntrl$eawu_max0, fitted(migcntrl), g = 10)
## Warning in Ops.factor(1, y): '-' not meaningful for factors
##
## Hosmer and Lemeshow goodness of fit (GOF) test
##
## data: data_migcntrl$eawu_max0, fitted(migcntrl)
## X-squared = 333, df = 8, p-value < 2.2e-16
roc_migcntrl1 <- predict(migcntrl, newdata = data_migcntrl, type = "response")
roc_migcntrl2 <- roc(data_migcntrl$eawu_max0 ~ roc_migcntrl1, plot = TRUE, print.auc = T)
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
data_migmomedu <- df1 [c("eawu_max0","gender", "classnum", "migeth0", "momedubi")]
data_migmomedu <- na.omit(data_migmomedu)
ea_migmomedu <- glm(eawu_max0 ~ gender + classnum + migeth0 * momedubi, data = data_migmomedu, family = "binomial")
summary(ea_migmomedu)
##
## Call:
## glm(formula = eawu_max0 ~ gender + classnum + migeth0 * momedubi,
## family = "binomial", data = data_migmomedu)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.4973 0.3355 0.5316 0.7326 1.2623
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.1974 0.3708 -0.532 0.594386
## gender2 0.6502 0.3139 2.071 0.038357 *
## classnum10 1.2659 0.4067 3.112 0.001857 **
## classnum11 1.4900 0.4075 3.657 0.000256 ***
## migeth01 1.1303 0.4858 2.327 0.019972 *
## momedubi1 0.4549 0.3842 1.184 0.236411
## migeth01:momedubi1 -0.9924 0.6384 -1.554 0.120065
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 300.50 on 278 degrees of freedom
## Residual deviance: 270.62 on 272 degrees of freedom
## AIC: 284.62
##
## Number of Fisher Scoring iterations: 4
round(exp(coef(ea_migmomedu)), digits = 2)
## (Intercept) gender2 classnum10 classnum11
## 0.82 1.92 3.55 4.44
## migeth01 momedubi1 migeth01:momedubi1
## 3.10 1.58 0.37
Мигранты более вероятно пойдут в универ И! Мигранты с высшим образованием матери менее вероятно пойдут в ВУЗ? 0.3 шанса, что они пойдут в ВУЗ
vif(ea_migmomedu)
## GVIF Df GVIF^(1/(2*Df))
## gender 1.082054 1 1.040218
## classnum 1.018427 2 1.004575
## migeth0 2.376841 1 1.541701
## momedubi 1.593986 1 1.262531
## migeth0:momedubi 2.937527 1 1.713922
pR2(ea_migmomedu)
## fitting null model for pseudo-r2
## llh llhNull G2 McFadden r2ML
## -135.30894627 -150.25239375 29.88689496 0.09945564 0.10158348
## r2CU
## 0.15405190
hoslem.test(data_migmomedu$eawu_max0, fitted(ea_migmomedu), g = 10)
## Warning in Ops.factor(1, y): '-' not meaningful for factors
##
## Hosmer and Lemeshow goodness of fit (GOF) test
##
## data: data_migmomedu$eawu_max0, fitted(ea_migmomedu)
## X-squared = 279, df = 8, p-value < 2.2e-16
p-value < 2.2e-16
roc_ea_migmomedu1 <- predict(ea_migmomedu, newdata = data_migmomedu, type = "response")
roc_ea_migmomedu2 <- roc(data_migmomedu$eawu_max0 ~ roc_ea_migmomedu1, plot = TRUE, print.auc = T)
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
data_migdadedu <- df1 [c("eawu_max0", "gender", "classnum", "migeth0", "dadedubi")]
data_migdadedu <- na.omit(data_migdadedu)
ea_migdadedu <- glm(eawu_max0 ~ gender + classnum + migeth0 * dadedubi, data = df1, family = "binomial")
summary(ea_migdadedu)
##
## Call:
## glm(formula = eawu_max0 ~ gender + classnum + migeth0 * dadedubi,
## family = "binomial", data = df1)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.4765 0.3170 0.4871 0.6829 1.2487
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.1660 0.3932 -0.422 0.672882
## gender2 0.6112 0.3614 1.691 0.090856 .
## classnum10 0.9480 0.4485 2.114 0.034558 *
## classnum11 1.6286 0.4917 3.312 0.000926 ***
## migeth01 0.9449 0.4906 1.926 0.054089 .
## dadedubi1 0.6788 0.4464 1.520 0.128398
## migeth01:dadedubi1 -0.7318 0.7317 -1.000 0.317254
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 225.81 on 214 degrees of freedom
## Residual deviance: 203.83 on 208 degrees of freedom
## (144 observations deleted due to missingness)
## AIC: 217.83
##
## Number of Fisher Scoring iterations: 5
round(exp(coef(ea_migdadedu)), digits = 2)
## (Intercept) gender2 classnum10 classnum11
## 0.85 1.84 2.58 5.10
## migeth01 dadedubi1 migeth01:dadedubi1
## 2.57 1.97 0.48
pR2(ea_migdadedu)
## fitting null model for pseudo-r2
## llh llhNull G2 McFadden r2ML
## -101.91633129 -112.90429023 21.97591787 0.09732100 0.09716329
## r2CU
## 0.14944575
hoslem.test(data_migdadedu$eawu_max0, fitted(ea_migdadedu), g = 10)
## Warning in Ops.factor(1, y): '-' not meaningful for factors
##
## Hosmer and Lemeshow goodness of fit (GOF) test
##
## data: data_migdadedu$eawu_max0, fitted(ea_migdadedu)
## X-squared = 215, df = 8, p-value < 2.2e-16
# plot(ea_migdadedu)
roc_ea_migdadedu <- predict(ea_migdadedu, newdata = data_migdadedu, type = "response")
roc_ea_migdadedu <- roc(data_migdadedu$eawu_max0 ~ roc_ea_migdadedu, plot = TRUE, print.auc = T)
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
Sensitivity - True Positive Rate Specificity - False Positive Rate
data_migimom <- df1 [c("eawu_max0","gender", "classnum", "migeth0", "iseimom")]
data_migimom <- na.omit(data_migimom)
ea_migimom <- glm(eawu_max0 ~ gender + classnum + migeth0 * iseimom, data = data_migimom, family = "binomial")
summary(ea_migimom)
##
## Call:
## glm(formula = eawu_max0 ~ gender + classnum + migeth0 * iseimom,
## family = "binomial", data = data_migimom)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.5011 0.3261 0.5661 0.7008 1.3485
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.74279 0.63411 -1.171 0.241446
## gender2 0.66814 0.30489 2.191 0.028421 *
## classnum10 0.82094 0.37308 2.200 0.027775 *
## classnum11 1.41063 0.41028 3.438 0.000586 ***
## migeth01 1.28369 0.92973 1.381 0.167367
## iseimom 0.02182 0.01319 1.655 0.097993 .
## migeth01:iseimom -0.01480 0.02172 -0.682 0.495421
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 302.59 on 287 degrees of freedom
## Residual deviance: 277.31 on 281 degrees of freedom
## AIC: 291.31
##
## Number of Fisher Scoring iterations: 4
round(exp(coef(ea_migimom)), digits = 2)
## (Intercept) gender2 classnum10 classnum11
## 0.48 1.95 2.27 4.10
## migeth01 iseimom migeth01:iseimom
## 3.61 1.02 0.99
hoslem.test(data_migimom$eawu_max0, fitted(ea_migimom), g = 10)
## Warning in Ops.factor(1, y): '-' not meaningful for factors
##
## Hosmer and Lemeshow goodness of fit (GOF) test
##
## data: data_migimom$eawu_max0, fitted(ea_migimom)
## X-squared = 288, df = 8, p-value < 2.2e-16
roc_ea_migimom <- predict(ea_migimom, newdata = data_migimom, type = "response")
roc_ea_migimom <- roc(data_migimom$eawu_max0 ~ roc_ea_migimom, plot = TRUE, print.auc = T)
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
data_migidad <- df1 [c("eawu_max0","gender", "classnum", "migeth0", "iseidad")]
data_migidad <- na.omit(data_migidad)
ea_migidad <- glm(eawu_max0 ~ gender + classnum + migeth0 * iseidad, data = data_migidad, family = "binomial")
summary(ea_migidad)
##
## Call:
## glm(formula = eawu_max0 ~ gender + classnum + migeth0 * iseidad,
## family = "binomial", data = data_migidad)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.3953 0.2453 0.4770 0.7673 1.6016
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.82707 0.80855 -2.260 0.023842 *
## gender2 0.76593 0.33899 2.259 0.023857 *
## classnum10 1.24859 0.43472 2.872 0.004076 **
## classnum11 1.72775 0.49057 3.522 0.000428 ***
## migeth01 1.25112 1.11177 1.125 0.260442
## iseidad 0.04346 0.01779 2.443 0.014580 *
## migeth01:iseidad -0.01867 0.02533 -0.737 0.461112
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 268.71 on 245 degrees of freedom
## Residual deviance: 233.22 on 239 degrees of freedom
## AIC: 247.22
##
## Number of Fisher Scoring iterations: 5
round(exp(coef(ea_migidad)), digits = 2)
## (Intercept) gender2 classnum10 classnum11
## 0.16 2.15 3.49 5.63
## migeth01 iseidad migeth01:iseidad
## 3.49 1.04 0.98
pR2(ea_migidad)
## fitting null model for pseudo-r2
## llh llhNull G2 McFadden r2ML r2CU
## -116.6090884 -134.3547742 35.4913716 0.1320808 0.1343494 0.2021626
hoslem.test(data_migidad$eawu_max0, fitted(ea_migidad), g = 10)
## Warning in Ops.factor(1, y): '-' not meaningful for factors
##
## Hosmer and Lemeshow goodness of fit (GOF) test
##
## data: data_migidad$eawu_max0, fitted(ea_migidad)
## X-squared = 246, df = 8, p-value < 2.2e-16
roc_ea_migidad <- predict(ea_migidad, newdata = data_migidad, type = "response")
roc_ea_migidad <- roc(data_migidad$eawu_max0 ~ roc_ea_migidad, plot = TRUE, print.auc = T)
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
export_summs(migcntrl, ea_migmomedu, ea_migdadedu, ea_migimom, ea_migidad, ea_class, scale = TRUE)
| Model 1 | Model 2 | Model 3 | Model 4 | Model 5 | Model 6 | |
|---|---|---|---|---|---|---|
| (Intercept) | 0.03 | -0.20 | -0.17 | 0.19 | 0.12 | 0.56 *** |
| (0.24) | (0.37) | (0.39) | (0.28) | (0.30) | (0.16) | |
| gender | 0.60 * | 0.65 * | 0.61 | 0.67 * | 0.77 * | |
| (0.27) | (0.31) | (0.36) | (0.30) | (0.34) | ||
| classnum10 | 1.09 ** | 1.27 ** | 0.95 * | 0.82 * | 1.25 ** | 1.14 *** |
| (0.35) | (0.41) | (0.45) | (0.37) | (0.43) | (0.34) | |
| classnum11 | 1.45 *** | 1.49 *** | 1.63 *** | 1.41 *** | 1.73 *** | 1.31 *** |
| (0.37) | (0.41) | (0.49) | (0.41) | (0.49) | (0.35) | |
| migeth0 | 0.59 * | 1.13 * | 0.94 | 0.65 * | 0.41 | |
| (0.28) | (0.49) | (0.49) | (0.32) | (0.34) | ||
| momedubi | 0.45 | |||||
| (0.38) | ||||||
| migeth0:momedubi | -0.99 | |||||
| (0.64) | ||||||
| dadedubi | 0.68 | |||||
| (0.45) | ||||||
| migeth0:dadedubi | -0.73 | |||||
| (0.73) | ||||||
| iseimom | 0.32 | |||||
| (0.19) | ||||||
| migeth0:iseimom | -0.22 | |||||
| (0.32) | ||||||
| iseidad | 0.59 * | |||||
| (0.24) | ||||||
| migeth0:iseidad | -0.25 | |||||
| (0.34) | ||||||
| N | 333 | 279 | 215 | 288 | 246 | 339 |
| AIC | 352.55 | 284.62 | 217.83 | 291.31 | 247.22 | 365.38 |
| BIC | 371.59 | 310.04 | 241.43 | 316.95 | 271.76 | 376.86 |
| Pseudo R2 | 0.13 | 0.15 | 0.15 | 0.13 | 0.20 | 0.09 |
| All continuous predictors are mean-centered and scaled by 1 standard deviation. *** p < 0.001; ** p < 0.01; * p < 0.05. | ||||||