# if (!requireNamespace("BiocManager", quietly = TRUE))
  # install.packages("BiocManager")
 # BiocManager::install("limma")
library(limma)
library(MKmisc)
## 
## Attaching package: 'MKmisc'
## The following objects are masked from 'package:psych':
## 
##     AUC, corPlot
library(ResourceSelection)
df1 <- dd[c("gpa", "age", "classnum", "v17_2", "v17_a", "v17_b", "v17_2etc", "v18_1", "v18_6", "v21_1", "v21_2", "v21_3", "v21_4", "v21_5", "v21_6", "v21_7", "v21_8etcvar", "momedu", "v29",  "ISCOmom", "ISCOmom_2", "dadedu", "v32", "ISCOdad", "ISCOdad_2", "v35_1", "v36", "v37_1", "v38", "v39_1", "v40", "v41", "v42", "codnat_samooopred", "codnat_samooopredbinom", "codenat_parents", "codenat_parentsbinom", "v43_1", "codlanhome", "v47", "v48", "v48_4_Years", "v48_4_Month", "v49_2", "v50_10", "gender", "v54", "iseimom", "iseidad", "codenat_language")]
# df1 <- na.omit(df1)
head(df1)
gpaageclassnumv17_2v17_av17_bv17_2etcv18_1v18_6v21_1v21_2v21_3v21_4v21_5v21_6v21_7v21_8etcvarmomeduv29ISCOmomISCOmom_2dadeduv32ISCOdadISCOdad_2v35_1v36v37_1v38v39_1v40v41v42codnat_samooopredcodnat_samooopredbinomcodenat_parentscodenat_parentsbinomv43_1codlanhomev47v48v48_4_Yearsv48_4_Monthv49_2v50_10genderv54iseimomiseidadcodenat_language
3.431677312315.23e+03      215.41e+03АрменияармянскийАрменияармянскийАрмения13армянскийармений516605166015.17e+0314131566311631405.17e+03
3.291677персональным фитнес тренером325512.34e+03      412.14e+03БелоруссиябелорусскийРоссиярусскийРоссия матушкарусскийрусск11101111102       126411647651.11e+03
3.4315933247422.6e+03       237.1e+03 РоссиярусскийРоссиярусскийРоссияпару месяцеврусскийрусский111011110122.12e+03125111566341.11e+03
4.1415933316214.3e+03       514.3e+03 РоссиярусскийРоссиярусскийРоссия12русскийрусский11101111011       34121446211543431.11e+03
3.7114977тренер (спорт)3123411.44e+032.3e+03611e+03       Грузия (Абхазия)грузинскийРоссиягрузинскийРоссиягрузинскийГрузия517705177025.18e+03126111459625.18e+03
3.7116922513115.12e+03      62       КыргызстанузбекскийУзбекистанузбекскийКыргызстан14узбекскийузбекский621206212016.21e+03341315651216276.21e+03
df_ea <- dd[c("v21_1", "v21_2", "v21_3", "v21_4", "v21_5", "v21_6", "v21_7" )]
df_oa <- dd[c("v17_a", "v17_b", "v17_c", "v17_d")]
df_edupar <- dd[c("momedu","dadedu")]
df_iseipar <- dd [c("iseimom", "iseidad")]
df_age <- dd [c("age", "classnum", "v48_4_Month")]

# df1[is.na(df1)] <- 0
#Если надо будет заменить все на нолики

ПЕРЕМЕННЫЕ

sapply(df1,function(x) sum(is.na(x)))
##                    gpa                    age               classnum 
##                     15                     11                      2 
##                  v17_2                  v17_a                  v17_b 
##                      0                     23                    312 
##               v17_2etc                  v18_1                  v18_6 
##                      0                      1                      2 
##                  v21_1                  v21_2                  v21_3 
##                    351                    308                    306 
##                  v21_4                  v21_5                  v21_6 
##                    350                    321                    312 
##                  v21_7            v21_8etcvar                 momedu 
##                    193                      0                      6 
##                    v29                ISCOmom              ISCOmom_2 
##                     11                     52                    328 
##                 dadedu                    v32                ISCOdad 
##                      7                     43                     91 
##              ISCOdad_2                  v35_1                    v36 
##                    333                      0                      0 
##                  v37_1                    v38                  v39_1 
##                      0                      0                      0 
##                    v40                    v41                    v42 
##                      0                      0                      0 
##      codnat_samooopred codnat_samooopredbinom        codenat_parents 
##                     55                     55                     10 
##   codenat_parentsbinom                  v43_1             codlanhome 
##                     10                      9                    234 
##                    v47                    v48            v48_4_Years 
##                     15                     14                      0 
##            v48_4_Month                  v49_2                 v50_10 
##                    216                      2                      6 
##                 gender                    v54                iseimom 
##                      7                      0                     52 
##                iseidad       codenat_language 
##                     95                      6

Gender

1 - мальчик, 2 - девочка

table(df1$gender) # 170 182
## 
##   1   2 
## 170 182
df1$gender <- as.factor(df1$gender)

class(df1$gender)
## [1] "factor"
df1$gender1 [df1$gender == 1] <- "boy"
df1$gender1 [df1$gender == 2] <- "girl"
df1$gender1 <- as.factor(df1$gender1)
table(df1$gender1)
## 
##  boy girl 
##  170  182
round(prop.table(table(df1$gender1))*100, digits = 2)
## 
##  boy girl 
## 48.3 51.7

Migration

в кодировке 0 - это мигрант, 1 - это не мигрант. здесь я перекодирую и делаю так, чтобы 0 стал не мигрантом , а 1 - мигрантом.

df1$mig0 [df1$codenat_parentsbinom == 0] <- 1
df1$mig0 [df1$codenat_parentsbinom == 1] <- 0
# df1$mig0 <- as.factor(df1$mig0)
table(df1$mig0)
## 
##   0   1 
## 206 143
class(df1$mig0)
## [1] "numeric"
df1$mig [df1$mig0 == 1] <- 'Мигрант'
df1$mig [df1$mig0 == 0] <- 'Не мигрант'
df1$mig <- as.factor(df1$mig)
table(df1$mig)
## 
##    Мигрант Не мигрант 
##        143        206
round(prop.table(table(df1$mig))*100, digits = 2)
## 
##    Мигрант Не мигрант 
##      40.97      59.03

Два родителя не в россии - мигрант Хотя бы один в россии - не мигрант

здесь меня интересуют НЕ мигранты

Ethnicity

Здесь я обращаю внимание на то, на каком языке говорят дома. 0 - это когда на русском.1 - это когда не на русском

table(df1$codenat_language)
## 
## 1110 1111 1112 1113 2116 3210 4013 4210 4215 5010 5166 5177 5212 5255 6212 6213 
##  231    2    6    1    2    2    1    1    1    1   23    9    1   23   14   16 
## 6214 6216 7015 9015 
##   11    1    6    1
df1$eth0 [df1$codenat_language == 1110] <- 0
df1$eth0 [df1$codenat_language > 1110] <- 1
# df1$eth0 <- as.factor(df1$eth0)
table(df1$eth0)
## 
##   0   1 
## 231 122
class(df1$eth0)
## [1] "numeric"
df1$eth [df1$codenat_language == 1110] <- "Russian"
df1$eth [df1$codenat_language > 1110] <- "Not Russian"
df1$eth <- as.factor(df1$eth)
table(df1$eth)
## 
## Not Russian     Russian 
##         122         231
round(prop.table(table(df1$eth))*100, digits = 2)
## 
## Not Russian     Russian 
##       34.56       65.44

MIG and ETH

table(df1$mig, df1$eth)
##             
##              Not Russian Russian
##   Мигрант            116      25
##   Не мигрант           5     199
table(df1$mig0, df1$eth0)
##    
##       0   1
##   0 199   5
##   1  25 116
df1$mig0 = as.numeric(df1$mig0)
class(df1$mig0)
## [1] "numeric"
df1$eth0 = as.numeric(df1$eth0)
class(df1$eth0)
## [1] "numeric"
df1$mig0 [is.na(df1$mig0 )] <- 0
df1$eth0 [is.na(df1$eth0 )] <- 0

df1$migeth <- df1$mig0 + df1$eth0

table(df1$migeth)
## 
##   0   1   2 
## 210  33 116
df1$migeth0 [df1$migeth > 0] = 1
df1$migeth0 [df1$migeth == 0] = 0

table(df1$migeth0)
## 
##   0   1 
## 210 149
df1$migeth0 = as.factor(df1$migeth0)
round(prop.table(table(df1$migeth0))*100, digits = 2)
## 
##    0    1 
## 58.5 41.5

Здесь я достаю всех тех, кто либо говорит дома не на русском, либо чьи родители родились не в России.

1 =это мигрант иговорит дома на нерусском 0 - это не мигранты, которые говорят дома на русском

Educaitonal aspirations

df_ea[is.na(df_ea)] <- 0
df_eamax <- as.matrix(df_ea)

df1$ea <- rowMaxs(df_eamax)

table(df1$ea) # тут я сравнила значения в строке и попросила выбрать максимальное. Это - то образовательное намерение, которое меня интересует. 
## 
##   0   1   2   3   4   5   6   7 
##  18   6  44  50   6  31  38 166

0 Рабочий рынок труда: колледж или работа 1 Профессионал с высшим образованием

df1$eawu_min [df1$v21_3 == 3] <- "uni_min"
df1$eawu_min [df1$v21_6 == 6] <- "uni_min"
df1$eawu_min [df1$v21_7 == 7] <- "uni_min"
df1$eawu_min [df1$v21_1 == 1] <- "work_min"
df1$eawu_min [df1$v21_4 == 4] <- "work_min"
df1$eawu_min [df1$v21_2 == 2] <- "work_min"
df1$eawu_min [df1$v21_5 == 5] <- "work_min"

df1$eawu_min0 [df1$v21_3 == 3] <- 1
df1$eawu_min0 [df1$v21_6 == 6] <- 1
df1$eawu_min0 [df1$v21_7 == 7] <- 1
df1$eawu_min0 [df1$v21_1 == 1] <- 0
df1$eawu_min0 [df1$v21_4 == 4] <- 0
df1$eawu_min0 [df1$v21_2 == 2] <- 0
df1$eawu_min0 [df1$v21_5 == 5] <- 0

table(df1$eawu_min)
## 
##  uni_min work_min 
##      243       98
table(df1$eawu_min0)
## 
##   0   1 
##  98 243
round(prop.table(table(df1$eawu_min0))*100, digits = 2)
## 
##     0     1 
## 28.74 71.26
df1$eawu_max [df1$ea == 1] <- 'reswork'
df1$eawu_max [df1$ea == 4] <- 'reswork'
df1$eawu_max [df1$ea == 2] <- 'reswork'
df1$eawu_max [df1$ea == 5] <- 'reswork'
df1$eawu_max [df1$ea == 7] <- 'resuni'
df1$eawu_max [df1$ea == 3] <- 'resuni'
df1$eawu_max [df1$ea == 6] <- 'resuni'

df1$eawu_max0 [df1$ea == 1] <- 0
df1$eawu_max0 [df1$ea == 4] <- 0
df1$eawu_max0 [df1$ea == 2] <- 0
df1$eawu_max0 [df1$ea == 5] <- 0
df1$eawu_max0 [df1$ea == 7] <- 1
df1$eawu_max0 [df1$ea == 3] <- 1
df1$eawu_max0 [df1$ea == 6] <- 1

table(df1$eawu_max)
## 
##  resuni reswork 
##     254      87
table(df1$eawu_max0)
## 
##   0   1 
##  87 254
round(prop.table(table(df1$eawu_max0))*100, digits = 2)
## 
##     0     1 
## 25.51 74.49

Mom’s education

0 - невысшее 1 - высшее (даже если незаконченное)

table(df1$momedu)
## 
##   1   2   3   4   5   6 
##  63  70  16 151  45   8
df1$momedubi [df1$momedu == 1] <- 0
df1$momedubi [df1$momedu == 2] <- 0
df1$momedubi [df1$momedu == 3] <- 1
df1$momedubi [df1$momedu == 4] <- 1
df1$momedubi <- as.factor(df1$momedubi)

table(df1$momedubi)
## 
##   0   1 
## 133 167
round(prop.table(table(df1$momedubi))*100, digits = 2)
## 
##     0     1 
## 44.33 55.67

Dad’s education

0 - нет высшего 1 - высшее

table(df1$dadedu)
## 
##  1  2  3  4  5  6  7 
## 49 68 21 94 12 69 39
df1$dadedubi [df1$dadedu == 1] <- 0
df1$dadedubi [df1$dadedu == 2] <- 0
df1$dadedubi [df1$dadedu == 3] <- 1
df1$dadedubi [df1$dadedu == 4] <- 1
df1$dadedubi <- as.factor(df1$dadedubi)



table(df1$dadedubi)
## 
##   0   1 
## 117 115
round(prop.table(table(df1$dadedubi))*100, digits = 2)
## 
##     0     1 
## 50.43 49.57

Age of arrival

0 - В школу 1 - не в школу

df1$agesch [df1$v48_4_Month <= 72 ] <- "beforeschol"
df1$agesch [df1$v48_4_Month > 72 ] <- "inschool"

df1$agesch0 [df1$v48_4_Month <= 72 ] <- 0
df1$agesch0 [df1$v48_4_Month > 72 ] <- 1
df1$agesch0 <- as.factor(df1$agesch0)

table(df1$agesch)
## 
## beforeschol    inschool 
##          52          91
table(df1$agesch0)
## 
##  0  1 
## 52 91
round(prop.table(table(df1$agesch0))*100, digits = 2)
## 
##     0     1 
## 36.36 63.64

Educational attainment

# df1$gpa[is.na(df1$gpa)] <- 0

Class

df1$classnum <- as.factor(df1$classnum)

Еще раз все мои бинарные переменные:

# gender
table(df1$gender)
## 
##   1   2 
## 170 182
class(df1$gender)
## [1] "factor"
# 1 - boy, 2 - girl
table(df1$gender1)
## 
##  boy girl 
##  170  182
prop.table(table(df1$gender1))*100
## 
##      boy     girl 
## 48.29545 51.70455
# migration and ethnicity
table(df1$migeth0)
## 
##   0   1 
## 210 149
class(df1$migeth0)
## [1] "factor"
# 0 - НЕмигрант и этническое большинство, 1 - мигрант и\или этническое меньшинство
prop.table(table(df1$migeth0))*100
## 
##        0        1 
## 58.49582 41.50418
#edu aspirations
xtabs(~ eawu_max0, data = df1)
## eawu_max0
##   0   1 
##  87 254
# 0 - на работу, 1 - в университет
table(df1$eawu_max)
## 
##  resuni reswork 
##     254      87
prop.table(table(df1$eawu_max))*100
## 
##  resuni reswork 
## 74.4868 25.5132
#education of parents
table(df1$momedubi)
## 
##   0   1 
## 133 167
class(df1$momedubi)
## [1] "factor"
table(df1$dadedubi)
## 
##   0   1 
## 117 115
class(df1$dadedubi)
## [1] "factor"
# 0 - нет высшего, 1 - есть высшее 
prop.table(table(df1$momedubi))*100
## 
##        0        1 
## 44.33333 55.66667
prop.table(table(df1$dadedubi))*100
## 
##        0        1 
## 50.43103 49.56897
# возраст прибытия
table(df1$agesch0)
## 
##  0  1 
## 52 91
class(df1$agesch0)
## [1] "factor"
# 0 - до школы, 1 - в школу
table(df1$agesch)
## 
## beforeschol    inschool 
##          52          91
prop.table(table(df1$agesch))*100
## 
## beforeschol    inschool 
##    36.36364    63.63636
#Успеваемость
describe(df1$gpa)
varsnmeansdmediantrimmedmadminmaxrangeskewkurtosisse
13443.780.5743.713.750.6352.1452.860.304-0.3750.0309
class(df1$gpa)
## [1] "numeric"

Тесты?

# chisq.test(df1$eawu_max0, df1$agesch)

BINARY LOGISTIC REGRESSION. MAX EA

df1$eawu_max0 <- as.factor(df1$eawu_max0)
df1$eawu_min0 <- as.factor(df1$eawu_min0)

data_mig0 <- df1 [c("migeth0")]
data_mig0 <- na.omit(data_mig0)

# sapply(data,function(x) sum(is.na(x)))

Models

EA and migration+ethnicity

data_mig <- df1[c("eawu_max0", "migeth0")]
data_mig <- na.omit(data_mig)
class(data_mig$eawu_max0)
## [1] "factor"
class(data_mig$migeth0)
## [1] "factor"
chisq.test(data_mig$eawu_max0, data_mig$migeth0)
## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  data_mig$eawu_max0 and data_mig$migeth0
## X-squared = 1.5741, df = 1, p-value = 0.2096

p-value > 0.05, so there is NO association between v.

ea_mig <- glm(eawu_max0 ~ migeth0, data = data_mig, family = "binomial")
summary(ea_mig)
## 
## Call:
## glm(formula = eawu_max0 ~ migeth0, family = "binomial", data = data_mig)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.7486  -1.5893   0.6991   0.8154   0.8154  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   0.9305     0.1578   5.897 3.71e-09 ***
## migeth01      0.3540     0.2571   1.377    0.168    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 387.31  on 340  degrees of freedom
## Residual deviance: 385.38  on 339  degrees of freedom
## AIC: 389.38
## 
## Number of Fisher Scoring iterations: 4
round(exp(coef(ea_mig)), digits = 2)
## (Intercept)    migeth01 
##        2.54        1.42

Наличие миграционного опыта не влияет на аспирации (А ЭТНИЧНОСТЬ???). По одс ратио выходит так, что русские меньше хотят в ВУЗ. То есть 0,7 к 1 шансов что русский пойдет в ВУЗ. ИЛИ? Нерусские хотят в ВУЗ больше русских, и 1,3 шансов к 1 что если мигрант, то он пойдет в ВУЗ.

hoslem.test(data_mig$eawu_max0, fitted(ea_mig), g = 10)
## Warning in Ops.factor(1, y): '-' not meaningful for factors
## 
##  Hosmer and Lemeshow goodness of fit (GOF) test
## 
## data:  data_mig$eawu_max0, fitted(ea_mig)
## X-squared = 341, df = 8, p-value < 2.2e-16
roc_ea_mig1 <- predict(ea_mig, newdata = data_mig, type = "response")
roc_ea_mig2 <- roc(data_mig$eawu_max0 ~ roc_ea_mig1, plot = TRUE, print.auc = T)
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases

Хорошо (плохо) - эта модель не лучше, чем ее отсутствие

EA and gender

data_gen <- df1 [c("eawu_max0", "gender")]
data_gen <- na.omit(data_gen)
chisq.test(data_gen$eawu_max0, data_gen$gender)
## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  data_gen$eawu_max0 and data_gen$gender
## X-squared = 4.7245, df = 1, p-value = 0.02974
chisq.test(data_gen$eawu_max0, data_gen$gender)$std
##                   data_gen$gender
## data_gen$eawu_max0         1         2
##                  0  2.299248 -2.299248
##                  1 -2.299248  2.299248

p-value is less than 0.05 = there is a association between variables

ea_gen <- glm(eawu_max0 ~ gender, data = data_gen, family = "binomial")
summary(ea_gen)
## 
## Call:
## glm(formula = eawu_max0 ~ gender, family = "binomial", data = data_gen)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.7909  -1.5293   0.6702   0.8624   0.8624  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   0.7975     0.1703   4.682 2.84e-06 ***
## gender2       0.5816     0.2545   2.285   0.0223 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 379.49  on 334  degrees of freedom
## Residual deviance: 374.19  on 333  degrees of freedom
## AIC: 378.19
## 
## Number of Fisher Scoring iterations: 4
round(exp(coef(ea_gen)), digits = 2)
## (Intercept)     gender2 
##        2.22        1.79

У девочек выше аспирации

pR2(ea_gen)
## fitting null model for pseudo-r2
##           llh       llhNull            G2      McFadden          r2ML 
## -187.09366335 -189.74314189    5.29895709    0.01396350    0.01569334 
##          r2CU 
##    0.02315101
hoslem.test(data_gen$eawu_max0, fitted(ea_gen), g = 10)
## Warning in Ops.factor(1, y): '-' not meaningful for factors
## 
##  Hosmer and Lemeshow goodness of fit (GOF) test
## 
## data:  data_gen$eawu_max0, fitted(ea_gen)
## X-squared = 335, df = 8, p-value < 2.2e-16

Model has no predictive power ?!

roc_ea_gen1 <- predict(ea_gen, newdata = data_gen, type = "response")
roc_ea_gen2 <- roc(data_gen$eawu_max0 ~ roc_ea_gen1, plot = TRUE, print.auc = T)
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases

EA and GPA

data_gpa <- df1[c("eawu_max0", "gpa")]
data_gpa <- na.omit(data_gpa)
ea_gpa <- glm(eawu_max0 ~ gpa, data = data_gpa, family = "binomial")
summary(ea_gpa)
## 
## Call:
## glm(formula = eawu_max0 ~ gpa, family = "binomial", data = data_gpa)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.5933  -0.9121   0.5339   0.8230   1.6327  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  -5.1387     1.0650  -4.825 1.40e-06 ***
## gpa           1.6932     0.2968   5.705 1.16e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 372.63  on 326  degrees of freedom
## Residual deviance: 330.71  on 325  degrees of freedom
## AIC: 334.71
## 
## Number of Fisher Scoring iterations: 5
round(exp(coef(ea_gpa)), digits = 2)
## (Intercept)         gpa 
##        0.01        5.44

Человек пойдет в Вуз если у него выская успеваемость

pR2(ea_gpa)
## fitting null model for pseudo-r2
##          llh      llhNull           G2     McFadden         r2ML         r2CU 
## -165.3535912 -186.3144340   41.9216857    0.1125025    0.1203233    0.1769375
hoslem.test(data_gpa$eawu_max0, fitted(ea_gpa), g = 10)
## Warning in Ops.factor(1, y): '-' not meaningful for factors
## 
##  Hosmer and Lemeshow goodness of fit (GOF) test
## 
## data:  data_gpa$eawu_max0, fitted(ea_gpa)
## X-squared = 327, df = 8, p-value < 2.2e-16
roc_ea_gpa1 <- predict(ea_gpa, newdata = data_gpa, type = "response")
roc_ea_gpa2 <- roc(data_gpa$eawu_max0 ~ roc_ea_gpa1, plot = TRUE, print.auc = T)
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases

EA and age

data_age <- df1 [c("eawu_max0", "agesch0")]
data_age <- na.omit(data_age)

здесь тоже может быть мало наблюдений, потому что не все переезжали. Их тут 140 человек

ea_age <- glm(eawu_max0 ~ agesch0, data = data_age, family = "binomial")
summary(ea_age)
## 
## Call:
## glm(formula = eawu_max0 ~ agesch0, family = "binomial", data = data_age)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.7378   0.7066   0.7066   0.8106   0.8106  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)   
## (Intercept)   0.9445     0.3150   2.999  0.00271 **
## agesch01      0.3158     0.4084   0.773  0.43934   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 150.72  on 135  degrees of freedom
## Residual deviance: 150.13  on 134  degrees of freedom
## AIC: 154.13
## 
## Number of Fisher Scoring iterations: 4

Возраст незначим

pR2(ea_age)
## fitting null model for pseudo-r2
##           llh       llhNull            G2      McFadden          r2ML 
## -75.062787247 -75.359229144   0.592883794   0.003933717   0.004349951 
##          r2CU 
##   0.006493868
hoslem.test(data_age$eawu_max0, fitted(ea_age), g = 10)
## Warning in Ops.factor(1, y): '-' not meaningful for factors
## 
##  Hosmer and Lemeshow goodness of fit (GOF) test
## 
## data:  data_age$eawu_max0, fitted(ea_age)
## X-squared = 136, df = 8, p-value < 2.2e-16
roc_ea_age1 <- predict(ea_age, newdata = data_age, type = "response")
roc_ea_age2 <- roc(data_age$eawu_max0 ~ roc_ea_age1, plot = TRUE, print.auc = T)
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases

EA + momedubi

data_medu <- df1[c("eawu_max0", "momedubi")]
data_medu <- na.omit(data_medu)
ea_medu <- glm(eawu_max0 ~ momedubi, data = data_medu, family = "binomial")
summary(ea_medu)
## 
## Call:
## glm(formula = eawu_max0 ~ momedubi, family = "binomial", data = data_medu)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.7149   0.7227   0.7227   0.7409   0.7409  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  1.15268    0.20943   5.504 3.71e-08 ***
## momedubi1    0.05668    0.28098   0.202     0.84    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 311.39  on 285  degrees of freedom
## Residual deviance: 311.34  on 284  degrees of freedom
## AIC: 315.34
## 
## Number of Fisher Scoring iterations: 4
round(exp(coef(ea_medu)), digits = 2)
## (Intercept)   momedubi1 
##        3.17        1.06

Образование матери незначимо

pR2(ea_medu)
## fitting null model for pseudo-r2
##           llh       llhNull            G2      McFadden          r2ML 
## -1.556722e+02 -1.556925e+02  4.065339e-02  1.305566e-04  1.421346e-04 
##          r2CU 
##  2.142627e-04
hoslem.test(data_medu$eawu_max0, fitted(ea_medu), g = 10)
## Warning in Ops.factor(1, y): '-' not meaningful for factors
## 
##  Hosmer and Lemeshow goodness of fit (GOF) test
## 
## data:  data_medu$eawu_max0, fitted(ea_medu)
## X-squared = 286, df = 8, p-value < 2.2e-16

p-value = 1

roc_ea_medu1 <- predict(ea_medu, newdata = data_medu, type = "response")
roc_ea_medu2 <- roc(data_medu$eawu_max0 ~ roc_ea_medu1, plot = TRUE, print.auc = T)
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases

EA + dadedubi

data_dedu <- df1[c("eawu_max0", "dadedubi")]
data_dedu <- na.omit(data_dedu)
ea_dedu <- glm(eawu_max0 ~ dadedubi, data = data_dedu, family = "binomial")
summary(ea_dedu)
## 
## Call:
## glm(formula = eawu_max0 ~ dadedubi, family = "binomial", data = data_dedu)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.7890   0.6715   0.6715   0.7625   0.7625  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   1.0866     0.2185   4.972 6.62e-07 ***
## dadedubi1     0.2882     0.3236   0.891    0.373    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 235.82  on 219  degrees of freedom
## Residual deviance: 235.02  on 218  degrees of freedom
## AIC: 239.02
## 
## Number of Fisher Scoring iterations: 4
round(exp(coef(ea_dedu)), digits = 2)
## (Intercept)   dadedubi1 
##        2.96        1.33

Образование отца незначимо

hoslem.test(data_dedu$eawu_max0, fitted(ea_dedu), g = 10)
## Warning in Ops.factor(1, y): '-' not meaningful for factors
## 
##  Hosmer and Lemeshow goodness of fit (GOF) test
## 
## data:  data_dedu$eawu_max0, fitted(ea_dedu)
## X-squared = 220, df = 8, p-value < 2.2e-16

p-value = 1

roc_ea_dedu1 <- predict(ea_dedu, newdata = data_dedu, type = "response")
roc_ea_dedu2 <- roc(data_dedu$eawu_max0 ~ roc_ea_dedu1, plot = TRUE, print.auc = T)
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases

EA + imom

data_imom <- df1[c("eawu_max0", "iseimom")]
data_imom <- na.omit(data_imom)
ea_imom <- glm(eawu_max0 ~ iseimom, data = data_imom, family = "binomial")
summary(ea_imom)
## 
## Call:
## glm(formula = eawu_max0 ~ iseimom, family = "binomial", data = data_imom)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.8683   0.6195   0.6961   0.7615   0.8653  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.652930   0.419456   1.557    0.120
## iseimom     0.013643   0.009624   1.418    0.156
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 316.11  on 294  degrees of freedom
## Residual deviance: 314.06  on 293  degrees of freedom
## AIC: 318.06
## 
## Number of Fisher Scoring iterations: 4
round(exp(coef(ea_imom)), digits = 2)
## (Intercept)     iseimom 
##        1.92        1.01

ISEI мамы незначимо

hoslem.test(data_imom$eawu_max0, fitted(ea_imom), g = 10)
## Warning in Ops.factor(1, y): '-' not meaningful for factors
## 
##  Hosmer and Lemeshow goodness of fit (GOF) test
## 
## data:  data_imom$eawu_max0, fitted(ea_imom)
## X-squared = NaN, df = 8, p-value = NA

p-value = NA

roc_ea_imom1 <- predict(ea_imom, newdata = data_imom, type = "response")
roc_ea_imom2 <- roc(data_imom$eawu_max0 ~ roc_ea_imom1, plot = TRUE, print.auc = T)
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases

EA + idad

data_idad <- df1[c("eawu_max0", "iseidad")]
data_idad <- na.omit(data_idad)
ea_idad <- glm(eawu_max0 ~ iseidad, data = data_idad, family = "binomial")
summary(ea_idad)
## 
## Call:
## glm(formula = eawu_max0 ~ iseidad, family = "binomial", data = data_idad)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.9674   0.5279   0.6501   0.8252   1.0056  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)   
## (Intercept) -0.18627    0.51177  -0.364  0.71588   
## iseidad      0.03024    0.01160   2.606  0.00916 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 277.82  on 249  degrees of freedom
## Residual deviance: 270.65  on 248  degrees of freedom
## AIC: 274.65
## 
## Number of Fisher Scoring iterations: 4
round(exp(coef(ea_idad)), digits = 2)
## (Intercept)     iseidad 
##        0.83        1.03

ISEI Отца значимо Чем выше статус работы отца, тем больше шансов пойти в университет

hoslem.test(data_idad$eawu_max0, fitted(ea_idad), g = 10)
## Warning in Ops.factor(1, y): '-' not meaningful for factors
## 
##  Hosmer and Lemeshow goodness of fit (GOF) test
## 
## data:  data_idad$eawu_max0, fitted(ea_idad)
## X-squared = 250, df = 8, p-value < 2.2e-16

НО ЭТОЙ МОДЕЛИ НЕЛЬЗЯ ДОВЕРЯТЬ! p-value = 0.04116

roc_ea_idad1 <- predict(ea_idad, newdata = data_idad, type = "response")
roc_ea_idad2 <- roc(data_idad$eawu_max0 ~ roc_ea_idad1, plot = TRUE, print.auc = T)
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases

EA and class

data_classnum <-df1[c("eawu_max0", "classnum")]
data_classnum <- na.omit(data_classnum)
ea_class <- glm(eawu_max0 ~ classnum, data = data_classnum, family = "binomial")
summary(ea_class)
## 
## Call:
## glm(formula = eawu_max0 ~ classnum, family = "binomial", data = data_classnum)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.0074  -0.4437   0.5799   0.9508   0.9508  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   0.5596     0.1618   3.458 0.000544 ***
## classnum10    1.1381     0.3423   3.324 0.000886 ***
## classnum11    1.3122     0.3498   3.751 0.000176 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 381.81  on 338  degrees of freedom
## Residual deviance: 359.38  on 336  degrees of freedom
## AIC: 365.38
## 
## Number of Fisher Scoring iterations: 4
round(exp(coef(ea_class)), digits = 2)
## (Intercept)  classnum10  classnum11 
##        1.75        3.12        3.71

Чем выше класс, тем больше шанс, что чел пойдет в ВУЗ

hoslem.test(data_classnum$eawu_max0, fitted(ea_class), g = 10)
## Warning in Ops.factor(1, y): '-' not meaningful for factors
## 
##  Hosmer and Lemeshow goodness of fit (GOF) test
## 
## data:  data_classnum$eawu_max0, fitted(ea_class)
## X-squared = 339, df = 8, p-value < 2.2e-16

p-value = 1

library(pROC)
roc_ea_class1 <- predict(ea_class, newdata = data_classnum, type = "response")
roc_ea_class2 <- roc(data_classnum$eawu_max0 ~ roc_ea_class1, plot = TRUE, print.auc = T)
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases

BLREGRESSION: MAX EA summary without interacitons

export_summs(ea_gen, ea_gpa, ea_age, ea_mig, ea_medu, ea_dedu, ea_imom, ea_idad, ea_class, scale = TRUE)
Model 1Model 2Model 3Model 4Model 5Model 6Model 7Model 8Model 9
(Intercept)0.80 ***1.27 ***0.94 **0.93 ***1.15 ***1.09 ***1.24 ***1.17 ***0.56 ***
(0.17)   (0.15)   (0.31)  (0.16)   (0.21)   (0.22)   (0.14)   (0.15)   (0.16)   
gender0.58 *                                                         
(0.25)                                                          
gpa       0.96 ***                                                
       (0.17)                                                   
agesch0              0.32                                             
              (0.41)                                            
migeth0                    0.35                                       
                    (0.26)                                      
momedubi                           0.06                                
                           (0.28)                               
dadedubi                                  0.29                         
                                  (0.32)                        
iseimom                                         0.20                  
                                         (0.14)                 
iseidad                                                0.41 **        
                                                (0.16)          
classnum10                                                       1.14 ***
                                                       (0.34)   
classnum11                                                       1.31 ***
                                                       (0.35)   
N335       327       136      341       286       220       295       250       339       
AIC378.19    334.71    154.13   389.38    315.34    239.02    318.06    274.65    365.38    
BIC385.82    342.29    159.95   397.05    322.66    245.81    325.44    281.70    376.86    
Pseudo R20.02    0.18    0.01   0.01    0.00    0.01    0.01    0.04    0.09    
All continuous predictors are mean-centered and scaled by 1 standard deviation. *** p < 0.001; ** p < 0.01; * p < 0.05.
# install.packages("gtsummary")
# library(gtsummary)
# tbl_regression(ea_age, exponentiate = TRUE)

BINARY LOGISTIC REGRESSION: interactions

migcntrl

data_migcntrl <- df1 [c("eawu_max0","gender", "classnum", "migeth0" )]
data_migcntrl <- na.omit(data_migcntrl)
migcntrl <- glm(eawu_max0 ~ gender + classnum + migeth0, data = data_migcntrl, family = "binomial")
summary(migcntrl)
## 
## Call:
## glm(formula = eawu_max0 ~ gender + classnum + migeth0, family = "binomial", 
##     data = data_migcntrl)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.3393   0.3661   0.5744   0.7527   1.1643  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   0.0310     0.2423   0.128   0.8982    
## gender2       0.6020     0.2699   2.231   0.0257 *  
## classnum10    1.0853     0.3477   3.121   0.0018 ** 
## classnum11    1.4481     0.3673   3.942 8.07e-05 ***
## migeth01      0.5880     0.2785   2.112   0.0347 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 373.96  on 332  degrees of freedom
## Residual deviance: 342.55  on 328  degrees of freedom
## AIC: 352.55
## 
## Number of Fisher Scoring iterations: 4
round(exp(coef(migcntrl)), digits = 2)
## (Intercept)     gender2  classnum10  classnum11    migeth01 
##        1.03        1.83        2.96        4.25        1.80

При добавлении контрольных переменных миграционный опыт начинает проявлять значимость

vif(migcntrl)
##              GVIF Df GVIF^(1/(2*Df))
## gender   1.023031  1        1.011450
## classnum 1.016856  2        1.004188
## migeth0  1.020015  1        1.009958

No correlation

pR2(migcntrl)
## fitting null model for pseudo-r2
##           llh       llhNull            G2      McFadden          r2ML 
## -171.27590496 -186.98244925   31.41308859    0.08400010    0.09002086 
##          r2CU 
##    0.13342287
hoslem.test(data_migcntrl$eawu_max0, fitted(migcntrl), g = 10)
## Warning in Ops.factor(1, y): '-' not meaningful for factors
## 
##  Hosmer and Lemeshow goodness of fit (GOF) test
## 
## data:  data_migcntrl$eawu_max0, fitted(migcntrl)
## X-squared = 333, df = 8, p-value < 2.2e-16
roc_migcntrl1 <- predict(migcntrl, newdata = data_migcntrl, type = "response")
roc_migcntrl2 <- roc(data_migcntrl$eawu_max0 ~ roc_migcntrl1, plot = TRUE, print.auc = T)
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases

ea_migmomedu

data_migmomedu <- df1 [c("eawu_max0","gender", "classnum", "migeth0", "momedubi")]
data_migmomedu <- na.omit(data_migmomedu)
ea_migmomedu <- glm(eawu_max0 ~ gender + classnum + migeth0 * momedubi, data = data_migmomedu, family = "binomial")
summary(ea_migmomedu)
## 
## Call:
## glm(formula = eawu_max0 ~ gender + classnum + migeth0 * momedubi, 
##     family = "binomial", data = data_migmomedu)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.4973   0.3355   0.5316   0.7326   1.2623  
## 
## Coefficients:
##                    Estimate Std. Error z value Pr(>|z|)    
## (Intercept)         -0.1974     0.3708  -0.532 0.594386    
## gender2              0.6502     0.3139   2.071 0.038357 *  
## classnum10           1.2659     0.4067   3.112 0.001857 ** 
## classnum11           1.4900     0.4075   3.657 0.000256 ***
## migeth01             1.1303     0.4858   2.327 0.019972 *  
## momedubi1            0.4549     0.3842   1.184 0.236411    
## migeth01:momedubi1  -0.9924     0.6384  -1.554 0.120065    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 300.50  on 278  degrees of freedom
## Residual deviance: 270.62  on 272  degrees of freedom
## AIC: 284.62
## 
## Number of Fisher Scoring iterations: 4
round(exp(coef(ea_migmomedu)), digits = 2)
##        (Intercept)            gender2         classnum10         classnum11 
##               0.82               1.92               3.55               4.44 
##           migeth01          momedubi1 migeth01:momedubi1 
##               3.10               1.58               0.37

Мигранты более вероятно пойдут в универ И! Мигранты с высшим образованием матери менее вероятно пойдут в ВУЗ? 0.3 шанса, что они пойдут в ВУЗ

vif(ea_migmomedu)
##                      GVIF Df GVIF^(1/(2*Df))
## gender           1.082054  1        1.040218
## classnum         1.018427  2        1.004575
## migeth0          2.376841  1        1.541701
## momedubi         1.593986  1        1.262531
## migeth0:momedubi 2.937527  1        1.713922
pR2(ea_migmomedu)
## fitting null model for pseudo-r2
##           llh       llhNull            G2      McFadden          r2ML 
## -135.30894627 -150.25239375   29.88689496    0.09945564    0.10158348 
##          r2CU 
##    0.15405190
hoslem.test(data_migmomedu$eawu_max0, fitted(ea_migmomedu), g = 10)
## Warning in Ops.factor(1, y): '-' not meaningful for factors
## 
##  Hosmer and Lemeshow goodness of fit (GOF) test
## 
## data:  data_migmomedu$eawu_max0, fitted(ea_migmomedu)
## X-squared = 279, df = 8, p-value < 2.2e-16

p-value < 2.2e-16

roc_ea_migmomedu1 <- predict(ea_migmomedu, newdata = data_migmomedu, type = "response")
roc_ea_migmomedu2 <- roc(data_migmomedu$eawu_max0 ~ roc_ea_migmomedu1, plot = TRUE, print.auc = T)
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases

ea_migdadedu

data_migdadedu <- df1 [c("eawu_max0", "gender", "classnum", "migeth0", "dadedubi")]
data_migdadedu <- na.omit(data_migdadedu)
ea_migdadedu <- glm(eawu_max0 ~ gender + classnum + migeth0 * dadedubi, data = df1, family = "binomial")
summary(ea_migdadedu)
## 
## Call:
## glm(formula = eawu_max0 ~ gender + classnum + migeth0 * dadedubi, 
##     family = "binomial", data = df1)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.4765   0.3170   0.4871   0.6829   1.2487  
## 
## Coefficients:
##                    Estimate Std. Error z value Pr(>|z|)    
## (Intercept)         -0.1660     0.3932  -0.422 0.672882    
## gender2              0.6112     0.3614   1.691 0.090856 .  
## classnum10           0.9480     0.4485   2.114 0.034558 *  
## classnum11           1.6286     0.4917   3.312 0.000926 ***
## migeth01             0.9449     0.4906   1.926 0.054089 .  
## dadedubi1            0.6788     0.4464   1.520 0.128398    
## migeth01:dadedubi1  -0.7318     0.7317  -1.000 0.317254    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 225.81  on 214  degrees of freedom
## Residual deviance: 203.83  on 208  degrees of freedom
##   (144 observations deleted due to missingness)
## AIC: 217.83
## 
## Number of Fisher Scoring iterations: 5
round(exp(coef(ea_migdadedu)), digits = 2)
##        (Intercept)            gender2         classnum10         classnum11 
##               0.85               1.84               2.58               5.10 
##           migeth01          dadedubi1 migeth01:dadedubi1 
##               2.57               1.97               0.48
pR2(ea_migdadedu)
## fitting null model for pseudo-r2
##           llh       llhNull            G2      McFadden          r2ML 
## -101.91633129 -112.90429023   21.97591787    0.09732100    0.09716329 
##          r2CU 
##    0.14944575
hoslem.test(data_migdadedu$eawu_max0, fitted(ea_migdadedu), g = 10)
## Warning in Ops.factor(1, y): '-' not meaningful for factors
## 
##  Hosmer and Lemeshow goodness of fit (GOF) test
## 
## data:  data_migdadedu$eawu_max0, fitted(ea_migdadedu)
## X-squared = 215, df = 8, p-value < 2.2e-16
# plot(ea_migdadedu)
roc_ea_migdadedu <- predict(ea_migdadedu, newdata = data_migdadedu, type = "response")
roc_ea_migdadedu <- roc(data_migdadedu$eawu_max0 ~ roc_ea_migdadedu, plot = TRUE, print.auc = T)
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases

Sensitivity - True Positive Rate Specificity - False Positive Rate

ea_migimom

data_migimom <- df1 [c("eawu_max0","gender", "classnum", "migeth0", "iseimom")]
data_migimom <- na.omit(data_migimom)
ea_migimom <- glm(eawu_max0 ~ gender + classnum + migeth0 * iseimom, data = data_migimom, family = "binomial")
summary(ea_migimom)
## 
## Call:
## glm(formula = eawu_max0 ~ gender + classnum + migeth0 * iseimom, 
##     family = "binomial", data = data_migimom)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.5011   0.3261   0.5661   0.7008   1.3485  
## 
## Coefficients:
##                  Estimate Std. Error z value Pr(>|z|)    
## (Intercept)      -0.74279    0.63411  -1.171 0.241446    
## gender2           0.66814    0.30489   2.191 0.028421 *  
## classnum10        0.82094    0.37308   2.200 0.027775 *  
## classnum11        1.41063    0.41028   3.438 0.000586 ***
## migeth01          1.28369    0.92973   1.381 0.167367    
## iseimom           0.02182    0.01319   1.655 0.097993 .  
## migeth01:iseimom -0.01480    0.02172  -0.682 0.495421    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 302.59  on 287  degrees of freedom
## Residual deviance: 277.31  on 281  degrees of freedom
## AIC: 291.31
## 
## Number of Fisher Scoring iterations: 4
round(exp(coef(ea_migimom)), digits = 2)
##      (Intercept)          gender2       classnum10       classnum11 
##             0.48             1.95             2.27             4.10 
##         migeth01          iseimom migeth01:iseimom 
##             3.61             1.02             0.99
hoslem.test(data_migimom$eawu_max0, fitted(ea_migimom), g = 10)
## Warning in Ops.factor(1, y): '-' not meaningful for factors
## 
##  Hosmer and Lemeshow goodness of fit (GOF) test
## 
## data:  data_migimom$eawu_max0, fitted(ea_migimom)
## X-squared = 288, df = 8, p-value < 2.2e-16
roc_ea_migimom <- predict(ea_migimom, newdata = data_migimom, type = "response")
roc_ea_migimom <- roc(data_migimom$eawu_max0 ~ roc_ea_migimom, plot = TRUE, print.auc = T)
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases

ea_migidad

data_migidad <- df1 [c("eawu_max0","gender", "classnum", "migeth0", "iseidad")]
data_migidad <- na.omit(data_migidad)
ea_migidad <- glm(eawu_max0 ~ gender + classnum + migeth0 * iseidad, data = data_migidad, family = "binomial")
summary(ea_migidad)
## 
## Call:
## glm(formula = eawu_max0 ~ gender + classnum + migeth0 * iseidad, 
##     family = "binomial", data = data_migidad)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.3953   0.2453   0.4770   0.7673   1.6016  
## 
## Coefficients:
##                  Estimate Std. Error z value Pr(>|z|)    
## (Intercept)      -1.82707    0.80855  -2.260 0.023842 *  
## gender2           0.76593    0.33899   2.259 0.023857 *  
## classnum10        1.24859    0.43472   2.872 0.004076 ** 
## classnum11        1.72775    0.49057   3.522 0.000428 ***
## migeth01          1.25112    1.11177   1.125 0.260442    
## iseidad           0.04346    0.01779   2.443 0.014580 *  
## migeth01:iseidad -0.01867    0.02533  -0.737 0.461112    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 268.71  on 245  degrees of freedom
## Residual deviance: 233.22  on 239  degrees of freedom
## AIC: 247.22
## 
## Number of Fisher Scoring iterations: 5
round(exp(coef(ea_migidad)), digits = 2)
##      (Intercept)          gender2       classnum10       classnum11 
##             0.16             2.15             3.49             5.63 
##         migeth01          iseidad migeth01:iseidad 
##             3.49             1.04             0.98
pR2(ea_migidad)
## fitting null model for pseudo-r2
##          llh      llhNull           G2     McFadden         r2ML         r2CU 
## -116.6090884 -134.3547742   35.4913716    0.1320808    0.1343494    0.2021626
hoslem.test(data_migidad$eawu_max0, fitted(ea_migidad), g = 10)
## Warning in Ops.factor(1, y): '-' not meaningful for factors
## 
##  Hosmer and Lemeshow goodness of fit (GOF) test
## 
## data:  data_migidad$eawu_max0, fitted(ea_migidad)
## X-squared = 246, df = 8, p-value < 2.2e-16
roc_ea_migidad <- predict(ea_migidad, newdata = data_migidad, type = "response")
roc_ea_migidad <- roc(data_migidad$eawu_max0 ~ roc_ea_migidad, plot = TRUE, print.auc = T)
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases

All models with interaction

export_summs(migcntrl, ea_migmomedu, ea_migdadedu, ea_migimom, ea_migidad, ea_class, scale = TRUE)
Model 1Model 2Model 3Model 4Model 5Model 6
(Intercept)0.03    -0.20    -0.17    0.19    0.12    0.56 ***
(0.24)   (0.37)   (0.39)   (0.28)   (0.30)   (0.16)   
gender0.60 *  0.65 *  0.61    0.67 *  0.77 *         
(0.27)   (0.31)   (0.36)   (0.30)   (0.34)          
classnum101.09 ** 1.27 ** 0.95 *  0.82 *  1.25 ** 1.14 ***
(0.35)   (0.41)   (0.45)   (0.37)   (0.43)   (0.34)   
classnum111.45 ***1.49 ***1.63 ***1.41 ***1.73 ***1.31 ***
(0.37)   (0.41)   (0.49)   (0.41)   (0.49)   (0.35)   
migeth00.59 *  1.13 *  0.94    0.65 *  0.41           
(0.28)   (0.49)   (0.49)   (0.32)   (0.34)          
momedubi       0.45                                
       (0.38)                               
migeth0:momedubi       -0.99                                
       (0.64)                               
dadedubi              0.68                         
              (0.45)                        
migeth0:dadedubi              -0.73                         
              (0.73)                        
iseimom                     0.32                  
                     (0.19)                 
migeth0:iseimom                     -0.22                  
                     (0.32)                 
iseidad                            0.59 *         
                            (0.24)          
migeth0:iseidad                            -0.25           
                            (0.34)          
N333       279       215       288       246       339       
AIC352.55    284.62    217.83    291.31    247.22    365.38    
BIC371.59    310.04    241.43    316.95    271.76    376.86    
Pseudo R20.13    0.15    0.15    0.13    0.20    0.09    
All continuous predictors are mean-centered and scaled by 1 standard deviation. *** p < 0.001; ** p < 0.01; * p < 0.05.