df1 <- dd[c("gpa", "age", "classnum", "v17_2", "v17_a", "v17_b", "v17_2etc", "v21_1", "v21_2", "v21_3", "v21_4", "v21_5", "v21_6", "v21_7", "v21_8etcvar", "momedu", "dadedu", "codenat_parentsbinom", "v48_4_Month", "gender", "v54", "iseimom", "iseidad", "codenat_language")]
head(df1)

gpa	age	classnum	v17_2	v17_a	v17_2etc	v21_2	v21_3	v21_5	v21_6	v21_7	momedu	dadedu	codenat_parentsbinom	v48_4_Month	gender	v54	iseimom	iseidad	codenat_language
3.43	16		7	7		2					3	2	0	156	1	16	31	40	5.17e+03
3.29	16		7	7	персональным фитнес тренером			5			5	4	0		1	16	47	65	1.11e+03
3.43	15	9	3	3						7	4	2	1		1	15	66	34	1.11e+03
4.14	15	9	3	3					6		2	5	1	144	1	15	43	43	1.11e+03
3.71	14	9	7	7	тренер (спорт)	2	3				4	6	0		1	14	59	62	5.18e+03
3.71	16	9	2	2			3				1	6	0	156	2	16	27		6.21e+03

df_ea <- dd[c("v21_1", "v21_2", "v21_3", "v21_4", "v21_5", "v21_6", "v21_7" )]
df_oa <- dd[c("v17_a", "v17_b", "v17_c", "v17_d")]
df_edupar <- dd[c("momedu","dadedu")]
df_iseipar <- dd [c("iseimom", "iseidad")]
df_age <- dd [c("age", "classnum", "v48_4_Month")]

Variabels

# sapply(df1,function(x) sum(is.na(x)))

Gender

1 - мальчик, 2 - девочка

table(df1$gender) # 170 182

## 
##   1   2 
## 170 182

df1$gender <- as.factor(df1$gender)

Migration

Два родителя не в россии - мигрант Хотя бы один в россии - не мигрант

в кодировке 0 - это мигрант, 1 - это не мигрант. здесь я перекодирую и делаю так, чтобы 0 стал не мигрантом , а 1 - мигрантом.

df1$mig0 [df1$codenat_parentsbinom == 0] <- 1
df1$mig0 [df1$codenat_parentsbinom == 1] <- 0
df1$mig0 <- as.factor(df1$mig0)
table(df1$mig0)

## 
##   0   1 
## 206 143

Educaitonal aspirations

df_ea[is.na(df_ea)] <- 0
df_eamax <- as.matrix(df_ea)

df1$ea <- rowMaxs(df_eamax)

table(df1$ea) # тут я сравнила значения в строке и попросила выбрать максимальное. Это - то образовательное намерение, которое меня интересует.

## 
##   0   1   2   3   4   5   6   7 
##  18   6  44  50   6  31  38 166

0 Рабочий рынок труда: колледж или работа 1 Профессионал с высшим образованием

df1$eawu_min0 [df1$v21_3 == 3] <- 1
df1$eawu_min0 [df1$v21_6 == 6] <- 1
df1$eawu_min0 [df1$v21_7 == 7] <- 1
df1$eawu_min0 [df1$v21_1 == 1] <- 0
df1$eawu_min0 [df1$v21_4 == 4] <- 0
df1$eawu_min0 [df1$v21_2 == 2] <- 0
df1$eawu_min0 [df1$v21_5 == 5] <- 0

df1$eawu_max0 [df1$ea == 1] <- 0
df1$eawu_max0 [df1$ea == 4] <- 0
df1$eawu_max0 [df1$ea == 2] <- 0
df1$eawu_max0 [df1$ea == 5] <- 0
df1$eawu_max0 [df1$ea == 7] <- 1
df1$eawu_max0 [df1$ea == 3] <- 1
df1$eawu_max0 [df1$ea == 6] <- 1

Mom’s education

0 - невысшее 1 - высшее (даже если незаконченное)

df1$momedubi [df1$momedu == 1] <- 0
df1$momedubi [df1$momedu == 2] <- 0
df1$momedubi [df1$momedu == 3] <- 1
df1$momedubi [df1$momedu == 4] <- 1
df1$momedubi <- as.factor(df1$momedubi)

Dad’s education

0 - нет высшего 1 - высшее

df1$dadedubi [df1$dadedu == 1] <- 0
df1$dadedubi [df1$dadedu == 2] <- 0
df1$dadedubi [df1$dadedu == 3] <- 1
df1$dadedubi [df1$dadedu == 4] <- 1
df1$dadedubi <- as.factor(df1$dadedubi)

Age of arrival

0 - В школу 1 - не в школу

df1$agesch0 [df1$v48_4_Month <= 72 ] <- 0
df1$agesch0 [df1$v48_4_Month > 72 ] <- 1
df1$agesch0 <- as.factor(df1$agesch0)

SAMPLE

df1$mig0 <- 
  factor(df1$mig0, levels = c(0, 1),
         labels = c("Native born",
                    "Migrant"))
label(df1$mig0)<- "Migration status"

df1$gender <- 
  factor(df1$gender, levels = c(1, 2),
         labels = c("Boy",
                    "Girl"))
label(df1$gender)<- "Gender"

df1$classnum <- 
  factor(df1$classnum, levels = c(9,10,11),
         labels = c("9 class",
                    "10 class",
                    "11 class"))
label(df1$classnum)<- "Class"

df1$momedubi <-
  factor(df1$momedubi, levels = c(0, 1),
         labels = c("Lower than uni",
                    "Higher education"))
label(df1$momedubi) <- "Education of mother"


df1$dadedubi <-
  factor(df1$dadedubi, levels = c(0, 1),
         labels = c("Lower than uni",
                    "Higher education"))
label(df1$dadedubi) <- "Education of father"

label(df1$iseimom) <- "ISEI of mother"
label(df1$iseidad) <- "ISEI of father"

df1$eawu_max0 <- 
  factor(df1$eawu_max0, levels = c(0,1),
         labels = c("Low aspirations - College or Work",
                    "High aspirations - University"))
label(df1$eawu_max0)<- "Educational Aspirations"

table1( ~ gender + classnum  + momedubi + dadedubi +  iseimom + iseidad + gpa + eawu_max0| mig0, data = df1, overall = "Total")

	Native born (N=206)	Migrant (N=143)	Total (N=359)
Gender
Boy	90 (43.7%)	75 (52.4%)	170 (47.4%)
Girl	112 (54.4%)	66 (46.2%)	182 (50.7%)
Missing	4 (1.9%)	2 (1.4%)	7 (1.9%)
Class
9 class	97 (47.1%)	71 (49.7%)	174 (48.5%)
10 class	55 (26.7%)	34 (23.8%)	91 (25.3%)
11 class	54 (26.2%)	36 (25.2%)	92 (25.6%)
Missing	0 (0%)	2 (1.4%)	2 (0.6%)
Education of mother
Lower than uni	74 (35.9%)	57 (39.9%)	133 (37.0%)
Higher education	104 (50.5%)	59 (41.3%)	167 (46.5%)
Missing	28 (13.6%)	27 (18.9%)	59 (16.4%)
Education of father
Lower than uni	59 (28.6%)	56 (39.2%)	117 (32.6%)
Higher education	74 (35.9%)	39 (27.3%)	115 (32.0%)
Missing	73 (35.4%)	48 (33.6%)	127 (35.4%)
ISEI of mother
Mean (SD)	44.2 (14.5)	40.6 (14.9)	42.7 (14.8)
Median [Min, Max]	44.5 [16.0, 66.0]	41.0 [10.0, 66.0]	41.0 [10.0, 66.0]
Missing	22 (10.7%)	26 (18.2%)	52 (14.5%)
ISEI of father
Mean (SD)	44.7 (12.9)	44.6 (13.8)	44.8 (13.3)
Median [Min, Max]	41.0 [20.0, 69.0]	40.0 [20.0, 69.0]	41.0 [20.0, 69.0]
Missing	59 (28.6%)	31 (21.7%)	95 (26.5%)
GPA
Mean (SD)	3.82 (0.559)	3.73 (0.587)	3.78 (0.574)
Median [Min, Max]	3.71 [2.57, 5.00]	3.71 [2.43, 5.00]	3.71 [2.14, 5.00]
Missing	5 (2.4%)	9 (6.3%)	15 (4.2%)
Educational Aspirations
Low aspirations - College or Work	53 (25.7%)	30 (21.0%)	87 (24.2%)
High aspirations - University	141 (68.4%)	108 (75.5%)	254 (70.8%)
Missing	12 (5.8%)	5 (3.5%)	18 (5.0%)

table1( ~ gender + classnum  + momedubi + dadedubi +  iseimom + iseidad + gpa | eawu_max0*mig0, data = df1, overall = "Total")

	Low aspirations - College or Work		High aspirations - University		Total
	Native born (N=53)	Migrant (N=30)	Native born (N=141)	Migrant (N=108)	Native born (N=206)	Migrant (N=143)
Gender
Boy	30 (56.6%)	18 (60.0%)	53 (37.6%)	55 (50.9%)	90 (43.7%)	75 (52.4%)
Girl	22 (41.5%)	11 (36.7%)	85 (60.3%)	52 (48.1%)	112 (54.4%)	66 (46.2%)
Missing	1 (1.9%)	1 (3.3%)	3 (2.1%)	1 (0.9%)	4 (1.9%)	2 (1.4%)
Class
9 class	38 (71.7%)	19 (63.3%)	51 (36.2%)	51 (47.2%)	97 (47.1%)	71 (49.7%)
10 class	9 (17.0%)	4 (13.3%)	43 (30.5%)	27 (25.0%)	55 (26.7%)	34 (23.8%)
11 class	6 (11.3%)	5 (16.7%)	47 (33.3%)	30 (27.8%)	54 (26.2%)	36 (25.2%)
Missing	0 (0%)	2 (6.7%)	0 (0%)	0 (0%)	0 (0%)	2 (1.4%)
Education of mother
Lower than uni	22 (41.5%)	8 (26.7%)	47 (33.3%)	46 (42.6%)	74 (35.9%)	57 (39.9%)
Higher education	22 (41.5%)	14 (46.7%)	76 (53.9%)	45 (41.7%)	104 (50.5%)	59 (41.3%)
Missing	9 (17.0%)	8 (26.7%)	18 (12.8%)	17 (15.7%)	28 (13.6%)	27 (18.9%)
Education of father
Lower than uni	18 (34.0%)	10 (33.3%)	37 (26.2%)	44 (40.7%)	59 (28.6%)	56 (39.2%)
Higher education	14 (26.4%)	8 (26.7%)	55 (39.0%)	30 (27.8%)	74 (35.9%)	39 (27.3%)
Missing	21 (39.6%)	12 (40.0%)	49 (34.8%)	34 (31.5%)	73 (35.4%)	48 (33.6%)
ISEI of mother
Mean (SD)	40.7 (14.0)	40.0 (14.3)	45.2 (14.6)	40.7 (15.2)	44.2 (14.5)	40.6 (14.9)
Median [Min, Max]	32.0 [16.0, 66.0]	36.5 [20.0, 66.0]	46.0 [16.0, 66.0]	41.0 [10.0, 66.0]	44.5 [16.0, 66.0]	41.0 [10.0, 66.0]
Missing	8 (15.1%)	8 (26.7%)	12 (8.5%)	14 (13.0%)	22 (10.7%)	26 (18.2%)
ISEI of father
Mean (SD)	39.7 (11.3)	42.8 (13.2)	46.7 (13.2)	45.2 (14.3)	44.7 (12.9)	44.6 (13.8)
Median [Min, Max]	38.0 [20.0, 65.0]	38.0 [20.0, 65.0]	45.0 [20.0, 69.0]	41.0 [20.0, 69.0]	41.0 [20.0, 69.0]	40.0 [20.0, 69.0]
Missing	17 (32.1%)	5 (16.7%)	39 (27.7%)	25 (23.1%)	59 (28.6%)	31 (21.7%)
GPA
Mean (SD)	3.42 (0.469)	3.50 (0.517)	3.97 (0.521)	3.81 (0.592)	3.82 (0.559)	3.73 (0.587)
Median [Min, Max]	3.29 [2.57, 5.00]	3.43 [2.57, 4.71]	3.86 [2.86, 5.00]	3.71 [2.43, 5.00]	3.71 [2.57, 5.00]	3.71 [2.43, 5.00]
Missing	1 (1.9%)	1 (3.3%)	4 (2.8%)	7 (6.5%)	5 (2.4%)	9 (6.3%)

table(df1$momedubi, df1$mig0)

##                   
##                    Native born Migrant
##   Lower than uni            74      57
##   Higher education         104      59

dfmm <- as.data.frame(table(df1$momedubi, df1$mig0))
dfmm

Var1	Var2	Freq
Lower than uni	Native born	74
Higher education	Native born	104
Lower than uni	Migrant	57
Higher education	Migrant	59

ggbarplot(dfmm, x = "Var2", y = "Freq", color = "Var1",
          position = position_dodge(),
          palette = c("brown", "green"))

table(df1$dadedubi, df1$mig0)

##                   
##                    Native born Migrant
##   Lower than uni            59      56
##   Higher education          74      39

dfdm <- as.data.frame(table(df1$dadedubi, df1$mig0))
dfdm

Var1	Var2	Freq
Lower than uni	Native born	59
Higher education	Native born	74
Lower than uni	Migrant	56
Higher education	Migrant	39

ggbarplot(dfdm, x = "Var2", y = "Freq", color = "Var1",
          position = position_dodge(),
          palette = c("brown", "green"))

BINARY LOGISTIC REGRESSION. MAX EA

df1$eawu_max0 <- as.factor(df1$eawu_max0)
df1$eawu_min0 <- as.factor(df1$eawu_min0)

# sapply(data,function(x) sum(is.na(x)))

Models

EA and migration

data_mig <- df1[c("eawu_max0", "mig0")]
data_mig <- na.omit(data_mig)

chisq.test(data_mig$eawu_max0, data_mig$mig0)

## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  data_mig$eawu_max0 and data_mig$mig0
## X-squared = 1.0582, df = 1, p-value = 0.3036

p-value > 0.05, so there is NO association between v.

ea_mig <- glm(eawu_max0 ~ mig0, data = data_mig, family = "binomial")
summary(ea_mig)

## 
## Call:
## glm(formula = eawu_max0 ~ mig0, family = "binomial", data = data_mig)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.7470   0.1224   0.7002   0.7989   0.7989  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   0.9785     0.1611   6.073 1.26e-09 ***
## mig0Migrant   0.3025     0.2618   1.155    0.248    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 373.39  on 331  degrees of freedom
## Residual deviance: 372.04  on 330  degrees of freedom
## AIC: 376.04
## 
## Number of Fisher Scoring iterations: 4

round(exp(coef(ea_mig)), digits = 2)

## (Intercept) mig0Migrant 
##        2.66        1.35

Наличие миграционного опыта не влияет на аспирации.

pR2(ea_mig)

## fitting null model for pseudo-r2

##           llh       llhNull            G2      McFadden          r2ML 
## -1.860188e+02 -1.866953e+02  1.352973e+00  3.623480e-03  4.066928e-03 
##          r2CU 
##  6.022932e-03

hoslem.test(data_mig$eawu_max0, fitted(ea_mig), g = 10)

## Warning in Ops.factor(1, y): '-' not meaningful for factors

## 
##  Hosmer and Lemeshow goodness of fit (GOF) test
## 
## data:  data_mig$eawu_max0, fitted(ea_mig)
## X-squared = 332, df = 8, p-value < 2.2e-16

Тест показывает, что модель не подходит для данных.

roc_ea_mig1 <- predict(ea_mig, newdata = data_mig, type = "response")
roc_ea_mig2 <- roc(data_mig$eawu_max0 ~ roc_ea_mig1, plot = TRUE, print.auc = T)

## Setting levels: control = Low aspirations - College or Work, case = High aspirations - University

## Setting direction: controls < cases

Модель не отличается от угадывания.

EA and gender

data_gen <- df1 [c("eawu_max0", "gender")]
data_gen <- na.omit(data_gen)

chisq.test(data_gen$eawu_max0, data_gen$gender)

## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  data_gen$eawu_max0 and data_gen$gender
## X-squared = 4.7245, df = 1, p-value = 0.02974

chisq.test(data_gen$eawu_max0, data_gen$gender)$std

##                                    data_gen$gender
## data_gen$eawu_max0                        Boy      Girl
##   Low aspirations - College or Work  2.299248 -2.299248
##   High aspirations - University     -2.299248  2.299248

p-value is less than 0.05 = there is a association between variables

ea_gen <- glm(eawu_max0 ~ gender, data = data_gen, family = "binomial")
summary(ea_gen)

## 
## Call:
## glm(formula = eawu_max0 ~ gender, family = "binomial", data = data_gen)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.7909  -1.5293   0.6702   0.8624   0.8624  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   0.7975     0.1703   4.682 2.84e-06 ***
## genderGirl    0.5816     0.2545   2.285   0.0223 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 379.49  on 334  degrees of freedom
## Residual deviance: 374.19  on 333  degrees of freedom
## AIC: 378.19
## 
## Number of Fisher Scoring iterations: 4

round(exp(coef(ea_gen)), digits = 2)

## (Intercept)  genderGirl 
##        2.22        1.79

У девочек выше аспирации, 1,7 шансов к 1 тому, что если человек - девочка, она пойдет в ВУЗ.

pR2(ea_gen)

## fitting null model for pseudo-r2

##           llh       llhNull            G2      McFadden          r2ML 
## -187.09366335 -189.74314189    5.29895709    0.01396350    0.01569334 
##          r2CU 
##    0.02315101

hoslem.test(data_gen$eawu_max0, fitted(ea_gen), g = 10)

## Warning in Ops.factor(1, y): '-' not meaningful for factors

## 
##  Hosmer and Lemeshow goodness of fit (GOF) test
## 
## data:  data_gen$eawu_max0, fitted(ea_gen)
## X-squared = 335, df = 8, p-value < 2.2e-16

Model has no predictive power ?!

roc_ea_gen1 <- predict(ea_gen, newdata = data_gen, type = "response")
roc_ea_gen2 <- roc(data_gen$eawu_max0 ~ roc_ea_gen1, plot = TRUE, print.auc = T)

## Setting levels: control = Low aspirations - College or Work, case = High aspirations - University

## Setting direction: controls < cases

Модель не отличается от угадывания

EA and GPA

data_gpa <- df1[c("eawu_max0", "gpa")]
data_gpa <- na.omit(data_gpa)

ea_gpa <- glm(eawu_max0 ~ gpa, data = data_gpa, family = "binomial")
summary(ea_gpa)

## 
## Call:
## glm(formula = eawu_max0 ~ gpa, family = "binomial", data = data_gpa)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.5933  -0.9121   0.5339   0.8230   1.6327  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  -5.1387     1.0650  -4.825 1.40e-06 ***
## gpa           1.6932     0.2968   5.705 1.16e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 372.63  on 326  degrees of freedom
## Residual deviance: 330.71  on 325  degrees of freedom
## AIC: 334.71
## 
## Number of Fisher Scoring iterations: 5

round(exp(coef(ea_gpa)), digits = 2)

## (Intercept)         gpa 
##        0.01        5.44

Человек с высокой успеваемостью с большей вероятностью пойдет в ВУЗ.

pR2(ea_gpa)

## fitting null model for pseudo-r2

##          llh      llhNull           G2     McFadden         r2ML         r2CU 
## -165.3535912 -186.3144340   41.9216857    0.1125025    0.1203233    0.1769375

hoslem.test(data_gpa$eawu_max0, fitted(ea_gpa), g = 10)

## Warning in Ops.factor(1, y): '-' not meaningful for factors

## 
##  Hosmer and Lemeshow goodness of fit (GOF) test
## 
## data:  data_gpa$eawu_max0, fitted(ea_gpa)
## X-squared = 327, df = 8, p-value < 2.2e-16

Предиктивную силу модель как будто имеет в большей степени, потмоу что McFadden не совсем прямо очень маленький

roc_ea_gpa1 <- predict(ea_gpa, newdata = data_gpa, type = "response")
roc_ea_gpa2 <- roc(data_gpa$eawu_max0 ~ roc_ea_gpa1, plot = TRUE, print.auc = T)

## Setting levels: control = Low aspirations - College or Work, case = High aspirations - University

## Setting direction: controls < cases

Модель лучше чем угадывание ура!

EA and age

data_age <- df1 [c("eawu_max0", "agesch0")]
data_age <- na.omit(data_age)

здесь тоже может быть мало наблюдений, потому что не все переезжали. Их тут 136 человек

ea_age <- glm(eawu_max0 ~ agesch0, data = data_age, family = "binomial")
summary(ea_age)

## 
## Call:
## glm(formula = eawu_max0 ~ agesch0, family = "binomial", data = data_age)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.7378   0.7066   0.7066   0.8106   0.8106  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)   
## (Intercept)   0.9445     0.3150   2.999  0.00271 **
## agesch01      0.3158     0.4084   0.773  0.43934   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 150.72  on 135  degrees of freedom
## Residual deviance: 150.13  on 134  degrees of freedom
## AIC: 154.13
## 
## Number of Fisher Scoring iterations: 4

Возраст незначим

pR2(ea_age)

## fitting null model for pseudo-r2

##           llh       llhNull            G2      McFadden          r2ML 
## -75.062787247 -75.359229144   0.592883794   0.003933717   0.004349951 
##          r2CU 
##   0.006493868

hoslem.test(data_age$eawu_max0, fitted(ea_age), g = 10)

## Warning in Ops.factor(1, y): '-' not meaningful for factors

## 
##  Hosmer and Lemeshow goodness of fit (GOF) test
## 
## data:  data_age$eawu_max0, fitted(ea_age)
## X-squared = 136, df = 8, p-value < 2.2e-16

roc_ea_age1 <- predict(ea_age, newdata = data_age, type = "response")
roc_ea_age2 <- roc(data_age$eawu_max0 ~ roc_ea_age1, plot = TRUE, print.auc = T)

## Setting levels: control = Low aspirations - College or Work, case = High aspirations - University

## Setting direction: controls < cases

EA + momedubi

data_medu <- df1[c("eawu_max0", "momedubi")]
data_medu <- na.omit(data_medu)

ea_medu <- glm(eawu_max0 ~ momedubi, data = data_medu, family = "binomial")
summary(ea_medu)

## 
## Call:
## glm(formula = eawu_max0 ~ momedubi, family = "binomial", data = data_medu)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.7149   0.7227   0.7227   0.7409   0.7409  
## 
## Coefficients:
##                          Estimate Std. Error z value Pr(>|z|)    
## (Intercept)               1.15268    0.20943   5.504 3.71e-08 ***
## momedubiHigher education  0.05668    0.28098   0.202     0.84    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 311.39  on 285  degrees of freedom
## Residual deviance: 311.34  on 284  degrees of freedom
## AIC: 315.34
## 
## Number of Fisher Scoring iterations: 4

round(exp(coef(ea_medu)), digits = 2)

##              (Intercept) momedubiHigher education 
##                     3.17                     1.06

Образование матери незначимо

pR2(ea_medu)

## fitting null model for pseudo-r2

##           llh       llhNull            G2      McFadden          r2ML 
## -1.556722e+02 -1.556925e+02  4.065339e-02  1.305566e-04  1.421346e-04 
##          r2CU 
##  2.142627e-04

hoslem.test(data_medu$eawu_max0, fitted(ea_medu), g = 10)

## Warning in Ops.factor(1, y): '-' not meaningful for factors

## 
##  Hosmer and Lemeshow goodness of fit (GOF) test
## 
## data:  data_medu$eawu_max0, fitted(ea_medu)
## X-squared = 286, df = 8, p-value < 2.2e-16

roc_ea_medu1 <- predict(ea_medu, newdata = data_medu, type = "response")
roc_ea_medu2 <- roc(data_medu$eawu_max0 ~ roc_ea_medu1, plot = TRUE, print.auc = T)

## Setting levels: control = Low aspirations - College or Work, case = High aspirations - University

## Setting direction: controls < cases

EA + dadedubi

data_dedu <- df1[c("eawu_max0", "dadedubi")]
data_dedu <- na.omit(data_dedu)

ea_dedu <- glm(eawu_max0 ~ dadedubi, data = data_dedu, family = "binomial")
summary(ea_dedu)

## 
## Call:
## glm(formula = eawu_max0 ~ dadedubi, family = "binomial", data = data_dedu)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.7890   0.6715   0.6715   0.7625   0.7625  
## 
## Coefficients:
##                          Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                1.0866     0.2185   4.972 6.62e-07 ***
## dadedubiHigher education   0.2882     0.3236   0.891    0.373    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 235.82  on 219  degrees of freedom
## Residual deviance: 235.02  on 218  degrees of freedom
## AIC: 239.02
## 
## Number of Fisher Scoring iterations: 4

round(exp(coef(ea_dedu)), digits = 2)

##              (Intercept) dadedubiHigher education 
##                     2.96                     1.33

Образование отца незначимо

hoslem.test(data_dedu$eawu_max0, fitted(ea_dedu), g = 10)

## Warning in Ops.factor(1, y): '-' not meaningful for factors

## 
##  Hosmer and Lemeshow goodness of fit (GOF) test
## 
## data:  data_dedu$eawu_max0, fitted(ea_dedu)
## X-squared = 220, df = 8, p-value < 2.2e-16

roc_ea_dedu1 <- predict(ea_dedu, newdata = data_dedu, type = "response")
roc_ea_dedu2 <- roc(data_dedu$eawu_max0 ~ roc_ea_dedu1, plot = TRUE, print.auc = T)

## Setting levels: control = Low aspirations - College or Work, case = High aspirations - University

## Setting direction: controls < cases

EA + imom

data_imom <- df1[c("eawu_max0", "iseimom")]
data_imom <- na.omit(data_imom)

ea_imom <- glm(eawu_max0 ~ iseimom, data = data_imom, family = "binomial")
summary(ea_imom)

## 
## Call:
## glm(formula = eawu_max0 ~ iseimom, family = "binomial", data = data_imom)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.8683   0.6195   0.6961   0.7615   0.8653  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.652930   0.419456   1.557    0.120
## iseimom     0.013643   0.009624   1.418    0.156
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 316.11  on 294  degrees of freedom
## Residual deviance: 314.06  on 293  degrees of freedom
## AIC: 318.06
## 
## Number of Fisher Scoring iterations: 4

round(exp(coef(ea_imom)), digits = 2)

## (Intercept)     iseimom 
##        1.92        1.01

ISEI мамы незначимо

hoslem.test(data_imom$eawu_max0, fitted(ea_imom), g = 10)

## Warning in Ops.factor(1, y): '-' not meaningful for factors

## 
##  Hosmer and Lemeshow goodness of fit (GOF) test
## 
## data:  data_imom$eawu_max0, fitted(ea_imom)
## X-squared = NaN, df = 8, p-value = NA

p-value = NA

roc_ea_imom1 <- predict(ea_imom, newdata = data_imom, type = "response")
roc_ea_imom2 <- roc(data_imom$eawu_max0 ~ roc_ea_imom1, plot = TRUE, print.auc = T)

## Setting levels: control = Low aspirations - College or Work, case = High aspirations - University

## Setting direction: controls < cases

EA + idad

data_idad <- df1[c("eawu_max0", "iseidad")]
data_idad <- na.omit(data_idad)

ea_idad <- glm(eawu_max0 ~ iseidad, data = data_idad, family = "binomial")
summary(ea_idad)

## 
## Call:
## glm(formula = eawu_max0 ~ iseidad, family = "binomial", data = data_idad)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.9674   0.5279   0.6501   0.8252   1.0056  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)   
## (Intercept) -0.18627    0.51177  -0.364  0.71588   
## iseidad      0.03024    0.01160   2.606  0.00916 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 277.82  on 249  degrees of freedom
## Residual deviance: 270.65  on 248  degrees of freedom
## AIC: 274.65
## 
## Number of Fisher Scoring iterations: 4

round(exp(coef(ea_idad)), digits = 2)

## (Intercept)     iseidad 
##        0.83        1.03

ISEI Отца значимо Чем выше статус работы отца, тем больше шансов пойти в университет

hoslem.test(data_idad$eawu_max0, fitted(ea_idad), g = 10)

## Warning in Ops.factor(1, y): '-' not meaningful for factors

## 
##  Hosmer and Lemeshow goodness of fit (GOF) test
## 
## data:  data_idad$eawu_max0, fitted(ea_idad)
## X-squared = 250, df = 8, p-value < 2.2e-16

НО ЭТОЙ МОДЕЛИ НЕЛЬЗЯ ДОВЕРЯТЬ! Как и всем.

roc_ea_idad1 <- predict(ea_idad, newdata = data_idad, type = "response")
roc_ea_idad2 <- roc(data_idad$eawu_max0 ~ roc_ea_idad1, plot = TRUE, print.auc = T)

## Setting levels: control = Low aspirations - College or Work, case = High aspirations - University

## Setting direction: controls < cases

EA and class

data_classnum <-df1[c("eawu_max0", "classnum")]
data_classnum <- na.omit(data_classnum)

ea_class <- glm(eawu_max0 ~ classnum, data = data_classnum, family = "binomial")
summary(ea_class)

## 
## Call:
## glm(formula = eawu_max0 ~ classnum, family = "binomial", data = data_classnum)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.0074  -0.4437   0.5799   0.9508   0.9508  
## 
## Coefficients:
##                  Estimate Std. Error z value Pr(>|z|)    
## (Intercept)        0.5596     0.1618   3.458 0.000544 ***
## classnum10 class   1.1381     0.3423   3.324 0.000886 ***
## classnum11 class   1.3122     0.3498   3.751 0.000176 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 381.81  on 338  degrees of freedom
## Residual deviance: 359.38  on 336  degrees of freedom
## AIC: 365.38
## 
## Number of Fisher Scoring iterations: 4

round(exp(coef(ea_class)), digits = 2)

##      (Intercept) classnum10 class classnum11 class 
##             1.75             3.12             3.71

Чем выше класс, тем больше шанс, что чел пойдет в ВУЗ

hoslem.test(data_classnum$eawu_max0, fitted(ea_class), g = 10)

## Warning in Ops.factor(1, y): '-' not meaningful for factors

## 
##  Hosmer and Lemeshow goodness of fit (GOF) test
## 
## data:  data_classnum$eawu_max0, fitted(ea_class)
## X-squared = 339, df = 8, p-value < 2.2e-16

roc_ea_class1 <- predict(ea_class, newdata = data_classnum, type = "response")
roc_ea_class2 <- roc(data_classnum$eawu_max0 ~ roc_ea_class1, plot = TRUE, print.auc = T)

## Setting levels: control = Low aspirations - College or Work, case = High aspirations - University

## Setting direction: controls < cases

BLREGRESSION: MAX EA summary without interacitons

export_summs(ea_gen, ea_gpa, ea_age, ea_mig, ea_medu, ea_dedu, ea_imom, ea_idad, ea_class, scale = TRUE)

	Model 1	Model 2	Model 3	Model 4	Model 5	Model 6	Model 7	Model 8	Model 9
(Intercept)	0.80 ***	1.27 ***	0.94 **	0.98 ***	1.15 ***	1.09 ***	1.24 ***	1.17 ***	0.56 ***
	(0.17)	(0.15)	(0.31)	(0.16)	(0.21)	(0.22)	(0.14)	(0.15)	(0.16)
gender	0.58 *
	(0.25)
gpa		0.96 ***
		(0.17)
agesch0			0.32
			(0.41)
mig0				0.30
				(0.26)
momedubi					0.06
					(0.28)
dadedubi						0.29
						(0.32)
iseimom							0.20
							(0.14)
iseidad								0.41 **
								(0.16)
classnum10 class									1.14 ***
									(0.34)
classnum11 class									1.31 ***
									(0.35)
N	335	327	136	332	286	220	295	250	339
AIC	378.19	334.71	154.13	376.04	315.34	239.02	318.06	274.65	365.38
BIC	385.82	342.29	159.95	383.65	322.66	245.81	325.44	281.70	376.86
Pseudo R2	0.02	0.18	0.01	0.01	0.00	0.01	0.01	0.04	0.09
All continuous predictors are mean-centered and scaled by 1 standard deviation. * p < 0.001; p < 0.01; * p < 0.05.

# install.packages("gtsummary")
# library(gtsummary)
# tbl_regression(ea_age, exponentiate = TRUE)

BINARY LOGISTIC REGRESSION: interactions

migcntrl MAX EA

data_migcntrl <- df1 [c("eawu_max0","gender", "classnum", "mig0" , "agesch0")]
data_migcntrl <- na.omit(data_migcntrl)

migcntrl <- glm(eawu_max0 ~ gender + classnum + mig0, data = data_migcntrl, family = "binomial")
summary(migcntrl)

## 
## Call:
## glm(formula = eawu_max0 ~ gender + classnum + mig0, family = "binomial", 
##     data = data_migcntrl)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.0941   0.4825   0.6466   0.6895   1.0173  
## 
## Coefficients:
##                  Estimate Std. Error z value Pr(>|z|)
## (Intercept)        0.3889     0.4695   0.828    0.407
## genderGirl         0.6331     0.4379   1.446    0.148
## classnum10 class   0.7764     0.5304   1.464    0.143
## classnum11 class   0.7590     0.5683   1.335    0.182
## mig0Migrant        0.2934     0.4533   0.647    0.517
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 138.00  on 129  degrees of freedom
## Residual deviance: 132.56  on 125  degrees of freedom
## AIC: 142.56
## 
## Number of Fisher Scoring iterations: 4

round(exp(coef(migcntrl)), digits = 2)

##      (Intercept)       genderGirl classnum10 class classnum11 class 
##             1.48             1.88             2.17             2.14 
##      mig0Migrant 
##             1.34

При добавлении контрольных переменных миграционный опыт начинает проявлять значимость

vif(migcntrl)

##              GVIF Df GVIF^(1/(2*Df))
## gender   1.023063  1        1.011466
## classnum 1.004283  2        1.001069
## mig0     1.024712  1        1.012281

No correlation

pR2(migcntrl)

## fitting null model for pseudo-r2

##          llh      llhNull           G2     McFadden         r2ML         r2CU 
## -66.27881682 -69.00072729   5.44382093   0.03944756   0.04101088   0.06270003

hoslem.test(data_migcntrl$eawu_max0, fitted(migcntrl), g = 10)

## Warning in Ops.factor(1, y): '-' not meaningful for factors

## 
##  Hosmer and Lemeshow goodness of fit (GOF) test
## 
## data:  data_migcntrl$eawu_max0, fitted(migcntrl)
## X-squared = 130, df = 8, p-value < 2.2e-16

roc_migcntrl1 <- predict(migcntrl, newdata = data_migcntrl, type = "response")
roc_migcntrl2 <- roc(data_migcntrl$eawu_max0 ~ roc_migcntrl1, plot = TRUE, print.auc = T)

## Setting levels: control = Low aspirations - College or Work, case = High aspirations - University

## Setting direction: controls < cases

ea_migmomedu MAX EA

data_migmomedu <- df1 [c("eawu_max0","gender", "classnum", "mig0", "momedubi")]
data_migmomedu <- na.omit(data_migmomedu)

ea_migmomedu <- glm(eawu_max0 ~ gender + classnum + mig0 * momedubi, data = data_migmomedu, family = "binomial")
summary(ea_migmomedu)

## 
## Call:
## glm(formula = eawu_max0 ~ gender + classnum + mig0 * momedubi, 
##     family = "binomial", data = data_migmomedu)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.5224   0.3205   0.5441   0.7157   1.2878  
## 
## Coefficients:
##                                      Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                           -0.2559     0.3699  -0.692 0.489098    
## genderGirl                             0.6430     0.3165   2.031 0.042219 *  
## classnum10 class                       1.2915     0.4096   3.153 0.001613 ** 
## classnum11 class                       1.4871     0.4094   3.632 0.000281 ***
## mig0Migrant                            1.2645     0.5028   2.515 0.011897 *  
## momedubiHigher education               0.5332     0.3839   1.389 0.164855    
## mig0Migrant:momedubiHigher education  -1.1936     0.6549  -1.823 0.068368 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 294.95  on 272  degrees of freedom
## Residual deviance: 264.37  on 266  degrees of freedom
## AIC: 278.37
## 
## Number of Fisher Scoring iterations: 4

round(exp(coef(ea_migmomedu)), digits = 2)

##                          (Intercept)                           genderGirl 
##                                 0.77                                 1.90 
##                     classnum10 class                     classnum11 class 
##                                 3.64                                 4.42 
##                          mig0Migrant             momedubiHigher education 
##                                 3.54                                 1.70 
## mig0Migrant:momedubiHigher education 
##                                 0.30

Мигранты с высшим образованием матери более вероятно пойдут в универ

vif(ea_migmomedu)

##                   GVIF Df GVIF^(1/(2*Df))
## gender        1.076152  1        1.037377
## classnum      1.018325  2        1.004550
## mig0          2.453925  1        1.566501
## momedubi      1.557608  1        1.248042
## mig0:momedubi 3.055078  1        1.747878

pR2(ea_migmomedu)

## fitting null model for pseudo-r2

##          llh      llhNull           G2     McFadden         r2ML         r2CU 
## -132.1837217 -147.4757309   30.5840183    0.1036917    0.1059820    0.1604468

hoslem.test(data_migmomedu$eawu_max0, fitted(ea_migmomedu), g = 10)

## Warning in Ops.factor(1, y): '-' not meaningful for factors

## 
##  Hosmer and Lemeshow goodness of fit (GOF) test
## 
## data:  data_migmomedu$eawu_max0, fitted(ea_migmomedu)
## X-squared = 273, df = 8, p-value < 2.2e-16

roc_ea_migmomedu1 <- predict(ea_migmomedu, newdata = data_migmomedu, type = "response")
roc_ea_migmomedu2 <- roc(data_migmomedu$eawu_max0 ~ roc_ea_migmomedu1, plot = TRUE, print.auc = T)

## Setting levels: control = Low aspirations - College or Work, case = High aspirations - University

## Setting direction: controls < cases

ea_migdadedu MAX EA

data_migdadedu <- df1 [c("eawu_max0", "gender", "classnum", "mig0", "dadedubi")]
data_migdadedu <- na.omit(data_migdadedu)

ea_migdadedu <- glm(eawu_max0 ~ gender + classnum + mig0 * dadedubi, data = df1, family = "binomial")
summary(ea_migdadedu)

## 
## Call:
## glm(formula = eawu_max0 ~ gender + classnum + mig0 * dadedubi, 
##     family = "binomial", data = df1)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.4740   0.3099   0.4861   0.6854   1.2757  
## 
## Coefficients:
##                                      Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                           -0.2282     0.3964  -0.576 0.564843    
## genderGirl                             0.6207     0.3625   1.712 0.086856 .  
## classnum10 class                       1.0041     0.4509   2.227 0.025960 *  
## classnum11 class                       1.6835     0.4933   3.412 0.000644 ***
## mig0Migrant                            0.9363     0.4928   1.900 0.057452 .  
## dadedubiHigher education               0.6210     0.4433   1.401 0.161259    
## mig0Migrant:dadedubiHigher education  -0.5141     0.7504  -0.685 0.493268    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 223.81  on 210  degrees of freedom
## Residual deviance: 200.51  on 204  degrees of freedom
##   (148 observations deleted due to missingness)
## AIC: 214.51
## 
## Number of Fisher Scoring iterations: 5

round(exp(coef(ea_migdadedu)), digits = 2)

##                          (Intercept)                           genderGirl 
##                                 0.80                                 1.86 
##                     classnum10 class                     classnum11 class 
##                                 2.73                                 5.38 
##                          mig0Migrant             dadedubiHigher education 
##                                 2.55                                 1.86 
## mig0Migrant:dadedubiHigher education 
##                                 0.60

pR2(ea_migdadedu)

## fitting null model for pseudo-r2

##          llh      llhNull           G2     McFadden         r2ML         r2CU 
## -100.2569905 -111.9070347   23.3000885    0.1041047    0.1045483    0.1599094

hoslem.test(data_migdadedu$eawu_max0, fitted(ea_migdadedu), g = 10)

## Warning in Ops.factor(1, y): '-' not meaningful for factors

## 
##  Hosmer and Lemeshow goodness of fit (GOF) test
## 
## data:  data_migdadedu$eawu_max0, fitted(ea_migdadedu)
## X-squared = 211, df = 8, p-value < 2.2e-16

# plot(ea_migdadedu)

roc_ea_migdadedu <- predict(ea_migdadedu, newdata = data_migdadedu, type = "response")
roc_ea_migdadedu <- roc(data_migdadedu$eawu_max0 ~ roc_ea_migdadedu, plot = TRUE, print.auc = T)

## Setting levels: control = Low aspirations - College or Work, case = High aspirations - University

## Setting direction: controls < cases

Sensitivity - True Positive Rate Specificity - False Positive Rate

ea_migimom MAX EA

data_migimom <- df1 [c("eawu_max0","gender", "classnum", "mig0", "iseimom")]
data_migimom <- na.omit(data_migimom)

ea_migimom <- glm(eawu_max0 ~ gender + classnum + mig0 * iseimom, data = data_migimom, family = "binomial")
summary(ea_migimom)

## 
## Call:
## glm(formula = eawu_max0 ~ gender + classnum + mig0 * iseimom, 
##     family = "binomial", data = data_migimom)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.4648   0.3223   0.5677   0.6980   1.3788  
## 
## Coefficients:
##                     Estimate Std. Error z value Pr(>|z|)    
## (Intercept)         -0.82355    0.62941  -1.308 0.190723    
## genderGirl           0.68354    0.30571   2.236 0.025361 *  
## classnum10 class     0.84392    0.37471   2.252 0.024308 *  
## classnum11 class     1.43746    0.41149   3.493 0.000477 ***
## mig0Migrant          1.52746    0.94224   1.621 0.104995    
## iseimom              0.02261    0.01310   1.726 0.084291 .  
## mig0Migrant:iseimom -0.02013    0.02195  -0.917 0.359144    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 300.09  on 282  degrees of freedom
## Residual deviance: 273.70  on 276  degrees of freedom
## AIC: 287.7
## 
## Number of Fisher Scoring iterations: 4

round(exp(coef(ea_migimom)), digits = 2)

##         (Intercept)          genderGirl    classnum10 class    classnum11 class 
##                0.44                1.98                2.33                4.21 
##         mig0Migrant             iseimom mig0Migrant:iseimom 
##                4.61                1.02                0.98

pR2(ea_migimom)

## fitting null model for pseudo-r2

##           llh       llhNull            G2      McFadden          r2ML 
## -136.85096401 -150.04592407   26.38992012    0.08793948    0.08903482 
##          r2CU 
##    0.13620528

hoslem.test(data_migimom$eawu_max0, fitted(ea_migimom), g = 10)

## Warning in Ops.factor(1, y): '-' not meaningful for factors

## 
##  Hosmer and Lemeshow goodness of fit (GOF) test
## 
## data:  data_migimom$eawu_max0, fitted(ea_migimom)
## X-squared = 283, df = 8, p-value < 2.2e-16

roc_ea_migimom <- predict(ea_migimom, newdata = data_migimom, type = "response")
roc_ea_migimom <- roc(data_migimom$eawu_max0 ~ roc_ea_migimom, plot = TRUE, print.auc = T)

## Setting levels: control = Low aspirations - College or Work, case = High aspirations - University

## Setting direction: controls < cases

ea_migidad MAX EA

data_migidad <- df1 [c("eawu_max0","gender", "classnum", "mig0", "iseidad")]
data_migidad <- na.omit(data_migidad)

ea_migidad <- glm(eawu_max0 ~ gender + classnum + mig0 * iseidad, data = data_migidad, family = "binomial")
summary(ea_migidad)

## 
## Call:
## glm(formula = eawu_max0 ~ gender + classnum + mig0 * iseidad, 
##     family = "binomial", data = data_migidad)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.4111   0.2442   0.4754   0.7670   1.6233  
## 
## Coefficients:
##                     Estimate Std. Error z value Pr(>|z|)    
## (Intercept)         -1.87494    0.81218  -2.309 0.020970 *  
## genderGirl           0.80064    0.34008   2.354 0.018560 *  
## classnum10 class     1.27282    0.43572   2.921 0.003487 ** 
## classnum11 class     1.74727    0.49180   3.553 0.000381 ***
## mig0Migrant          1.34476    1.11999   1.201 0.229871    
## iseidad              0.04346    0.01788   2.431 0.015065 *  
## mig0Migrant:iseidad -0.02033    0.02551  -0.797 0.425472    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 266.54  on 241  degrees of freedom
## Residual deviance: 230.61  on 235  degrees of freedom
## AIC: 244.61
## 
## Number of Fisher Scoring iterations: 5

round(exp(coef(ea_migidad)), digits = 2)

##         (Intercept)          genderGirl    classnum10 class    classnum11 class 
##                0.15                2.23                3.57                5.74 
##         mig0Migrant             iseidad mig0Migrant:iseidad 
##                3.84                1.04                0.98

pR2(ea_migidad)

## fitting null model for pseudo-r2

##          llh      llhNull           G2     McFadden         r2ML         r2CU 
## -115.3061999 -133.2690557   35.9257117    0.1347864    0.1379598    0.2066523

hoslem.test(data_migidad$eawu_max0, fitted(ea_migidad), g = 10)

## Warning in Ops.factor(1, y): '-' not meaningful for factors

## 
##  Hosmer and Lemeshow goodness of fit (GOF) test
## 
## data:  data_migidad$eawu_max0, fitted(ea_migidad)
## X-squared = 242, df = 8, p-value < 2.2e-16

roc_ea_migidad <- predict(ea_migidad, newdata = data_migidad, type = "response")
roc_ea_migidad <- roc(data_migidad$eawu_max0 ~ roc_ea_migidad, plot = TRUE, print.auc = T)

## Setting levels: control = Low aspirations - College or Work, case = High aspirations - University

## Setting direction: controls < cases

All models: SES with interaction

export_summs(migcntrl, ea_migmomedu, ea_migdadedu, ea_migimom, ea_migidad,  scale = TRUE)

	Model 1	Model 2	Model 3	Model 4	Model 5
(Intercept)	0.39	-0.26	-0.23	0.14	0.07
	(0.47)	(0.37)	(0.40)	(0.28)	(0.30)
gender	0.63	0.64 *	0.62	0.68 *	0.80 *
	(0.44)	(0.32)	(0.36)	(0.31)	(0.34)
classnum10 class	0.78	1.29 **	1.00 *	0.84 *	1.27 **
	(0.53)	(0.41)	(0.45)	(0.37)	(0.44)
classnum11 class	0.76	1.49 ***	1.68 ***	1.44 ***	1.75 ***
	(0.57)	(0.41)	(0.49)	(0.41)	(0.49)
mig0	0.29	1.26 *	0.94	0.67 *	0.44
	(0.45)	(0.50)	(0.49)	(0.33)	(0.34)
momedubi		0.53
		(0.38)
mig0:momedubi		-1.19
		(0.65)
dadedubi			0.62
			(0.44)
mig0:dadedubi			-0.51
			(0.75)
iseimom				0.33
				(0.19)
mig0:iseimom				-0.30
				(0.32)
iseidad					0.59 *
					(0.24)
mig0:iseidad					-0.27
					(0.34)
N	130	273	211	283	242
AIC	142.56	278.37	214.51	287.70	244.61
BIC	156.90	303.63	237.98	313.22	269.03
Pseudo R2	0.06	0.16	0.16	0.14	0.21
All continuous predictors are mean-centered and scaled by 1 standard deviation. * p < 0.001; p < 0.01; * p < 0.05.

TRY ANOTHER ISEI AND ANOTHER EDU

ISEI of parents

df_iseipar[is.na(df_iseipar)] <- 0

df_iseipar <- as.matrix(df_iseipar)

df1$iseipar <- rowMaxs(df_iseipar)

#высший индекс ISEI среди родителей
table(df1$iseipar)

## 
##  0 16 20 27 31 32 33 34 35 36 37 38 40 41 42 43 45 46 47 49 50 51 53 54 55 58 
## 30  1  2 15 15  8  1 16  2 26  5 16  2 18  4  3  2 10 23 24  1  1  7  5  2  1 
## 59 60 62 63 64 65 66 69 
##  5  1 46 15  2 15 32  3

data_migipar <- df1 [c("eawu_max0","gender", "classnum", "mig0", "iseipar")]
data_migipar <- na.omit(data_migipar)

ea_migipar <- glm(eawu_max0 ~ gender + classnum + mig0 * iseipar, data = data_migipar, family = "binomial")
summary(ea_migipar)

## 
## Call:
## glm(formula = eawu_max0 ~ gender + classnum + mig0 * iseipar, 
##     family = "binomial", data = data_migipar)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.4180   0.3324   0.5391   0.7498   1.5195  
## 
## Coefficients:
##                      Estimate Std. Error z value Pr(>|z|)    
## (Intercept)         -0.775755   0.481277  -1.612 0.106991    
## genderGirl           0.651931   0.278823   2.338 0.019379 *  
## classnum10 class     1.049340   0.352784   2.974 0.002935 ** 
## classnum11 class     1.490809   0.383710   3.885 0.000102 ***
## mig0Migrant          1.108760   0.763128   1.453 0.146247    
## iseipar              0.018648   0.009314   2.002 0.045270 *  
## mig0Migrant:iseipar -0.012703   0.016366  -0.776 0.437660    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 359.93  on 323  degrees of freedom
## Residual deviance: 325.01  on 317  degrees of freedom
## AIC: 339.01
## 
## Number of Fisher Scoring iterations: 4

round(exp(coef(ea_migipar)), digits = 2)

##         (Intercept)          genderGirl    classnum10 class    classnum11 class 
##                0.46                1.92                2.86                4.44 
##         mig0Migrant             iseipar mig0Migrant:iseipar 
##                3.03                1.02                0.99

Education of parents

df_edupar[is.na(df_edupar)] <- 0

df_edupar <- as.matrix(df_edupar)

df1$edupar <- rowMaxs(df_edupar)

#высшее образование среди родителей
table(df1$edupar)

## 
##   0   1   2   3   4   5   6   7 
##   2  25  46  15 138  23  71  39

data_migedupar <- df1 [c("eawu_max0","gender", "classnum", "mig0", "edupar")]
data_migedupar <- na.omit(data_migedupar)

ea_migedupar <- glm(eawu_max0 ~ gender + classnum + mig0 * edupar, data = data_migedupar, family = "binomial")
summary(ea_migedupar)

## 
## Call:
## glm(formula = eawu_max0 ~ gender + classnum + mig0 * edupar, 
##     family = "binomial", data = data_migedupar)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.4132   0.2606   0.5480   0.8122   1.2048  
## 
## Coefficients:
##                    Estimate Std. Error z value Pr(>|z|)    
## (Intercept)         -0.2651     0.5402  -0.491  0.62351    
## genderGirl           0.6351     0.2778   2.286  0.02226 *  
## classnum10 class     1.0038     0.3530   2.844  0.00446 ** 
## classnum11 class     1.5206     0.3850   3.950 7.82e-05 ***
## mig0Migrant          1.9844     0.8279   2.397  0.01653 *  
## edupar               0.0744     0.1056   0.705  0.48102    
## mig0Migrant:edupar  -0.3292     0.1717  -1.917  0.05523 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 359.93  on 323  degrees of freedom
## Residual deviance: 324.95  on 317  degrees of freedom
## AIC: 338.95
## 
## Number of Fisher Scoring iterations: 4

round(exp(coef(ea_migedupar)), digits = 2)

##        (Intercept)         genderGirl   classnum10 class   classnum11 class 
##               0.77               1.89               2.73               4.57 
##        mig0Migrant             edupar mig0Migrant:edupar 
##               7.27               1.08               0.72

export_summs(migcntrl, ea_migipar, ea_migedupar,  scale = TRUE)

	Model 1	Model 2	Model 3
(Intercept)	0.39	0.06	0.05
	(0.47)	(0.25)	(0.25)
gender	0.63	0.65 *	0.64 *
	(0.44)	(0.28)	(0.28)
classnum10 class	0.78	1.05 **	1.00 **
	(0.53)	(0.35)	(0.35)
classnum11 class	0.76	1.49 ***	1.52 ***
	(0.57)	(0.38)	(0.38)
mig0	0.29	0.54	0.58 *
	(0.45)	(0.29)	(0.29)
iseipar		0.33 *
		(0.16)
mig0:iseipar		-0.22
		(0.29)
edupar			0.13
			(0.18)
mig0:edupar			-0.57
			(0.30)
N	130	324	324
AIC	142.56	339.01	338.95
BIC	156.90	365.48	365.42
Pseudo R2	0.06	0.15	0.15
All continuous predictors are mean-centered and scaled by 1 standard deviation. * p < 0.001; p < 0.01; * p < 0.05.

migcntrl MIN EA

data_migcntrl_mea <- df1 [c("eawu_min0","gender", "classnum", "mig0" )]
data_migcntrl_mea <- na.omit(data_migcntrl_mea)

migcntrl_mea <- glm(eawu_min0 ~ gender + classnum + mig0, data = data_migcntrl_mea, family = "binomial")
summary(migcntrl_mea)

## 
## Call:
## glm(formula = eawu_min0 ~ gender + classnum + mig0, family = "binomial", 
##     data = data_migcntrl_mea)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.2704  -1.1654   0.6710   0.7602   1.1894  
## 
## Coefficients:
##                  Estimate Std. Error z value Pr(>|z|)    
## (Intercept)      -0.02829    0.24253  -0.117   0.9071    
## genderGirl        0.52933    0.26213   2.019   0.0434 *  
## classnum10 class  0.76822    0.31807   2.415   0.0157 *  
## classnum11 class  1.40468    0.35758   3.928 8.56e-05 ***
## mig0Migrant       0.59260    0.27135   2.184   0.0290 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 382.87  on 323  degrees of freedom
## Residual deviance: 355.93  on 319  degrees of freedom
## AIC: 365.93
## 
## Number of Fisher Scoring iterations: 4

round(exp(coef(migcntrl_mea)), digits = 2)

##      (Intercept)       genderGirl classnum10 class classnum11 class 
##             0.97             1.70             2.16             4.07 
##      mig0Migrant 
##             1.81

При добавлении контрольных переменных миграционный опыт начинает проявлять значимость

pR2(migcntrl_mea)

## fitting null model for pseudo-r2

##           llh       llhNull            G2      McFadden          r2ML 
## -177.96654782 -191.43288779   26.93267994    0.07034497    0.07976440 
##          r2CU 
##    0.11506056

hoslem.test(data_migcntrl_mea$eawu_min0, fitted(migcntrl_mea), g = 10)

## Warning in Ops.factor(1, y): '-' not meaningful for factors

## 
##  Hosmer and Lemeshow goodness of fit (GOF) test
## 
## data:  data_migcntrl_mea$eawu_min0, fitted(migcntrl_mea)
## X-squared = 324, df = 8, p-value < 2.2e-16

roc_migcntrl_mea <- predict(migcntrl_mea, newdata = data_migcntrl_mea, type = "response")
roc_migcntrl_mea <- roc(data_migcntrl_mea$eawu_min0 ~ roc_migcntrl_mea, plot = TRUE, print.auc = T)

## Setting levels: control = 0, case = 1

## Setting direction: controls < cases

ea_migmomedu MIN EA (mea)

data_migmomedu_mea <- df1 [c("eawu_min0","gender", "classnum", "mig0" , "momedubi")]
data_migmomedu_mea <- na.omit(data_migmomedu_mea)

ea_migmomedu_mea <- glm(eawu_min0 ~ gender + classnum + mig0*momedubi, data = data_migmomedu_mea, family = "binomial")
summary(ea_migmomedu_mea)

## 
## Call:
## glm(formula = eawu_min0 ~ gender + classnum + mig0 * momedubi, 
##     family = "binomial", data = data_migmomedu_mea)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.4567  -1.0811   0.5714   0.7416   1.2769  
## 
## Coefficients:
##                                      Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                           -0.2309     0.3615  -0.639 0.523042    
## genderGirl                             0.5795     0.3029   1.913 0.055739 .  
## classnum10 class                       1.0427     0.3727   2.797 0.005153 ** 
## classnum11 class                       1.4632     0.3934   3.720 0.000199 ***
## mig0Migrant                            1.1557     0.4799   2.408 0.016031 *  
## momedubiHigher education               0.3387     0.3688   0.918 0.358475    
## mig0Migrant:momedubiHigher education  -0.9500     0.6263  -1.517 0.129294    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 310.82  on 272  degrees of freedom
## Residual deviance: 282.80  on 266  degrees of freedom
## AIC: 296.8
## 
## Number of Fisher Scoring iterations: 4

round(exp(coef(ea_migmomedu_mea)), digits = 2)

##                          (Intercept)                           genderGirl 
##                                 0.79                                 1.79 
##                     classnum10 class                     classnum11 class 
##                                 2.84                                 4.32 
##                          mig0Migrant             momedubiHigher education 
##                                 3.18                                 1.40 
## mig0Migrant:momedubiHigher education 
##                                 0.39

roc_ea_migmomedu_mea <- predict(ea_migmomedu_mea, newdata = data_migmomedu_mea, type = "response")
roc_ea_migmomedu_mea <- roc(data_migmomedu_mea$eawu_min0 ~ roc_ea_migmomedu_mea, plot = TRUE, print.auc = T)

## Setting levels: control = 0, case = 1

## Setting direction: controls < cases

ea_migdadedu MIN EA (mea)

data_migdadedu_mea <- df1 [c("eawu_min0","gender", "classnum", "mig0" , "dadedubi")]
data_migdadedu_mea <- na.omit(data_migdadedu_mea)

ea_migdadedu_mea <- glm(eawu_min0 ~ gender + classnum + mig0*dadedubi, data = data_migdadedu_mea, family = "binomial")
summary(ea_migdadedu_mea)

## 
## Call:
## glm(formula = eawu_min0 ~ gender + classnum + mig0 * dadedubi, 
##     family = "binomial", data = data_migdadedu_mea)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.4217   0.3309   0.5383   0.7632   1.2475  
## 
## Coefficients:
##                                      Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                           -0.1633     0.3874  -0.421  0.67341    
## genderGirl                             0.4736     0.3479   1.361  0.17343    
## classnum10 class                       0.7741     0.4222   1.834  0.06670 .  
## classnum11 class                       1.5482     0.4654   3.326  0.00088 ***
## mig0Migrant                            1.0192     0.4847   2.103  0.03550 *  
## dadedubiHigher education               0.5112     0.4249   1.203  0.22891    
## mig0Migrant:dadedubiHigher education  -0.6558     0.7210  -0.910  0.36303    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 233.38  on 210  degrees of freedom
## Residual deviance: 213.17  on 204  degrees of freedom
## AIC: 227.17
## 
## Number of Fisher Scoring iterations: 4

round(exp(coef(ea_migdadedu_mea)), digits = 2)

##                          (Intercept)                           genderGirl 
##                                 0.85                                 1.61 
##                     classnum10 class                     classnum11 class 
##                                 2.17                                 4.70 
##                          mig0Migrant             dadedubiHigher education 
##                                 2.77                                 1.67 
## mig0Migrant:dadedubiHigher education 
##                                 0.52

roc_ea_migdadedu_mea <- predict(ea_migdadedu_mea, newdata = data_migdadedu_mea, type = "response")
roc_ea_migdadedu_mea <- roc(data_migdadedu_mea$eawu_min0 ~ roc_ea_migdadedu_mea, plot = TRUE, print.auc = T)

## Setting levels: control = 0, case = 1

## Setting direction: controls < cases

ea_migimom MIN EA (mea)

data_migimom_mea <- df1 [c("eawu_min0","gender", "classnum", "mig0" , "iseimom")]
data_migimom_mea <- na.omit(data_migimom_mea)

ea_migimom_mea <- glm(eawu_min0 ~ gender + classnum + mig0*iseimom, data = data_migimom_mea, family = "binomial")
summary(ea_migimom_mea)

## 
## Call:
## glm(formula = eawu_min0 ~ gender + classnum + mig0 * iseimom, 
##     family = "binomial", data = data_migimom_mea)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.3852  -0.3720   0.6038   0.7951   1.3494  
## 
## Coefficients:
##                     Estimate Std. Error z value Pr(>|z|)    
## (Intercept)         -0.66743    0.60244  -1.108 0.267915    
## genderGirl           0.59177    0.29158   2.029 0.042408 *  
## classnum10 class     0.59891    0.34474   1.737 0.082336 .  
## classnum11 class     1.41339    0.39490   3.579 0.000345 ***
## mig0Migrant          1.41740    0.90322   1.569 0.116583    
## iseimom              0.01698    0.01234   1.376 0.168765    
## mig0Migrant:iseimom -0.01654    0.02082  -0.794 0.426906    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 318.83  on 282  degrees of freedom
## Residual deviance: 294.32  on 276  degrees of freedom
## AIC: 308.32
## 
## Number of Fisher Scoring iterations: 4

round(exp(coef(ea_migimom_mea)), digits = 2)

##         (Intercept)          genderGirl    classnum10 class    classnum11 class 
##                0.51                1.81                1.82                4.11 
##         mig0Migrant             iseimom mig0Migrant:iseimom 
##                4.13                1.02                0.98

ea_migidad MIN EA (mea)

data_migidad_mea <- df1 [c("eawu_min0","gender", "classnum", "mig0" , "iseidad")]
data_migidad_mea <- na.omit(data_migidad_mea)

ea_migidad_mea <- glm(eawu_min0 ~ gender + classnum + mig0*iseidad, data = data_migidad_mea, family = "binomial")
summary(ea_migidad_mea)

## 
## Call:
## glm(formula = eawu_min0 ~ gender + classnum + mig0 * iseidad, 
##     family = "binomial", data = data_migidad_mea)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.3167  -1.0575   0.5256   0.8011   1.5251  
## 
## Coefficients:
##                     Estimate Std. Error z value Pr(>|z|)    
## (Intercept)         -1.44293    0.76574  -1.884 0.059517 .  
## genderGirl           0.68705    0.32397   2.121 0.033946 *  
## classnum10 class     1.07079    0.40218   2.662 0.007757 ** 
## classnum11 class     1.61601    0.45912   3.520 0.000432 ***
## mig0Migrant          1.12684    1.07375   1.049 0.293975    
## iseidad              0.03274    0.01663   1.969 0.048944 *  
## mig0Migrant:iseidad -0.01535    0.02408  -0.637 0.523833    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 277.53  on 241  degrees of freedom
## Residual deviance: 247.67  on 235  degrees of freedom
## AIC: 261.67
## 
## Number of Fisher Scoring iterations: 4

round(exp(coef(ea_migidad_mea)), digits = 2)

##         (Intercept)          genderGirl    classnum10 class    classnum11 class 
##                0.24                1.99                2.92                5.03 
##         mig0Migrant             iseidad mig0Migrant:iseidad 
##                3.09                1.03                0.98

All models: SES with interaction MIN EA

export_summs(migcntrl_mea, ea_migdadedu_mea, ea_migmomedu_mea, ea_migimom_mea, ea_migidad_mea,  scale = TRUE)

	Model 1	Model 2	Model 3	Model 4	Model 5
(Intercept)	-0.03	-0.16	-0.23	0.06	0.02
	(0.24)	(0.39)	(0.36)	(0.27)	(0.29)
gender	0.53 *	0.47	0.58	0.59 *	0.69 *
	(0.26)	(0.35)	(0.30)	(0.29)	(0.32)
classnum10 class	0.77 *	0.77	1.04 **	0.60	1.07 **
	(0.32)	(0.42)	(0.37)	(0.34)	(0.40)
classnum11 class	1.40 ***	1.55 ***	1.46 ***	1.41 ***	1.62 ***
	(0.36)	(0.47)	(0.39)	(0.39)	(0.46)
mig0	0.59 *	1.02 *	1.16 *	0.71 *	0.44
	(0.27)	(0.48)	(0.48)	(0.31)	(0.33)
dadedubi		0.51
		(0.42)
mig0:dadedubi		-0.66
		(0.72)
momedubi			0.34
			(0.37)
mig0:momedubi			-0.95
			(0.63)
iseimom				0.25
				(0.18)
mig0:iseimom				-0.24
				(0.31)
iseidad					0.44 *
					(0.22)
mig0:iseidad					-0.21
					(0.33)
N	324	211	273	283	242
AIC	365.93	227.17	296.80	308.32	261.67
BIC	384.84	250.63	322.07	333.84	286.09
Pseudo R2	0.12	0.14	0.14	0.12	0.17
All continuous predictors are mean-centered and scaled by 1 standard deviation. * p < 0.001; p < 0.01; * p < 0.05.

EA and gpa

gpa * mig

data_gpamig <- df1 [c("eawu_max0","gender", "classnum", "mig0", "gpa")]
data_gpamig <- na.omit(data_gpamig)

ea_gpamig <- glm(eawu_max0 ~ gender + classnum + mig0 * gpa, data = data_gpamig, family = "binomial")
summary(ea_gpamig)

## 
## Call:
## glm(formula = eawu_max0 ~ gender + classnum + mig0 * gpa, family = "binomial", 
##     data = data_gpamig)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.9333   0.1278   0.4634   0.7252   1.6819  
## 
## Coefficients:
##                  Estimate Std. Error z value Pr(>|z|)    
## (Intercept)       -7.9671     1.6784  -4.747 2.07e-06 ***
## genderGirl         0.4854     0.3035   1.599  0.10971    
## classnum10 class   1.2110     0.3939   3.075  0.00211 ** 
## classnum11 class   1.1504     0.4114   2.796  0.00517 ** 
## mig0Migrant        5.9876     2.2693   2.639  0.00833 ** 
## gpa                2.2211     0.4631   4.796 1.62e-06 ***
## mig0Migrant:gpa   -1.4954     0.6267  -2.386  0.01702 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 348.11  on 310  degrees of freedom
## Residual deviance: 283.21  on 304  degrees of freedom
## AIC: 297.21
## 
## Number of Fisher Scoring iterations: 5

round(exp(coef(ea_gpamig)), digits = 2)

##      (Intercept)       genderGirl classnum10 class classnum11 class 
##             0.00             1.62             3.36             3.16 
##      mig0Migrant              gpa  mig0Migrant:gpa 
##           398.48             9.22             0.22

pR2(ea_gpamig)

## fitting null model for pseudo-r2

##          llh      llhNull           G2     McFadden         r2ML         r2CU 
## -141.6046564 -174.0574372   64.9055617    0.1864487    0.1883609    0.2796727

hoslem.test(data_gpamig$eawu_max0, fitted(ea_gpamig), g = 10)

## Warning in Ops.factor(1, y): '-' not meaningful for factors

## 
##  Hosmer and Lemeshow goodness of fit (GOF) test
## 
## data:  data_gpamig$eawu_max0, fitted(ea_gpamig)
## X-squared = 311, df = 8, p-value < 2.2e-16

roc_ea_gpamig <- predict(ea_gpamig, newdata = data_gpamig, type = "response")
roc_ea_gpamig <- roc(data_gpamig$eawu_max0 ~ roc_ea_gpamig, plot = TRUE, print.auc = T)

## Setting levels: control = Low aspirations - College or Work, case = High aspirations - University

## Setting direction: controls < cases

EA and age

mig * age

data_agemig <- df1 [c("eawu_max0","gender", "classnum", "mig0", "agesch0")]
data_agemig <- na.omit(data_agemig)

ea_agemig <- glm(eawu_max0 ~ gender + classnum + mig0 * agesch0, data = data_agemig, family = "binomial")
summary(ea_agemig)

## 
## Call:
## glm(formula = eawu_max0 ~ gender + classnum + mig0 * agesch0, 
##     family = "binomial", data = data_agemig)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.1173   0.4741   0.6335   0.7125   1.2311  
## 
## Coefficients:
##                      Estimate Std. Error z value Pr(>|z|)
## (Intercept)           -0.1254     0.7239  -0.173    0.862
## genderGirl             0.6249     0.4403   1.419    0.156
## classnum10 class       0.7583     0.5334   1.422    0.155
## classnum11 class       0.6865     0.5741   1.196    0.232
## mig0Migrant            0.6985     0.7694   0.908    0.364
## agesch01               0.7420     0.7883   0.941    0.347
## mig0Migrant:agesch01  -0.4973     0.9550  -0.521    0.603
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 138.0  on 129  degrees of freedom
## Residual deviance: 131.5  on 123  degrees of freedom
## AIC: 145.5
## 
## Number of Fisher Scoring iterations: 4

round(exp(coef(ea_agemig)), digits = 2)

##          (Intercept)           genderGirl     classnum10 class 
##                 0.88                 1.87                 2.13 
##     classnum11 class          mig0Migrant             agesch01 
##                 1.99                 2.01                 2.10 
## mig0Migrant:agesch01 
##                 0.61

pR2(ea_agemig)

## fitting null model for pseudo-r2

##          llh      llhNull           G2     McFadden         r2ML         r2CU 
## -65.74860380 -69.00072729   6.50424698   0.04713173   0.04880165   0.07461106

hoslem.test(data_agemig$eawu_max0, fitted(ea_agemig), g = 10)

## Warning in Ops.factor(1, y): '-' not meaningful for factors

## 
##  Hosmer and Lemeshow goodness of fit (GOF) test
## 
## data:  data_agemig$eawu_max0, fitted(ea_agemig)
## X-squared = 130, df = 8, p-value < 2.2e-16

data_monthmig <- df1 [c("eawu_max0","gender", "classnum", "mig0", "v48_4_Month")]
data_monthmig <- na.omit(data_monthmig)

ea_monthmig <- glm(eawu_max0 ~ gender + classnum + mig0 * v48_4_Month, data = data_monthmig, family = "binomial")
summary(ea_monthmig)

## 
## Call:
## glm(formula = eawu_max0 ~ gender + classnum + mig0 * v48_4_Month, 
##     family = "binomial", data = data_monthmig)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.1011   0.4666   0.6197   0.7065   1.0405  
## 
## Coefficients:
##                          Estimate Std. Error z value Pr(>|z|)
## (Intercept)             0.2847900  1.0000627   0.285    0.776
## genderGirl              0.6513291  0.4390445   1.484    0.138
## classnum10 class        0.7820118  0.5311631   1.472    0.141
## classnum11 class        0.7604847  0.5695970   1.335    0.182
## mig0Migrant             0.2265833  1.0273029   0.221    0.825
## v48_4_Month             0.0007702  0.0073361   0.105    0.916
## mig0Migrant:v48_4_Month 0.0007822  0.0078331   0.100    0.920
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 138.0  on 129  degrees of freedom
## Residual deviance: 132.1  on 123  degrees of freedom
## AIC: 146.1
## 
## Number of Fisher Scoring iterations: 4

round(exp(coef(ea_monthmig)), digits = 2)

##             (Intercept)              genderGirl        classnum10 class 
##                    1.33                    1.92                    2.19 
##        classnum11 class             mig0Migrant             v48_4_Month 
##                    2.14                    1.25                    1.00 
## mig0Migrant:v48_4_Month 
##                    1.00

roc_ea_monthmig <- predict(ea_monthmig, newdata = data_monthmig, type = "response")
roc_ea_monthmig <- roc(data_monthmig$eawu_max0 ~ roc_ea_monthmig, plot = TRUE, print.auc = T)

## Setting levels: control = Low aspirations - College or Work, case = High aspirations - University

## Setting direction: controls < cases

EA + mig analysis

Adelina

27 04 2021

Variabels

Gender

Migration

Educaitonal aspirations

Mom’s education

Dad’s education

Age of arrival

SAMPLE

BINARY LOGISTIC REGRESSION. MAX EA

Models

EA and migration

EA and gender

EA and GPA

EA and age

EA + momedubi

EA + dadedubi

EA + imom

EA + idad

EA and class

BLREGRESSION: MAX EA summary without interacitons

BINARY LOGISTIC REGRESSION: interactions

migcntrl MAX EA

ea_migmomedu MAX EA

ea_migdadedu MAX EA

ea_migimom MAX EA

ea_migidad MAX EA

All models: SES with interaction

TRY ANOTHER ISEI AND ANOTHER EDU

ISEI of parents

Education of parents

migcntrl MIN EA

ea_migmomedu MIN EA (mea)

ea_migdadedu MIN EA (mea)

ea_migimom MIN EA (mea)

ea_migidad MIN EA (mea)

All models: SES with interaction MIN EA

EA and gpa

gpa * mig

EA and age

mig * age