It’s a well know idea that capitals of the family may be related to students achievement at school: bigger number of books give them ability to learn more (as well as children in families with higher cultural capital tend to be taught to learn), and availability of gadgets, such as tablets and pc, may increase student’s productivity (as it has a lot of computing functions, which are helpful in math, and internet access can contribute to lerning as well). However, computers can also disturb a lot, while books have been successfully serving to humanity for centuries without any computer technology.
In this paper the following question will be explored - what is more important for student math achievement nowadays: books or computers?
TIMSS data for Russia is used.
For more details - summary table and graphs are below
#full dataset, which is gonna be used. Factors scores will be attached to it, either.
rus_full <- rus_ds %>% select(BSBG06A, BSBG06B, BSBG06C, BSBG06D, BSBG06E, BSBG06F, BSBG06G, BSBG06H, BSBG06I, BSBG06J, BSBG06K, BSMMAT01, BSBG04, ITSEX, BSBG07A)
names(rus_full) <- c("tabl_own", "tabl_shar", "desk", "room", "int", "mobile", "game", "music", "car", "big_flat","dishwash", "math_ach", "books", "gender", "momedu")
rus_full$math_ach <- as.numeric(as.character(rus_full$math_ach))
rus_full$books <- as.character(rus_full$books)
rus_full$books[rus_full$books == "0–10 books"] <- "1. 0-10 books"
rus_full$books[rus_full$books == "11–25 books"] <- "2. 11-25 books"
rus_full$books[rus_full$books == "26–100 books"] <- "3. 26-100 books"
rus_full$books[rus_full$books == "101–200 books"] <- "4. 101-200 books"
rus_full$books[rus_full$books == "More than 200"] <- "5. More than 200"
rus_full$books <- as.factor(rus_full$books)
rus_full$momedu <- as.character(rus_full$momedu)
rus_full$momedu[rus_full$momedu == "Bachelor’s or equivalent"] <- "tertiary"
rus_full$momedu[rus_full$momedu == "Postgraduate degree"] <- "tertiary"
rus_full$momedu[rus_full$momedu == "Don’t know"] <- NA
rus_full$momedu[rus_full$momedu == "Upper secondary"] <- "non-tertiary"
rus_full$momedu[rus_full$momedu == "Lower secondary"] <- "non-tertiary"
rus_full$momedu[rus_full$momedu == "Post-secondary, non-tertiary"] <- "non-tertiary"
rus_full$momedu[rus_full$momedu == "Some Primary or Lower secondary or did not go to school"] <- "non-tertiary"
rus_full$momedu[rus_full$momedu == "Short-cycle tertiary"] <- "tertiary"
rus_full$momedu <- as.factor(rus_full$momedu)
rus_full <- na.omit(rus_full)
# dataset for regressions
rus <- rus_full %>% select("math_ach", "books", "tabl_own", "tabl_shar", "gender", "momedu")
summary(rus)
## math_ach books tabl_own tabl_shar
## Min. :282.6 1. 0-10 books : 238 Yes:3287 Yes:3269
## 1st Qu.:486.2 2. 11-25 books :1117 No : 590 No : 608
## Median :543.6 3. 26-100 books :1522
## Mean :541.1 4. 101-200 books: 607
## 3rd Qu.:596.8 5. More than 200: 393
## Max. :819.8
## gender momedu
## Female:1906 non-tertiary:1800
## Male :1971 tertiary :2077
##
##
##
##
#achievement
ggplot() +
geom_histogram(data = rus,
aes(x = math_ach),
binwidth = 30,
fill = "cornflowerblue",
col= "black",
alpha = 0.5) +
labs(subtitle="mean=541 (white solid line), median=543 (white dashed line),\nmin.=282, 1st Qu.=486, 3rd Qu.=596, max.=819",
y="Number of respondents",
x="Student's achievement in math",
title="Distribution of math achievement among the students") +
geom_vline(aes(xintercept = mean(rus$math_ach)), linetype="solid", color="white", size=1) +
geom_vline(aes(xintercept = median(rus$math_ach)), linetype="dashed", color="white", size=1) +
theme_bw()
# availability of tablets and pc
g1.2 <- ggplot(rus, aes(x = tabl_own, fill = tabl_own)) +
geom_bar(aes(y = prop.table(..count..) * 100),
position = "dodge") +
geom_text(aes(y = prop.table(..count..) * 100 + 0.5,
label = paste0(round(prop.table(..count..) * 100,digits= 1), '%')),
stat = 'count',
position = position_dodge(.9),
size = 5) +
labs(y="Proportion of students (%)", x = "",
title="child's own") +
theme_bw() +
theme(axis.text = element_text(size=13.5), plot.title = element_text(size=12), legend.position = "none") +
scale_fill_manual(values=c("skyblue3", "sienna3"))
g1.3 <- ggplot(rus, aes(x = tabl_shar, fill = tabl_shar)) +
geom_bar(aes(y = prop.table(..count..) * 100),
position = "dodge") +
geom_text(aes(y = prop.table(..count..) * 100 + 0.5,
label = paste0(round(prop.table(..count..) * 100,digits= 2), '%')),
stat = 'count',
position = position_dodge(.9),
size = 5) +
labs(y="", x = "",
title="shared") +
theme_bw() +
theme(axis.text = element_text(size=13.5), plot.title = element_text(size=12), legend.position = "none") +
scale_fill_manual(values=c("skyblue3", "sienna3"))
grid.arrange(g1.2, g1.3, ncol=2, top = textGrob("Availality of tablets/PC", gp=gpar(fontsize=17)))
# books
ggplot(rus, aes(x = books)) +
geom_bar(aes(y = prop.table(..count..) * 100),
position = "dodge",
fill = "cornflowerblue",
col= "black") +
geom_text(aes(y = prop.table(..count..) * 100 + 2.5,
label = paste0((..count..))),
stat = 'count',
position = position_dodge(.9),
size = 5) +
labs(y="Proportion of students (%)", x = "",
title="Number of books at home",
subtitle = "labels represent number of respondents in each category") +
coord_flip() +
theme_bw() +
theme(axis.text = element_text(size=13.5), plot.title = element_text(size=12), legend.position = "none")
# additive models
model1_1 <- lm(math_ach ~ gender, data = rus) #p-value: 0.014, Adjusted R-squared: 0.001, AIC: 46046.19
#summary(model1_1)
model1_2 <- lm(math_ach ~ gender + books, data = rus) #p-value < 2.2e-16, Adjusted R-squared: 0.3, AIC: 45930.51
#summary(model1_2)
model1_3 <- lm(math_ach ~ gender + books + tabl_own, data = rus) #p-value < 2.2e-16, Adjusted R-squared: 0.3, AIC: 45920.55
#summary(model1_3)
model1_4 <- lm(math_ach ~ gender + books + tabl_own + tabl_shar, data = rus) #p-value < 2.2e-16, Adjusted R-squared: 0.4, AIC: 45890.11
#summary(model1_4)
model1_5 <- lm(math_ach ~ gender + momedu + books + tabl_own + tabl_shar, data = rus) #p-value < 2.2e-16, Adjusted R-squared: 0.7, AIC: 45752.04
summary(model1_5)
##
## Call:
## lm(formula = math_ach ~ gender + momedu + books + tabl_own +
## tabl_shar, data = rus)
##
## Residuals:
## Min 1Q Median 3Q Max
## -269.318 -52.550 2.841 52.187 251.398
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 509.240 5.299 96.104 < 2e-16 ***
## genderMale 8.044 2.480 3.243 0.001192 **
## momedutertiary 30.141 2.562 11.764 < 2e-16 ***
## books2. 11-25 books 1.721 5.477 0.314 0.753304
## books3. 26-100 books 13.196 5.390 2.448 0.014405 *
## books4. 101-200 books 29.045 5.945 4.886 1.07e-06 ***
## books5. More than 200 22.531 6.438 3.500 0.000471 ***
## tabl_ownNo 12.761 3.431 3.719 0.000203 ***
## tabl_sharNo -18.212 3.395 -5.364 8.63e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 76.43 on 3868 degrees of freedom
## Multiple R-squared: 0.07708, Adjusted R-squared: 0.07517
## F-statistic: 40.38 on 8 and 3868 DF, p-value: < 2.2e-16
#model1_6 <- lm(math_ach ~ gender + momedu + if_native + books + tabl_av + pc_av, data = rus) #p-value < 2.2e-16, Adjusted R-squared: 0.7, AIC: 45753.96 - being native did not show signoficant results, so i did not add in into the dataset
#summary(model1_6)
AIC(model1_1, model1_2, model1_3, model1_4, model1_5)
anova(model1_1, model1_2, model1_3, model1_4, model1_5)
To explore reдation of books and gadgets to students achievement, linear regreession modeling was used, where math achievement is the outcome. The best model according to adjusted R^2, AIC, anova and significance of predictors is the one with the folliwing predictors (more details about other models are available in the code chunk above):
It is very interesting result, that own tablet or pc descrease child’s achievent, while presence of shared gadget, in contrast, increase it. I suggest that this happens because usage of child’ own tablet is harder to control and it becames not only a helpful tool in education, but also a strong distration factor, while shared gadget helps in educational function and at the same time does not allow much incontrollable playing.
This model has adjusted R-squared equal to 0.07, which means that it is rather weak, explaining only 7% of the variability of students’ math achievement in Russia, however it is still significant (p-value < 0.05 and all of the predictors there are significant). The model has a couple of outliers (items 1712, 4173, 1442, 3470) and no lavarages.
qqPlot(model1_5, main="QQ Plot")
## 1712 4173
## 1412 3387
plot(allEffects(model1_5))
# interaction effect
model5_1 <- lm(math_ach ~ gender + tabl_own + tabl_shar + books * momedu , data = rus) #p-value < 2.2e-16, Adjusted R-squared: 0.75, AIC: 45751.15
summary(model5_1)
##
## Call:
## lm(formula = math_ach ~ gender + tabl_own + tabl_shar + books *
## momedu, data = rus)
##
## Residuals:
## Min 1Q Median 3Q Max
## -270.80 -52.20 2.62 52.15 250.07
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 514.774 6.064 84.884 < 2e-16
## genderMale 8.203 2.480 3.308 0.000949
## tabl_ownNo 12.621 3.431 3.678 0.000238
## tabl_sharNo -18.275 3.396 -5.381 7.85e-08
## books2. 11-25 books -3.980 6.525 -0.610 0.541945
## books3. 26-100 books 9.015 6.506 1.386 0.165929
## books4. 101-200 books 20.780 7.882 2.636 0.008412
## books5. More than 200 1.605 10.160 0.158 0.874527
## momedutertiary 9.388 11.181 0.840 0.401181
## books2. 11-25 books:momedutertiary 21.012 12.099 1.737 0.082536
## books3. 26-100 books:momedutertiary 18.216 11.850 1.537 0.124316
## books4. 101-200 books:momedutertiary 24.810 12.951 1.916 0.055478
## books5. More than 200:momedutertiary 40.275 14.603 2.758 0.005841
##
## (Intercept) ***
## genderMale ***
## tabl_ownNo ***
## tabl_sharNo ***
## books2. 11-25 books
## books3. 26-100 books
## books4. 101-200 books **
## books5. More than 200
## momedutertiary
## books2. 11-25 books:momedutertiary .
## books3. 26-100 books:momedutertiary
## books4. 101-200 books:momedutertiary .
## books5. More than 200:momedutertiary **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 76.39 on 3864 degrees of freedom
## Multiple R-squared: 0.07912, Adjusted R-squared: 0.07626
## F-statistic: 27.67 on 12 and 3864 DF, p-value: < 2.2e-16
plot_model(model5_1, type = "int")
Cultural and educational capitals of the family can play significant role in child’s educational achienements. And it may be interesting to see if there is any iteraction effect between these two predictors: mother’s education and number of books at home. For that interaction model has been created. For now the effects are below.
Effecta of student’s gender, availability of tablet and pc remain the same:
gender - shows positive relation in boys case - if student is male their level of math achievement increases by 8;
presence of shared tablet/pc in a family - the absense of shared tablet or computer at home gives strong negative relation, descreasing level of math achievement by 18;
presence of child’s own tablet/pc - the absence of own tablet increase student’s level of math achievent by 12.
books - big number of books (101-200) increase student’s level of math achievement by 21 (other levels of cultural capital are insignificant here);
and number of books with interacvtion of mother’s education - strong positive relation can be seen in interaction of big number of books and mother’s higher education. In other words, when mother has tertiary education, big cultural capital (101 books and more) has stronger positive effect on child’s level of math achievement (by 25 in case for 101-200 and by 40 in case of more than 200 books).
anova(model1_5, model5_1) # interaction model is not significantly better
AIC(model1_5, model5_1) # 45752.04 vs 45751.15
As for anova comparison of these two models (without interaction effect (model1_5) and with it (model5_1)) - the second model, with interaction effect, is not significantly better than additive model. AIC is also relatively the same. Anyway, I prefer additive model, because it shows more significant relations of predictors to the outcome.
For exploratory factor analysis here are the variables, describing home possessions - if students have any of these items at home:
The distributions of students who have and have not such posessions are described below. In short it can be said that the majority of students have their own or shared tablet/pc, internet connection, mobile phone, study desk. Then, share of student with their own rooms and a car in their families is higher than of those, who do not own such items. And in contrast, the less students own gaming system, musical instruments, dishwasher at home. The proportion of families who possess a big flat (with 4 and more rooms) or a house is nearly the same as of those, who do not.
r_efa <- rus_full %>% select("tabl_own", "tabl_shar", "desk", "room", "int", "mobile", "game", "music", "car", "big_flat","dishwash")
r_efa <- na.omit(r_efa)
summary(r_efa)
## tabl_own tabl_shar desk room int mobile
## Yes:3287 Yes:3269 Yes:3561 Yes:2657 Yes:3747 Yes:3799
## No : 590 No : 608 No : 316 No :1220 No : 130 No : 78
## game music car big_flat dishwash
## Yes:1027 Yes:1445 Yes:2643 Yes:2022 Yes: 807
## No :2850 No :2432 No :1234 No :1855 No :3070
g1 <- ggplot(r_efa, aes(x = tabl_own, fill = tabl_own)) +
geom_bar(aes(y = prop.table(..count..) * 100),
position = "dodge") +
labs(y="%", x = "",
title="own tablet") +
theme_bw() +
theme(axis.text = element_text(size=13.5), plot.title = element_text(size=12), legend.position = "none") +
scale_fill_manual(values=c("skyblue3", "sienna3"))
g2 <- ggplot(r_efa, aes(x = tabl_shar, fill = tabl_shar)) +
geom_bar(aes(y = prop.table(..count..) * 100),
position = "dodge") +
labs(y="", x = "",
title="shared tablet") +
theme_bw() +
theme(axis.text = element_text(size=13.5), plot.title = element_text(size=12), legend.position = "none") +
scale_fill_manual(values=c("skyblue3", "sienna3"))
g3 <- ggplot(r_efa, aes(x = desk, fill = desk)) +
geom_bar(aes(y = prop.table(..count..) * 100),
position = "dodge") +
labs(y="", x = "",
title="study desk") +
theme_bw() +
theme(axis.text = element_text(size=13.5), plot.title = element_text(size=12), legend.position = "none") +
scale_fill_manual(values=c("skyblue3", "sienna3"))
g4 <- ggplot(r_efa, aes(x = room, fill = room)) +
geom_bar(aes(y = prop.table(..count..) * 100),
position = "dodge") +
labs(y="", x = "",
title="own room") +
theme_bw() +
theme(axis.text = element_text(size=13.5), plot.title = element_text(size=12), legend.position = "none") +
scale_fill_manual(values=c("skyblue3", "sienna3"))
g5 <- ggplot(r_efa, aes(x = int, fill = int)) +
geom_bar(aes(y = prop.table(..count..) * 100),
position = "dodge") +
labs(y="%", x = "",
title="internet connection") +
theme_bw() +
theme(axis.text = element_text(size=13.5), plot.title = element_text(size=12), legend.position = "none") +
scale_fill_manual(values=c("skyblue3", "sienna3"))
g6 <- ggplot(r_efa, aes(x = mobile, fill = mobile)) +
geom_bar(aes(y = prop.table(..count..) * 100),
position = "dodge") +
labs(y="", x = "",
title="own mobile phone") +
theme_bw() +
theme(axis.text = element_text(size=13.5), plot.title = element_text(size=12), legend.position = "none") +
scale_fill_manual(values=c("skyblue3", "sienna3"))
g7 <- ggplot(r_efa, aes(x = game, fill = game)) +
geom_bar(aes(y = prop.table(..count..) * 100),
position = "dodge") +
labs(y="", x = "",
title="gaming system") +
theme_bw() +
theme(axis.text = element_text(size=13.5), plot.title = element_text(size=12), legend.position = "none") +
scale_fill_manual(values=c("skyblue3", "sienna3"))
g8 <- ggplot(r_efa, aes(x = music, fill = music)) +
geom_bar(aes(y = prop.table(..count..) * 100),
position = "dodge") +
labs(y="", x = "",
title="musical instrument") +
theme_bw() +
theme(axis.text = element_text(size=13.5), plot.title = element_text(size=12), legend.position = "none") +
scale_fill_manual(values=c("skyblue3", "sienna3"))
g9 <- ggplot(r_efa, aes(x = car, fill = car)) +
geom_bar(aes(y = prop.table(..count..) * 100),
position = "dodge") +
labs(y="%", x = "",
title="car") +
theme_bw() +
theme(axis.text = element_text(size=13.5), plot.title = element_text(size=12), legend.position = "none") +
scale_fill_manual(values=c("skyblue3", "sienna3"))
g10 <- ggplot(r_efa, aes(x = big_flat, fill = big_flat)) +
geom_bar(aes(y = prop.table(..count..) * 100),
position = "dodge") +
labs(y="", x = "",
title="big flat or house") +
theme_bw() +
theme(axis.text = element_text(size=13.5), plot.title = element_text(size=12), legend.position = "none") +
scale_fill_manual(values=c("skyblue3", "sienna3"))
g11 <- ggplot(r_efa, aes(x = dishwash, fill = dishwash)) +
geom_bar(aes(y = prop.table(..count..) * 100),
position = "dodge") +
labs(y="", x = "",
title="dishwasher") +
theme_bw() +
theme(axis.text = element_text(size=13.5), plot.title = element_text(size=12), legend.position = "none") +
scale_fill_manual(values=c("skyblue3", "sienna3"))
grid.arrange(g1, g2, g3, g4, g5, g6, g7, g8, g9, g10, g11, ncol=4, nrow=3, top = textGrob("Home possession", gp=gpar(fontsize=17)))
dat.cor <- hetcor(r_efa)
dat.cor <- dat.cor$correlations
dat.cor
## tabl_own tabl_shar desk room int
## tabl_own 1.000000000 -0.088599322 0.38394235 0.250554905 0.31860610
## tabl_shar -0.088599322 1.000000000 0.01021489 -0.004006585 0.37995786
## desk 0.383942351 0.010214890 1.00000000 0.515056308 0.41974269
## room 0.250554905 -0.004006585 0.51505631 1.000000000 0.19137937
## int 0.318606100 0.379957861 0.41974269 0.191379372 1.00000000
## mobile 0.266196582 0.340789169 0.38328576 0.173314080 0.59916928
## game 0.286108774 -0.048276398 0.08530682 0.108465304 0.15693711
## music -0.003184618 0.132422113 0.16891958 0.049519921 0.23527040
## car 0.171797039 0.205013374 0.19202006 0.207147124 0.30451817
## big_flat 0.122636493 0.048725002 0.19216726 0.326288791 0.08494608
## dishwash 0.181352622 0.040208731 0.21711667 0.197352498 0.21838467
## mobile game music car big_flat
## tabl_own 0.26619658 0.28610877 -0.003184618 0.1717970 0.12263649
## tabl_shar 0.34078917 -0.04827640 0.132422113 0.2050134 0.04872500
## desk 0.38328576 0.08530682 0.168919584 0.1920201 0.19216726
## room 0.17331408 0.10846530 0.049519921 0.2071471 0.32628879
## int 0.59916928 0.15693711 0.235270400 0.3045182 0.08494608
## mobile 1.00000000 0.27600711 0.293295363 0.2342765 0.04889399
## game 0.27600711 1.00000000 0.109418772 0.2319974 0.13525682
## music 0.29329536 0.10941877 1.000000000 0.1715338 0.06565494
## car 0.23427653 0.23199737 0.171533781 1.0000000 0.34415171
## big_flat 0.04889399 0.13525682 0.065654937 0.3441517 1.00000000
## dishwash 0.40718948 0.38384925 0.225875238 0.3160933 0.24452534
## dishwash
## tabl_own 0.18135262
## tabl_shar 0.04020873
## desk 0.21711667
## room 0.19735250
## int 0.21838467
## mobile 0.40718948
## game 0.38384925
## music 0.22587524
## car 0.31609327
## big_flat 0.24452534
## dishwash 1.00000000
cor_pmat(dat.cor)
## tabl_own tabl_shar desk room int
## tabl_own 0.00000000 0.09780393 0.20761670 0.55423935 0.70863393
## tabl_shar 0.09780393 0.00000000 0.27576565 0.17716178 0.21734690
## desk 0.20761670 0.27576565 0.00000000 0.04703741 0.38711242
## room 0.55423935 0.17716178 0.04703741 0.00000000 0.59717600
## int 0.70863393 0.21734690 0.38711242 0.59717600 0.00000000
## mobile 0.91740210 0.35201942 0.59329511 0.42215594 0.01602295
## game 0.51768209 0.16758546 0.41703096 0.52976729 0.48121784
## music 0.19899940 0.86488447 0.64562441 0.26736234 0.89976557
## car 0.62281340 0.95849956 0.53305301 0.77825075 0.91099613
## big_flat 0.68123457 0.36020564 0.83976905 0.37045533 0.13909631
## dishwash 0.87367539 0.32567215 0.72603523 0.74008512 0.62423038
## mobile game music car big_flat dishwash
## tabl_own 0.91740210 0.5176821 0.1989994 0.6228134 0.68123457 0.8736754
## tabl_shar 0.35201942 0.1675855 0.8648845 0.9584996 0.36020564 0.3256721
## desk 0.59329511 0.4170310 0.6456244 0.5330530 0.83976905 0.7260352
## room 0.42215594 0.5297673 0.2673623 0.7782507 0.37045533 0.7400851
## int 0.01602295 0.4812178 0.8997656 0.9109961 0.13909631 0.6242304
## mobile 0.00000000 0.9661678 0.6220614 0.6255622 0.07017937 0.5490731
## game 0.96616781 0.0000000 0.6329636 0.9849794 0.74290378 0.2045858
## music 0.62206142 0.6329636 0.0000000 0.7167705 0.35111348 0.9245115
## car 0.62556224 0.9849794 0.7167705 0.0000000 0.41379979 0.7142589
## big_flat 0.07017937 0.7429038 0.3511135 0.4137998 0.00000000 0.9385490
## dishwash 0.54907305 0.2045858 0.9245115 0.7142589 0.93854896 0.0000000
r_efa = as.data.frame(lapply(r_efa, as.numeric))
fa.parallel(r_efa, fa="both", n.iter=100)
## Parallel analysis suggests that the number of factors = 4 and the number of components = 4
fa(r_efa, nfactors=5, rotate="varimax", fm="ml", cor="mixed")
##
## mixed.cor is deprecated, please use mixedCor.
## Factor Analysis using method = ml
## Call: fa(r = r_efa, nfactors = 5, rotate = "varimax", fm = "ml", cor = "mixed")
## Standardized loadings (pattern matrix) based upon correlation matrix
## ML2 ML1 ML3 ML5 ML4 h2 u2 com
## tabl_own 0.03 0.25 0.13 0.09 0.74 0.63 0.37 1.3
## tabl_shar 0.65 -0.09 -0.05 0.13 -0.13 0.46 0.54 1.2
## desk 0.17 0.89 0.10 0.08 0.19 0.88 0.12 1.2
## room 0.01 0.51 0.08 0.34 0.11 0.40 0.60 1.9
## int 0.69 0.25 0.14 0.06 0.28 0.64 0.36 1.7
## mobile 0.62 0.23 0.47 -0.06 0.18 0.69 0.31 2.4
## game 0.01 -0.03 0.50 0.14 0.30 0.36 0.64 1.8
## music 0.26 0.12 0.27 0.03 -0.10 0.17 0.83 2.7
## car 0.26 0.07 0.22 0.50 0.10 0.38 0.62 2.1
## big_flat -0.01 0.15 0.11 0.64 0.02 0.44 0.56 1.2
## dishwash 0.10 0.12 0.69 0.24 0.05 0.56 0.44 1.4
##
## ML2 ML1 ML3 ML5 ML4
## SS loadings 1.46 1.30 1.14 0.89 0.83
## Proportion Var 0.13 0.12 0.10 0.08 0.08
## Cumulative Var 0.13 0.25 0.35 0.44 0.51
## Proportion Explained 0.26 0.23 0.20 0.16 0.15
## Cumulative Proportion 0.26 0.49 0.69 0.85 1.00
##
## Mean item complexity = 1.7
## Test of the hypothesis that 5 factors are sufficient.
##
## The degrees of freedom for the null model are 55 and the objective function was 2.54 with Chi Square of 9816.02
## The degrees of freedom for the model are 10 and the objective function was 0.02
##
## The root mean square of the residuals (RMSR) is 0.01
## The df corrected root mean square of the residuals is 0.03
##
## The harmonic number of observations is 3877 with the empirical chi square 60.93 with prob < 2.4e-09
## The total number of observations was 3877 with Likelihood Chi Square = 94.92 with prob < 5.6e-16
##
## Tucker Lewis Index of factoring reliability = 0.952
## RMSEA index = 0.047 and the 90 % confidence intervals are 0.038 0.056
## BIC = 12.29
## Fit based upon off diagonal values = 1
## Measures of factor score adequacy
## ML2 ML1 ML3 ML5 ML4
## Correlation of (regression) scores with factors 0.86 0.92 0.79 0.75 0.78
## Multiple R square of scores with factors 0.74 0.84 0.63 0.57 0.60
## Minimum correlation of possible factor scores 0.47 0.69 0.26 0.14 0.20
fa.diagram(fa(r_efa, nfactors=5, rotate="varimax", fm="ml", mixedCor))
# less successful models with 5 factors
#fa(r_efa, nfactors=5, rotate="none", fm="ml")
#fa(r_efa, nfactors=5, rotate="none", fm="ml", cor="mixed")
#fa(r_efa, nfactors=5, rotate="varimax", fm="ml")
#fa(r_efa, nfactors=5, rotate="oblimin", fm="ml")
#fa(r_efa, nfactors=5, rotate="oblimin", fm="ml", cor="mixed")
#fa(r_efa, nfactors=4, rotate="varimax", fm="ml", mixedCor) # low prop var
# fa(r_efa, nfactors=4, rotate="varimax", fm="ml", cor="mixed") # low TLI
#fa(r_efa, nfactors=5, rotate="varimax", fm="ml", mixedCor) # low prop var
Number of factors suggested by parallel analisys is 4, but I will stick to 5, as lower number of factors give low TLI. Experimenting with mixedCor was not very successful, either, as it decreased proportion variances significantly;
Model without rotation was bad (worse than others) and model with oblimin rotation did not give much better results than varimax one, so I would stick to varimax rotation.
Cumulative variance: 0.51 - not bad;
Proportion variance: could be better, only 3 out of 5 factors give >0.1 result;
Proportion explained: from 0.26 to 0.15 - not best, but ok;
RMSR, Tucker Lewis Index and RMSEA are ok.
Variable, describing family’s possesion of musical instrument did not fit to any of the factors, so I have delited it. Parallel analysis suggested me 4 factors, again, but just like before 4 factors gave lower TLI (0.8), while 5 factors gave not very bad results, so stick to this number.
# "music" variable does not seem to be very useful - let's try to do it without musical instrument
r_efa1 <- rus_full %>% select("tabl_own", "tabl_shar", "desk", "room", "int", "mobile", "game", "car", "big_flat","dishwash")
r_efa1 <- na.omit(r_efa1)
#summary(r_efa1)
r_efa1 <- as.data.frame(lapply(r_efa1, as.numeric))
fa.parallel(r_efa1, fa="both", n.iter=100)
## Parallel analysis suggests that the number of factors = 4 and the number of components = 4
fa1 <- fa(r_efa1, nfactors=5, rotate="varimax", fm="ml", cor="mixed")
##
## mixed.cor is deprecated, please use mixedCor.
fa1
## Factor Analysis using method = ml
## Call: fa(r = r_efa1, nfactors = 5, rotate = "varimax", fm = "ml", cor = "mixed")
## Standardized loadings (pattern matrix) based upon correlation matrix
## ML2 ML1 ML3 ML4 ML5 h2 u2 com
## tabl_own 0.04 0.27 0.17 0.07 0.61 0.48 0.52 1.6
## tabl_shar 0.66 -0.09 -0.06 0.14 -0.16 0.49 0.51 1.3
## desk 0.17 0.83 0.08 0.08 0.22 0.77 0.23 1.3
## room 0.00 0.56 0.07 0.32 0.09 0.43 0.57 1.7
## int 0.69 0.26 0.13 0.06 0.32 0.66 0.34 1.8
## mobile 0.63 0.26 0.47 -0.08 0.15 0.71 0.29 2.4
## game 0.00 -0.04 0.53 0.15 0.31 0.40 0.60 1.8
## car 0.25 0.07 0.21 0.52 0.13 0.40 0.60 2.0
## big_flat -0.02 0.18 0.12 0.61 0.00 0.41 0.59 1.3
## dishwash 0.11 0.15 0.68 0.23 0.01 0.55 0.45 1.4
##
## ML2 ML1 ML3 ML4 ML5
## SS loadings 1.41 1.28 1.07 0.86 0.69
## Proportion Var 0.14 0.13 0.11 0.09 0.07
## Cumulative Var 0.14 0.27 0.38 0.46 0.53
## Proportion Explained 0.27 0.24 0.20 0.16 0.13
## Cumulative Proportion 0.27 0.51 0.71 0.87 1.00
##
## Mean item complexity = 1.7
## Test of the hypothesis that 5 factors are sufficient.
##
## The degrees of freedom for the null model are 45 and the objective function was 2.4 with Chi Square of 9275.16
## The degrees of freedom for the model are 5 and the objective function was 0.01
##
## The root mean square of the residuals (RMSR) is 0.01
## The df corrected root mean square of the residuals is 0.03
##
## The harmonic number of observations is 3877 with the empirical chi square 29.04 with prob < 2.3e-05
## The total number of observations was 3877 with Likelihood Chi Square = 57.2 with prob < 4.6e-11
##
## Tucker Lewis Index of factoring reliability = 0.949
## RMSEA index = 0.052 and the 90 % confidence intervals are 0.04 0.064
## BIC = 15.89
## Fit based upon off diagonal values = 1
## Measures of factor score adequacy
## ML2 ML1 ML3 ML4
## Correlation of (regression) scores with factors 0.86 0.86 0.79 0.75
## Multiple R square of scores with factors 0.74 0.74 0.63 0.56
## Minimum correlation of possible factor scores 0.49 0.48 0.26 0.13
## ML5
## Correlation of (regression) scores with factors 0.69
## Multiple R square of scores with factors 0.48
## Minimum correlation of possible factor scores -0.05
fa.diagram(fa1)
For now results are the following:
As for interpretation, it is a bit strange – little number of variables explained by each factor (but decreasing the number of factors, as I have already mentioned, spoils the model). So the factors are:
# here I've tried to create model without "own tablet" variable, but results were not promissing: too small TLI
#r_efa1 <- rus_ds %>% select(BSBG06B, BSBG06C,BSBG06D, BSBG06E, BSBG06F, BSBG06G, BSBG06I, BSBG06J,BSBG06K)
#names(r_efa1) <- c("tabl_shar", "desk", "room", "int", "mobile", "game", "car", "big_flat","dishwash")
#r_efa1 <- na.omit(r_efa1)
#summary(r_efa1)
#r_efa1 <- as.data.frame(lapply(r_efa1, as.numeric))
#fa.parallel(r_efa1, fa="both", n.iter=100)
#fa(r_efa1, nfactors=5, rotate="varimax", fm="ml", cor="mixed")
#fa.diagram(fa(r_efa1, nfactors=5, rotate="varimax", fm="ml", cor="mixed"))
#fa(r_efa1, nfactors=4, rotate="varimax", fm="ml", cor="mixed")
#fa.diagram(fa(r_efa1, nfactors=4, rotate="varimax", fm="ml", cor="mixed"))
Let’s test these factors with Cronbach’s alpha - unfortunatelly they are not very good (alpha <0.7)
ML2<- r_efa1[c("int", "tabl_shar", "mobile")]
psych::alpha(ML2, check.keys=TRUE)
##
## Reliability analysis
## Call: psych::alpha(x = ML2, check.keys = TRUE)
##
## raw_alpha std.alpha G6(smc) average_r S/N ase mean sd median_r
## 0.27 0.36 0.28 0.16 0.56 0.017 1.1 0.16 0.14
##
## lower alpha upper 95% confidence boundaries
## 0.24 0.27 0.3
##
## Reliability if an item is dropped:
## raw_alpha std.alpha G6(smc) average_r S/N alpha se var.r med.r
## int 0.13 0.19 0.10 0.10 0.23 0.019 NA 0.10
## tabl_shar 0.36 0.37 0.23 0.23 0.59 0.020 NA 0.23
## mobile 0.20 0.25 0.14 0.14 0.33 0.020 NA 0.14
##
## Item statistics
## n raw.r std.r r.cor r.drop mean sd
## int 3877 0.55 0.69 0.42 0.21 1.0 0.18
## tabl_shar 3877 0.85 0.63 0.26 0.16 1.2 0.36
## mobile 3877 0.46 0.67 0.37 0.19 1.0 0.14
##
## Non missing response frequency for each item
## 1 2 miss
## int 0.97 0.03 0
## tabl_shar 0.84 0.16 0
## mobile 0.98 0.02 0
ML1<- r_efa1[c("desk", "room")]
psych::alpha(ML1, check.keys=TRUE)
##
## Reliability analysis
## Call: psych::alpha(x = ML1, check.keys = TRUE)
##
## raw_alpha std.alpha G6(smc) average_r S/N ase mean sd median_r
## 0.36 0.4 0.25 0.25 0.66 0.018 1.2 0.3 0.25
##
## lower alpha upper 95% confidence boundaries
## 0.32 0.36 0.39
##
## Reliability if an item is dropped:
## raw_alpha std.alpha G6(smc) average_r S/N alpha se var.r med.r
## desk 0.249 0.25 0.062 0.25 NA NA 0.249 0.25
## room 0.062 0.25 NA NA NA NA 0.062 0.25
##
## Item statistics
## n raw.r std.r r.cor r.drop mean sd
## desk 3877 0.65 0.79 0.39 0.25 1.1 0.27
## room 3877 0.90 0.79 0.39 0.25 1.3 0.46
##
## Non missing response frequency for each item
## 1 2 miss
## desk 0.92 0.08 0
## room 0.69 0.31 0
ML3<- r_efa1[c("dishwash", "game")]
psych::alpha(ML3, check.keys=TRUE)
##
## Reliability analysis
## Call: psych::alpha(x = ML3, check.keys = TRUE)
##
## raw_alpha std.alpha G6(smc) average_r S/N ase mean sd median_r
## 0.36 0.37 0.22 0.22 0.58 0.02 1.8 0.33 0.22
##
## lower alpha upper 95% confidence boundaries
## 0.32 0.36 0.4
##
## Reliability if an item is dropped:
## raw_alpha std.alpha G6(smc) average_r S/N alpha se var.r med.r
## dishwash 0.22 0.22 0.05 0.22 NA NA 0.22 0.22
## game 0.05 0.22 NA NA NA NA 0.05 0.22
##
## Item statistics
## n raw.r std.r r.cor r.drop mean sd
## dishwash 3877 0.76 0.78 0.37 0.22 1.8 0.41
## game 3877 0.80 0.78 0.37 0.22 1.7 0.44
##
## Non missing response frequency for each item
## 1 2 miss
## dishwash 0.21 0.79 0
## game 0.26 0.74 0
ML4<- r_efa1[c("big_flat", "car")]
psych::alpha(ML4, check.keys=TRUE)
##
## Reliability analysis
## Call: psych::alpha(x = ML4, check.keys = TRUE)
##
## raw_alpha std.alpha G6(smc) average_r S/N ase mean sd median_r
## 0.35 0.35 0.21 0.21 0.55 0.021 1.4 0.38 0.21
##
## lower alpha upper 95% confidence boundaries
## 0.31 0.35 0.39
##
## Reliability if an item is dropped:
## raw_alpha std.alpha G6(smc) average_r S/N alpha se var.r med.r
## big_flat 0.215 0.21 0.046 0.21 NA NA 0.215 0.21
## car 0.046 0.21 NA NA NA NA 0.046 0.21
##
## Item statistics
## n raw.r std.r r.cor r.drop mean sd
## big_flat 3877 0.80 0.78 0.36 0.21 1.5 0.50
## car 3877 0.76 0.78 0.36 0.21 1.3 0.47
##
## Non missing response frequency for each item
## 1 2 miss
## big_flat 0.52 0.48 0
## car 0.68 0.32 0
First, factor scores has been addded to the dataset. Below are all the variables, that are going to be used.
load <- fa1$loadings[,1:2]
fascores<-as.data.frame(fa1$scores)
datfa<-cbind(rus_full,fascores) # now we have all our factor scores in one data frame, datfa
#datfa1 <- datfa %>% select("math_achiv", "sex_stud", "momedu", "fathedu", "if_native", "ML2", "ML1", "ML3")
#datfa1 = as.data.frame(lapply(datfa1, as.numeric))
names(datfa) #good!
## [1] "tabl_own" "tabl_shar" "desk" "room" "int"
## [6] "mobile" "game" "music" "car" "big_flat"
## [11] "dishwash" "math_ach" "books" "gender" "momedu"
## [16] "ML2" "ML1" "ML3" "ML4" "ML5"
summary(datfa)
## tabl_own tabl_shar desk room int mobile
## Yes:3287 Yes:3269 Yes:3561 Yes:2657 Yes:3747 Yes:3799
## No : 590 No : 608 No : 316 No :1220 No : 130 No : 78
##
##
##
##
## game music car big_flat dishwash math_ach
## Yes:1027 Yes:1445 Yes:2643 Yes:2022 Yes: 807 Min. :282.6
## No :2850 No :2432 No :1234 No :1855 No :3070 1st Qu.:486.2
## Median :543.6
## Mean :541.1
## 3rd Qu.:596.8
## Max. :819.8
## books gender momedu
## 1. 0-10 books : 238 Female:1906 non-tertiary:1800
## 2. 11-25 books :1117 Male :1971 tertiary :2077
## 3. 26-100 books :1522
## 4. 101-200 books: 607
## 5. More than 200: 393
##
## ML2 ML1 ML3 ML4
## Min. :-0.8655 Min. :-1.1485 Min. :-2.2221 Min. :-2.12996
## 1st Qu.:-0.5125 1st Qu.:-0.5243 1st Qu.:-0.4378 1st Qu.:-0.77700
## Median :-0.3693 Median :-0.3768 Median : 0.3835 Median : 0.03212
## Mean : 0.0000 Mean : 0.0000 Mean : 0.0000 Mean : 0.00000
## 3rd Qu.:-0.1195 3rd Qu.: 0.2546 3rd Qu.: 0.4686 3rd Qu.: 0.54035
## Max. : 7.3101 Max. : 4.7695 Max. : 3.7644 Max. : 1.74560
## ML5
## Min. :-1.3046
## 1st Qu.:-0.4492
## Median :-0.1084
## Mean : 0.0000
## 3rd Qu.: 0.1016
## Max. : 3.3458
Now, let’s come back to our regression model. Quick reminder: we are using additive model with the following predictors: students’ gender, education of their mothers, number of books at home and student’s posession of their own tablet/pc or shared with other family members. As we have one factor, describing possesion own tablet and we had this variable in our regression model, i have decided to leave it as an independent variable, while such variable as shared tablet/pc i will vanish, because it is a part of one of the factors.
model1_5 <- lm(math_ach ~ gender + momedu + books + tabl_own + tabl_shar, data = datfa)
#summary(model1_5) # p<.5, R = 0.07
modelefa1 <- lm(math_ach ~ gender + momedu + books + tabl_own + ML2, data = datfa)
summary(modelefa1) # p<.5, R = 0.08
##
## Call:
## lm(formula = math_ach ~ gender + momedu + books + tabl_own +
## ML2, data = datfa)
##
## Residuals:
## Min 1Q Median 3Q Max
## -269.980 -51.966 2.039 51.786 251.379
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 507.398 5.244 96.762 < 2e-16 ***
## genderMale 8.687 2.477 3.507 0.000459 ***
## momedutertiary 29.085 2.563 11.346 < 2e-16 ***
## books2. 11-25 books 1.080 5.464 0.198 0.843381
## books3. 26-100 books 11.926 5.381 2.216 0.026737 *
## books4. 101-200 books 27.451 5.937 4.623 3.90e-06 ***
## books5. More than 200 21.496 6.424 3.346 0.000826 ***
## tabl_ownNo 14.482 3.423 4.231 2.38e-05 ***
## ML2 -8.728 1.228 -7.110 1.38e-12 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 76.22 on 3868 degrees of freedom
## Multiple R-squared: 0.08221, Adjusted R-squared: 0.08031
## F-statistic: 43.31 on 8 and 3868 DF, p-value: < 2.2e-16
modelefa2 <- lm(math_ach ~ gender + momedu + books + tabl_own + ML2 + ML1, data = datfa)
#summary(modelefa2) # p<.5, R = 0.08 (ML1 is insignificant)
modelefa3 <- lm(math_ach ~ gender + momedu + books + tabl_own + ML2 + ML1 + ML3, data = datfa)
#summary(modelefa3) # p<.5, R = 0.08 (ML1 and ML3 are insignificant)
modelefa4 <- lm(math_ach ~ gender + momedu + books + tabl_own + ML2 + ML1 + ML3 + ML4, data = datfa)
#summary(modelefa4) # p<.5, R = 0.08 (ML1, ML3 and ML4 are insignificant)
AIC(model1_5, modelefa1, modelefa2, modelefa3, modelefa4) # second model modelefa1 with AIC=44616.43 is the best
anova(model1_5, modelefa1, modelefa2, modelefa3, modelefa4) # nothing, actually wins
I have left summary for only the best regression model - additive model with one one factor and variable showing students’ possesion of own tablet. As for the factors - they did not give significant results in regression, only one of them (to see summaries of all 5 regression models scroll to the appendix section) - factor, presenting possesion with electronic common gadgets (mobile phone, internet connection, tablet/pc shared with family members) - it shows negative relation to students’ math achievement. In other words, when student’s family have such electronic gadgets their math achevement descreases by 8.7.
Other effects remein the same:
This model has some outliers (1712, 4173, 1412, 3387), no leverages, model explains 8% of variability of students’ level of math achievement (adjusted R^2 = 0.08). And of course it is singificant, model’s p-value < 2.2e-16.
qqPlot(modelefa1, main="QQ Plot")
## 1712 4173
## 1412 3387
plot(allEffects(modelefa1))
Throughout the analysis the following patterns have been discovered:
So, to answer the question from the beggining - it is better for students’ math achievemnt to pay more attention to books, than to gadgets.
P.S. and, dear parents, do not forget about your education either ;)
Here are regression models with gradual adding factor scores:
summary(model1_5) # p<.5, R = 0.07
##
## Call:
## lm(formula = math_ach ~ gender + momedu + books + tabl_own +
## tabl_shar, data = datfa)
##
## Residuals:
## Min 1Q Median 3Q Max
## -269.318 -52.550 2.841 52.187 251.398
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 509.240 5.299 96.104 < 2e-16 ***
## genderMale 8.044 2.480 3.243 0.001192 **
## momedutertiary 30.141 2.562 11.764 < 2e-16 ***
## books2. 11-25 books 1.721 5.477 0.314 0.753304
## books3. 26-100 books 13.196 5.390 2.448 0.014405 *
## books4. 101-200 books 29.045 5.945 4.886 1.07e-06 ***
## books5. More than 200 22.531 6.438 3.500 0.000471 ***
## tabl_ownNo 12.761 3.431 3.719 0.000203 ***
## tabl_sharNo -18.212 3.395 -5.364 8.63e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 76.43 on 3868 degrees of freedom
## Multiple R-squared: 0.07708, Adjusted R-squared: 0.07517
## F-statistic: 40.38 on 8 and 3868 DF, p-value: < 2.2e-16
summary(modelefa1) # p<.5, R = 0.08 - all presictors give significant results, lowest AIC
##
## Call:
## lm(formula = math_ach ~ gender + momedu + books + tabl_own +
## ML2, data = datfa)
##
## Residuals:
## Min 1Q Median 3Q Max
## -269.980 -51.966 2.039 51.786 251.379
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 507.398 5.244 96.762 < 2e-16 ***
## genderMale 8.687 2.477 3.507 0.000459 ***
## momedutertiary 29.085 2.563 11.346 < 2e-16 ***
## books2. 11-25 books 1.080 5.464 0.198 0.843381
## books3. 26-100 books 11.926 5.381 2.216 0.026737 *
## books4. 101-200 books 27.451 5.937 4.623 3.90e-06 ***
## books5. More than 200 21.496 6.424 3.346 0.000826 ***
## tabl_ownNo 14.482 3.423 4.231 2.38e-05 ***
## ML2 -8.728 1.228 -7.110 1.38e-12 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 76.22 on 3868 degrees of freedom
## Multiple R-squared: 0.08221, Adjusted R-squared: 0.08031
## F-statistic: 43.31 on 8 and 3868 DF, p-value: < 2.2e-16
summary(modelefa2) # p<.5, R = 0.08 (ML1 is insignificant)
##
## Call:
## lm(formula = math_ach ~ gender + momedu + books + tabl_own +
## ML2 + ML1, data = datfa)
##
## Residuals:
## Min 1Q Median 3Q Max
## -269.613 -52.018 2.184 51.890 251.784
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 507.4497 5.2456 96.739 < 2e-16 ***
## genderMale 8.7250 2.4790 3.520 0.000437 ***
## momedutertiary 29.0599 2.5642 11.333 < 2e-16 ***
## books2. 11-25 books 0.9813 5.4684 0.179 0.857595
## books3. 26-100 books 11.7798 5.3916 2.185 0.028960 *
## books4. 101-200 books 27.3012 5.9472 4.591 4.56e-06 ***
## books5. More than 200 21.3137 6.4368 3.311 0.000937 ***
## tabl_ownNo 14.9465 3.5731 4.183 2.94e-05 ***
## ML2 -8.6164 1.2522 -6.881 6.90e-12 ***
## ML1 -0.6308 1.3901 -0.454 0.649994
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 76.23 on 3867 degrees of freedom
## Multiple R-squared: 0.08226, Adjusted R-squared: 0.08012
## F-statistic: 38.51 on 9 and 3867 DF, p-value: < 2.2e-16
summary(modelefa3) # p<.5, R = 0.08 (ML1 and ML3 are insignificant)
##
## Call:
## lm(formula = math_ach ~ gender + momedu + books + tabl_own +
## ML2 + ML1 + ML3, data = datfa)
##
## Residuals:
## Min 1Q Median 3Q Max
## -266.744 -52.145 2.313 51.811 255.100
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 507.0201 5.2544 96.494 < 2e-16 ***
## genderMale 9.1718 2.5003 3.668 0.000247 ***
## momedutertiary 29.5270 2.5868 11.415 < 2e-16 ***
## books2. 11-25 books 1.0122 5.4678 0.185 0.853151
## books3. 26-100 books 11.8093 5.3910 2.191 0.028544 *
## books4. 101-200 books 27.4950 5.9483 4.622 3.92e-06 ***
## books5. More than 200 21.4565 6.4369 3.333 0.000866 ***
## tabl_ownNo 14.2028 3.6142 3.930 8.65e-05 ***
## ML2 -9.0830 1.2980 -6.998 3.06e-12 ***
## ML1 -0.7989 1.3954 -0.572 0.567030
## ML3 2.0461 1.5011 1.363 0.172950
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 76.22 on 3866 degrees of freedom
## Multiple R-squared: 0.0827, Adjusted R-squared: 0.08033
## F-statistic: 34.85 on 10 and 3866 DF, p-value: < 2.2e-16
summary(modelefa4) # p<.5, R = 0.08 (ML1, ML3 and ML4 are insignificant)
##
## Call:
## lm(formula = math_ach ~ gender + momedu + books + tabl_own +
## ML2 + ML1 + ML3 + ML4, data = datfa)
##
## Residuals:
## Min 1Q Median 3Q Max
## -265.70 -51.65 2.26 51.81 256.16
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 506.9827 5.2534 96.505 < 2e-16 ***
## genderMale 9.1802 2.4998 3.672 0.000244 ***
## momedutertiary 29.6254 2.5870 11.452 < 2e-16 ***
## books2. 11-25 books 0.8899 5.4672 0.163 0.870711
## books3. 26-100 books 11.7818 5.3900 2.186 0.028884 *
## books4. 101-200 books 27.5717 5.9473 4.636 3.67e-06 ***
## books5. More than 200 21.6782 6.4371 3.368 0.000765 ***
## tabl_ownNo 14.1501 3.6136 3.916 9.17e-05 ***
## ML2 -9.0906 1.2978 -7.005 2.90e-12 ***
## ML1 -1.1577 1.4130 -0.819 0.412635
## ML3 1.6188 1.5243 1.062 0.288293
## ML4 2.6630 1.6607 1.604 0.108886
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 76.2 on 3865 degrees of freedom
## Multiple R-squared: 0.08331, Adjusted R-squared: 0.0807
## F-statistic: 31.93 on 11 and 3865 DF, p-value: < 2.2e-16
AIC(model1_5, modelefa1, modelefa2, modelefa3, modelefa4) # second model modelefa1 with AIC=44616.43 is the best
anova(model1_5, modelefa1, modelefa2, modelefa3, modelefa4) # nothing wins