pisadb <- dbConnect(RSQLite::SQLite(), "../pisa.sqlite")
israel2015 <- dplyr::tbl(pisadb, "israel2015b")
secular<-israel2015%>%filter(COUNTRY=="Hebrew Secular")
ggplot(secular,aes(y=PV1SCIE,x=CLSIZE,colour=MISCED)) +
stat_smooth(method=lm, se=F) +
scale_colour_discrete(name="השכלת אם",
breaks=c("ISCED 5A, 6", "ISCED 5B", "ISCED 3B, C", "ISCED 3A, ISCED 4", "ISCED 2", "ISCED 1", "None", NA),
labels=c("תואר אקדמי", "לימודי תעודה", "תעודת בגרות או מכינה", "סיום תיכון ללא בגרות", "חטיבת ביניים", "יסודי", "ללא", "אין נתונים"))

ggplot(secular,aes(y=PV1SCIE,x=CLSIZE,color=ESCS)) +
stat_smooth(method=lm, se=F)
## Warning: Removed 83 rows containing non-finite values (stat_smooth).

secular %>% do(glance(lm(PV1SCIE ~ CLSIZE, data=.)))
## r.squared adj.r.squared sigma statistic p.value df logLik
## 1 0.07593984 0.07565366 99.72629 265.3612 2.130925e-57 2 -19454.04
## AIC BIC deviance df.residual
## 1 38914.08 38932.32 32113481 3229
secular %>% do(glance(lm(PV1SCIE ~ CLSIZE+MISCED, data=.)))
## r.squared adj.r.squared sigma statistic p.value df logLik
## 1 0.1498884 0.1480124 95.20318 79.89639 3.737113e-107 8 -18996.34
## AIC BIC deviance df.residual
## 1 38010.68 38065.27 28749884 3172
secular %>% do(glance(lm(PV1SCIE ~ ESCS, data=.)))
## r.squared adj.r.squared sigma statistic p.value df logLik
## 1 0.121756 0.1212189 96.70712 226.6695 6.468483e-93 3 -19605.82
## AIC BIC deviance df.residual
## 1 39219.63 39244.01 30581913 3270
secular %>% do(glance(lm(PV1SCIE ~ CLSIZE+MISCED+ESCS, data=.)))
## r.squared adj.r.squared sigma statistic p.value df logLik
## 1 0.1811087 0.1787816 93.3044 77.82491 1.367978e-130 10 -18913.41
## AIC BIC deviance df.residual
## 1 37848.82 37915.52 27570984 3167
fit <- lm(PV1SCIE ~ CLSIZE+MISCED+ESCS, data=secular)
summary(fit)
##
## Call:
## lm(formula = PV1SCIE ~ CLSIZE + MISCED + ESCS, data = secular)
##
## Residuals:
## Min 1Q Median 3Q Max
## -343.54 -59.25 4.12 65.95 278.17
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 320.5758 20.4283 15.693 < 2e-16 ***
## CLSIZE 4.1255 0.3461 11.920 < 2e-16 ***
## MISCEDISCED 2 34.3992 20.7186 1.660 0.097 .
## MISCEDISCED 3A, ISCED 4 73.5775 16.8208 4.374 1.26e-05 ***
## MISCEDISCED 3B, C 81.5353 17.6213 4.627 3.86e-06 ***
## MISCEDISCED 5A, 6 99.7095 16.9462 5.884 4.43e-09 ***
## MISCEDISCED 5B 78.3586 17.6666 4.435 9.50e-06 ***
## MISCEDNone 20.5950 23.9670 0.859 0.390
## ESCSLow -60.3198 5.5681 -10.833 < 2e-16 ***
## ESCSMedium -35.0599 4.0425 -8.673 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 93.3 on 3167 degrees of freedom
## (137 observations deleted due to missingness)
## Multiple R-squared: 0.1811, Adjusted R-squared: 0.1788
## F-statistic: 77.82 on 9 and 3167 DF, p-value: < 2.2e-16
ggplot(secular,aes(y=PV1SCIE,x=CLSIZE,colour=SC001Q01TA)) +
stat_smooth(method=lm, se=F)
## Warning: Removed 83 rows containing non-finite values (stat_smooth).

ggplot(secular,aes(y=PV1SCIE,x=CLSIZE,colour=MISCED)) +
stat_smooth(method=lm, se=F) +
facet_grid(ESCS ~ SC001Q01TA)
## Warning: Removed 83 rows containing non-finite values (stat_smooth).

secular %>% do(glance(lm(PV1SCIE ~ CLSIZE+SC001Q01TA, data=.)))
## r.squared adj.r.squared sigma statistic p.value df logLik
## 1 0.1043014 0.1031908 98.22959 93.91445 1.163244e-75 5 -19403.68
## AIC BIC deviance df.residual
## 1 38819.36 38855.84 31127845 3226
secular %>% do(glance(lm(PV1SCIE ~ CLSIZE+MISCED+ESCS+SC001Q01TA, data=.)))
## r.squared adj.r.squared sigma statistic p.value df logLik
## 1 0.193168 0.1901079 92.65873 63.12585 7.714413e-138 13 -18889.84
## AIC BIC deviance df.residual
## 1 37807.69 37892.58 27164965 3164