pisadb <- dbConnect(RSQLite::SQLite(), "../pisa.sqlite") 
israel2015 <- dplyr::tbl(pisadb, "israel2015b")
secular<-israel2015%>%filter(COUNTRY=="Hebrew Secular")
ggplot(secular,aes(y=PV1SCIE,x=CLSIZE,colour=MISCED)) + 
  stat_smooth(method=lm, se=F) +
    scale_colour_discrete(name="השכלת אם",
      breaks=c("ISCED 5A, 6", "ISCED 5B", "ISCED 3B, C", "ISCED 3A, ISCED 4", "ISCED 2", "ISCED 1", "None", NA),
      labels=c("תואר אקדמי", "לימודי תעודה", "תעודת בגרות או מכינה", "סיום תיכון ללא בגרות", "חטיבת ביניים", "יסודי", "ללא", "אין נתונים"))

ggplot(secular,aes(y=PV1SCIE,x=CLSIZE,color=ESCS)) + 
  stat_smooth(method=lm, se=F) 
## Warning: Removed 83 rows containing non-finite values (stat_smooth).

secular %>% do(glance(lm(PV1SCIE ~ CLSIZE, data=.)))
##    r.squared adj.r.squared    sigma statistic      p.value df    logLik
## 1 0.07593984    0.07565366 99.72629  265.3612 2.130925e-57  2 -19454.04
##        AIC      BIC deviance df.residual
## 1 38914.08 38932.32 32113481        3229
secular %>% do(glance(lm(PV1SCIE ~ CLSIZE+MISCED, data=.)))
##   r.squared adj.r.squared    sigma statistic       p.value df    logLik
## 1 0.1498884     0.1480124 95.20318  79.89639 3.737113e-107  8 -18996.34
##        AIC      BIC deviance df.residual
## 1 38010.68 38065.27 28749884        3172
secular %>% do(glance(lm(PV1SCIE ~ ESCS, data=.)))
##   r.squared adj.r.squared    sigma statistic      p.value df    logLik
## 1  0.121756     0.1212189 96.70712  226.6695 6.468483e-93  3 -19605.82
##        AIC      BIC deviance df.residual
## 1 39219.63 39244.01 30581913        3270
secular %>% do(glance(lm(PV1SCIE ~ CLSIZE+MISCED+ESCS, data=.)))
##   r.squared adj.r.squared   sigma statistic       p.value df    logLik
## 1 0.1811087     0.1787816 93.3044  77.82491 1.367978e-130 10 -18913.41
##        AIC      BIC deviance df.residual
## 1 37848.82 37915.52 27570984        3167
fit <- lm(PV1SCIE ~ CLSIZE+MISCED+ESCS, data=secular)
summary(fit)
## 
## Call:
## lm(formula = PV1SCIE ~ CLSIZE + MISCED + ESCS, data = secular)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -343.54  -59.25    4.12   65.95  278.17 
## 
## Coefficients:
##                         Estimate Std. Error t value Pr(>|t|)    
## (Intercept)             320.5758    20.4283  15.693  < 2e-16 ***
## CLSIZE                    4.1255     0.3461  11.920  < 2e-16 ***
## MISCEDISCED 2            34.3992    20.7186   1.660    0.097 .  
## MISCEDISCED 3A, ISCED 4  73.5775    16.8208   4.374 1.26e-05 ***
## MISCEDISCED 3B, C        81.5353    17.6213   4.627 3.86e-06 ***
## MISCEDISCED 5A, 6        99.7095    16.9462   5.884 4.43e-09 ***
## MISCEDISCED 5B           78.3586    17.6666   4.435 9.50e-06 ***
## MISCEDNone               20.5950    23.9670   0.859    0.390    
## ESCSLow                 -60.3198     5.5681 -10.833  < 2e-16 ***
## ESCSMedium              -35.0599     4.0425  -8.673  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 93.3 on 3167 degrees of freedom
##   (137 observations deleted due to missingness)
## Multiple R-squared:  0.1811, Adjusted R-squared:  0.1788 
## F-statistic: 77.82 on 9 and 3167 DF,  p-value: < 2.2e-16
ggplot(secular,aes(y=PV1SCIE,x=CLSIZE,colour=SC001Q01TA)) + 
  stat_smooth(method=lm, se=F) 
## Warning: Removed 83 rows containing non-finite values (stat_smooth).

ggplot(secular,aes(y=PV1SCIE,x=CLSIZE,colour=MISCED)) + 
  stat_smooth(method=lm, se=F) +
  facet_grid(ESCS ~ SC001Q01TA)
## Warning: Removed 83 rows containing non-finite values (stat_smooth).

secular %>% do(glance(lm(PV1SCIE ~ CLSIZE+SC001Q01TA, data=.)))
##   r.squared adj.r.squared    sigma statistic      p.value df    logLik
## 1 0.1043014     0.1031908 98.22959  93.91445 1.163244e-75  5 -19403.68
##        AIC      BIC deviance df.residual
## 1 38819.36 38855.84 31127845        3226
secular %>% do(glance(lm(PV1SCIE ~ CLSIZE+MISCED+ESCS+SC001Q01TA, data=.)))
##   r.squared adj.r.squared    sigma statistic       p.value df    logLik
## 1  0.193168     0.1901079 92.65873  63.12585 7.714413e-138 13 -18889.84
##        AIC      BIC deviance df.residual
## 1 37807.69 37892.58 27164965        3164