library(lme4)
library(lmerTest)
library(performance)
library(ggplot2)
setwd(“C:/adresse/de/mon/dossier/de/travail”)
library(readr)
TS <- read_delim("donnees_temps_scol_na.csv",
delim = ";", escape_double = FALSE, trim_ws = TRUE)
modelreg1 <- lm(FRAN4 ~ FRAN1, data = TS) #première régression, prédiction du
#score de la fin
summary(modelreg1) #affichage des résultats
##
## Call:
## lm(formula = FRAN4 ~ FRAN1, data = TS)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.25968 -0.51653 -0.04871 0.47117 2.77318
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.01045 0.03562 -0.294 0.769
## FRAN1 0.64104 0.03571 17.950 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.7654 on 460 degrees of freedom
## (28 observations effacées parce que manquantes)
## Multiple R-squared: 0.4119, Adjusted R-squared: 0.4106
## F-statistic: 322.2 on 1 and 460 DF, p-value: < 2.2e-16
plot(FRAN4 ~ FRAN1, pch=12, data=TS) #graphique du modèle
abline(modelreg1, col="blue",lwd=2) #pour dessininer la droite de régression
modelreg2 <-lm(FRAN4 ~ FRAN1 + arti + inter + empl + ouvr + autr + FILLE
+ RDBLT, data = TS) #ici, on a enlevé la modalité "sup" pour
#que celle-ci soit la référence
summary(modelreg2)
##
## Call:
## lm(formula = FRAN4 ~ FRAN1 + arti + inter + empl + ouvr + autr +
## FILLE + RDBLT, data = TS)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.93111 -0.52044 -0.02968 0.48418 2.47459
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.13007 0.08894 -1.462 0.14433
## FRAN1 0.55742 0.03709 15.031 < 2e-16 ***
## arti -0.10495 0.13897 -0.755 0.45050
## inter 0.31040 0.12471 2.489 0.01317 *
## empl 0.18236 0.11744 1.553 0.12118
## ouvr 0.04123 0.10173 0.405 0.68544
## autr -0.02186 0.14561 -0.150 0.88071
## FILLE 0.20753 0.07050 2.944 0.00341 **
## RDBLT -0.54597 0.12517 -4.362 1.6e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.7388 on 453 degrees of freedom
## (28 observations effacées parce que manquantes)
## Multiple R-squared: 0.4604, Adjusted R-squared: 0.4509
## F-statistic: 48.32 on 8 and 453 DF, p-value: < 2.2e-16
Dans ce modèle si c’est un garçon, de cadre sup’ et qu’il n’a jamais redoublé alors tous les coefficients s’annulent. Il ne reste que la constante.
modelreg3 <-lm(FRAN4 ~ FRAN1 + arti + inter + empl + ouvr + autr + FILLE
+ RDBLT + EXPE, data = TS) #ici, on reprend la régression
#précédente en y ajoutant l'expérimentation
# qui est la varaible "EXPE"
summary(modelreg3)
##
## Call:
## lm(formula = FRAN4 ~ FRAN1 + arti + inter + empl + ouvr + autr +
## FILLE + RDBLT + EXPE, data = TS)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.85309 -0.53429 -0.02107 0.43922 2.55384
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.19464 0.09115 -2.135 0.03327 *
## FRAN1 0.55425 0.03682 15.054 < 2e-16 ***
## arti -0.11583 0.13796 -0.840 0.40157
## inter 0.29727 0.12384 2.401 0.01677 *
## empl 0.16870 0.11664 1.446 0.14877
## ouvr 0.04540 0.10096 0.450 0.65318
## autr -0.02283 0.14449 -0.158 0.87451
## FILLE 0.20543 0.06996 2.936 0.00349 **
## RDBLT -0.53432 0.12427 -4.300 2.1e-05 ***
## EXPE 0.20658 0.07285 2.836 0.00478 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.7331 on 452 degrees of freedom
## (28 observations effacées parce que manquantes)
## Multiple R-squared: 0.4698, Adjusted R-squared: 0.4593
## F-statistic: 44.51 on 9 and 452 DF, p-value: < 2.2e-16
On observe un effet significatif de l’expérimentation et il est positif. On a donc un effet bénéfique de l’expérimentation sur le score de français au bout de deux ans. Les effets calculés sont marginaux, c’est-à-dire, qu’on regarde l’effet qu’a la variable sur la note de français.
Un modèle vide a une constante de niveau 1 et une constante de niveau 2.
modelM0 <- lmer(FRAN4~1 #estimation d'une constante
+(1|CLASSE2), #estimation de deuxième constante mais avec le niveau2
REML = F, #maximum de vraissemblance complet
data = TS) #données
summary(modelM0)
## Linear mixed model fit by maximum likelihood . t-tests use Satterthwaite's
## method [lmerModLmerTest]
## Formula: FRAN4 ~ 1 + (1 | CLASSE2)
## Data: TS
##
## AIC BIC logLik deviance df.resid
## 1290.7 1303.1 -642.4 1284.7 459
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.95515 -0.67747 -0.01198 0.62331 2.67412
##
## Random effects:
## Groups Name Variance Std.Dev.
## CLASSE2 (Intercept) 0.0967 0.3110
## Residual 0.8898 0.9433
## Number of obs: 462, groups: CLASSE2, 28
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 0.002226 0.075409 28.609299 0.03 0.977
icc(modelM0) #coeff de corrélation intra-classes
## # Intraclass Correlation Coefficient
##
## Adjusted ICC: 0.098
## Unadjusted ICC: 0.098
ranova(modelM0)
## ANOVA-like table for random-effects: Single term deletions
##
## Model:
## FRAN4 ~ (1 | CLASSE2)
## npar logLik AIC LRT Df Pr(>Chisq)
## <none> 3 -642.36 1290.7
## (1 | CLASSE2) 2 -653.65 1311.3 22.593 1 2.002e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Déviance: on peut considérer cette donnée comme étant la distance entre notre modèle et le modèle parfait. Il est utile de comparer les déviances des modèles pour juger les modèles les uns entre les autres. Plus la déviance se réduit, plus notre modèle s’approche de la “perfection”…
modelEF1 <- lmer(FRAN4 ~ FRAN1+ #effet fixe de la VI
(1|CLASSE2), REML=F, data=TS)
summary(modelEF1, correlation = F)
## Linear mixed model fit by maximum likelihood . t-tests use Satterthwaite's
## method [lmerModLmerTest]
## Formula: FRAN4 ~ FRAN1 + (1 | CLASSE2)
## Data: TS
##
## AIC BIC logLik deviance df.resid
## 1024.8 1041.4 -508.4 1016.8 458
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.7859 -0.7017 -0.0558 0.6336 3.2900
##
## Random effects:
## Groups Name Variance Std.Dev.
## CLASSE2 (Intercept) 0.1256 0.3543
## Residual 0.4801 0.6929
## Number of obs: 462, groups: CLASSE2, 28
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) -0.04056 0.07596 23.02829 -0.534 0.598
## FRAN1 0.66424 0.03448 457.75169 19.262 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
La déviance s’est réduit. Il y a 28 classes.
Pourcomparer deux modèle on peut utiliser:
anova(modelM0,modelEF1)
## Data: TS
## Models:
## modelM0: FRAN4 ~ 1 + (1 | CLASSE2)
## modelEF1: FRAN4 ~ FRAN1 + (1 | CLASSE2)
## npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
## modelM0 3 1290.7 1303.1 -642.36 1284.7
## modelEF1 4 1024.8 1041.4 -508.42 1016.8 267.88 1 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Le modelEF1 a une déviance inféreieure au modelM0. Le modelEF1 est donc plus significatif que le modelEF1
model02N <- lmer(FRAN4 ~ 1+(1|ECOLE2:CLASSE2)+(1|ECOLE2),REML = F, data = TS)
summary(model02N, correlation=F)
## Linear mixed model fit by maximum likelihood . t-tests use Satterthwaite's
## method [lmerModLmerTest]
## Formula: FRAN4 ~ 1 + (1 | ECOLE2:CLASSE2) + (1 | ECOLE2)
## Data: TS
##
## AIC BIC logLik deviance df.resid
## 1291.8 1308.3 -641.9 1283.8 458
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.95930 -0.67454 -0.01715 0.63499 2.74386
##
## Random effects:
## Groups Name Variance Std.Dev.
## ECOLE2:CLASSE2 (Intercept) 0.06143 0.2478
## ECOLE2 (Intercept) 0.03486 0.1867
## Residual 0.88971 0.9432
## Number of obs: 462, groups: ECOLE2:CLASSE2, 28; ECOLE2, 16
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 0.01079 0.08398 15.27943 0.128 0.899
ranova(model02N) #test sur les effets aléatoires
## ANOVA-like table for random-effects: Single term deletions
##
## Model:
## FRAN4 ~ (1 | ECOLE2:CLASSE2) + (1 | ECOLE2)
## npar logLik AIC LRT Df Pr(>Chisq)
## <none> 4 -641.90 1291.8
## (1 | ECOLE2:CLASSE2) 3 -644.87 1295.7 5.9379 1 0.01482 *
## (1 | ECOLE2) 3 -642.36 1290.7 0.9200 1 0.33748
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Ici, le niveau classe est intéressant (car significatif) mais le niveau école n’est pas pertinent pour notre modèle.
anova(modelM0,model02N)
## Data: TS
## Models:
## modelM0: FRAN4 ~ 1 + (1 | CLASSE2)
## model02N: FRAN4 ~ 1 + (1 | ECOLE2:CLASSE2) + (1 | ECOLE2)
## npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
## modelM0 3 1290.7 1303.1 -642.36 1284.7
## model02N 4 1291.8 1308.3 -641.90 1283.8 0.92 1 0.3375
Même constat que précédemment, le deuxième modèle n’est pas significativement meilleur que le 1er. Une fois qu’on a pris en compte l’appartenance à la classe, il n’y a pas de différence entre les différentes écoles.
modelEA1 <- lmer(FRAN4 ~ 1+ FRAN1
+ (1+FRAN1|CLASSE2), #ici, il y a introduction de l'EA
REML=F, #1=constante, FRAN1=pente+cov
data=TS, na.action = na.exclude)
## boundary (singular) fit: see help('isSingular')
summary(modelEA1)
## Linear mixed model fit by maximum likelihood . t-tests use Satterthwaite's
## method [lmerModLmerTest]
## Formula: FRAN4 ~ 1 + FRAN1 + (1 + FRAN1 | CLASSE2)
## Data: TS
##
## AIC BIC logLik deviance df.resid
## 1028.8 1053.6 -508.4 1016.8 456
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.7838 -0.7019 -0.0551 0.6329 3.2889
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## CLASSE2 (Intercept) 1.258e-01 0.3546682
## FRAN1 9.686e-07 0.0009842 -1.00
## Residual 4.801e-01 0.6928876
## Number of obs: 462, groups: CLASSE2, 28
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) -0.04065 0.07602 22.12931 -0.535 0.598
## FRAN1 0.66434 0.03449 443.00946 19.262 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr)
## FRAN1 -0.034
## optimizer (nloptwrap) convergence code: 0 (OK)
## boundary (singular) fit: see help('isSingular')
La covariance n’existe pas, R n’arrive pas l’estimer. Une corrélation qui vaut 1 ou -1 est étrange.
modelEA2 <- lmer(FRAN4 ~ FRAN1 + #effet fixe qql soit la classe
(1|CLASSE2)+ #variation de l'effet fixe suivant la classe
(0+FRAN1|CLASSE2), #variation des pentes selon les classes
REML=F, data = TS)
## boundary (singular) fit: see help('isSingular')
summary(modelEA2, correlation=F)
## Linear mixed model fit by maximum likelihood . t-tests use Satterthwaite's
## method [lmerModLmerTest]
## Formula: FRAN4 ~ FRAN1 + (1 | CLASSE2) + (0 + FRAN1 | CLASSE2)
## Data: TS
##
## AIC BIC logLik deviance df.resid
## 1026.8 1047.5 -508.4 1016.8 457
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.7859 -0.7017 -0.0558 0.6336 3.2900
##
## Random effects:
## Groups Name Variance Std.Dev.
## CLASSE2 (Intercept) 1.256e-01 3.543e-01
## CLASSE2.1 FRAN1 1.642e-10 1.281e-05
## Residual 4.801e-01 6.929e-01
## Number of obs: 462, groups: CLASSE2, 28
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) -0.04056 0.07596 23.02821 -0.534 0.598
## FRAN1 0.66424 0.03448 457.75084 19.262 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## optimizer (nloptwrap) convergence code: 0 (OK)
## boundary (singular) fit: see help('isSingular')
anova(modelEF1, modelEA2)
## Data: TS
## Models:
## modelEF1: FRAN4 ~ FRAN1 + (1 | CLASSE2)
## modelEA2: FRAN4 ~ FRAN1 + (1 | CLASSE2) + (0 + FRAN1 | CLASSE2)
## npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
## modelEF1 4 1024.8 1041.4 -508.42 1016.8
## modelEA2 5 1026.8 1047.5 -508.42 1016.8 0 1 1
modelEF2 <- lmer(FRAN4~FRAN1+arti+inter+empl+ouvr+autr+FILLE+RDBLT+
(1|CLASSE2),REML=F,data = TS,)
summary(modelEF2)
## Linear mixed model fit by maximum likelihood . t-tests use Satterthwaite's
## method [lmerModLmerTest]
## Formula: FRAN4 ~ FRAN1 + arti + inter + empl + ouvr + autr + FILLE + RDBLT +
## (1 | CLASSE2)
## Data: TS
##
## AIC BIC logLik deviance df.resid
## 1001.8 1047.3 -489.9 979.8 451
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.61644 -0.73027 -0.06293 0.66747 3.13271
##
## Random effects:
## Groups Name Variance Std.Dev.
## CLASSE2 (Intercept) 0.1121 0.3348
## Residual 0.4439 0.6662
## Number of obs: 462, groups: CLASSE2, 28
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) -0.13200 0.10588 96.68841 -1.247 0.21556
## FRAN1 0.58201 0.03576 459.42076 16.278 < 2e-16 ***
## arti -0.03023 0.13050 449.72898 -0.232 0.81690
## inter 0.20061 0.11668 444.74624 1.719 0.08626 .
## empl 0.10588 0.10921 442.11448 0.969 0.33284
## ouvr 0.00653 0.09624 450.46507 0.068 0.94593
## autr 0.02480 0.13567 445.11171 0.183 0.85503
## FILLE 0.20915 0.06621 448.37127 3.159 0.00169 **
## RDBLT -0.54487 0.11644 443.31226 -4.679 3.83e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) FRAN1 arti inter empl ouvr autr FILLE
## FRAN1 0.002
## arti -0.420 0.047
## inter -0.491 -0.113 0.392
## empl -0.505 -0.019 0.393 0.471
## ouvr -0.609 0.034 0.487 0.546 0.566
## autr -0.432 0.018 0.337 0.381 0.407 0.477
## FILLE -0.272 -0.165 -0.062 0.000 -0.034 0.008 0.004
## RDBLT -0.107 0.280 0.010 0.000 -0.028 0.004 -0.018 0.060
Le nouveau modèle est significativement meilleur que le précédent. A la fin, les élèves ayant redoublé auront à 0,5 écart type de différence avec la note moyenne, toutes choses étant égales par ailleurs.
modelEF2 <- lmer(FRAN4~FRAN1+arti+inter+empl+ouvr+autr+FILLE+RDBLT+ EXPE+
(1|CLASSE2),REML=F,data = TS)
summary(modelEF2)
## Linear mixed model fit by maximum likelihood . t-tests use Satterthwaite's
## method [lmerModLmerTest]
## Formula: FRAN4 ~ FRAN1 + arti + inter + empl + ouvr + autr + FILLE + RDBLT +
## EXPE + (1 | CLASSE2)
## Data: TS
##
## AIC BIC logLik deviance df.resid
## 1001.6 1051.2 -488.8 977.6 450
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.58716 -0.71699 -0.03991 0.68003 3.08368
##
## Random effects:
## Groups Name Variance Std.Dev.
## CLASSE2 (Intercept) 0.09949 0.3154
## Residual 0.44419 0.6665
## Number of obs: 462, groups: CLASSE2, 28
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) -0.20654 0.11467 73.33040 -1.801 0.07578 .
## FRAN1 0.58089 0.03569 460.37691 16.275 < 2e-16 ***
## arti -0.03169 0.13039 450.95324 -0.243 0.80807
## inter 0.20169 0.11663 445.85087 1.729 0.08444 .
## empl 0.10573 0.10918 442.96892 0.968 0.33335
## ouvr 0.01154 0.09619 451.56623 0.120 0.90455
## autr 0.02636 0.13562 446.10375 0.194 0.84598
## FILLE 0.20881 0.06617 449.63055 3.156 0.00171 **
## RDBLT -0.54191 0.11642 444.07261 -4.655 4.29e-06 ***
## EXPE 0.22433 0.14666 22.20696 1.530 0.14024
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) FRAN1 arti inter empl ouvr autr FILLE RDBLT
## FRAN1 0.000
## arti -0.390 0.047
## inter -0.450 -0.114 0.392
## empl -0.460 -0.019 0.393 0.471
## ouvr -0.574 0.034 0.487 0.545 0.566
## autr -0.406 0.017 0.337 0.381 0.407 0.477
## FILLE -0.248 -0.164 -0.062 0.001 -0.034 0.008 0.003
## RDBLT -0.107 0.280 0.010 -0.001 -0.028 0.004 -0.018 0.060
## EXPE -0.428 0.005 0.004 -0.008 -0.012 0.030 0.018 -0.006 0.019
Quand on utilise les MCO on trouve un effet significatif de l’expérimentation. Ici, il n’y a pas d’effet de l’expérimentation. Cela est typique de la différence entre les MCO et les modèles multiniveaux. Avec les MCO l’expérimentation est attachée aux classes. Pour les modèles multiniveaux, ce n’est pas un élément qui distingue l’individu dans la classe.
On cherche a évaluer si cela est intéressant de réduire le nombre d’élève par classe (EXPE) pour maximiser le score final (p3sc_global), en fonction du score du début de d’année (p1sc_global) et de la PCS. Il existe deux niveaux possibles de groupement:
La classe (classe)
L’école (ETAB)
#création d'un modèle vide
modelCP0 <- lmer(p3sc_global ~ 1+(1|classe), REML=F, data=CP2)
summary(modelCP0, correlation=F)
## Linear mixed model fit by maximum likelihood . t-tests use Satterthwaite's
## method [lmerModLmerTest]
## Formula: p3sc_global ~ 1 + (1 | classe)
## Data: CP2
##
## AIC BIC logLik deviance df.resid
## 2904.8 2919.7 -1449.4 2898.8 1054
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.67752 -0.61272 -0.00894 0.62776 3.04966
##
## Random effects:
## Groups Name Variance Std.Dev.
## classe (Intercept) 0.1901 0.4361
## Residual 0.7937 0.8909
## Number of obs: 1057, groups: classe, 144
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 0.03397 0.04604 143.77685 0.738 0.462
icc(modelCP0)
## # Intraclass Correlation Coefficient
##
## Adjusted ICC: 0.193
## Unadjusted ICC: 0.193
ranova(modelCP0)
## ANOVA-like table for random-effects: Single term deletions
##
## Model:
## p3sc_global ~ (1 | classe)
## npar logLik AIC LRT Df Pr(>Chisq)
## <none> 3 -1449.4 2904.8
## (1 | classe) 2 -1492.0 2988.0 85.239 1 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Ici, il y a une variance interclasse de presque 19% (icc)
modelCP1 <- lmer(p3sc_global ~ 1+(1|classe)+(1|ETAB), REML=F, data=CP2)
summary(modelCP1, correlation=F)
## Linear mixed model fit by maximum likelihood . t-tests use Satterthwaite's
## method [lmerModLmerTest]
## Formula: p3sc_global ~ 1 + (1 | classe) + (1 | ETAB)
## Data: CP2
##
## AIC BIC logLik deviance df.resid
## 2905.7 2925.5 -1448.8 2897.7 1053
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.71737 -0.62571 -0.00694 0.62549 3.04984
##
## Random effects:
## Groups Name Variance Std.Dev.
## classe (Intercept) 0.1556 0.3945
## ETAB (Intercept) 0.0357 0.1889
## Residual 0.7936 0.8909
## Number of obs: 1057, groups: classe, 144; ETAB, 68
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 0.03123 0.05045 46.18671 0.619 0.539
icc(modelCP1)
## # Intraclass Correlation Coefficient
##
## Adjusted ICC: 0.194
## Unadjusted ICC: 0.194
ranova(modelCP1)
## ANOVA-like table for random-effects: Single term deletions
##
## Model:
## p3sc_global ~ (1 | classe) + (1 | ETAB)
## npar logLik AIC LRT Df Pr(>Chisq)
## <none> 4 -1448.8 2905.7
## (1 | classe) 3 -1466.1 2938.3 34.594 1 4.061e-09 ***
## (1 | ETAB) 3 -1449.4 2904.8 1.121 1 0.2898
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
L’établissement n’a pas d’effet significatif. On ne va donc pas tenir compte du niveau de l’établissement dans l’analyse. Il faut retenir uniquement le niveau “élève” et le niveau “classe”.
On peut commencer à introduidre des effets fixes:
modelCP2 <- lmer(p3sc_global ~ p1sc_global+(1|classe), REML=F, data=CP2)
#on explique le score global par l'effet fixe du 1er score
# global et la classe
summary(modelCP2, correlation=F)
## Linear mixed model fit by maximum likelihood . t-tests use Satterthwaite's
## method [lmerModLmerTest]
## Formula: p3sc_global ~ p1sc_global + (1 | classe)
## Data: CP2
##
## AIC BIC logLik deviance df.resid
## 2023.8 2043.7 -1007.9 2015.8 1053
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -5.0275 -0.6112 -0.0237 0.6472 3.7523
##
## Random effects:
## Groups Name Variance Std.Dev.
## classe (Intercept) 0.1374 0.3706
## Residual 0.3267 0.5715
## Number of obs: 1057, groups: classe, 144
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) -0.04552 0.03595 143.50551 -1.266 0.207
## p1sc_global 0.75413 0.02016 1031.74495 37.416 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ranova(modelCP2)
## ANOVA-like table for random-effects: Single term deletions
##
## Model:
## p3sc_global ~ p1sc_global + (1 | classe)
## npar logLik AIC LRT Df Pr(>Chisq)
## <none> 4 -1007.9 2023.8
## (1 | classe) 3 -1092.8 2191.5 169.67 1 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Ici, on introduit un effet aléatoire:
modelCP3 <- lmer(p3sc_global ~ p1sc_global+(1+p1sc_global|classe), REML=F,
data=CP2) #corrélation entre les constantes et les pentes
summary(modelCP3, correlation=F)
## Linear mixed model fit by maximum likelihood . t-tests use Satterthwaite's
## method [lmerModLmerTest]
## Formula: p3sc_global ~ p1sc_global + (1 + p1sc_global | classe)
## Data: CP2
##
## AIC BIC logLik deviance df.resid
## 1999.4 2029.2 -993.7 1987.4 1051
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.2731 -0.6188 -0.0089 0.6353 2.8081
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## classe (Intercept) 0.1331 0.3648
## p1sc_global 0.0384 0.1960 0.19
## Residual 0.2946 0.5427
## Number of obs: 1057, groups: classe, 144
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) -0.05622 0.03560 133.86235 -1.579 0.117
## p1sc_global 0.75521 0.02658 129.75968 28.414 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ranova(modelCP3)
## ANOVA-like table for random-effects: Single term deletions
##
## Model:
## p3sc_global ~ p1sc_global + (1 + p1sc_global | classe)
## npar logLik AIC LRT Df
## <none> 6 -993.72 1999.4
## p1sc_global in (1 + p1sc_global | classe) 4 -1007.92 2023.8 28.407 2
## Pr(>Chisq)
## <none>
## p1sc_global in (1 + p1sc_global | classe) 6.785e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
modelCP4 <- lmer(p3sc_global ~ p1sc_global+(1|classe)+
(0+p1sc_global|classe)#variance des pentes qui n'est pas
#liée à une constante
, REML=F, data=CP2)
summary(modelCP4, correlation=F)
## Linear mixed model fit by maximum likelihood . t-tests use Satterthwaite's
## method [lmerModLmerTest]
## Formula: p3sc_global ~ p1sc_global + (1 | classe) + (0 + p1sc_global |
## classe)
## Data: CP2
##
## AIC BIC logLik deviance df.resid
## 1998.8 2023.6 -994.4 1988.8 1052
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.4730 -0.6230 -0.0054 0.6211 2.8344
##
## Random effects:
## Groups Name Variance Std.Dev.
## classe (Intercept) 0.13502 0.3675
## classe.1 p1sc_global 0.03781 0.1944
## Residual 0.29473 0.5429
## Number of obs: 1057, groups: classe, 144
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) -0.05751 0.03582 134.66575 -1.605 0.111
## p1sc_global 0.75386 0.02651 126.99177 28.439 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ranova(modelCP4)
## ANOVA-like table for random-effects: Single term deletions
##
## Model:
## p3sc_global ~ p1sc_global + (1 | classe) + (0 + p1sc_global | classe)
## npar logLik AIC LRT Df
## <none> 5 -994.42 1998.8
## (1 | classe) 4 -1068.71 2145.4 148.591 1
## p1sc_global in (0 + p1sc_global | classe) 4 -1007.92 2023.8 27.008 1
## Pr(>Chisq)
## <none>
## (1 | classe) < 2.2e-16 ***
## p1sc_global in (0 + p1sc_global | classe) 2.026e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
La différence entre les 2 modèles, il y 1,2. A 1 degré de liberté, il aurait fallut 3 et quelques. Donc il y a la même information. On garde donc le modèle qui est le plus parcimonieux.
DATACM2 <- read_delim("suivicm2_6e_2.csv",
delim = ";", escape_double = FALSE, trim_ws = TRUE)
DATACMNA <- na.omit(DATACM2)
Pour faire des modèles multiniveaux, il faut que nos données soient structurées de la même manière que la base DATACMNA. La tableau est organisé par mesure et non par individu.
model0 <- lmer(sout_global~ 1 + (1|eleve_t3),
REML=F, data = DATACMNA)
summary(model0, correlation = F)
## Linear mixed model fit by maximum likelihood . t-tests use Satterthwaite's
## method [lmerModLmerTest]
## Formula: sout_global ~ 1 + (1 | eleve_t3)
## Data: DATACMNA
##
## AIC BIC logLik deviance df.resid
## 645.1 654.6 -319.5 639.1 176
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.70860 -0.50995 0.04912 0.58871 2.24018
##
## Random effects:
## Groups Name Variance Std.Dev.
## eleve_t3 (Intercept) 1.481 1.217
## Residual 1.243 1.115
## Number of obs: 179, groups: eleve_t3, 62
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 13.0441 0.1758 62.0139 74.18 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
icc(model0) # 54% des différences sont des différences inter-élèves
## # Intraclass Correlation Coefficient
##
## Adjusted ICC: 0.544
## Unadjusted ICC: 0.544
modelT <- lmer(sout_global~ time + (1|eleve_t3), #ici, on met un effet fixe du
#temps
REML=F, data = DATACMNA)
summary(modelT, correlation = F)
## Linear mixed model fit by maximum likelihood . t-tests use Satterthwaite's
## method [lmerModLmerTest]
## Formula: sout_global ~ time + (1 | eleve_t3)
## Data: DATACMNA
##
## AIC BIC logLik deviance df.resid
## 642.1 654.8 -317.0 634.1 175
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.53769 -0.60823 0.09801 0.57612 2.07586
##
## Random effects:
## Groups Name Variance Std.Dev.
## eleve_t3 (Intercept) 1.475 1.215
## Residual 1.198 1.095
## Number of obs: 179, groups: eleve_t3, 62
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 13.26681 0.20086 102.43177 66.050 <2e-16 ***
## time -0.03795 0.01685 118.49337 -2.252 0.0262 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
icc(modelT)
## # Intraclass Correlation Coefficient
##
## Adjusted ICC: 0.552
## Unadjusted ICC: 0.545
Il y a un effet significatif du temps. Le modèle est plus adapté que le précédent (baisse de la déviance).
modelT2 <- lmer(sout_global~ time + (1+time|eleve_t3), #ici, on met un effet fixe du
#temps
REML=F, data = DATACMNA)
summary(modelT2, correlation = F)
## Linear mixed model fit by maximum likelihood . t-tests use Satterthwaite's
## method [lmerModLmerTest]
## Formula: sout_global ~ time + (1 + time | eleve_t3)
## Data: DATACMNA
##
## AIC BIC logLik deviance df.resid
## 634.1 653.2 -311.0 622.1 173
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.10327 -0.40109 0.02212 0.42288 2.00694
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## eleve_t3 (Intercept) 1.97751 1.4062
## time 0.01427 0.1195 -0.42
## Residual 0.68550 0.8279
## Number of obs: 179, groups: eleve_t3, 62
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 13.26756 0.20376 61.10499 65.114 <2e-16 ***
## time -0.03887 0.02000 61.28917 -1.944 0.0565 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(modelT,modelT2)
## Data: DATACMNA
## Models:
## modelT: sout_global ~ time + (1 | eleve_t3)
## modelT2: sout_global ~ time + (1 + time | eleve_t3)
## npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
## modelT 4 642.07 654.82 -317.04 634.07
## modelT2 6 634.07 653.20 -311.04 622.07 12 2 0.002478 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Il faut enlever la covariance pour savoir si elle a un pouvoir explicatif dans notre modèle.
modelT3 <- lmer(sout_global~ time + (1|eleve_t3)+
(0+time|eleve_t3), #fixation de la COV à 0
REML=F, data = DATACMNA)
summary(modelT3, correlation = F)
## Linear mixed model fit by maximum likelihood . t-tests use Satterthwaite's
## method [lmerModLmerTest]
## Formula: sout_global ~ time + (1 | eleve_t3) + (0 + time | eleve_t3)
## Data: DATACMNA
##
## AIC BIC logLik deviance df.resid
## 636.6 652.6 -313.3 626.6 174
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.0055 -0.4402 0.0368 0.4587 1.7827
##
## Random effects:
## Groups Name Variance Std.Dev.
## eleve_t3 (Intercept) 1.497628 1.22378
## eleve_t3.1 time 0.008939 0.09454
## Residual 0.826301 0.90901
## Number of obs: 179, groups: eleve_t3, 62
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 13.26775 0.18881 76.99248 70.270 <2e-16 ***
## time -0.03786 0.01860 75.52243 -2.036 0.0453 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(modelT2,modelT3)
## Data: DATACMNA
## Models:
## modelT3: sout_global ~ time + (1 | eleve_t3) + (0 + time | eleve_t3)
## modelT2: sout_global ~ time + (1 + time | eleve_t3)
## npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
## modelT3 5 636.62 652.56 -313.31 626.62
## modelT2 6 634.07 653.20 -311.04 622.07 4.5476 1 0.03296 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Le modèle (modelT2) précédent est plus adapté, on garde donc le paramètre de covariance.
modelT2b <- lmer(sout_global~ time + (1+time|eleve_t3)+
(1|numclasse), REML=F, data = DATACMNA)
## boundary (singular) fit: see help('isSingular')
summary(modelT2b, correlation = F)
## Linear mixed model fit by maximum likelihood . t-tests use Satterthwaite's
## method [lmerModLmerTest]
## Formula: sout_global ~ time + (1 + time | eleve_t3) + (1 | numclasse)
## Data: DATACMNA
##
## AIC BIC logLik deviance df.resid
## 636.1 658.4 -311.0 622.1 172
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.10328 -0.40109 0.02213 0.42288 2.00697
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## eleve_t3 (Intercept) 1.978e+00 1.406e+00
## time 1.427e-02 1.195e-01 -0.42
## numclasse (Intercept) 3.757e-10 1.938e-05
## Residual 6.855e-01 8.279e-01
## Number of obs: 179, groups: eleve_t3, 62; numclasse, 13
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 13.26756 0.20376 61.10201 65.113 <2e-16 ***
## time -0.03887 0.02000 61.28833 -1.944 0.0565 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## optimizer (nloptwrap) convergence code: 0 (OK)
## boundary (singular) fit: see help('isSingular')
ranova(modelT2b)
## ANOVA-like table for random-effects: Single term deletions
##
## Model:
## sout_global ~ time + (1 + time | eleve_t3) + (1 | numclasse)
## npar logLik AIC LRT Df Pr(>Chisq)
## <none> 7 -311.04 636.07
## time in (1 + time | eleve_t3) 5 -317.04 644.07 12 2 0.002479 **
## (1 | numclasse) 6 -311.04 634.07 0 1 0.999913
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
modelT4 <- lmer(sout_global~ time + resglob + (1+time|eleve_t3),
REML=F, data = DATACMNA)
summary(modelT4, correlation = F)
## Linear mixed model fit by maximum likelihood . t-tests use Satterthwaite's
## method [lmerModLmerTest]
## Formula: sout_global ~ time + resglob + (1 + time | eleve_t3)
## Data: DATACMNA
##
## AIC BIC logLik deviance df.resid
## 631.9 654.2 -308.9 617.9 172
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.1044 -0.4494 0.0272 0.4339 2.1148
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## eleve_t3 (Intercept) 1.85478 1.3619
## time 0.01355 0.1164 -0.39
## Residual 0.67940 0.8243
## Number of obs: 179, groups: eleve_t3, 62
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 12.19391 0.55515 142.63318 21.965 <2e-16 ***
## time -0.02867 0.02027 67.33774 -1.415 0.162
## resglob 0.07223 0.03487 149.51327 2.072 0.040 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
OR <- read_delim("rossignol_var.csv",
delim = ";", escape_double = FALSE, trim_ws = TRUE)
## Rows: 18482 Columns: 35
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ";"
## dbl (35): CONS, Numetab, Numeleve, Educprio, fille, pcs1, pcs2, Bourse, Txbo...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Ici, on va centrer réduire les variables de type échelle de mesure qu’on va utiliser dans l’analyse
OR$is_eleve_C<-(OR$is_eleve-mean(OR$is_eleve))/sd(OR$is_eleve)
OR$MATHSEP_C<-(OR$MATHSEP-mean(OR$MATHSEP))/sd(OR$MATHSEP)
OR$MATHSCC_C<-(OR$MATHSCC-mean(OR$MATHSCC))/sd(OR$MATHSCC)
Model0<-glmer(recom2GT~1+(1|Numetab), family = binomial("logit"), data = OR)
summary(Model0)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: recom2GT ~ 1 + (1 | Numetab)
## Data: OR
##
## AIC BIC logLik deviance df.resid
## 23310.7 23326.4 -11653.4 23306.7 18480
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.0717 -1.2370 0.6268 0.7308 1.1248
##
## Random effects:
## Groups Name Variance Std.Dev.
## Numetab (Intercept) 0.1346 0.3669
## Number of obs: 18482, groups: Numetab, 173
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.6758 0.0324 20.86 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
icc(Model0)
## # Intraclass Correlation Coefficient
##
## Adjusted ICC: 0.039
## Unadjusted ICC: 0.039
D’un collège à l’autre il des fluctuations sur la quantité d’élèves auxquels on recommande d’aller en seconde GT.
Ici on regarde s’il a été proposé aux élèves de faire une secondeGT.
Model1<- glmer(recom2GT~inten2GT+(1|Numetab),
family = binomial("logit"), data = OR)
summary(Model1)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: recom2GT ~ inten2GT + (1 | Numetab)
## Data: OR
##
## AIC BIC logLik deviance df.resid
## 13398.9 13422.3 -6696.4 13392.9 18479
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -6.2041 -0.3604 0.2803 0.3576 5.1161
##
## Random effects:
## Groups Name Variance Std.Dev.
## Numetab (Intercept) 0.346 0.5882
## Number of obs: 18482, groups: Numetab, 173
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.82569 0.05994 -30.46 <2e-16 ***
## inten2GT 4.06166 0.05310 76.49 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr)
## inten2GT -0.531
Model2<- glmer(recom2GT~inten2GT+(inten2GT|Numetab),
family = binomial("logit"), data = OR)
summary(Model2)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: recom2GT ~ inten2GT + (inten2GT | Numetab)
## Data: OR
##
## AIC BIC logLik deviance df.resid
## 13298.9 13338.0 -6644.5 13288.9 18477
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -5.5515 -0.3320 0.2688 0.3696 4.9018
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## Numetab (Intercept) 0.6646 0.8152
## inten2GT 0.7751 0.8804 -0.68
## Number of obs: 18482, groups: Numetab, 173
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.94041 0.07845 -24.73 <2e-16 ***
## inten2GT 4.21533 0.08984 46.92 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr)
## inten2GT -0.730
anova(Model1,Model2)
## Data: OR
## Models:
## Model1: recom2GT ~ inten2GT + (1 | Numetab)
## Model2: recom2GT ~ inten2GT + (inten2GT | Numetab)
## npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
## Model1 3 13399 13422 -6696.4 13393
## Model2 5 13299 13338 -6644.5 13289 103.96 2 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Introduction des notes de maths.
Model5a<- glmer(recom2GT~inten2GT+MATHSEP_C+ #épreuve du brevet
MATHSCC_C+#controle continu, dépendance de l'établissement
(inten2GT|Numetab),
family =binomial("logit"), data = OR)
summary(Model5a)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: recom2GT ~ inten2GT + MATHSEP_C + MATHSCC_C + (inten2GT | Numetab)
## Data: OR
##
## AIC BIC logLik deviance df.resid
## 8629.1 8683.9 -4307.6 8615.1 18475
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -59.986 -0.114 0.056 0.200 16.254
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## Numetab (Intercept) 1.834 1.354
## inten2GT 1.856 1.363 -0.79
## Number of obs: 18482, groups: Numetab, 173
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.97747 0.11959 -8.173 3e-16 ***
## inten2GT 3.88884 0.13026 29.855 <2e-16 ***
## MATHSEP_C 0.48194 0.05758 8.371 <2e-16 ***
## MATHSCC_C 2.22767 0.06335 35.167 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) int2GT MATHSE
## inten2GT -0.763
## MATHSEP_C 0.067 0.006
## MATHSCC_C -0.007 0.173 -0.549