## This is lavaan 0.6-19
## lavaan is FREE software! Please report any bugs.
data <- read.csv("data.csv",sep = ";")
head(data, 5)
##   school sex age address famsize Pstatus Medu Fedu    Mjob     Fjob reason
## 1     GP   F  18       U     GT3       A    4    4 at_home  teacher course
## 2     GP   F  17       U     GT3       T    1    1 at_home    other course
## 3     GP   F  15       U     LE3       T    1    1 at_home    other  other
## 4     GP   F  15       U     GT3       T    4    2  health services   home
## 5     GP   F  16       U     GT3       T    3    3   other    other   home
##   guardian traveltime studytime failures schoolsup famsup paid activities
## 1   mother          2         2        0       yes     no   no         no
## 2   father          1         2        0        no    yes   no         no
## 3   mother          1         2        3       yes     no  yes         no
## 4   mother          1         3        0        no    yes  yes        yes
## 5   father          1         2        0        no    yes  yes         no
##   nursery higher internet romantic famrel freetime goout Dalc Walc health
## 1     yes    yes       no       no      4        3     4    1    1      3
## 2      no    yes      yes       no      5        3     3    1    1      3
## 3     yes    yes      yes       no      4        3     2    2    3      3
## 4     yes    yes      yes      yes      3        2     2    1    1      5
## 5     yes    yes       no       no      4        3     2    1    2      5
##   absences G1 G2 G3
## 1        6  5  6  6
## 2        4  5  5  6
## 3       10  7  8 10
## 4        2 15 14 15
## 5        4  6 10 10

Attribute Information:

school - student’s school (binary: “GP” - Gabriel Pereira or “MS” - Mousinho da Silveira)
sex - student’s sex (binary: “F” - female or “M” - male)
age - student’s age (numeric: from 15 to 22)
address - student’s home address type (binary: “U” - urban or “R” - rural)
famsize - family size (binary: “LE3” - less or equal to 3 or “GT3” - greater than 3)
Pstatus - parent’s cohabitation status (binary: “T” - living together or “A” - apart)
Medu - mother’s education (numeric: 0 - none, 1 - primary education (4th grade), 2 – 5th to 9th grade, 3 – secondary education or 4 – higher education)
Fedu - father’s education (numeric: 0 - none, 1 - primary education (4th grade), 2 – 5th to 9th grade, 3 – secondary education or 4 – higher education)
Mjob - mother’s job (nominal: “teacher”, “health” care related, civil “services” (e.g. administrative or police), “at_home” or “other”)
Fjob - father’s job (nominal: “teacher”, “health” care related, civil “services” (e.g. administrative or police), “at_home” or “other”)
reason - reason to choose this school (nominal: close to “home”, school “reputation”, “course” preference or “other”)
guardian - student’s guardian (nominal: “mother”, “father” or “other”)
traveltime - home to school travel time (numeric: 1 - <15 min., 2 - 15 to 30 min., 3 - 30 min. to 1 hour, or 4 - >1 hour)
studytime - weekly study time (numeric: 1 - <2 hours, 2 - 2 to 5 hours, 3 - 5 to 10 hours, or 4 - >10 hours)
failures - number of past class failures (numeric: n if 1<=n<3, else 4)
schoolsup - extra educational support (binary: yes or no)
** famsup** - family educational support (binary: yes or no)
paid - extra paid classes within the course subject (Math or Portuguese) (binary: yes or no)
activities - extra-curricular activities (binary: yes or no)
nursery - attended nursery school (binary: yes or no)
higher - wants to take higher education (binary: yes or no)
internet - Internet access at home (binary: yes or no)
romantic - with a romantic relationship (binary: yes or no)
famrel - quality of family relationships (numeric: from 1 - very bad to 5 - excellent)
freetime - free time after school (numeric: from 1 - very low to 5 - very high)
goout - going out with friends (numeric: from 1 - very low to 5 - very high)
Dalc - workday alcohol consumption (numeric: from 1 - very low to 5 - very high)
Walc - weekend alcohol consumption (numeric: from 1 - very low to 5 - very high)
health - current health status (numeric: from 1 - very bad to 5 - very good)
absences - number of school absences (numeric: from 0 to 93)

G1 - first period grade (numeric: from 0 to 20)
G2 - second period grade (numeric: from 0 to 20)
G3 - final grade (numeric: from 0 to 20, output target)

lets check for missing values

colSums(is.na(data))
##     school        sex        age    address    famsize    Pstatus       Medu 
##          0          0          0          0          0          0          0 
##       Fedu       Mjob       Fjob     reason   guardian traveltime  studytime 
##          0          0          0          0          0          0          0 
##   failures  schoolsup     famsup       paid activities    nursery     higher 
##          0          0          0          0          0          0          0 
##   internet   romantic     famrel   freetime      goout       Dalc       Walc 
##          0          0          0          0          0          0          0 
##     health   absences         G1         G2         G3 
##          0          0          0          0          0

No missing values in our dataset.

Confirmatory Factor Analysis:

lets build the CFA model.

CFAmodel <- 'Parents_Demo=~ Medu+Fedu +Mjob+Fjob
             Support_Act=~ schoolsup+famsup+paid+activities
            Personal_life=~romantic+famrel+freetime+goout'
CFAmodel.fitted <- cfa(model = CFAmodel, data = data, std.lv = TRUE,
                       missing = "fiml", mimic = "Mplus")
summary(CFAmodel.fitted)
## lavaan 0.6-19 ended normally after 48 iterations
## 
##   Estimator                                         ML
##   Optimization method                           NLMINB
##   Number of model parameters                        39
## 
##   Number of observations                           395
##   Number of missing patterns                         1
## 
## Model Test User Model:
##                                                       
##   Test statistic                                68.453
##   Degrees of freedom                                51
##   P-value (Chi-square)                           0.052
## 
## Parameter Estimates:
## 
##   Standard errors                             Standard
##   Information                                 Observed
##   Observed information based on                Hessian
## 
## Latent Variables:
##                    Estimate  Std.Err  z-value  P(>|z|)
##   Parents_Demo =~                                     
##     Medu              1.063    0.067   15.873    0.000
##     Fedu              0.696    0.060   11.656    0.000
##     Mjob              0.571    0.063    9.010    0.000
##     Fjob              0.147    0.048    3.041    0.002
##   Support_Act =~                                      
##     schoolsup        -0.028    0.026   -1.086    0.278
##     famsup           -0.305    0.070   -4.385    0.000
##     paid             -0.234    0.055   -4.243    0.000
##     activities       -0.011    0.036   -0.306    0.759
##   Personal_life =~                                    
##     romantic          0.009    0.031    0.275    0.783
##     famrel           -0.179    0.069   -2.592    0.010
##     freetime         -0.738    0.232   -3.180    0.001
##     goout            -0.428    0.143   -2.993    0.003
## 
## Covariances:
##                    Estimate  Std.Err  z-value  P(>|z|)
##   Parents_Demo ~~                                     
##     Support_Act      -0.315    0.082   -3.839    0.000
##     Personal_life    -0.057    0.073   -0.777    0.437
##   Support_Act ~~                                      
##     Personal_life    -0.044    0.099   -0.439    0.661
## 
## Intercepts:
##                    Estimate  Std.Err  z-value  P(>|z|)
##    .Medu              2.749    0.055   49.977    0.000
##    .Fedu              2.522    0.055   46.111    0.000
##    .Mjob              3.170    0.062   51.386    0.000
##    .Fjob              3.281    0.043   75.609    0.000
##    .schoolsup         1.129    0.017   66.922    0.000
##    .famsup            1.613    0.025   65.794    0.000
##    .paid              1.458    0.025   58.167    0.000
##    .activities        1.509    0.025   59.985    0.000
##    .romantic          1.334    0.024   56.214    0.000
##    .famrel            3.944    0.045   87.537    0.000
##    .freetime          3.235    0.050   64.458    0.000
##    .goout             3.109    0.056   55.571    0.000
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)
##    .Medu              0.065    0.114    0.568    0.570
##    .Fedu              0.696    0.069   10.086    0.000
##    .Mjob              1.177    0.088   13.312    0.000
##    .Fjob              0.722    0.052   13.941    0.000
##    .schoolsup         0.112    0.008   13.912    0.000
##    .famsup            0.144    0.042    3.456    0.001
##    .paid              0.194    0.027    7.151    0.000
##    .activities        0.250    0.018   14.046    0.000
##    .romantic          0.222    0.016   14.050    0.000
##    .famrel            0.770    0.058   13.295    0.000
##    .freetime          0.450    0.339    1.328    0.184
##    .goout             1.053    0.136    7.751    0.000
##     Parents_Demo      1.000                           
##     Support_Act       1.000                           
##     Personal_life     1.000

lets calculate the standardized estimates.

summary(CFAmodel.fitted, standardized = TRUE)
## lavaan 0.6-19 ended normally after 48 iterations
## 
##   Estimator                                         ML
##   Optimization method                           NLMINB
##   Number of model parameters                        39
## 
##   Number of observations                           395
##   Number of missing patterns                         1
## 
## Model Test User Model:
##                                                       
##   Test statistic                                68.453
##   Degrees of freedom                                51
##   P-value (Chi-square)                           0.052
## 
## Parameter Estimates:
## 
##   Standard errors                             Standard
##   Information                                 Observed
##   Observed information based on                Hessian
## 
## Latent Variables:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##   Parents_Demo =~                                                       
##     Medu              1.063    0.067   15.873    0.000    1.063    0.972
##     Fedu              0.696    0.060   11.656    0.000    0.696    0.641
##     Mjob              0.571    0.063    9.010    0.000    0.571    0.465
##     Fjob              0.147    0.048    3.041    0.002    0.147    0.171
##   Support_Act =~                                                        
##     schoolsup        -0.028    0.026   -1.086    0.278   -0.028   -0.085
##     famsup           -0.305    0.070   -4.385    0.000   -0.305   -0.627
##     paid             -0.234    0.055   -4.243    0.000   -0.234   -0.469
##     activities       -0.011    0.036   -0.306    0.759   -0.011   -0.022
##   Personal_life =~                                                      
##     romantic          0.009    0.031    0.275    0.783    0.009    0.018
##     famrel           -0.179    0.069   -2.592    0.010   -0.179   -0.200
##     freetime         -0.738    0.232   -3.180    0.001   -0.738   -0.740
##     goout            -0.428    0.143   -2.993    0.003   -0.428   -0.385
## 
## Covariances:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##   Parents_Demo ~~                                                       
##     Support_Act      -0.315    0.082   -3.839    0.000   -0.315   -0.315
##     Personal_life    -0.057    0.073   -0.777    0.437   -0.057   -0.057
##   Support_Act ~~                                                        
##     Personal_life    -0.044    0.099   -0.439    0.661   -0.044   -0.044
## 
## Intercepts:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##    .Medu              2.749    0.055   49.977    0.000    2.749    2.515
##    .Fedu              2.522    0.055   46.111    0.000    2.522    2.320
##    .Mjob              3.170    0.062   51.386    0.000    3.170    2.586
##    .Fjob              3.281    0.043   75.609    0.000    3.281    3.804
##    .schoolsup         1.129    0.017   66.922    0.000    1.129    3.367
##    .famsup            1.613    0.025   65.794    0.000    1.613    3.310
##    .paid              1.458    0.025   58.167    0.000    1.458    2.927
##    .activities        1.509    0.025   59.985    0.000    1.509    3.018
##    .romantic          1.334    0.024   56.214    0.000    1.334    2.828
##    .famrel            3.944    0.045   87.537    0.000    3.944    4.404
##    .freetime          3.235    0.050   64.458    0.000    3.235    3.243
##    .goout             3.109    0.056   55.571    0.000    3.109    2.796
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##    .Medu              0.065    0.114    0.568    0.570    0.065    0.054
##    .Fedu              0.696    0.069   10.086    0.000    0.696    0.590
##    .Mjob              1.177    0.088   13.312    0.000    1.177    0.783
##    .Fjob              0.722    0.052   13.941    0.000    0.722    0.971
##    .schoolsup         0.112    0.008   13.912    0.000    0.112    0.993
##    .famsup            0.144    0.042    3.456    0.001    0.144    0.607
##    .paid              0.194    0.027    7.151    0.000    0.194    0.780
##    .activities        0.250    0.018   14.046    0.000    0.250    1.000
##    .romantic          0.222    0.016   14.050    0.000    0.222    1.000
##    .famrel            0.770    0.058   13.295    0.000    0.770    0.960
##    .freetime          0.450    0.339    1.328    0.184    0.450    0.452
##    .goout             1.053    0.136    7.751    0.000    1.053    0.852
##     Parents_Demo      1.000                               1.000    1.000
##     Support_Act       1.000                               1.000    1.000
##     Personal_life     1.000                               1.000    1.000

Results

The above results suggest that majority of our observed factors are statistically significant having p value less than 0.05. However, there are a few factors which are not significant such as schoolsup, activities belonging to supoort_activities latent factor. Similarly, romantic is also not significant belonging to personal life latent factor.We can remove these and re run CFA just to see if our results improve.

r_squared_values <- inspect(CFAmodel.fitted, "rsquare")
print(r_squared_values)
##       Medu       Fedu       Mjob       Fjob  schoolsup     famsup       paid 
##      0.946      0.410      0.217      0.029      0.007      0.393      0.220 
## activities   romantic     famrel   freetime      goout 
##      0.000      0.000      0.040      0.548      0.148

The output above reports the explained variance for the respective observed variable. The higher the value the better it is and vice versa. Lets now visualize our 3 factor fitted model.

semPaths(CFAmodel.fitted, "par", weighted = FALSE, nCharNodes = 7, shapeMan = "rectangle",
         sizeMan = 8, sizeMan2 = 5)

Model Diagnostics

lets examine fit measures to see if our results are reliable.

# Extract a selection of fit measures
selected_fit_measures <- fitMeasures(CFAmodel.fitted, c("chisq","cfi", "rmsea", "srmr"))

# Print selected fit measures
selected_fit_measures
##  chisq    cfi  rmsea   srmr 
## 68.453  0.956  0.029  0.040

Model diagnostic results are convincing. Chi-square test statistic value is lower and its pvalue: p-value for > 0.05 which indicates a good fit, Comparative Fit Index, value close to 1 indicates a good fit.Root Mean Square Error of Approximation is less than 0.08 which also suggests a good model fit and similarly Standardized Root Mean Square Residual value as well.

Lets now re run the estimation by removing observed variables which have higher p values greater than 0.05. In this case i am removing romatic and activities as both had p value equal to 0.7

CFAmodel_1 <- 'Parents_Demo=~ Medu+Fedu +Mjob+Fjob
             Support_Act=~ schoolsup+famsup+paid
            Personal_life=~famrel+freetime+goout'
CFAmodel.fitted_1 <- cfa(model = CFAmodel_1, data = data, std.lv = TRUE,
                       missing = "fiml", mimic = "Mplus")
summary(CFAmodel.fitted_1, standardized = TRUE)
## lavaan 0.6-19 ended normally after 39 iterations
## 
##   Estimator                                         ML
##   Optimization method                           NLMINB
##   Number of model parameters                        33
## 
##   Number of observations                           395
##   Number of missing patterns                         1
## 
## Model Test User Model:
##                                                       
##   Test statistic                                47.878
##   Degrees of freedom                                32
##   P-value (Chi-square)                           0.035
## 
## Parameter Estimates:
## 
##   Standard errors                             Standard
##   Information                                 Observed
##   Observed information based on                Hessian
## 
## Latent Variables:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##   Parents_Demo =~                                                       
##     Medu              1.064    0.067   15.852    0.000    1.064    0.973
##     Fedu              0.696    0.060   11.640    0.000    0.696    0.640
##     Mjob              0.570    0.063    9.003    0.000    0.570    0.465
##     Fjob              0.147    0.048    3.035    0.002    0.147    0.171
##   Support_Act =~                                                        
##     schoolsup         0.028    0.026    1.075    0.282    0.028    0.084
##     famsup            0.307    0.072    4.256    0.000    0.307    0.629
##     paid              0.234    0.057    4.109    0.000    0.234    0.470
##   Personal_life =~                                                      
##     famrel            0.178    0.069    2.589    0.010    0.178    0.198
##     freetime          0.740    0.236    3.131    0.002    0.740    0.742
##     goout             0.427    0.145    2.945    0.003    0.427    0.384
## 
## Covariances:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##   Parents_Demo ~~                                                       
##     Support_Act       0.311    0.081    3.836    0.000    0.311    0.311
##     Personal_life     0.057    0.074    0.777    0.437    0.057    0.057
##   Support_Act ~~                                                        
##     Personal_life    -0.046    0.099   -0.462    0.644   -0.046   -0.046
## 
## Intercepts:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##    .Medu              2.749    0.055   49.977    0.000    2.749    2.515
##    .Fedu              2.522    0.055   46.111    0.000    2.522    2.320
##    .Mjob              3.170    0.062   51.386    0.000    3.170    2.586
##    .Fjob              3.281    0.043   75.609    0.000    3.281    3.804
##    .schoolsup         1.129    0.017   66.922    0.000    1.129    3.367
##    .famsup            1.613    0.025   65.794    0.000    1.613    3.310
##    .paid              1.458    0.025   58.167    0.000    1.458    2.927
##    .famrel            3.944    0.045   87.537    0.000    3.944    4.404
##    .freetime          3.235    0.050   64.458    0.000    3.235    3.243
##    .goout             3.109    0.056   55.571    0.000    3.109    2.796
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##    .Medu              0.064    0.115    0.553    0.580    0.064    0.053
##    .Fedu              0.697    0.069   10.083    0.000    0.697    0.590
##    .Mjob              1.178    0.088   13.310    0.000    1.178    0.784
##    .Fjob              0.722    0.052   13.941    0.000    0.722    0.971
##    .schoolsup         0.112    0.008   13.910    0.000    0.112    0.993
##    .famsup            0.143    0.043    3.314    0.001    0.143    0.604
##    .paid              0.193    0.028    6.938    0.000    0.193    0.779
##    .famrel            0.770    0.058   13.312    0.000    0.770    0.961
##    .freetime          0.448    0.345    1.297    0.195    0.448    0.450
##    .goout             1.054    0.138    7.661    0.000    1.054    0.852
##     Parents_Demo      1.000                               1.000    1.000
##     Support_Act       1.000                               1.000    1.000
##     Personal_life     1.000                               1.000    1.000

lets compare models and see which one is better.

anova(CFAmodel.fitted, CFAmodel.fitted_1)
## Warning: lavaan->lavTestLRT():  
##    some models are based on a different set of observed variables
## 
## Chi-Squared Difference Test
## 
##                   Df     AIC     BIC  Chisq Chisq diff    RMSEA Df diff
## CFAmodel.fitted_1 32  9073.6  9204.9 47.878                            
## CFAmodel.fitted   51 10186.0 10341.2 68.453     20.574 0.014483      19
##                   Pr(>Chisq)
## CFAmodel.fitted_1           
## CFAmodel.fitted       0.3608

Model CFAmodel.fitted_1 seems to be better fitted than the first model as AIC value is low and also the Chisq value.