Reading and load packages

#install.packages("foreign")#load an SPSSS file
#install.packages("psych")
#install.packages("broom")
#install.packages("gclus")
library(broom)
library(psych)
library(foreign)
library(tidyverse)
## Loading tidyverse: ggplot2
## Loading tidyverse: tibble
## Loading tidyverse: tidyr
## Loading tidyverse: readr
## Loading tidyverse: purrr
## Loading tidyverse: dplyr
## Conflicts with tidy packages ----------------------------------------------
## %+%():    ggplot2, psych
## alpha():  ggplot2, psych
## filter(): dplyr, stats
## lag():    dplyr, stats
library(stringr)
library(gclus)
## Loading required package: cluster

Reading Data set

dataset = read.spss("/Users/levi.brackman/Downloads/GG T0 Motivation Scale (1).sav", to.data.frame=TRUE)

View few lines of data

head(dataset, 3)
##              StudentID   Semester       t0.MvEn_1       t0.MvEn_2
## 1 2016Yr9001           Semester 2 Agree a lot - 6 Agree a lot - 6
## 2 2016Yr9003           Semester 1       Agree - 5       Agree - 5
## 3 2016Yr9003           Semester 2       Agree - 5       Agree - 5
##            t0.MvEn_3             t0.MvEn_4          t0.MvEn_5
## 1    Agree a lot - 6       Agree a lot - 6    Agree a lot - 6
## 2 Agree a little - 4 Disagree a little - 3          Agree - 5
## 3          Agree - 5    Agree a little - 4 Agree a little - 4
##               t0.MvEn_6 t0.MvEn_7r t0.MvEn_8r t0.MvEn_9r t0.MvEn_10r
## 1 Disagree a little - 3          1          1          1           1
## 2    Agree a little - 4          2          3          4           4
## 3             Agree - 5          3          4          4           3
##   t0.MvEn_11r t0.MvEnThought_Tot t0.MvEnBeh_Tot t0.MvEnMuff_Tot
## 1           1           6.000000       5.000000        1.000000
## 2           5           4.666667       4.000000        3.000000
## 3           5           5.000000       4.333333        3.666667
##   t0.MvEnGuzz_Tot
## 1             1.0
## 2             4.5
## 3             4.0

There are extra characters in the colunms that R wont be able to read as well as colunms not needed for the analysis. So I now select the correct variables and remove characters from the values and save as newdataset

newdataset<-dataset %>%
  select(-c(StudentID, Semester, t0.MvEnThought_Tot, t0.MvEnBeh_Tot, t0.MvEnMuff_Tot,
            t0.MvEnGuzz_Tot)) %>%
  mutate(t0.MvEn_1 = gsub("[^0-9]", "", t0.MvEn_1),
         t0.MvEn_2 = gsub("[^0-9]", "", t0.MvEn_2),
         t0.MvEn_3 = gsub("[^0-9]", "", t0.MvEn_3),
         t0.MvEn_4 = gsub("[^0-9]", "", t0.MvEn_4),
         t0.MvEn_5 = gsub("[^0-9]", "", t0.MvEn_5),
         t0.MvEn_6 = gsub("[^0-9]", "", t0.MvEn_6),
         t0.MvEn_7r = gsub("[^0-9]", "", t0.MvEn_7r),
         t0.MvEn_8r = gsub("[^0-9]", "", t0.MvEn_8r),
         t0.MvEn_9r = gsub("[^0-9]", "", t0.MvEn_9r),
         t0.MvEn_10r = gsub("[^0-9]", "", t0.MvEn_10r),
         t0.MvEn_11r = gsub("[^0-9]", "", t0.MvEn_11r))
  

#write_csv(newdataset, "derdataset.csv")
#names(dataset)

Now view dataset again

head(newdataset, 3)
##   t0.MvEn_1 t0.MvEn_2 t0.MvEn_3 t0.MvEn_4 t0.MvEn_5 t0.MvEn_6 t0.MvEn_7r
## 1         6         6         6         6         6         3          1
## 2         5         5         4         3         5         4          2
## 3         5         5         5         4         4         5          3
##   t0.MvEn_8r t0.MvEn_9r t0.MvEn_10r t0.MvEn_11r
## 1          1          1           1           1
## 2          3          4           4           5
## 3          4          4           3           5

Colunm names have dots in them (not a good convention) so I want to rename variable without dots

newdataset<- newdataset %>% rename(MvEn_1 = t0.MvEn_1, MvEn_2 = t0.MvEn_2, MvEn_3 = t0.MvEn_3,
                                   MvEn_4 = t0.MvEn_4, MvEn_5 = t0.MvEn_5, MvEn_6 = t0.MvEn_6, 
                                   MvEn_7r = t0.MvEn_7r, MvEn_8r = t0.MvEn_8r, MvEn_9r = t0.MvEn_9r,
                                   MvEn_10r = t0.MvEn_10r, MvEn_11r = t0.MvEn_11r)

Make all items into numeric

newdataset<- data.frame(apply(newdataset,2, as.numeric))

find max inorder to reverse score items

max(newdataset$MvEn_1, na.rm = TRUE)
## [1] 6

Get list of variables without quotes

noquote(names(newdataset))
##  [1] MvEn_1   MvEn_2   MvEn_3   MvEn_4   MvEn_5   MvEn_6   MvEn_7r 
##  [8] MvEn_8r  MvEn_9r  MvEn_10r MvEn_11r

Reverse score columns

cols<- c("MvEn_7r",  "MvEn_8r",  "MvEn_9r",  "MvEn_10r", "MvEn_11r")
newdataset[,cols] = lapply(cols,  function(x) 6 - newdataset[, x])
#check
str(newdataset)
## 'data.frame':    346 obs. of  11 variables:
##  $ MvEn_1  : num  6 5 5 5 4 6 5 5 5 6 ...
##  $ MvEn_2  : num  6 5 5 6 3 5 5 4 5 6 ...
##  $ MvEn_3  : num  6 4 5 4 4 6 2 3 5 6 ...
##  $ MvEn_4  : num  6 3 4 3 3 5 5 2 4 6 ...
##  $ MvEn_5  : num  6 5 4 3 3 5 4 2 5 6 ...
##  $ MvEn_6  : num  3 4 5 5 4 5 5 4 4 6 ...
##  $ MvEn_7r : num  5 4 3 3 2 0 5 4 3 5 ...
##  $ MvEn_8r : num  5 3 2 0 2 1 5 3 4 5 ...
##  $ MvEn_9r : num  5 2 2 0 2 1 1 1 4 5 ...
##  $ MvEn_10r: num  5 2 3 3 3 1 2 4 4 5 ...
##  $ MvEn_11r: num  5 1 1 0 3 0 2 2 4 0 ...

Correlations

The darker squares on the plot show strong correlations

#firs remove incomplete data
newdataset<- newdataset[complete.cases(newdataset[,]),]
#look at the corolations
obs<-abs(cor(newdataset))
obs
##              MvEn_1     MvEn_2    MvEn_3     MvEn_4     MvEn_5     MvEn_6
## MvEn_1   1.00000000 0.37941338 0.3762868 0.36680462 0.40900350 0.50961810
## MvEn_2   0.37941338 1.00000000 0.4868319 0.32994776 0.40589754 0.44848627
## MvEn_3   0.37628675 0.48683190 1.0000000 0.52939225 0.48555172 0.40617355
## MvEn_4   0.36680462 0.32994776 0.5293923 1.00000000 0.75324414 0.54633631
## MvEn_5   0.40900350 0.40589754 0.4855517 0.75324414 1.00000000 0.63413629
## MvEn_6   0.50961810 0.44848627 0.4061736 0.54633631 0.63413629 1.00000000
## MvEn_7r  0.05987714 0.08146159 0.1306251 0.18066838 0.12596689 0.08172025
## MvEn_8r  0.07838723 0.09889988 0.1093530 0.08484913 0.07685958 0.13114125
## MvEn_9r  0.34956800 0.11606995 0.0702703 0.01210666 0.03315291 0.20448360
## MvEn_10r 0.15499155 0.09247661 0.1275561 0.14758115 0.19277654 0.26231508
## MvEn_11r 0.30022373 0.19640993 0.1306738 0.14082585 0.16309319 0.35105917
##             MvEn_7r    MvEn_8r    MvEn_9r   MvEn_10r  MvEn_11r
## MvEn_1   0.05987714 0.07838723 0.34956800 0.15499155 0.3002237
## MvEn_2   0.08146159 0.09889988 0.11606995 0.09247661 0.1964099
## MvEn_3   0.13062514 0.10935300 0.07027030 0.12755613 0.1306738
## MvEn_4   0.18066838 0.08484913 0.01210666 0.14758115 0.1408259
## MvEn_5   0.12596689 0.07685958 0.03315291 0.19277654 0.1630932
## MvEn_6   0.08172025 0.13114125 0.20448360 0.26231508 0.3510592
## MvEn_7r  1.00000000 0.26752603 0.12676569 0.17960163 0.2159479
## MvEn_8r  0.26752603 1.00000000 0.36954401 0.31415069 0.3854279
## MvEn_9r  0.12676569 0.36954401 1.00000000 0.46665383 0.5130856
## MvEn_10r 0.17960163 0.31415069 0.46665383 1.00000000 0.5049357
## MvEn_11r 0.21594791 0.38542795 0.51308560 0.50493567 1.0000000
#soem strong corolations. Which indicate that there are items that hang well together
colors<-dmat.color(obs)
ordered<-order.single(cor(newdataset))
cpairs(newdataset, order = ordered, panel.colors = colors)

Check the factors

Parallel analysis suggests that the number of factors = 2 perhaps 3. Scree plot suggests 2. We also look at the eigenvalues: There are two components that have an eigenvalue greater than 1. According to the Keiser criterion this means two factors, at least.

parallel<-fa.parallel(newdataset, fm="ml",fa="fa") 

## Parallel analysis suggests that the number of factors =  3  and the number of components =  NA
parallel$fa.values
##  [1]  3.20741698  1.31986563  0.26388954  0.11659036 -0.01813535
##  [6] -0.09772622 -0.15827505 -0.21374820 -0.30943751 -0.45869718
## [11] -0.52552008
parallel2<-princomp(newdataset, cor = TRUE)
summary(parallel2)
## Importance of components:
##                           Comp.1    Comp.2     Comp.3     Comp.4
## Standard deviation     1.9416218 1.4729576 0.96520913 0.91179133
## Proportion of Variance 0.3427178 0.1972367 0.08469352 0.07557849
## Cumulative Proportion  0.3427178 0.5399545 0.62464800 0.70022650
##                            Comp.5     Comp.6     Comp.7     Comp.8
## Standard deviation     0.85539696 0.79420330 0.74458489 0.66526095
## Proportion of Variance 0.06651854 0.05734172 0.05040061 0.04023383
## Cumulative Proportion  0.76674504 0.82408675 0.87448736 0.91472119
##                            Comp.9    Comp.10    Comp.11
## Standard deviation     0.61227675 0.58401667 0.47128403
## Proportion of Variance 0.03408026 0.03100686 0.02019169
## Cumulative Proportion  0.94880144 0.97980831 1.00000000
plot(parallel2)##results show at least two factors

```

EFA (Exploratory Factor Analysis) & Fit indices.

Let’s now try and see what actual items make up the individual factors using EFA

Here are the fit indicies we look at:

Tucker–Lewis Index (TLI)

Comparative Fit Index (CFI)

Root Mean Squared Error of Approximation (RMSEA)

χ2 (Chi Square) test statistic (while taking into account the fact that its results are effected by the number of parameters in the model as well as the sample size)

TLI and CFI can score anywhere from 0 to 1 where results > .90 are considered an acceptable fit and > .95 represents an excellent fit to the data.

Marsh and colleagues (2010) suggest that while no golden rule exists for RMSEA, a score < .06 reflects a reasonable fit, whilst a RMSEA > .10 represents a poor fit to the data.

Two Factors

twofactor<-fa(newdataset, nfactors=2, rotate="oblimin", fm="ml")
## Loading required namespace: GPArotation
twofactor
## Factor Analysis using method =  ml
## Call: fa(r = newdataset, nfactors = 2, rotate = "oblimin", fm = "ml")
## Standardized loadings (pattern matrix) based upon correlation matrix
##            ML1   ML2   h2   u2 com
## MvEn_1    0.45 -0.27 0.34 0.66 1.6
## MvEn_2    0.48 -0.11 0.27 0.73 1.1
## MvEn_3    0.61 -0.01 0.37 0.63 1.0
## MvEn_4    0.85  0.09 0.69 0.31 1.0
## MvEn_5    0.89  0.05 0.76 0.24 1.0
## MvEn_6    0.67 -0.22 0.58 0.42 1.2
## MvEn_7r   0.25  0.32 0.13 0.87 1.9
## MvEn_8r   0.02  0.50 0.25 0.75 1.0
## MvEn_9r   0.08  0.73 0.51 0.49 1.0
## MvEn_10r -0.07  0.61 0.40 0.60 1.0
## MvEn_11r -0.06  0.75 0.59 0.41 1.0
## 
##                        ML1  ML2
## SS loadings           2.87 2.01
## Proportion Var        0.26 0.18
## Cumulative Var        0.26 0.44
## Proportion Explained  0.59 0.41
## Cumulative Proportion 0.59 1.00
## 
##  With factor correlations of 
##       ML1   ML2
## ML1  1.00 -0.24
## ML2 -0.24  1.00
## 
## Mean item complexity =  1.2
## Test of the hypothesis that 2 factors are sufficient.
## 
## The degrees of freedom for the null model are  55  and the objective function was  3.89 with Chi Square of  1298.49
## The degrees of freedom for the model are 34  and the objective function was  0.43 
## 
## The root mean square of the residuals (RMSR) is  0.05 
## The df corrected root mean square of the residuals is  0.07 
## 
## The harmonic number of observations is  339 with the empirical chi square  105.47  with prob <  3e-09 
## The total number of observations was  339  with Likelihood Chi Square =  142.3  with prob <  3.3e-15 
## 
## Tucker Lewis Index of factoring reliability =  0.859
## RMSEA index =  0.098  and the 90 % confidence intervals are  0.081 0.114
## BIC =  -55.79
## Fit based upon off diagonal values = 0.97
## Measures of factor score adequacy             
##                                                 ML1  ML2
## Correlation of scores with factors             0.94 0.89
## Multiple R square of scores with factors       0.89 0.79
## Minimum correlation of possible factor scores  0.78 0.59
paste("CFI:", round(1-((twofactor$STATISTIC - twofactor$dof)/(twofactor$null.chisq- twofactor$null.dof)),4))
## [1] "CFI: 0.9129"

Three Factors

threefactor<-fa(newdataset, nfactors=3, rotate="oblimin", fm="ml")
threefactor
## Factor Analysis using method =  ml
## Call: fa(r = newdataset, nfactors = 3, rotate = "oblimin", fm = "ml")
## Standardized loadings (pattern matrix) based upon correlation matrix
##            ML1   ML2   ML3   h2   u2 com
## MvEn_1    0.15 -0.06  0.67 0.59 0.41 1.1
## MvEn_2    0.29  0.01  0.38 0.33 0.67 1.9
## MvEn_3    0.48  0.04  0.25 0.38 0.62 1.5
## MvEn_4    0.85  0.02 -0.02 0.71 0.29 1.0
## MvEn_5    0.89 -0.01  0.00 0.80 0.20 1.0
## MvEn_6    0.54 -0.16  0.28 0.59 0.41 1.7
## MvEn_7r   0.15  0.38  0.18 0.16 0.84 1.8
## MvEn_8r  -0.05  0.56  0.12 0.28 0.72 1.1
## MvEn_9r   0.19  0.64 -0.27 0.55 0.45 1.6
## MvEn_10r -0.15  0.70  0.15 0.48 0.52 1.2
## MvEn_11r -0.03  0.71 -0.09 0.57 0.43 1.0
## 
##                        ML1  ML2  ML3
## SS loadings           2.42 1.92 1.09
## Proportion Var        0.22 0.17 0.10
## Cumulative Var        0.22 0.39 0.49
## Proportion Explained  0.45 0.35 0.20
## Cumulative Proportion 0.45 0.80 1.00
## 
##  With factor correlations of 
##       ML1   ML2   ML3
## ML1  1.00 -0.16  0.43
## ML2 -0.16  1.00 -0.34
## ML3  0.43 -0.34  1.00
## 
## Mean item complexity =  1.4
## Test of the hypothesis that 3 factors are sufficient.
## 
## The degrees of freedom for the null model are  55  and the objective function was  3.89 with Chi Square of  1298.49
## The degrees of freedom for the model are 25  and the objective function was  0.23 
## 
## The root mean square of the residuals (RMSR) is  0.04 
## The df corrected root mean square of the residuals is  0.05 
## 
## The harmonic number of observations is  339 with the empirical chi square  48.11  with prob <  0.0036 
## The total number of observations was  339  with Likelihood Chi Square =  77.68  with prob <  2.6e-07 
## 
## Tucker Lewis Index of factoring reliability =  0.906
## RMSEA index =  0.08  and the 90 % confidence intervals are  0.059 0.099
## BIC =  -67.97
## Fit based upon off diagonal values = 0.99
## Measures of factor score adequacy             
##                                                 ML1  ML2  ML3
## Correlation of scores with factors             0.94 0.89 0.83
## Multiple R square of scores with factors       0.89 0.79 0.69
## Minimum correlation of possible factor scores  0.78 0.59 0.39
paste("CFI:", round(1-((threefactor$STATISTIC - threefactor$dof)/(threefactor$null.chisq- threefactor$null.dof)),4))
## [1] "CFI: 0.9576"