##load packages
library(psych)
library(GPArotation)
library(plyr)
library(dplyr)
##
## Attaching package: 'dplyr'
##
## The following objects are masked from 'package:plyr':
##
## arrange, count, desc, failwith, id, mutate, rename, summarise,
## summarize
##
## The following objects are masked from 'package:stats':
##
## filter, lag
##
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
# data preparation
data <- read.csv("~/Psychometric_study_data/allsurveysT1.csv")
APSI<-select(data, APSI_1, APSI_2, APSI_3, APSI_4, APSI_5, APSI_6, APSI_7, APSI_8)
APSI$APSI_6 <- 6- APSI$APSI_6
APSI<- data.frame(apply(APSI,2, as.numeric))
APSI<-tbl_df(APSI)
APSI
## Source: local data frame [757 x 8]
##
## APSI_1 APSI_2 APSI_3 APSI_4 APSI_5 APSI_6 APSI_7 APSI_8
## 1 2 4 4 4 4 2 4 4
## 2 4 3 4 5 4 3 4 4
## 3 3 4 4 3 3 3 4 3
## 4 4 4 5 4 5 2 4 3
## 5 3 3 4 3 4 3 2 3
## 6 3 4 4 4 4 4 5 3
## 7 2 2 4 3 4 2 2 2
## 8 3 3 4 3 5 3 3 1
## 9 4 5 5 4 4 4 4 5
## 10 2 2 2 3 5 3 3 4
## .. ... ... ... ... ... ... ... ...
str(APSI)
## Classes 'tbl_df', 'tbl' and 'data.frame': 757 obs. of 8 variables:
## $ APSI_1: num 2 4 3 4 3 3 2 3 4 2 ...
## $ APSI_2: num 4 3 4 4 3 4 2 3 5 2 ...
## $ APSI_3: num 4 4 4 5 4 4 4 4 5 2 ...
## $ APSI_4: num 4 5 3 4 3 4 3 3 4 3 ...
## $ APSI_5: num 4 4 3 5 4 4 4 5 4 5 ...
## $ APSI_6: num 2 3 3 2 3 4 2 3 4 3 ...
## $ APSI_7: num 4 4 4 4 2 5 2 3 4 3 ...
## $ APSI_8: num 4 4 3 3 3 3 2 1 5 4 ...
colnames(APSI) <- c("1","2", "3", "4", "5", "6", "7", "8")
APSI<- APSI[complete.cases(APSI[,]),]
##EFA
##number of factors
##parallal analysis and scree plot
parallel<-fa.parallel(APSI, fm="ml",fa="fa")

## Parallel analysis suggests that the number of factors = 3 and the number of components = NA
#two factors are greater than one Eigenvalue scree plot says there are two factors. Paralel analysis suggests 4 factors
#eigenvalues (kaiser)
parallel$fa.values
## [1] 3.858285840 0.416709155 0.194720428 0.036566419 0.000247384
## [6] -0.034992665 -0.116611798 -0.499783334
#over 1=2, over .7=2
#doign aprincipal components analysis to see how many factors there might be using that method
#Deal with NA doing principle componant analysis
princomp(na.omit(APSI), cor = TRUE)
## Call:
## princomp(x = na.omit(APSI), cor = TRUE)
##
## Standard deviations:
## Comp.1 Comp.2 Comp.3 Comp.4 Comp.5 Comp.6 Comp.7
## 2.0536963 1.1774408 0.8840305 0.6911068 0.5688568 0.5479494 0.5247605
## Comp.8
## 0.4874483
##
## 8 variables and 470 observations.
parallel2<-princomp(na.omit(APSI), cor = TRUE)
summary(parallel2)
## Importance of components:
## Comp.1 Comp.2 Comp.3 Comp.4
## Standard deviation 2.0536963 1.1774408 0.88403052 0.69110683
## Proportion of Variance 0.5272086 0.1732958 0.09768875 0.05970358
## Cumulative Proportion 0.5272086 0.7005044 0.79819317 0.85789675
## Comp.5 Comp.6 Comp.7 Comp.8
## Standard deviation 0.56885679 0.54794940 0.5247605 0.48744826
## Proportion of Variance 0.04044976 0.03753107 0.0344217 0.02970073
## Cumulative Proportion 0.89834651 0.93587758 0.9702993 1.00000000
plot(parallel2)##results show at least two factors

#simple structure
twofactor<-fa(APSI, nfactors=2, rotate="oblimin", fm="ml")
twofactor
## Factor Analysis using method = ml
## Call: fa(r = APSI, nfactors = 2, rotate = "oblimin", fm = "ml")
## Standardized loadings (pattern matrix) based upon correlation matrix
## ML2 ML1 h2 u2 com
## 1 0.85 -0.04 0.73 0.273 1.0
## 2 0.77 0.06 0.60 0.399 1.0
## 3 0.01 1.00 1.00 0.005 1.0
## 4 0.83 0.01 0.68 0.315 1.0
## 5 0.67 0.17 0.48 0.517 1.1
## 6 -0.21 0.40 0.20 0.803 1.5
## 7 0.79 -0.01 0.63 0.371 1.0
## 8 0.86 -0.07 0.74 0.263 1.0
##
## ML2 ML1
## SS loadings 3.86 1.19
## Proportion Var 0.48 0.15
## Cumulative Var 0.48 0.63
## Proportion Explained 0.76 0.24
## Cumulative Proportion 0.76 1.00
##
## With factor correlations of
## ML2 ML1
## ML2 1.00 0.04
## ML1 0.04 1.00
##
## Mean item complexity = 1.1
## Test of the hypothesis that 2 factors are sufficient.
##
## The degrees of freedom for the null model are 28 and the objective function was 4.28 with Chi Square of 1991.24
## The degrees of freedom for the model are 13 and the objective function was 0.25
##
## The root mean square of the residuals (RMSR) is 0.05
## The df corrected root mean square of the residuals is 0.07
##
## The harmonic number of observations is 470 with the empirical chi square 60.63 with prob < 4e-08
## The total number of observations was 470 with MLE Chi Square = 113.77 with prob < 3.4e-18
##
## Tucker Lewis Index of factoring reliability = 0.889
## RMSEA index = 0.129 and the 90 % confidence intervals are 0.107 0.151
## BIC = 33.78
## Fit based upon off diagonal values = 0.99
## Measures of factor score adequacy
## ML2 ML1
## Correlation of scores with factors 0.96 1.00
## Multiple R square of scores with factors 0.92 0.99
## Minimum correlation of possible factor scores 0.84 0.99
threefactor<-fa(APSI, nfactors=3, rotate="oblimin", fm="ml")
threefactor
## Factor Analysis using method = ml
## Call: fa(r = APSI, nfactors = 3, rotate = "oblimin", fm = "ml")
## Standardized loadings (pattern matrix) based upon correlation matrix
## ML2 ML1 ML3 h2 u2 com
## 1 0.78 -0.04 0.10 0.72 0.285 1.0
## 2 0.62 0.05 0.21 0.60 0.399 1.2
## 3 -0.03 0.99 0.04 1.00 0.005 1.0
## 4 0.82 0.02 0.01 0.69 0.309 1.0
## 5 0.14 0.10 0.81 0.83 0.167 1.1
## 6 0.11 0.44 -0.46 0.32 0.680 2.1
## 7 0.86 0.01 -0.08 0.66 0.341 1.0
## 8 0.88 -0.05 -0.02 0.75 0.246 1.0
##
## ML2 ML1 ML3
## SS loadings 3.33 1.19 1.04
## Proportion Var 0.42 0.15 0.13
## Cumulative Var 0.42 0.57 0.70
## Proportion Explained 0.60 0.21 0.19
## Cumulative Proportion 0.60 0.81 1.00
##
## With factor correlations of
## ML2 ML1 ML3
## ML2 1.00 0.03 0.62
## ML1 0.03 1.00 0.10
## ML3 0.62 0.10 1.00
##
## Mean item complexity = 1.2
## Test of the hypothesis that 3 factors are sufficient.
##
## The degrees of freedom for the null model are 28 and the objective function was 4.28 with Chi Square of 1991.24
## The degrees of freedom for the model are 7 and the objective function was 0.08
##
## The root mean square of the residuals (RMSR) is 0.02
## The df corrected root mean square of the residuals is 0.04
##
## The harmonic number of observations is 470 with the empirical chi square 9.6 with prob < 0.21
## The total number of observations was 470 with MLE Chi Square = 37.01 with prob < 4.7e-06
##
## Tucker Lewis Index of factoring reliability = 0.939
## RMSEA index = 0.096 and the 90 % confidence intervals are 0.067 0.127
## BIC = -6.06
## Fit based upon off diagonal values = 1
## Measures of factor score adequacy
## ML2 ML1 ML3
## Correlation of scores with factors 0.96 1.00 0.91
## Multiple R square of scores with factors 0.92 0.99 0.83
## Minimum correlation of possible factor scores 0.84 0.99 0.67
fourfactor<-fa(APSI, nfactors=4, rotate="oblimin", fm="ml")
fourfactor
## Factor Analysis using method = ml
## Call: fa(r = APSI, nfactors = 4, rotate = "oblimin", fm = "ml")
## Standardized loadings (pattern matrix) based upon correlation matrix
## ML2 ML3 ML1 ML4 h2 u2 com
## 1 0.44 0.43 -0.05 0.09 0.72 0.284 2.1
## 2 0.03 0.82 0.01 0.06 0.75 0.249 1.0
## 3 0.00 -0.02 1.00 -0.02 1.00 0.005 1.0
## 4 0.59 0.27 0.02 0.05 0.68 0.317 1.4
## 5 0.11 0.32 0.24 0.53 0.69 0.313 2.2
## 6 0.00 0.11 0.28 -0.60 0.45 0.552 1.5
## 7 0.95 -0.09 0.02 0.02 0.80 0.199 1.0
## 8 0.54 0.40 -0.09 -0.02 0.74 0.260 1.9
##
## ML2 ML3 ML1 ML4
## SS loadings 2.19 1.68 1.16 0.80
## Proportion Var 0.27 0.21 0.14 0.10
## Cumulative Var 0.27 0.48 0.63 0.73
## Proportion Explained 0.38 0.29 0.20 0.14
## Cumulative Proportion 0.38 0.66 0.86 1.00
##
## With factor correlations of
## ML2 ML3 ML1 ML4
## ML2 1.00 0.71 0.04 0.40
## ML3 0.71 1.00 0.14 0.43
## ML1 0.04 0.14 1.00 -0.14
## ML4 0.40 0.43 -0.14 1.00
##
## Mean item complexity = 1.5
## Test of the hypothesis that 4 factors are sufficient.
##
## The degrees of freedom for the null model are 28 and the objective function was 4.28 with Chi Square of 1991.24
## The degrees of freedom for the model are 2 and the objective function was 0
##
## The root mean square of the residuals (RMSR) is 0
## The df corrected root mean square of the residuals is 0
##
## The harmonic number of observations is 470 with the empirical chi square 0.04 with prob < 0.98
## The total number of observations was 470 with MLE Chi Square = 0.25 with prob < 0.88
##
## Tucker Lewis Index of factoring reliability = 1.013
## RMSEA index = 0 and the 90 % confidence intervals are NA 0.044
## BIC = -12.06
## Fit based upon off diagonal values = 1
## Measures of factor score adequacy
## ML2 ML3 ML1 ML4
## Correlation of scores with factors 0.95 0.92 1.00 0.81
## Multiple R square of scores with factors 0.89 0.85 0.99 0.65
## Minimum correlation of possible factor scores 0.79 0.70 0.99 0.31
#question 1,4,7,8 seem to talk about purpose. so try as one factr
APSI12478<-select(APSI, 1, 2, 4,7,8)
APSI12478<-tbl_df(APSI12478)
APSI12478
## Source: local data frame [470 x 5]
##
## 1 2 4 7 8
## 1 2 4 4 4 4
## 2 4 3 5 4 4
## 3 3 4 3 4 3
## 4 4 4 4 4 3
## 5 3 3 3 2 3
## 6 3 4 4 5 3
## 7 2 2 3 2 2
## 8 3 3 3 3 1
## 9 4 5 4 4 5
## 10 2 2 3 3 4
## .. . . . . .
twofactorAPSI12478<-fa(APSI12478, nfactors=1, rotate="varimax", fm="ml")
twofactorAPSI12478#The TLI is good but the RMSEA is a poor fit
## Factor Analysis using method = ml
## Call: fa(r = APSI12478, nfactors = 1, rotate = "varimax", fm = "ml")
## Standardized loadings (pattern matrix) based upon correlation matrix
## ML1 h2 u2 com
## 1 0.85 0.72 0.28 1
## 2 0.76 0.57 0.43 1
## 4 0.83 0.69 0.31 1
## 7 0.80 0.65 0.35 1
## 8 0.86 0.75 0.25 1
##
## ML1
## SS loadings 3.37
## Proportion Var 0.67
##
## Mean item complexity = 1
## Test of the hypothesis that 1 factor is sufficient.
##
## The degrees of freedom for the null model are 10 and the objective function was 3.3 with Chi Square of 1538.23
## The degrees of freedom for the model are 5 and the objective function was 0.08
##
## The root mean square of the residuals (RMSR) is 0.03
## The df corrected root mean square of the residuals is 0.05
##
## The harmonic number of observations is 470 with the empirical chi square 9.52 with prob < 0.09
## The total number of observations was 470 with MLE Chi Square = 38.14 with prob < 3.5e-07
##
## Tucker Lewis Index of factoring reliability = 0.957
## RMSEA index = 0.119 and the 90 % confidence intervals are 0.085 0.155
## BIC = 7.38
## Fit based upon off diagonal values = 1
## Measures of factor score adequacy
## ML1
## Correlation of scores with factors 0.96
## Multiple R square of scores with factors 0.92
## Minimum correlation of possible factor scores 0.83
#CFI
1-((twofactorAPSI12478$STATISTIC - twofactor$dof)/(twofactor$null.chisq- twofactor$null.dof))
## [1] 0.9871953
#drop question 2 and only uuse question 1,4,7,8 seem to talk about purpose. so try as one factr
APSI1478<-select(APSI, 1, 4,7,8)
APSI1478<-tbl_df(APSI1478)
APSI1478
## Source: local data frame [470 x 4]
##
## 1 4 7 8
## 1 2 4 4 4
## 2 4 5 4 4
## 3 3 3 4 3
## 4 4 4 4 3
## 5 3 3 2 3
## 6 3 4 5 3
## 7 2 3 2 2
## 8 3 3 3 1
## 9 4 4 4 5
## 10 2 3 3 4
## .. . . . .
twofactorAPSI1478<-fa(APSI1478, nfactors=1, rotate="varimax", fm="ml")
twofactorAPSI1478#clearly shows that this is all one factor
## Factor Analysis using method = ml
## Call: fa(r = APSI1478, nfactors = 1, rotate = "varimax", fm = "ml")
## Standardized loadings (pattern matrix) based upon correlation matrix
## ML1 h2 u2 com
## 1 0.83 0.69 0.31 1
## 4 0.83 0.69 0.31 1
## 7 0.83 0.69 0.31 1
## 8 0.86 0.73 0.27 1
##
## ML1
## SS loadings 2.8
## Proportion Var 0.7
##
## Mean item complexity = 1
## Test of the hypothesis that 1 factor is sufficient.
##
## The degrees of freedom for the null model are 6 and the objective function was 2.5 with Chi Square of 1164.86
## The degrees of freedom for the model are 2 and the objective function was 0
##
## The root mean square of the residuals (RMSR) is 0.01
## The df corrected root mean square of the residuals is 0.01
##
## The harmonic number of observations is 470 with the empirical chi square 0.28 with prob < 0.87
## The total number of observations was 470 with MLE Chi Square = 1.55 with prob < 0.46
##
## Tucker Lewis Index of factoring reliability = 1.001
## RMSEA index = 0 and the 90 % confidence intervals are NA 0.085
## BIC = -10.76
## Fit based upon off diagonal values = 1
## Measures of factor score adequacy
## ML1
## Correlation of scores with factors 0.95
## Multiple R square of scores with factors 0.90
## Minimum correlation of possible factor scores 0.81
#CFI
1-((twofactorAPSI1478$STATISTIC - twofactor$dof)/(twofactor$null.chisq- twofactor$null.dof))
## [1] 1.005834
#question 6 is negative but talks about fit in the world, question 2 talks about know "who I am" see if fits into one factor
APSI26<-select(APSI, 2,6)
APSI26<-tbl_df(APSI26)
APSI26
## Source: local data frame [470 x 2]
##
## 2 6
## 1 4 2
## 2 3 3
## 3 4 3
## 4 4 2
## 5 3 3
## 6 4 4
## 7 2 2
## 8 3 3
## 9 5 4
## 10 2 3
## .. . .
twofactorAPSI26<-fa(APSI26, nfactors=1, rotate="varimax", fm="ml")
twofactorAPSI26#seems to be a good fit
## Factor Analysis using method = ml
## Call: fa(r = APSI26, nfactors = 1, rotate = "varimax", fm = "ml")
## Standardized loadings (pattern matrix) based upon correlation matrix
## ML1 h2 u2 com
## 2 -0.35 0.12 0.88 1
## 6 0.35 0.12 0.88 1
##
## ML1
## SS loadings 0.25
## Proportion Var 0.12
##
## Mean item complexity = 1
## Test of the hypothesis that 1 factor is sufficient.
##
## The degrees of freedom for the null model are 1 and the objective function was 0.02 with Chi Square of 7.08
## The degrees of freedom for the model are -1 and the objective function was 0
##
## The root mean square of the residuals (RMSR) is 0
## The df corrected root mean square of the residuals is NA
##
## The harmonic number of observations is 470 with the empirical chi square 0 with prob < NA
## The total number of observations was 470 with MLE Chi Square = 0 with prob < NA
##
## Tucker Lewis Index of factoring reliability = 1.165
## Fit based upon off diagonal values = 1
## Measures of factor score adequacy
## ML1
## Correlation of scores with factors 0.47
## Multiple R square of scores with factors 0.22
## Minimum correlation of possible factor scores -0.56
#CFI
1-((twofactorAPSI26$STATISTIC - twofactor$dof)/(twofactor$null.chisq- twofactor$null.dof))
## [1] 1.006622
#question three and five talk about morals and values and could be one factor
APSI35<-select(APSI, 3,5)
APSI35<-tbl_df(APSI35)
APSI35
## Source: local data frame [470 x 2]
##
## 3 5
## 1 4 4
## 2 4 4
## 3 4 3
## 4 5 5
## 5 4 4
## 6 4 4
## 7 4 4
## 8 4 5
## 9 5 4
## 10 2 5
## .. . .
twofactorAPSI35<-fa(APSI35, nfactors=1, rotate="varimax", fm="ml")
twofactorAPSI35#seems to load well on one factor
## Factor Analysis using method = ml
## Call: fa(r = APSI35, nfactors = 1, rotate = "varimax", fm = "ml")
## Standardized loadings (pattern matrix) based upon correlation matrix
## ML1 h2 u2 com
## 3 0.45 0.2 0.8 1
## 5 0.45 0.2 0.8 1
##
## ML1
## SS loadings 0.4
## Proportion Var 0.2
##
## Mean item complexity = 1
## Test of the hypothesis that 1 factor is sufficient.
##
## The degrees of freedom for the null model are 1 and the objective function was 0.04 with Chi Square of 19.12
## The degrees of freedom for the model are -1 and the objective function was 0
##
## The root mean square of the residuals (RMSR) is 0
## The df corrected root mean square of the residuals is NA
##
## The harmonic number of observations is 470 with the empirical chi square 0 with prob < NA
## The total number of observations was 470 with MLE Chi Square = 0 with prob < NA
##
## Tucker Lewis Index of factoring reliability = 1.055
## Fit based upon off diagonal values = 1
## Measures of factor score adequacy
## ML1
## Correlation of scores with factors 0.58
## Multiple R square of scores with factors 0.33
## Minimum correlation of possible factor scores -0.33
#CFI
1-((twofactorAPSI35$STATISTIC - twofactor$dof)/(twofactor$null.chisq- twofactor$null.dof))
## [1] 1.006622
#What are the factors
#Factor 1 (questions 1, 4, 7, 8) is purpose
#Factor 2 (questions 3,5) is moral values
#Factor 3 (questions 2,6) fit in the world, knowledge of who I am