##load packages
library(psych)
library(GPArotation)
library(plyr)
library(dplyr)
## 
## Attaching package: 'dplyr'
## 
## The following objects are masked from 'package:plyr':
## 
##     arrange, count, desc, failwith, id, mutate, rename, summarise,
##     summarize
## 
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## 
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
# data preparation
data <- read.csv("~/Psychometric_study_data/allsurveysT1.csv")
 APSI<-select(data,  APSI_1,  APSI_2,  APSI_3,  APSI_4,  APSI_5,  APSI_6, APSI_7,  APSI_8)
APSI$APSI_6  <-  6- APSI$APSI_6
 APSI<- data.frame(apply(APSI,2, as.numeric))
 APSI<-tbl_df(APSI)
 APSI
## Source: local data frame [757 x 8]
## 
##    APSI_1 APSI_2 APSI_3 APSI_4 APSI_5 APSI_6 APSI_7 APSI_8
## 1       2      4      4      4      4      2      4      4
## 2       4      3      4      5      4      3      4      4
## 3       3      4      4      3      3      3      4      3
## 4       4      4      5      4      5      2      4      3
## 5       3      3      4      3      4      3      2      3
## 6       3      4      4      4      4      4      5      3
## 7       2      2      4      3      4      2      2      2
## 8       3      3      4      3      5      3      3      1
## 9       4      5      5      4      4      4      4      5
## 10      2      2      2      3      5      3      3      4
## ..    ...    ...    ...    ...    ...    ...    ...    ...
str(APSI)
## Classes 'tbl_df', 'tbl' and 'data.frame':    757 obs. of  8 variables:
##  $ APSI_1: num  2 4 3 4 3 3 2 3 4 2 ...
##  $ APSI_2: num  4 3 4 4 3 4 2 3 5 2 ...
##  $ APSI_3: num  4 4 4 5 4 4 4 4 5 2 ...
##  $ APSI_4: num  4 5 3 4 3 4 3 3 4 3 ...
##  $ APSI_5: num  4 4 3 5 4 4 4 5 4 5 ...
##  $ APSI_6: num  2 3 3 2 3 4 2 3 4 3 ...
##  $ APSI_7: num  4 4 4 4 2 5 2 3 4 3 ...
##  $ APSI_8: num  4 4 3 3 3 3 2 1 5 4 ...
colnames(APSI) <- c("1","2", "3", "4", "5", "6", "7", "8")
 APSI<- APSI[complete.cases(APSI[,]),]


##EFA
##number of factors
##parallal analysis and scree plot
parallel<-fa.parallel(APSI, fm="ml",fa="fa")

## Parallel analysis suggests that the number of factors =  3  and the number of components =  NA
#two factors are greater than one Eigenvalue scree plot says there are two factors. Paralel analysis suggests 4 factors
#eigenvalues (kaiser)
parallel$fa.values
## [1]  3.858285840  0.416709155  0.194720428  0.036566419  0.000247384
## [6] -0.034992665 -0.116611798 -0.499783334
#over 1=2, over .7=2
#doign aprincipal components analysis to see how many factors there might be using that method
#Deal with NA doing principle componant analysis
princomp(na.omit(APSI), cor = TRUE)
## Call:
## princomp(x = na.omit(APSI), cor = TRUE)
## 
## Standard deviations:
##    Comp.1    Comp.2    Comp.3    Comp.4    Comp.5    Comp.6    Comp.7 
## 2.0536963 1.1774408 0.8840305 0.6911068 0.5688568 0.5479494 0.5247605 
##    Comp.8 
## 0.4874483 
## 
##  8  variables and  470 observations.
parallel2<-princomp(na.omit(APSI), cor = TRUE)
summary(parallel2)
## Importance of components:
##                           Comp.1    Comp.2     Comp.3     Comp.4
## Standard deviation     2.0536963 1.1774408 0.88403052 0.69110683
## Proportion of Variance 0.5272086 0.1732958 0.09768875 0.05970358
## Cumulative Proportion  0.5272086 0.7005044 0.79819317 0.85789675
##                            Comp.5     Comp.6    Comp.7     Comp.8
## Standard deviation     0.56885679 0.54794940 0.5247605 0.48744826
## Proportion of Variance 0.04044976 0.03753107 0.0344217 0.02970073
## Cumulative Proportion  0.89834651 0.93587758 0.9702993 1.00000000
plot(parallel2)##results show at least two factors

#simple structure
twofactor<-fa(APSI, nfactors=2, rotate="oblimin", fm="ml")
twofactor
## Factor Analysis using method =  ml
## Call: fa(r = APSI, nfactors = 2, rotate = "oblimin", fm = "ml")
## Standardized loadings (pattern matrix) based upon correlation matrix
##     ML2   ML1   h2    u2 com
## 1  0.85 -0.04 0.73 0.273 1.0
## 2  0.77  0.06 0.60 0.399 1.0
## 3  0.01  1.00 1.00 0.005 1.0
## 4  0.83  0.01 0.68 0.315 1.0
## 5  0.67  0.17 0.48 0.517 1.1
## 6 -0.21  0.40 0.20 0.803 1.5
## 7  0.79 -0.01 0.63 0.371 1.0
## 8  0.86 -0.07 0.74 0.263 1.0
## 
##                        ML2  ML1
## SS loadings           3.86 1.19
## Proportion Var        0.48 0.15
## Cumulative Var        0.48 0.63
## Proportion Explained  0.76 0.24
## Cumulative Proportion 0.76 1.00
## 
##  With factor correlations of 
##      ML2  ML1
## ML2 1.00 0.04
## ML1 0.04 1.00
## 
## Mean item complexity =  1.1
## Test of the hypothesis that 2 factors are sufficient.
## 
## The degrees of freedom for the null model are  28  and the objective function was  4.28 with Chi Square of  1991.24
## The degrees of freedom for the model are 13  and the objective function was  0.25 
## 
## The root mean square of the residuals (RMSR) is  0.05 
## The df corrected root mean square of the residuals is  0.07 
## 
## The harmonic number of observations is  470 with the empirical chi square  60.63  with prob <  4e-08 
## The total number of observations was  470  with MLE Chi Square =  113.77  with prob <  3.4e-18 
## 
## Tucker Lewis Index of factoring reliability =  0.889
## RMSEA index =  0.129  and the 90 % confidence intervals are  0.107 0.151
## BIC =  33.78
## Fit based upon off diagonal values = 0.99
## Measures of factor score adequacy             
##                                                 ML2  ML1
## Correlation of scores with factors             0.96 1.00
## Multiple R square of scores with factors       0.92 0.99
## Minimum correlation of possible factor scores  0.84 0.99
threefactor<-fa(APSI, nfactors=3, rotate="oblimin", fm="ml")
threefactor
## Factor Analysis using method =  ml
## Call: fa(r = APSI, nfactors = 3, rotate = "oblimin", fm = "ml")
## Standardized loadings (pattern matrix) based upon correlation matrix
##     ML2   ML1   ML3   h2    u2 com
## 1  0.78 -0.04  0.10 0.72 0.285 1.0
## 2  0.62  0.05  0.21 0.60 0.399 1.2
## 3 -0.03  0.99  0.04 1.00 0.005 1.0
## 4  0.82  0.02  0.01 0.69 0.309 1.0
## 5  0.14  0.10  0.81 0.83 0.167 1.1
## 6  0.11  0.44 -0.46 0.32 0.680 2.1
## 7  0.86  0.01 -0.08 0.66 0.341 1.0
## 8  0.88 -0.05 -0.02 0.75 0.246 1.0
## 
##                        ML2  ML1  ML3
## SS loadings           3.33 1.19 1.04
## Proportion Var        0.42 0.15 0.13
## Cumulative Var        0.42 0.57 0.70
## Proportion Explained  0.60 0.21 0.19
## Cumulative Proportion 0.60 0.81 1.00
## 
##  With factor correlations of 
##      ML2  ML1  ML3
## ML2 1.00 0.03 0.62
## ML1 0.03 1.00 0.10
## ML3 0.62 0.10 1.00
## 
## Mean item complexity =  1.2
## Test of the hypothesis that 3 factors are sufficient.
## 
## The degrees of freedom for the null model are  28  and the objective function was  4.28 with Chi Square of  1991.24
## The degrees of freedom for the model are 7  and the objective function was  0.08 
## 
## The root mean square of the residuals (RMSR) is  0.02 
## The df corrected root mean square of the residuals is  0.04 
## 
## The harmonic number of observations is  470 with the empirical chi square  9.6  with prob <  0.21 
## The total number of observations was  470  with MLE Chi Square =  37.01  with prob <  4.7e-06 
## 
## Tucker Lewis Index of factoring reliability =  0.939
## RMSEA index =  0.096  and the 90 % confidence intervals are  0.067 0.127
## BIC =  -6.06
## Fit based upon off diagonal values = 1
## Measures of factor score adequacy             
##                                                 ML2  ML1  ML3
## Correlation of scores with factors             0.96 1.00 0.91
## Multiple R square of scores with factors       0.92 0.99 0.83
## Minimum correlation of possible factor scores  0.84 0.99 0.67
fourfactor<-fa(APSI, nfactors=4, rotate="oblimin", fm="ml")
fourfactor
## Factor Analysis using method =  ml
## Call: fa(r = APSI, nfactors = 4, rotate = "oblimin", fm = "ml")
## Standardized loadings (pattern matrix) based upon correlation matrix
##    ML2   ML3   ML1   ML4   h2    u2 com
## 1 0.44  0.43 -0.05  0.09 0.72 0.284 2.1
## 2 0.03  0.82  0.01  0.06 0.75 0.249 1.0
## 3 0.00 -0.02  1.00 -0.02 1.00 0.005 1.0
## 4 0.59  0.27  0.02  0.05 0.68 0.317 1.4
## 5 0.11  0.32  0.24  0.53 0.69 0.313 2.2
## 6 0.00  0.11  0.28 -0.60 0.45 0.552 1.5
## 7 0.95 -0.09  0.02  0.02 0.80 0.199 1.0
## 8 0.54  0.40 -0.09 -0.02 0.74 0.260 1.9
## 
##                        ML2  ML3  ML1  ML4
## SS loadings           2.19 1.68 1.16 0.80
## Proportion Var        0.27 0.21 0.14 0.10
## Cumulative Var        0.27 0.48 0.63 0.73
## Proportion Explained  0.38 0.29 0.20 0.14
## Cumulative Proportion 0.38 0.66 0.86 1.00
## 
##  With factor correlations of 
##      ML2  ML3   ML1   ML4
## ML2 1.00 0.71  0.04  0.40
## ML3 0.71 1.00  0.14  0.43
## ML1 0.04 0.14  1.00 -0.14
## ML4 0.40 0.43 -0.14  1.00
## 
## Mean item complexity =  1.5
## Test of the hypothesis that 4 factors are sufficient.
## 
## The degrees of freedom for the null model are  28  and the objective function was  4.28 with Chi Square of  1991.24
## The degrees of freedom for the model are 2  and the objective function was  0 
## 
## The root mean square of the residuals (RMSR) is  0 
## The df corrected root mean square of the residuals is  0 
## 
## The harmonic number of observations is  470 with the empirical chi square  0.04  with prob <  0.98 
## The total number of observations was  470  with MLE Chi Square =  0.25  with prob <  0.88 
## 
## Tucker Lewis Index of factoring reliability =  1.013
## RMSEA index =  0  and the 90 % confidence intervals are  NA 0.044
## BIC =  -12.06
## Fit based upon off diagonal values = 1
## Measures of factor score adequacy             
##                                                 ML2  ML3  ML1  ML4
## Correlation of scores with factors             0.95 0.92 1.00 0.81
## Multiple R square of scores with factors       0.89 0.85 0.99 0.65
## Minimum correlation of possible factor scores  0.79 0.70 0.99 0.31
#question 1,4,7,8 seem to talk about purpose. so try as one factr
 APSI12478<-select(APSI, 1, 2, 4,7,8)
 APSI12478<-tbl_df(APSI12478)
  APSI12478
## Source: local data frame [470 x 5]
## 
##    1 2 4 7 8
## 1  2 4 4 4 4
## 2  4 3 5 4 4
## 3  3 4 3 4 3
## 4  4 4 4 4 3
## 5  3 3 3 2 3
## 6  3 4 4 5 3
## 7  2 2 3 2 2
## 8  3 3 3 3 1
## 9  4 5 4 4 5
## 10 2 2 3 3 4
## .. . . . . .
twofactorAPSI12478<-fa(APSI12478, nfactors=1, rotate="varimax", fm="ml")
twofactorAPSI12478#The TLI is good but the RMSEA is a poor fit
## Factor Analysis using method =  ml
## Call: fa(r = APSI12478, nfactors = 1, rotate = "varimax", fm = "ml")
## Standardized loadings (pattern matrix) based upon correlation matrix
##    ML1   h2   u2 com
## 1 0.85 0.72 0.28   1
## 2 0.76 0.57 0.43   1
## 4 0.83 0.69 0.31   1
## 7 0.80 0.65 0.35   1
## 8 0.86 0.75 0.25   1
## 
##                 ML1
## SS loadings    3.37
## Proportion Var 0.67
## 
## Mean item complexity =  1
## Test of the hypothesis that 1 factor is sufficient.
## 
## The degrees of freedom for the null model are  10  and the objective function was  3.3 with Chi Square of  1538.23
## The degrees of freedom for the model are 5  and the objective function was  0.08 
## 
## The root mean square of the residuals (RMSR) is  0.03 
## The df corrected root mean square of the residuals is  0.05 
## 
## The harmonic number of observations is  470 with the empirical chi square  9.52  with prob <  0.09 
## The total number of observations was  470  with MLE Chi Square =  38.14  with prob <  3.5e-07 
## 
## Tucker Lewis Index of factoring reliability =  0.957
## RMSEA index =  0.119  and the 90 % confidence intervals are  0.085 0.155
## BIC =  7.38
## Fit based upon off diagonal values = 1
## Measures of factor score adequacy             
##                                                 ML1
## Correlation of scores with factors             0.96
## Multiple R square of scores with factors       0.92
## Minimum correlation of possible factor scores  0.83
#CFI
1-((twofactorAPSI12478$STATISTIC - twofactor$dof)/(twofactor$null.chisq- twofactor$null.dof))
## [1] 0.9871953
#drop question 2 and only uuse question 1,4,7,8 seem to talk about purpose. so try as one factr
 APSI1478<-select(APSI, 1, 4,7,8)
 APSI1478<-tbl_df(APSI1478)
  APSI1478
## Source: local data frame [470 x 4]
## 
##    1 4 7 8
## 1  2 4 4 4
## 2  4 5 4 4
## 3  3 3 4 3
## 4  4 4 4 3
## 5  3 3 2 3
## 6  3 4 5 3
## 7  2 3 2 2
## 8  3 3 3 1
## 9  4 4 4 5
## 10 2 3 3 4
## .. . . . .
twofactorAPSI1478<-fa(APSI1478, nfactors=1, rotate="varimax", fm="ml")
twofactorAPSI1478#clearly shows that this is all one factor
## Factor Analysis using method =  ml
## Call: fa(r = APSI1478, nfactors = 1, rotate = "varimax", fm = "ml")
## Standardized loadings (pattern matrix) based upon correlation matrix
##    ML1   h2   u2 com
## 1 0.83 0.69 0.31   1
## 4 0.83 0.69 0.31   1
## 7 0.83 0.69 0.31   1
## 8 0.86 0.73 0.27   1
## 
##                ML1
## SS loadings    2.8
## Proportion Var 0.7
## 
## Mean item complexity =  1
## Test of the hypothesis that 1 factor is sufficient.
## 
## The degrees of freedom for the null model are  6  and the objective function was  2.5 with Chi Square of  1164.86
## The degrees of freedom for the model are 2  and the objective function was  0 
## 
## The root mean square of the residuals (RMSR) is  0.01 
## The df corrected root mean square of the residuals is  0.01 
## 
## The harmonic number of observations is  470 with the empirical chi square  0.28  with prob <  0.87 
## The total number of observations was  470  with MLE Chi Square =  1.55  with prob <  0.46 
## 
## Tucker Lewis Index of factoring reliability =  1.001
## RMSEA index =  0  and the 90 % confidence intervals are  NA 0.085
## BIC =  -10.76
## Fit based upon off diagonal values = 1
## Measures of factor score adequacy             
##                                                 ML1
## Correlation of scores with factors             0.95
## Multiple R square of scores with factors       0.90
## Minimum correlation of possible factor scores  0.81
#CFI
1-((twofactorAPSI1478$STATISTIC - twofactor$dof)/(twofactor$null.chisq- twofactor$null.dof))
## [1] 1.005834
#question 6 is negative but talks about fit in the world, question 2 talks about know "who I am" see if fits into one factor
 APSI26<-select(APSI, 2,6)
 APSI26<-tbl_df(APSI26)
 APSI26
## Source: local data frame [470 x 2]
## 
##    2 6
## 1  4 2
## 2  3 3
## 3  4 3
## 4  4 2
## 5  3 3
## 6  4 4
## 7  2 2
## 8  3 3
## 9  5 4
## 10 2 3
## .. . .
twofactorAPSI26<-fa(APSI26, nfactors=1, rotate="varimax", fm="ml")
twofactorAPSI26#seems to be a good fit
## Factor Analysis using method =  ml
## Call: fa(r = APSI26, nfactors = 1, rotate = "varimax", fm = "ml")
## Standardized loadings (pattern matrix) based upon correlation matrix
##     ML1   h2   u2 com
## 2 -0.35 0.12 0.88   1
## 6  0.35 0.12 0.88   1
## 
##                 ML1
## SS loadings    0.25
## Proportion Var 0.12
## 
## Mean item complexity =  1
## Test of the hypothesis that 1 factor is sufficient.
## 
## The degrees of freedom for the null model are  1  and the objective function was  0.02 with Chi Square of  7.08
## The degrees of freedom for the model are -1  and the objective function was  0 
## 
## The root mean square of the residuals (RMSR) is  0 
## The df corrected root mean square of the residuals is  NA 
## 
## The harmonic number of observations is  470 with the empirical chi square  0  with prob <  NA 
## The total number of observations was  470  with MLE Chi Square =  0  with prob <  NA 
## 
## Tucker Lewis Index of factoring reliability =  1.165
## Fit based upon off diagonal values = 1
## Measures of factor score adequacy             
##                                                  ML1
## Correlation of scores with factors              0.47
## Multiple R square of scores with factors        0.22
## Minimum correlation of possible factor scores  -0.56
#CFI
1-((twofactorAPSI26$STATISTIC - twofactor$dof)/(twofactor$null.chisq- twofactor$null.dof))
## [1] 1.006622
#question three and five talk about morals and values and could be one factor
 APSI35<-select(APSI, 3,5)
 APSI35<-tbl_df(APSI35)
 APSI35
## Source: local data frame [470 x 2]
## 
##    3 5
## 1  4 4
## 2  4 4
## 3  4 3
## 4  5 5
## 5  4 4
## 6  4 4
## 7  4 4
## 8  4 5
## 9  5 4
## 10 2 5
## .. . .
twofactorAPSI35<-fa(APSI35, nfactors=1, rotate="varimax", fm="ml")
twofactorAPSI35#seems to load well on one factor
## Factor Analysis using method =  ml
## Call: fa(r = APSI35, nfactors = 1, rotate = "varimax", fm = "ml")
## Standardized loadings (pattern matrix) based upon correlation matrix
##    ML1  h2  u2 com
## 3 0.45 0.2 0.8   1
## 5 0.45 0.2 0.8   1
## 
##                ML1
## SS loadings    0.4
## Proportion Var 0.2
## 
## Mean item complexity =  1
## Test of the hypothesis that 1 factor is sufficient.
## 
## The degrees of freedom for the null model are  1  and the objective function was  0.04 with Chi Square of  19.12
## The degrees of freedom for the model are -1  and the objective function was  0 
## 
## The root mean square of the residuals (RMSR) is  0 
## The df corrected root mean square of the residuals is  NA 
## 
## The harmonic number of observations is  470 with the empirical chi square  0  with prob <  NA 
## The total number of observations was  470  with MLE Chi Square =  0  with prob <  NA 
## 
## Tucker Lewis Index of factoring reliability =  1.055
## Fit based upon off diagonal values = 1
## Measures of factor score adequacy             
##                                                  ML1
## Correlation of scores with factors              0.58
## Multiple R square of scores with factors        0.33
## Minimum correlation of possible factor scores  -0.33
#CFI
1-((twofactorAPSI35$STATISTIC - twofactor$dof)/(twofactor$null.chisq- twofactor$null.dof))
## [1] 1.006622
#What are the factors
#Factor 1 (questions 1, 4, 7, 8) is purpose
#Factor 2 (questions 3,5) is moral values
#Factor 3 (questions 2,6) fit in the world, knowledge of who I am