##load packages
library(psych)
library(GPArotation)
library(plyr)
library(dplyr)
## 
## Attaching package: 'dplyr'
## 
## The following objects are masked from 'package:plyr':
## 
##     arrange, count, desc, failwith, id, mutate, rename, summarise,
##     summarize
## 
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## 
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
# data preparation
data <- read.csv("~/Psychometric_study_data/allsurveysT1.csv")
 PWB<-select(data,  PWB_1,  PWB_2,  PWB_3,  PWB_4,  PWB_5,  PWB_6, PWB_7,  PWB_8,  PWB_9)
PWB$PWB_1  <-  7- PWB$PWB_1
PWB$PWB_2  <-  7- PWB$PWB_2
PWB$PWB_3  <-  7- PWB$PWB_3
PWB$PWB_4  <-  7- PWB$PWB_4
PWB$PWB_9  <-  7- PWB$PWB_9
 PWB<- data.frame(apply(PWB,2, as.numeric))
 PWB<-tbl_df(PWB)
 PWB
## Source: local data frame [757 x 9]
## 
##    PWB_1 PWB_2 PWB_3 PWB_4 PWB_5 PWB_6 PWB_7 PWB_8 PWB_9
## 1      4     3     5     2     4     5     4     3     6
## 2      4     5     5     2     2     5     3     2     5
## 3      5     6     5     6     1     4     6     3     6
## 4      2     2     4     4     3     4     5     4     4
## 5      2     2     3     3     4     3     2     3     4
## 6      5     4     6     5     3     4     3     4     6
## 7      2     2     5     2     1     4     3     3     3
## 8      6     6     5     1     2     4     4     4     6
## 9      5     5     5     5     1     5     5     5     6
## 10     6     6     3     3     2     6     6     3     6
## ..   ...   ...   ...   ...   ...   ...   ...   ...   ...
str(PWB)
## Classes 'tbl_df', 'tbl' and 'data.frame':    757 obs. of  9 variables:
##  $ PWB_1: num  4 4 5 2 2 5 2 6 5 6 ...
##  $ PWB_2: num  3 5 6 2 2 4 2 6 5 6 ...
##  $ PWB_3: num  5 5 5 4 3 6 5 5 5 3 ...
##  $ PWB_4: num  2 2 6 4 3 5 2 1 5 3 ...
##  $ PWB_5: num  4 2 1 3 4 3 1 2 1 2 ...
##  $ PWB_6: num  5 5 4 4 3 4 4 4 5 6 ...
##  $ PWB_7: num  4 3 6 5 2 3 3 4 5 6 ...
##  $ PWB_8: num  3 2 3 4 3 4 3 4 5 3 ...
##  $ PWB_9: num  6 5 6 4 4 6 3 6 6 6 ...
colnames(PWB) <- c("1","2", "3", "4", "5", "6", "7", "8", "9")
 PWB<- PWB[complete.cases(PWB[,]),]


##EFA
##number of factors
##parallal analysis and scree plot
parallel<-fa.parallel(PWB, fm="ml",fa="fa")

## Parallel analysis suggests that the number of factors =  3  and the number of components =  NA
#two factors are greater than one Eigenvalue scree plot says there are two factors. Paralel analysis suggests 4 factors
#eigenvalues (kaiser)
parallel$fa.values
## [1]  2.764589905  0.925824683  0.367705915  0.031000693 -0.006112395
## [6] -0.150861796 -0.275610741 -0.317569437 -0.606030196
#over 1=2, over .7=2
#doign aprincipal components analysis to see how many factors there might be using that method
#Deal with NA doing principle componant analysis
princomp(na.omit(PWB), cor = TRUE)
## Call:
## princomp(x = na.omit(PWB), cor = TRUE)
## 
## Standard deviations:
##    Comp.1    Comp.2    Comp.3    Comp.4    Comp.5    Comp.6    Comp.7 
## 1.8258502 1.3343875 1.0554192 0.8256147 0.7860937 0.6451339 0.6328747 
##    Comp.8    Comp.9 
## 0.5906555 0.5537025 
## 
##  9  variables and  471 observations.
parallel2<-princomp(na.omit(PWB), cor = TRUE)
summary(parallel2)
## Importance of components:
##                           Comp.1    Comp.2    Comp.3     Comp.4     Comp.5
## Standard deviation     1.8258502 1.3343875 1.0554192 0.82561467 0.78609375
## Proportion of Variance 0.3704143 0.1978433 0.1237677 0.07573773 0.06866038
## Cumulative Proportion  0.3704143 0.5682577 0.6920254 0.76776312 0.83642349
##                           Comp.6     Comp.7     Comp.8     Comp.9
## Standard deviation     0.6451339 0.63287472 0.59065550 0.55370250
## Proportion of Variance 0.0462442 0.04450338 0.03876377 0.03406516
## Cumulative Proportion  0.8826677 0.92717107 0.96593484 1.00000000
plot(parallel2)##results show at least two factors

#simple structure
twofactor<-fa(PWB, nfactors=2, rotate="oblimin", fm="ml")
twofactor
## Factor Analysis using method =  ml
## Call: fa(r = PWB, nfactors = 2, rotate = "oblimin", fm = "ml")
## Standardized loadings (pattern matrix) based upon correlation matrix
##     ML1   ML2   h2   u2 com
## 1  0.68 -0.25 0.48 0.52 1.3
## 2  0.40  0.13 0.18 0.82 1.2
## 3  0.80 -0.02 0.64 0.36 1.0
## 4  0.46  0.41 0.42 0.58 2.0
## 5 -0.81 -0.04 0.67 0.33 1.0
## 6  0.60  0.11 0.39 0.61 1.1
## 7 -0.07  0.87 0.74 0.26 1.0
## 8  0.10  0.72 0.54 0.46 1.0
## 9  0.36  0.10 0.15 0.85 1.1
## 
##                        ML1  ML2
## SS loadings           2.66 1.55
## Proportion Var        0.30 0.17
## Cumulative Var        0.30 0.47
## Proportion Explained  0.63 0.37
## Cumulative Proportion 0.63 1.00
## 
##  With factor correlations of 
##      ML1  ML2
## ML1 1.00 0.12
## ML2 0.12 1.00
## 
## Mean item complexity =  1.2
## Test of the hypothesis that 2 factors are sufficient.
## 
## The degrees of freedom for the null model are  36  and the objective function was  3 with Chi Square of  1399.69
## The degrees of freedom for the model are 19  and the objective function was  0.32 
## 
## The root mean square of the residuals (RMSR) is  0.06 
## The df corrected root mean square of the residuals is  0.09 
## 
## The harmonic number of observations is  471 with the empirical chi square  138.64  with prob <  3.3e-20 
## The total number of observations was  471  with MLE Chi Square =  147.52  with prob <  6.6e-22 
## 
## Tucker Lewis Index of factoring reliability =  0.821
## RMSEA index =  0.121  and the 90 % confidence intervals are  0.102 0.138
## BIC =  30.58
## Fit based upon off diagonal values = 0.96
## Measures of factor score adequacy             
##                                                 ML1  ML2
## Correlation of scores with factors             0.93 0.90
## Multiple R square of scores with factors       0.86 0.82
## Minimum correlation of possible factor scores  0.72 0.63
threefactor<-fa(PWB, nfactors=3, rotate="oblimin", fm="ml")
threefactor
## Factor Analysis using method =  ml
## Call: fa(r = PWB, nfactors = 3, rotate = "oblimin", fm = "ml")
## Standardized loadings (pattern matrix) based upon correlation matrix
##     ML2   ML3   ML1   h2    u2 com
## 1  0.58 -0.28  0.21 0.50 0.500 1.7
## 2 -0.01  0.02  1.00 1.00 0.005 1.0
## 3  0.83 -0.02 -0.03 0.67 0.331 1.0
## 4  0.38  0.38  0.19 0.43 0.572 2.5
## 5 -0.80 -0.03 -0.04 0.67 0.331 1.0
## 6  0.68  0.13 -0.13 0.45 0.553 1.1
## 7 -0.08  0.85  0.05 0.72 0.278 1.0
## 8  0.12  0.73 -0.02 0.56 0.442 1.1
## 9  0.22  0.06  0.30 0.20 0.803 1.9
## 
##                        ML2  ML3  ML1
## SS loadings           2.41 1.52 1.26
## Proportion Var        0.27 0.17 0.14
## Cumulative Var        0.27 0.44 0.58
## Proportion Explained  0.46 0.29 0.24
## Cumulative Proportion 0.46 0.76 1.00
## 
##  With factor correlations of 
##      ML2  ML3  ML1
## ML2 1.00 0.11 0.35
## ML3 0.11 1.00 0.12
## ML1 0.35 0.12 1.00
## 
## Mean item complexity =  1.4
## Test of the hypothesis that 3 factors are sufficient.
## 
## The degrees of freedom for the null model are  36  and the objective function was  3 with Chi Square of  1399.69
## The degrees of freedom for the model are 12  and the objective function was  0.09 
## 
## The root mean square of the residuals (RMSR) is  0.03 
## The df corrected root mean square of the residuals is  0.05 
## 
## The harmonic number of observations is  471 with the empirical chi square  24.05  with prob <  0.02 
## The total number of observations was  471  with MLE Chi Square =  42.44  with prob <  2.8e-05 
## 
## Tucker Lewis Index of factoring reliability =  0.933
## RMSEA index =  0.074  and the 90 % confidence intervals are  0.05 0.098
## BIC =  -31.42
## Fit based upon off diagonal values = 0.99
## Measures of factor score adequacy             
##                                                 ML2  ML3  ML1
## Correlation of scores with factors             0.93 0.90 1.00
## Multiple R square of scores with factors       0.86 0.81 0.99
## Minimum correlation of possible factor scores  0.72 0.62 0.99
fourfactor<-fa(PWB, nfactors=4, rotate="oblimin", fm="ml")
fourfactor
## Factor Analysis using method =  ml
## Call: fa(r = PWB, nfactors = 4, rotate = "oblimin", fm = "ml")
## Standardized loadings (pattern matrix) based upon correlation matrix
##     ML1   ML2   ML3   ML4   h2   u2 com
## 1  0.12 -0.20  0.37  0.51 0.61 0.39 2.3
## 2 -0.01  0.03  0.83 -0.01 0.68 0.32 1.0
## 3  0.85 -0.10 -0.03  0.08 0.75 0.25 1.0
## 4  0.57  0.29  0.20 -0.22 0.53 0.47 2.1
## 5 -0.56 -0.03 -0.12 -0.26 0.64 0.36 1.5
## 6  0.26  0.24 -0.09  0.57 0.56 0.44 1.9
## 7 -0.08  0.86  0.04 -0.01 0.72 0.28 1.0
## 8  0.07  0.73 -0.02  0.06 0.56 0.44 1.0
## 9  0.13  0.04  0.40  0.02 0.24 0.76 1.2
## 
##                        ML1  ML2  ML3  ML4
## SS loadings           1.75 1.49 1.16 0.89
## Proportion Var        0.19 0.17 0.13 0.10
## Cumulative Var        0.19 0.36 0.49 0.59
## Proportion Explained  0.33 0.28 0.22 0.17
## Cumulative Proportion 0.33 0.61 0.83 1.00
## 
##  With factor correlations of 
##      ML1   ML2  ML3   ML4
## ML1 1.00  0.21 0.43  0.55
## ML2 0.21  1.00 0.14 -0.12
## ML3 0.43  0.14 1.00  0.17
## ML4 0.55 -0.12 0.17  1.00
## 
## Mean item complexity =  1.5
## Test of the hypothesis that 4 factors are sufficient.
## 
## The degrees of freedom for the null model are  36  and the objective function was  3 with Chi Square of  1399.69
## The degrees of freedom for the model are 6  and the objective function was  0.02 
## 
## The root mean square of the residuals (RMSR) is  0.01 
## The df corrected root mean square of the residuals is  0.03 
## 
## The harmonic number of observations is  471 with the empirical chi square  4.69  with prob <  0.58 
## The total number of observations was  471  with MLE Chi Square =  8.85  with prob <  0.18 
## 
## Tucker Lewis Index of factoring reliability =  0.987
## RMSEA index =  0.032  and the 90 % confidence intervals are  NA 0.073
## BIC =  -28.08
## Fit based upon off diagonal values = 1
## Measures of factor score adequacy             
##                                                 ML1  ML2  ML3  ML4
## Correlation of scores with factors             0.92 0.90 0.87 0.83
## Multiple R square of scores with factors       0.85 0.81 0.75 0.69
## Minimum correlation of possible factor scores  0.69 0.63 0.51 0.38
#question 1,3,5,6,9 seems to be one factor and all talk about plans or lack of plans
 PWBWO15<-select(PWB,  1,3,5,6,9)
 PWBWO15<-tbl_df(PWBWO15)
 PWBWO15
## Source: local data frame [471 x 5]
## 
##    1 3 5 6 9
## 1  4 5 4 5 6
## 2  4 5 2 5 5
## 3  5 5 1 4 6
## 4  2 4 3 4 4
## 5  2 3 4 3 4
## 6  5 6 3 4 6
## 7  2 5 1 4 3
## 8  6 5 2 4 6
## 9  5 5 1 5 6
## 10 6 3 2 6 6
## .. . . . . .
twofactorWO15<-fa(PWBWO15, nfactors=1, rotate="oblimin", fm="ml")
twofactorWO15
## Factor Analysis using method =  ml
## Call: fa(r = PWBWO15, nfactors = 1, rotate = "oblimin", fm = "ml")
## Standardized loadings (pattern matrix) based upon correlation matrix
##     ML1   h2   u2 com
## 1  0.63 0.39 0.61   1
## 3  0.80 0.64 0.36   1
## 5 -0.83 0.69 0.31   1
## 6  0.64 0.41 0.59   1
## 9  0.34 0.11 0.89   1
## 
##                 ML1
## SS loadings    2.24
## Proportion Var 0.45
## 
## Mean item complexity =  1
## Test of the hypothesis that 1 factor is sufficient.
## 
## The degrees of freedom for the null model are  10  and the objective function was  1.49 with Chi Square of  697.51
## The degrees of freedom for the model are 5  and the objective function was  0.02 
## 
## The root mean square of the residuals (RMSR) is  0.03 
## The df corrected root mean square of the residuals is  0.05 
## 
## The harmonic number of observations is  471 with the empirical chi square  10.84  with prob <  0.055 
## The total number of observations was  471  with MLE Chi Square =  11.66  with prob <  0.04 
## 
## Tucker Lewis Index of factoring reliability =  0.981
## RMSEA index =  0.054  and the 90 % confidence intervals are  0.011 0.094
## BIC =  -19.11
## Fit based upon off diagonal values = 0.99
## Measures of factor score adequacy             
##                                                 ML1
## Correlation of scores with factors             0.92
## Multiple R square of scores with factors       0.85
## Minimum correlation of possible factor scores  0.69
#CFI, should be slightly higher than the TLI
1-((twofactorWO15$STATISTIC - twofactor$dof)/(twofactor$null.chisq- twofactor$null.dof))
## [1] 1.005381
#question 7,8 seems to be one factor and all talk about being active or wondering aimlessly
 PWB78<-select(PWB,  7,8)
 PWB78<-tbl_df(PWB78)
 PWB78
## Source: local data frame [471 x 2]
## 
##    7 8
## 1  4 3
## 2  3 2
## 3  6 3
## 4  5 4
## 5  2 3
## 6  3 4
## 7  3 3
## 8  4 4
## 9  5 5
## 10 6 3
## .. . .
twofactor78<-fa(PWB78, nfactors=1, rotate="oblimin", fm="ml")
twofactor78
## Factor Analysis using method =  ml
## Call: fa(r = PWB78, nfactors = 1, rotate = "oblimin", fm = "ml")
## Standardized loadings (pattern matrix) based upon correlation matrix
##    ML1   h2   u2 com
## 7 0.79 0.62 0.38   1
## 8 0.79 0.62 0.38   1
## 
##                 ML1
## SS loadings    1.23
## Proportion Var 0.62
## 
## Mean item complexity =  1
## Test of the hypothesis that 1 factor is sufficient.
## 
## The degrees of freedom for the null model are  1  and the objective function was  0.48 with Chi Square of  224.1
## The degrees of freedom for the model are -1  and the objective function was  0 
## 
## The root mean square of the residuals (RMSR) is  0 
## The df corrected root mean square of the residuals is  NA 
## 
## The harmonic number of observations is  471 with the empirical chi square  0  with prob <  NA 
## The total number of observations was  471  with MLE Chi Square =  0  with prob <  NA 
## 
## Tucker Lewis Index of factoring reliability =  1.004
## Fit based upon off diagonal values = 1
## Measures of factor score adequacy             
##                                                 ML1
## Correlation of scores with factors             0.87
## Multiple R square of scores with factors       0.76
## Minimum correlation of possible factor scores  0.53
#CFI, should be slightly higher than the TLI
1-((twofactorWO15$STATISTIC - twofactor$dof)/(twofactor$null.chisq- twofactor$null.dof))
## [1] 1.005381
#question 2,8 seems to be one factor 
 PWB29<-select(PWB, 2,8)
 PWB29<-tbl_df(PWB29)
 PWB29
## Source: local data frame [471 x 2]
## 
##    2 8
## 1  3 3
## 2  5 2
## 3  6 3
## 4  2 4
## 5  2 3
## 6  4 4
## 7  2 3
## 8  6 4
## 9  5 5
## 10 6 3
## .. . .
twofactor29<-fa(PWB29, nfactors=1, rotate="oblimin", fm="ml")
twofactor29
## Factor Analysis using method =  ml
## Call: fa(r = PWB29, nfactors = 1, rotate = "oblimin", fm = "ml")
## Standardized loadings (pattern matrix) based upon correlation matrix
##    ML1   h2   u2 com
## 2 0.34 0.12 0.88   1
## 8 0.34 0.12 0.88   1
## 
##                 ML1
## SS loadings    0.24
## Proportion Var 0.12
## 
## Mean item complexity =  1
## Test of the hypothesis that 1 factor is sufficient.
## 
## The degrees of freedom for the null model are  1  and the objective function was  0.01 with Chi Square of  6.65
## The degrees of freedom for the model are -1  and the objective function was  0 
## 
## The root mean square of the residuals (RMSR) is  0 
## The df corrected root mean square of the residuals is  NA 
## 
## The harmonic number of observations is  471 with the empirical chi square  0  with prob <  NA 
## The total number of observations was  471  with MLE Chi Square =  0  with prob <  NA 
## 
## Tucker Lewis Index of factoring reliability =  1.177
## Fit based upon off diagonal values = 1
## Measures of factor score adequacy             
##                                                  ML1
## Correlation of scores with factors              0.46
## Multiple R square of scores with factors        0.21
## Minimum correlation of possible factor scores  -0.58
#CFI, should be slightly higher than the TLI
1-((twofactor29$STATISTIC - twofactor$dof)/(twofactor$null.chisq- twofactor$null.dof))
## [1] 1.013933
##reliability
#alpha(PWB[,c(1,2,3,4,5,6,7,8,9)])
#alpha(PWB[,c(5,9,10,14)])
#What are the factors
#Factor 1 is positive emotions
#Factor 2 is negative emotions