##########################################
# Psychometric properties of online versions
# of FTND and PHQ-9
# Autor: Henrique Gomide
# Licença: GNU GPL 3.0
##########################################

# Load libraries
library(car)
library(psych)
## 
## Attaching package: 'psych'
## The following object is masked from 'package:car':
## 
##     logit
library(ggplot2)
## 
## Attaching package: 'ggplot2'
## The following objects are masked from 'package:psych':
## 
##     %+%, alpha
# Open user data
study2 <- read.csv("~/vst-analise/R/data/lwt.csv", stringsAsFactors = FALSE)

# What percentage of users who gave consent?
round(prop.table(table(study2$authorize_data)),3)[2]*100
##    1 
## 82.5
# How many people have authorized?
nrow(subset(study2, study2$authorize_data == 1))
## [1] 1275
# Subset with people who agreed to join the study
study2 <- subset(study2, study2$authorize_data == 1)

# Number of users

##########################
# Sample description ----
##########################

## Sex distribuction in percent; F = Female, M = Male
round(prop.table(table(study2$gender)),3)*100
## 
##    F    M 
## 65.6 34.4
## Age - Mean and standard deviation ====
study2$birth <- as.Date(study2$birth)
study2$age <- 2017 - as.numeric(format(study2$birth, "%Y"))
study2$age  <- ifelse(study2$age >= 18, study2$age, NA)
options(digits = 3)
cat("Mean :", mean(study2$age, na.rm = TRUE))
## Mean : 44
cat("Standard Deviation :", sd(study2$age, na.rm = TRUE))
## Standard Deviation : 11.6
## Motivation to quit
## How many people answered 
prop.table(table(!is.na(study2$ladder)))[2]
##  TRUE 
## 0.485
## In a ten-point scale...
table(cut(study2$ladder, 4))
## 
## (0.991,3.25]   (3.25,5.5]   (5.5,7.75]    (7.75,10] 
##            8          161          229          220
cat("Mean :", mean(study2$ladder, na.rm = TRUE))
## Mean : 6.79
cat("Standard Deviation :", sd(study2$ladder, na.rm = TRUE))
## Standard Deviation : 1.57
# Conclusion: The smokers seemed to be medium to highly motivated to quit Ladder score = 6.8 (sd = 1.6). 
## FTND
## Create sum of questionnaire
study2$ftnd_4r <- Recode(study2$ftnd_4, "0:10 = 1; 11:20 = 2; 21:30 = 3; 31:151 = 4")
study2$ftnd.score <- study2$ftnd_1 + study2$ftnd_2 + study2$ftnd_3 + study2$ftnd_4r + study2$ftnd_5 + study2$ftnd_6 - 6
## How many answered
prop.table(table(!is.na(study2$ftnd.score)))[2]
##  TRUE 
## 0.246
### Dependence Level
study2$ftnd.level <- Recode(study2$ftnd.score, "0:2 = 'Very low'; 3:4 = 'Low'; 5 = 'Medium'; 6:7 = 'High'; 8:10 = 'Very High'")
study2$ftnd.level <- factor(study2$ftnd.level, levels = c("Very low", "Low", "Medium", "High", "Very High"))
cbind(round(prop.table(table(study2$ftnd.level))*100,3)) 
##           [,1]
## Very low  12.7
## Low       19.4
## Medium    17.2
## High      34.1
## Very High 16.6
cat("Mean :", mean(study2$ftnd.score, na.rm = TRUE))
## Mean : 5.42
cat("Standard Deviation :", sd(study2$ftnd.score, na.rm = TRUE))
## Standard Deviation : 2.28
# The FTND score was 5.4 (sd = 2.4). 

## PHQ9
study2$phq.score <- rowSums(study2[,35:43]) - 9
## How many answered
prop.table(table(!is.na(study2$phq.score)))[2]
## TRUE 
## 0.27
## Score
cat("Mean :", mean(study2$phq.score, na.rm = TRUE))
## Mean : 9.92
cat("Standard Deviation :", sd(study2$phq.score, na.rm = TRUE))
## Standard Deviation : 6.61
## Severity index
study2$phq.severity <- Recode(study2$phq.score, "0:4 = 'No depression'; 5:9 = 'Mild'; 10:14 = 'Moderate'; 15:19 = 'Moderately severe'; 20:27 = 'Severe'")
study2$phq.severity <- factor(study2$phq.severity, levels = c("No depression", "Mild", "Moderate", "Moderately severe", "Severe"))
cbind(round(prop.table(table(study2$phq.severity))*100,3)) 
##                   [,1]
## No depression     23.3
## Mild              33.1
## Moderate          19.8
## Moderately severe 13.1
## Severe            10.8
## Major Depressive diagnosis
study2[,84:91] <- sapply(study2[,35:42], function(x) ifelse(x > 3, 1,0))
names(study2)[84:91] <- paste0("phq.", seq_len(8),"r")
study2$phq_9r <- ifelse(study2$phq_9 > 2, 1, 0)
study2$phq_mdd <- ifelse(rowSums(study2[,84:92]) > 0, "Yes" ,"No")
cbind(round(prop.table(table(study2$phq_mdd))*100,3))
##     [,1]
## Yes  100
# Time spent on intervention
ggplot(study2, aes(meanTimebySection)) + geom_histogram(binwidth = 60)
## Warning: Removed 44 rows containing non-finite values (stat_bin).

summary(as.numeric(study2$meanTimebySection)/60)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##     0.0     3.4     7.6     9.9    13.7    78.7      44
describe(as.numeric(study2$meanTimebySection)/60)
##    vars    n mean   sd median trimmed  mad min  max range skew kurtosis
## X1    1 1231 9.94 9.36   7.57    8.56 7.51   0 78.7  78.7 1.98     6.72
##      se
## X1 0.27
# the median of time spent was 6.7. The distribuction shape is highly skewed. Inspect outliers

# Number of sessions
ggplot(study2, aes(nsessions)) + geom_histogram(binwidth = 3)
## Warning: Removed 44 rows containing non-finite values (stat_bin).

summary(study2$nsessions)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##     1.0     1.0     1.0     1.3     1.0    90.0      44
describe(study2$nsessions)
##    vars    n mean   sd median trimmed mad min max range skew kurtosis   se
## X1    1 1231 1.28 3.02      1    1.03   0   1  90    89   25      678 0.09
# 98% of users acessed the intervention once 
# One user "joycesilva01@gmail.com" visited 60 pages!!

# Number of pages visited ====
ggplot(study2, aes(pagesVisited)) + geom_histogram(binwidth = 15)
## Warning: Removed 44 rows containing non-finite values (stat_bin).

summary(study2$pagesVisited)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##       1       7      12      15      18     681      44
describe(study2$pagesVisited)
##    vars    n mean   sd median trimmed mad min max range skew kurtosis   se
## X1    1 1231 14.9 27.2     12    12.5 8.9   1 681   680 18.7      420 0.77
quantile(study2$pagesVisited, c(.25,.5,.95), na.rm = TRUE)
## 25% 50% 95% 
##   7  12  34
# 25% of users acessed less than 7 pages; 50% less than 12; 5% more than 33

# Have you tried to quit before? ====
ggplot(study2, aes(factor(tentou_parar))) + geom_bar()

cbind(round(prop.table(table(study2$tentou_parar))*100,3))
##   [,1]
## 0 53.3
## 1 46.7
# Number of cigarettes smoked
describe(study2$ftnd_4)
##    vars   n mean   sd median trimmed mad min max range skew kurtosis   se
## X1    1 317 18.3 10.2     18      17 8.9   2  60    58 1.38     2.69 0.57
#######################
# FTND -----
#######################

########################
## Exploratory data analysis =====
########################
# Subset only with all cases completed
ftnd <- subset(study2, !is.na(study2$ftnd.score))
ftnd <- ftnd[, c("ftnd_1", "ftnd_2", "ftnd_3", "ftnd_4r", "ftnd_5", "ftnd_6")]

# Inspect Scatter Plot Matrix
pairs.panels(ftnd)

# Inspect Outliers
outlier(ftnd)

##    14    97   153   154   158   162   163   291   294   305   329   330 
##  5.96 10.45  4.74  3.49  5.55  6.74  7.63  4.59  4.59 10.45  6.74  8.59 
##   332   335   336   337   340   347   351   352   353   355   358   360 
##  6.74  6.60  9.29  7.57  5.66  6.49  9.43  6.74 11.32  5.14  2.93  4.73 
##   361   368   375   378   380   381   384   385   387   388   389   392 
##  2.93 11.32  9.72  3.28  3.52  9.47  7.30  7.07  4.73  3.28  9.09  8.89 
##   393   394   395   396   397   399   400   401   402   403   404   405 
##  2.93  9.29  2.93  8.63  4.88  3.56  4.08  5.80  4.17  3.56  3.28  3.28 
##   408   409   411   413   418   420   424   429   434   437   438   439 
##  3.84  2.93  2.93  2.93  4.17  5.01  5.35  6.40  2.93  4.08  4.73  2.91 
##   440   442   443   445   448   450   453   454   456   459   460   461 
##  4.73  3.28  4.59  3.49  5.99  3.12  7.20  5.44  5.17  6.67  3.56  5.01 
##   463   464   465   466   467   468   471   473   474   475   476   477 
##  4.73  5.14  3.12  5.30  3.28  9.72 11.58  3.84  8.92  7.24  3.52  7.71 
##   479   481   482   483   485   490   491   494   496   499   500   502 
## 10.99 12.53  5.63  3.56  8.61  3.28  6.16  5.89  3.28  8.89  5.86  5.37 
##   508   511   512   514   515   517   521   522   523   524   530   533 
##  5.68  3.12  8.41  4.17  2.91  7.20  7.20  6.06  5.14  3.84  5.63  8.64 
##   534   535   536   537   540   541   544   545   547   548   549   555 
##  6.30 12.53  6.40  6.74  7.24  2.93  9.77  9.24  3.52  3.12 12.40  7.24 
##   558   566   570   575   581   598   622   626   640   651   652   668 
##  2.93  7.71  4.88  9.77  3.52  5.66  3.28  9.43  5.89  2.91  9.09  6.25 
##   674   679   684   695   701   705   744   749   751   752   754   762 
##  3.28  7.30  3.28  8.92  6.30 10.14  8.14  9.77  7.20  8.64 12.51  3.49 
##   783   808   820   823   854   899   905   913   930   951   955   960 
##  5.01  5.89  7.22  6.77  3.12  3.49  6.16  4.17  6.45  6.60  7.55  7.24 
##   965   969   971   991  1002  1010  1014  1027  1029  1031  1033  1037 
##  6.40  2.93  6.42  5.35  8.89  3.49  5.66  5.35  4.17  5.68  9.77  2.91 
##  1039  1042  1051  1058  1062  1067  1075  1079  1110  1117  1121  1122 
##  8.04  5.86  7.63  3.84  6.48  5.44  3.86  3.49  6.45  3.86  7.57  4.88 
##  1123  1136  1139  1140  1143  1144  1147  1162  1163  1165  1168  1170 
##  4.88  4.73  3.49  5.35 11.58 10.31  7.24  5.99  5.99  3.28  5.99  6.66 
##  1173  1174  1178  1182  1185  1189  1192  1198  1202  1206  1207  1209 
## 11.28  4.88 10.02  3.28 10.83  5.55  5.35  8.92  6.06  8.41  3.56  5.63 
##  1219  1223  1224  1226  1231  1233  1235  1237  1241  1242  1245  1246 
##  4.74  4.17  4.59 10.04  9.09 12.85  5.92  8.63  8.41  3.49  3.28  6.66 
##  1249  1256  1258  1259  1260  1263  1271  1274  1276  1278  1289  1291 
##  7.89  5.44  4.73  3.52  5.63  8.59  3.52  5.92  2.91  5.01  7.91  7.45 
##  1294  1296  1298  1307  1312  1318  1325  1330  1331  1332  1333  1334 
##  5.96  3.49  3.49  9.98  3.12  4.40  6.16  9.80  8.64  3.52  8.10  8.10 
##  1336  1337  1339  1341  1350  1351  1353  1357  1358  1359  1365  1368 
##  4.60  7.18  4.40  5.92  6.48  4.93  6.67  3.28  2.91  9.72  7.20  6.33 
##  1370  1379  1380  1384  1385  1388  1394  1400  1405  1409  1413  1416 
##  4.93  7.89  5.66  6.40  3.12  2.91  6.25  5.30  4.08  5.30  7.24  7.30 
##  1418  1421  1422  1423  1429  1430  1431  1433  1436  1437  1442  1444 
##  6.60  6.74  6.33  5.30  5.89  5.14  3.52  7.24  3.28  3.52  2.91  6.74 
##  1447  1449  1455  1458  1462  1464  1465  1467  1469  1475  1485  1488 
##  7.71  4.98  9.77  5.66  5.66  7.71  8.84  6.74  6.60  6.30  8.61  3.28 
##  1489  1491  1494  1496  1497  1499  1506  1507  1508  1509  1510  1519 
##  3.28  4.73  4.73  2.93  7.49  6.48  6.48  3.52  7.71  6.60  4.17  6.67 
##  1521  1523  1526  1527  1528  1529  1530  1531  1537  1539  1542  1543 
##  5.37  2.93  3.12  4.08  4.59  7.24  9.43  3.56  5.96  6.48  3.28  4.73 
##  1544  1546 
##  3.28  4.08
# Inspect Correlations; we may compare this to 
r <- lowerCor(ftnd)
##         ftn_1 ftn_2 ftn_3 ftn_4 ftn_5 ftn_6
## ftnd_1  1.00                               
## ftnd_2  0.27  1.00                         
## ftnd_3  0.32  0.06  1.00                   
## ftnd_4r 0.38  0.19  0.08  1.00             
## ftnd_5  0.30  0.08  0.32  0.11  1.00       
## ftnd_6  0.29  0.26  0.05  0.25  0.04  1.00
corPlot(r)

# Parallel analysis
fa.parallel(ftnd)

## Parallel analysis suggests that the number of factors =  2  and the number of components =  2
##########################
# Classic Theory
# FTND - EFA ----
##########################

KMO(ftnd)
## Kaiser-Meyer-Olkin factor adequacy
## Call: KMO(r = ftnd)
## Overall MSA =  0.68
## MSA for each item = 
##  ftnd_1  ftnd_2  ftnd_3 ftnd_4r  ftnd_5  ftnd_6 
##    0.66    0.73    0.64    0.70    0.67    0.71
bartlett.test(ftnd)
## 
##  Bartlett test of homogeneity of variances
## 
## data:  ftnd
## Bartlett's K-squared = 400, df = 5, p-value <2e-16
# EFA --------
efaModel0 <- fa(ftnd, nfactors = 2,  fm="minres", rotate = "oblimin")
## Loading required namespace: GPArotation
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate =
## rotate, : I am sorry, to do these rotations requires the GPArotation
## package to be installed
print(efaModel0, cut = .3)
## Factor Analysis using method =  minres
## Call: fa(r = ftnd, nfactors = 2, rotate = "oblimin", fm = "minres")
## Standardized loadings (pattern matrix) based upon correlation matrix
##          MR1   MR2   h2   u2 com
## ftnd_1  0.76       0.57 0.43 1.0
## ftnd_2  0.37       0.19 0.81 1.7
## ftnd_3  0.41  0.40 0.33 0.67 2.0
## ftnd_4r 0.46       0.25 0.75 1.4
## ftnd_5  0.40  0.37 0.30 0.70 2.0
## ftnd_6  0.41 -0.34 0.28 0.72 1.9
## 
##                        MR1  MR2
## SS loadings           1.43 0.51
## Proportion Var        0.24 0.08
## Cumulative Var        0.24 0.32
## Proportion Explained  0.74 0.26
## Cumulative Proportion 0.74 1.00
## 
## Mean item complexity =  1.7
## Test of the hypothesis that 2 factors are sufficient.
## 
## The degrees of freedom for the null model are  15  and the objective function was  0.66 with Chi Square of  204
## The degrees of freedom for the model are 4  and the objective function was  0.01 
## 
## The root mean square of the residuals (RMSR) is  0.02 
## The df corrected root mean square of the residuals is  0.03 
## 
## The harmonic number of observations is  314 with the empirical chi square  2.54  with prob <  0.64 
## The total number of observations was  314  with Likelihood Chi Square =  2.68  with prob <  0.61 
## 
## Tucker Lewis Index of factoring reliability =  1.03
## RMSEA index =  0  and the 90 % confidence intervals are  0 0.071
## BIC =  -20.3
## Fit based upon off diagonal values = 0.99
## Measures of factor score adequacy             
##                                                 MR1   MR2
## Correlation of scores with factors             0.85  0.65
## Multiple R square of scores with factors       0.72  0.42
## Minimum correlation of possible factor scores  0.43 -0.17
plot(efaModel0)

fa.diagram(efaModel0)

# Reliability - Cronbach's Alfa; = .61; This shitty result has been found in literature.
psych::alpha(ftnd)
## 
## Reliability analysis   
## Call: psych::alpha(x = ftnd)
## 
##   raw_alpha std.alpha G6(smc) average_r S/N   ase mean   sd
##        0.6       0.6    0.59       0.2 1.5 0.031  1.9 0.38
## 
##  lower alpha upper     95% confidence boundaries
## 0.54 0.6 0.66 
## 
##  Reliability if an item is dropped:
##         raw_alpha std.alpha G6(smc) average_r  S/N alpha se
## ftnd_1       0.43      0.46    0.43      0.14 0.85    0.048
## ftnd_2       0.57      0.58    0.56      0.21 1.36    0.033
## ftnd_3       0.58      0.58    0.55      0.22 1.39    0.033
## ftnd_4r      0.55      0.55    0.53      0.20 1.24    0.034
## ftnd_5       0.58      0.58    0.55      0.22 1.37    0.033
## ftnd_6       0.57      0.57    0.55      0.21 1.34    0.034
## 
##  Item statistics 
##           n raw.r std.r r.cor r.drop mean   sd
## ftnd_1  314  0.81  0.74  0.70   0.56  3.1 0.96
## ftnd_2  314  0.48  0.54  0.37   0.29  1.4 0.49
## ftnd_3  314  0.47  0.53  0.37   0.28  1.6 0.48
## ftnd_4r 314  0.67  0.58  0.44   0.36  2.0 0.87
## ftnd_5  314  0.48  0.53  0.38   0.29  1.5 0.50
## ftnd_6  314  0.48  0.55  0.39   0.32  1.8 0.43
## 
## Non missing response frequency for each item
##            1    2    3   4 miss
## ftnd_1  0.11 0.10 0.39 0.4    0
## ftnd_2  0.60 0.40 0.00 0.0    0
## ftnd_3  0.37 0.63 0.00 0.0    0
## ftnd_4r 0.26 0.54 0.11 0.1    0
## ftnd_5  0.50 0.50 0.00 0.0    0
## ftnd_6  0.24 0.76 0.00 0.0    0
factorOne <- ftnd[,c(1,2,4,6)]
factorTwo <- ftnd[,c(3,5)]
psych::alpha(factorOne)
## 
## Reliability analysis   
## Call: psych::alpha(x = factorOne)
## 
##   raw_alpha std.alpha G6(smc) average_r S/N   ase mean   sd
##       0.57       0.6    0.54      0.27 1.5 0.034  2.1 0.48
## 
##  lower alpha upper     95% confidence boundaries
## 0.5 0.57 0.64 
## 
##  Reliability if an item is dropped:
##         raw_alpha std.alpha G6(smc) average_r  S/N alpha se
## ftnd_1       0.42      0.48    0.38      0.23 0.92    0.051
## ftnd_2       0.54      0.57    0.47      0.31 1.32    0.039
## ftnd_4r      0.46      0.53    0.43      0.27 1.14    0.044
## ftnd_6       0.53      0.54    0.45      0.28 1.16    0.041
## 
##  Item statistics 
##           n raw.r std.r r.cor r.drop mean   sd
## ftnd_1  314  0.81  0.72  0.58   0.46  3.1 0.96
## ftnd_2  314  0.54  0.64  0.42   0.32  1.4 0.49
## ftnd_4r 314  0.75  0.67  0.50   0.40  2.0 0.87
## ftnd_6  314  0.55  0.67  0.48   0.36  1.8 0.43
## 
## Non missing response frequency for each item
##            1    2    3   4 miss
## ftnd_1  0.11 0.10 0.39 0.4    0
## ftnd_2  0.60 0.40 0.00 0.0    0
## ftnd_4r 0.26 0.54 0.11 0.1    0
## ftnd_6  0.24 0.76 0.00 0.0    0
psych::alpha(factorTwo)
## Warning in matrix(unlist(drop.item), ncol = 8, byrow = TRUE): data length
## [12] is not a sub-multiple or multiple of the number of columns [8]
## 
## Reliability analysis   
## Call: psych::alpha(x = factorTwo)
## 
##   raw_alpha std.alpha G6(smc) average_r  S/N   ase mean  sd
##       0.48      0.48    0.32      0.32 0.94 0.058  1.6 0.4
## 
##  lower alpha upper     95% confidence boundaries
## 0.37 0.48 0.6 
## 
##  Reliability if an item is dropped:
##        raw_alpha std.alpha G6(smc) average_r  S/N alpha se
## ftnd_3      0.32      0.32     0.1      0.32   NA       NA
## ftnd_5      0.10      0.32      NA        NA 0.32    0.032
## 
##  Item statistics 
##          n raw.r std.r r.cor r.drop mean   sd
## ftnd_3 314  0.80  0.81  0.46   0.32  1.6 0.48
## ftnd_5 314  0.82  0.81  0.46   0.32  1.5 0.50
## 
## Non missing response frequency for each item
##           1    2 miss
## ftnd_3 0.37 0.63    0
## ftnd_5 0.50 0.50    0
rm(factorOne,factorTwo)

# What to show and Discuss
# 1. Sample characteristics
# 2. Table 1 - FTND Correlation Matrix 
# 3. Table 2 of Figure 1 - FTND EFA result
# Alpha was just horrific like previous studies. KMO was quite bad as well.


#######################
# PHQ-9 -----
#######################

########################
## Exploratory data analysis =====
########################
# Subset only with all cases completed
phq <- subset(study2, !is.na(study2$phq.score))
phq <- phq[, c("phq_1", "phq_2", "phq_3", "phq_4", "phq_5", "phq_6","phq_7", "phq_8", "phq_9")]


# Inspect Scatter Plot Matrix
pairs.panels(phq)

# Inspect Outliers
outlier(phq)

##     14     97    153    158    163    254    291    294    305    329 
##  8.604  3.431  2.829  4.137 10.980 13.835  3.726  9.655 13.721  3.690 
##    330    332    335    336    339    345    347    351    352    353 
## 15.975 25.770 20.800  4.225  7.489 20.993 26.917 12.526 11.359 18.035 
##    355    358    360    361    367    368    375    378    379    380 
##  2.732  4.903  6.663 15.053  3.878  3.041 17.961  8.284 15.809  3.540 
##    381    384    385    387    388    389    392    393    394    395 
## 10.510  4.954 35.106 19.007  7.908  4.748 12.302  3.300  8.888 10.330 
##    396    397    399    400    401    402    403    404    405    408 
##  5.657  3.113 17.476  3.726  6.868  4.789  4.644  7.496  9.000  3.282 
##    409    411    418    420    424    429    434    437    438    439 
##  7.452  6.641 10.265 18.907  7.122  1.412  4.129 17.381  3.646  5.345 
##    440    443    445    447    448    450    452    453    454    456 
##  5.241  7.925  7.542  7.340  3.276 13.693 10.439  5.193  3.878  3.269 
##    459    460    461    463    464    465    466    467    468    471 
## 16.668  3.779 15.642 12.956  7.198 16.856 24.297 12.526  0.831 14.914 
##    473    474    475    476    477    479    481    482    485    490 
## 33.926  2.841 14.909 30.582  7.259 15.863  4.954  3.957  6.350 11.266 
##    491    494    496    498    499    500    502    508    511    512 
##  5.665  4.159 15.642  3.988  2.829 18.311  3.847  1.486 18.081  5.350 
##    514    515    517    521    522    523    524    530    533    534 
## 11.389 10.201 14.708 22.142  5.127  5.658 22.677  3.384  6.053  5.308 
##    535    536    537    540    541    545    547    548    549    555 
## 18.193  9.907  3.878  4.690  6.457 14.722  6.353  5.566 11.236 12.405 
##    558    566    570    575    581    626    651    652    668    674 
## 17.408  3.276  3.690  7.997 20.720  3.690 25.813  4.377  3.957  1.412 
##    679    684    687    695    701    723    744    749    751    752 
##  3.690 15.362  9.319 19.038  3.726 12.577  5.401 16.744  2.922  8.317 
##    754    757    762    783    808    820    823    899    905    906 
## 44.162  1.486  2.922  1.840  2.938  1.840  5.270  9.372  6.529 11.709 
##    913    930    931    951    955    960    965    969    971    991 
##  3.862  4.590  5.879 18.137 17.838  3.113  6.353  4.759 17.564  4.387 
##   1002   1011   1014   1022   1027   1029   1031   1033   1037   1039 
## 18.370 16.648  0.831 16.609  6.366  3.540  3.847  9.629 15.766  3.646 
##   1047   1051   1058   1062   1067   1071   1075   1079   1093   1110 
##  4.789  3.462  3.705  7.092  3.646  9.853  9.224  6.438 16.281  0.831 
##   1117   1121   1122   1123   1129   1133   1136   1138   1139   1140 
## 29.545  2.938  2.971  2.971  9.031  6.181 12.828  4.543  2.753  3.690 
##   1143   1144   1147   1162   1163   1165   1168   1169   1170   1173 
##  7.178  3.646  3.540  5.658  5.658  2.922  3.282 13.330  2.829  1.412 
##   1174   1175   1176   1178   1182   1183   1185   1189   1192   1198 
## 12.638  1.412  1.412  3.779  5.350 11.427  1.486  4.931 11.780 26.071 
##   1202   1206   1207   1209   1219   1220   1223   1224   1226   1230 
## 13.877  1.840  8.695 17.184  6.098  8.389  5.910 12.555  3.462  6.735 
##   1231   1233   1235   1236   1237   1241   1242   1245   1246   1252 
## 21.694  4.159 14.204 16.364  7.443 30.116  3.888  1.412  7.934 11.757 
##   1256   1258   1259   1260   1263   1272   1273   1274   1276   1278 
##  4.540  3.554  5.728  2.922  6.157 13.946 13.946 12.673  3.057 11.335 
##   1289   1291   1294   1296   1298   1307   1310   1312   1313   1316 
##  0.831  3.690  3.847  0.831  6.157 11.666 12.953 10.330 18.158  0.831 
##   1318   1325   1330   1331   1332   1333   1334   1336   1337   1339 
##  3.467 10.707 21.050  2.922  4.301  5.350  5.350 11.807 13.891  8.556 
##   1340   1341   1350   1351   1353   1357   1358   1359   1368   1370 
## 23.099 13.557 30.631 12.698  4.996 21.627  3.957  1.840 12.220  9.881 
##   1372   1379   1380   1384   1385   1388   1393   1394   1400   1403 
##  3.726  5.333  1.840  5.075 25.454  4.540 14.283 17.258  4.510  7.845 
##   1405   1409   1410   1413   1414   1416   1418   1421   1422   1423 
##  4.357  7.046  2.887 16.331 16.568  3.878  2.229 15.731 24.890  8.604 
##   1429   1430   1431   1433   1436   1437   1441   1442   1444   1447 
##  1.486  5.781  6.022  5.187 14.291  7.157  5.268 12.064  7.347  6.589 
##   1449   1453   1455   1458   1462   1464   1465   1467   1469   1471 
##  6.539 10.182  0.831  3.705  0.831 15.949  6.948 14.399 12.685  2.922 
##   1475   1478   1484   1485   1487   1488   1489   1491   1494   1496 
##  5.194  6.157  2.922 14.087  9.081  4.494 15.346  3.726 10.821  4.716 
##   1497   1499   1506   1507   1508   1509   1510   1515   1519   1520 
##  2.922  2.942  5.703  3.690  8.112 12.615  7.996  4.052  2.922  4.047 
##   1521   1523   1526   1527   1528   1529   1530   1531   1537   1539 
##  1.412 20.206  1.486 10.628  8.783  4.159 14.745 10.844 21.627 11.757 
##   1542   1543   1544   1546 
##  2.229  0.831 11.501  1.412
# Inspect Correlations; we may compare this to 
r <- lowerCor(phq)
##       phq_1 phq_2 phq_3 phq_4 phq_5 phq_6 phq_7 phq_8 phq_9
## phq_1 1.00                                                 
## phq_2 0.72  1.00                                           
## phq_3 0.54  0.56  1.00                                     
## phq_4 0.65  0.61  0.55  1.00                               
## phq_5 0.44  0.48  0.45  0.56  1.00                         
## phq_6 0.53  0.69  0.46  0.62  0.56  1.00                   
## phq_7 0.49  0.53  0.43  0.53  0.49  0.61  1.00             
## phq_8 0.45  0.58  0.46  0.53  0.44  0.56  0.66  1.00       
## phq_9 0.41  0.48  0.35  0.33  0.31  0.44  0.33  0.40  1.00
corPlot(phq)

# Parallel analysis
fa.parallel(phq)

## Parallel analysis suggests that the number of factors =  3  and the number of components =  1
vss(phq)

## 
## Very Simple Structure
## Call: vss(x = phq)
## VSS complexity 1 achieves a maximimum of 0.91  with  1  factors
## VSS complexity 2 achieves a maximimum of 0.93  with  2  factors
## 
## The Velicer MAP achieves a minimum of 0.03  with  1  factors 
## BIC achieves a minimum of  NA  with  2  factors
## Sample Size adjusted BIC achieves a minimum of  NA  with  4  factors
## 
## Statistics by number of factors 
##   vss1 vss2   map dof   chisq    prob sqresid  fit RMSEA   BIC SABIC
## 1 0.91 0.00 0.032  27 1.6e+02 4.4e-21     2.4 0.91 0.121   3.6  89.3
## 2 0.51 0.93 0.063  19 8.6e+01 1.6e-10     2.0 0.93 0.102 -24.8  35.4
## 3 0.36 0.77 0.085  12 4.6e+01 8.2e-06     1.7 0.94 0.091 -24.5  13.5
## 4 0.35 0.68 0.124   6 1.3e+01 4.3e-02     1.5 0.95 0.059 -22.0  -3.0
## 5 0.29 0.56 0.191   1 4.0e+00 4.4e-02     1.3 0.95 0.096  -1.8   1.4
## 6 0.33 0.55 0.293  -3 4.7e-02      NA     1.1 0.96    NA    NA    NA
## 7 0.31 0.57 0.523  -6 2.0e-08      NA     1.1 0.96    NA    NA    NA
## 8 0.31 0.59 1.000  -8 0.0e+00      NA     1.0 0.96    NA    NA    NA
##   complex  eChisq    SRMR eCRMS  eBIC
## 1     1.0 6.3e+01 5.1e-02 0.058 -94.4
## 2     1.7 2.7e+01 3.3e-02 0.046 -83.6
## 3     2.1 1.2e+01 2.2e-02 0.039 -57.7
## 4     2.3 3.6e+00 1.2e-02 0.030 -31.4
## 5     2.5 1.2e+00 6.9e-03 0.041  -4.7
## 6     2.4 1.0e-02 6.4e-04    NA    NA
## 7     2.7 2.8e-09 3.3e-07    NA    NA
## 8     2.8 8.6e-18 1.9e-11    NA    NA
##########################
# Classic Theory
# PHQ-9 - EFA ----
##########################

KMO(phq)
## Kaiser-Meyer-Olkin factor adequacy
## Call: KMO(r = phq)
## Overall MSA =  0.89
## MSA for each item = 
## phq_1 phq_2 phq_3 phq_4 phq_5 phq_6 phq_7 phq_8 phq_9 
##  0.86  0.87  0.95  0.91  0.94  0.89  0.88  0.88  0.94
bartlett.test(phq)
## 
##  Bartlett test of homogeneity of variances
## 
## data:  phq
## Bartlett's K-squared = 30, df = 8, p-value = 4e-04
# EFA --------
efaModel0 <- fa(phq, nfactors = 1,  fm="minres")
print(efaModel0, cut = .3)
## Factor Analysis using method =  minres
## Call: fa(r = phq, nfactors = 1, fm = "minres")
## Standardized loadings (pattern matrix) based upon correlation matrix
##        MR1   h2   u2 com
## phq_1 0.75 0.56 0.44   1
## phq_2 0.83 0.70 0.30   1
## phq_3 0.66 0.43 0.57   1
## phq_4 0.78 0.61 0.39   1
## phq_5 0.65 0.42 0.58   1
## phq_6 0.79 0.63 0.37   1
## phq_7 0.72 0.51 0.49   1
## phq_8 0.71 0.51 0.49   1
## phq_9 0.52 0.27 0.73   1
## 
##                 MR1
## SS loadings    4.63
## Proportion Var 0.51
## 
## Mean item complexity =  1
## Test of the hypothesis that 1 factor is sufficient.
## 
## The degrees of freedom for the null model are  36  and the objective function was  4.8 with Chi Square of  1629
## The degrees of freedom for the model are 27  and the objective function was  0.48 
## 
## The root mean square of the residuals (RMSR) is  0.05 
## The df corrected root mean square of the residuals is  0.06 
## 
## The harmonic number of observations is  344 with the empirical chi square  63.3  with prob <  9.5e-05 
## The total number of observations was  344  with Likelihood Chi Square =  161  with prob <  4.4e-21 
## 
## Tucker Lewis Index of factoring reliability =  0.887
## RMSEA index =  0.121  and the 90 % confidence intervals are  0.103 0.139
## BIC =  3.63
## Fit based upon off diagonal values = 0.99
## Measures of factor score adequacy             
##                                                 MR1
## Correlation of scores with factors             0.96
## Multiple R square of scores with factors       0.91
## Minimum correlation of possible factor scores  0.83
plot(efaModel0)

fa.diagram(efaModel0)

# Reliability - Cronbach's Alfa; = .61;
psych::alpha(phq)
## 
## Reliability analysis   
## Call: psych::alpha(x = phq)
## 
##   raw_alpha std.alpha G6(smc) average_r S/N    ase mean   sd
##        0.9       0.9    0.91      0.51 9.2 0.0078  2.1 0.73
## 
##  lower alpha upper     95% confidence boundaries
## 0.89 0.9 0.92 
## 
##  Reliability if an item is dropped:
##       raw_alpha std.alpha G6(smc) average_r S/N alpha se
## phq_1      0.89      0.89    0.89      0.50 8.0   0.0088
## phq_2      0.88      0.88    0.88      0.48 7.5   0.0093
## phq_3      0.90      0.89    0.90      0.52 8.5   0.0084
## phq_4      0.89      0.89    0.89      0.49 7.8   0.0091
## phq_5      0.90      0.90    0.90      0.52 8.6   0.0083
## phq_6      0.89      0.89    0.89      0.49 7.7   0.0092
## phq_7      0.89      0.89    0.89      0.50 8.2   0.0087
## phq_8      0.89      0.89    0.89      0.50 8.2   0.0087
## phq_9      0.90      0.90    0.91      0.54 9.5   0.0079
## 
##  Item statistics 
##         n raw.r std.r r.cor r.drop mean   sd
## phq_1 344  0.77  0.77  0.75   0.70  2.2 0.95
## phq_2 344  0.84  0.84  0.83   0.78  2.2 0.96
## phq_3 344  0.71  0.71  0.66   0.62  2.3 1.00
## phq_4 344  0.80  0.80  0.77   0.74  2.5 0.96
## phq_5 344  0.71  0.70  0.65   0.62  2.2 1.04
## phq_6 344  0.82  0.81  0.79   0.75  2.3 1.06
## phq_7 344  0.76  0.75  0.72   0.68  2.1 1.00
## phq_8 344  0.76  0.75  0.72   0.68  1.8 1.01
## phq_9 344  0.58  0.60  0.52   0.49  1.4 0.82
## 
## Non missing response frequency for each item
##          1    2    3    4 miss
## phq_1 0.22 0.47 0.18 0.14    0
## phq_2 0.26 0.47 0.13 0.14    0
## phq_3 0.23 0.46 0.14 0.18    0
## phq_4 0.12 0.49 0.17 0.22    0
## phq_5 0.30 0.37 0.17 0.16    0
## phq_6 0.27 0.36 0.18 0.18    0
## phq_7 0.35 0.37 0.16 0.12    0
## phq_8 0.49 0.31 0.10 0.11    0
## phq_9 0.76 0.12 0.06 0.05    0