##########################################
# Psychometric properties of online versions
# of FTND and PHQ-9
# Autor: Henrique Gomide
# Licença: GNU GPL 3.0
##########################################
# Load libraries
library(car)
library(psych)
##
## Attaching package: 'psych'
## The following object is masked from 'package:car':
##
## logit
library(ggplot2)
##
## Attaching package: 'ggplot2'
## The following objects are masked from 'package:psych':
##
## %+%, alpha
# Open user data
study2 <- read.csv("~/vst-analise/R/data/lwt.csv", stringsAsFactors = FALSE)
# What percentage of users who gave consent?
round(prop.table(table(study2$authorize_data)),3)[2]*100
## 1
## 82.5
# How many people have authorized?
nrow(subset(study2, study2$authorize_data == 1))
## [1] 1275
# Subset with people who agreed to join the study
study2 <- subset(study2, study2$authorize_data == 1)
# Number of users
##########################
# Sample description ----
##########################
## Sex distribuction in percent; F = Female, M = Male
round(prop.table(table(study2$gender)),3)*100
##
## F M
## 65.6 34.4
## Age - Mean and standard deviation ====
study2$birth <- as.Date(study2$birth)
study2$age <- 2017 - as.numeric(format(study2$birth, "%Y"))
study2$age <- ifelse(study2$age >= 18, study2$age, NA)
options(digits = 3)
cat("Mean :", mean(study2$age, na.rm = TRUE))
## Mean : 44
cat("Standard Deviation :", sd(study2$age, na.rm = TRUE))
## Standard Deviation : 11.6
## Motivation to quit
## How many people answered
prop.table(table(!is.na(study2$ladder)))[2]
## TRUE
## 0.485
## In a ten-point scale...
table(cut(study2$ladder, 4))
##
## (0.991,3.25] (3.25,5.5] (5.5,7.75] (7.75,10]
## 8 161 229 220
cat("Mean :", mean(study2$ladder, na.rm = TRUE))
## Mean : 6.79
cat("Standard Deviation :", sd(study2$ladder, na.rm = TRUE))
## Standard Deviation : 1.57
# Conclusion: The smokers seemed to be medium to highly motivated to quit Ladder score = 6.8 (sd = 1.6).
## FTND
## Create sum of questionnaire
study2$ftnd_4r <- Recode(study2$ftnd_4, "0:10 = 1; 11:20 = 2; 21:30 = 3; 31:151 = 4")
study2$ftnd.score <- study2$ftnd_1 + study2$ftnd_2 + study2$ftnd_3 + study2$ftnd_4r + study2$ftnd_5 + study2$ftnd_6 - 6
## How many answered
prop.table(table(!is.na(study2$ftnd.score)))[2]
## TRUE
## 0.246
### Dependence Level
study2$ftnd.level <- Recode(study2$ftnd.score, "0:2 = 'Very low'; 3:4 = 'Low'; 5 = 'Medium'; 6:7 = 'High'; 8:10 = 'Very High'")
study2$ftnd.level <- factor(study2$ftnd.level, levels = c("Very low", "Low", "Medium", "High", "Very High"))
cbind(round(prop.table(table(study2$ftnd.level))*100,3))
## [,1]
## Very low 12.7
## Low 19.4
## Medium 17.2
## High 34.1
## Very High 16.6
cat("Mean :", mean(study2$ftnd.score, na.rm = TRUE))
## Mean : 5.42
cat("Standard Deviation :", sd(study2$ftnd.score, na.rm = TRUE))
## Standard Deviation : 2.28
# The FTND score was 5.4 (sd = 2.4).
## PHQ9
study2$phq.score <- rowSums(study2[,35:43]) - 9
## How many answered
prop.table(table(!is.na(study2$phq.score)))[2]
## TRUE
## 0.27
## Score
cat("Mean :", mean(study2$phq.score, na.rm = TRUE))
## Mean : 9.92
cat("Standard Deviation :", sd(study2$phq.score, na.rm = TRUE))
## Standard Deviation : 6.61
## Severity index
study2$phq.severity <- Recode(study2$phq.score, "0:4 = 'No depression'; 5:9 = 'Mild'; 10:14 = 'Moderate'; 15:19 = 'Moderately severe'; 20:27 = 'Severe'")
study2$phq.severity <- factor(study2$phq.severity, levels = c("No depression", "Mild", "Moderate", "Moderately severe", "Severe"))
cbind(round(prop.table(table(study2$phq.severity))*100,3))
## [,1]
## No depression 23.3
## Mild 33.1
## Moderate 19.8
## Moderately severe 13.1
## Severe 10.8
## Major Depressive diagnosis
study2[,84:91] <- sapply(study2[,35:42], function(x) ifelse(x > 3, 1,0))
names(study2)[84:91] <- paste0("phq.", seq_len(8),"r")
study2$phq_9r <- ifelse(study2$phq_9 > 2, 1, 0)
study2$phq_mdd <- ifelse(rowSums(study2[,84:92]) > 0, "Yes" ,"No")
cbind(round(prop.table(table(study2$phq_mdd))*100,3))
## [,1]
## Yes 100
# Time spent on intervention
ggplot(study2, aes(meanTimebySection)) + geom_histogram(binwidth = 60)
## Warning: Removed 44 rows containing non-finite values (stat_bin).

summary(as.numeric(study2$meanTimebySection)/60)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.0 3.4 7.6 9.9 13.7 78.7 44
describe(as.numeric(study2$meanTimebySection)/60)
## vars n mean sd median trimmed mad min max range skew kurtosis
## X1 1 1231 9.94 9.36 7.57 8.56 7.51 0 78.7 78.7 1.98 6.72
## se
## X1 0.27
# the median of time spent was 6.7. The distribuction shape is highly skewed. Inspect outliers
# Number of sessions
ggplot(study2, aes(nsessions)) + geom_histogram(binwidth = 3)
## Warning: Removed 44 rows containing non-finite values (stat_bin).

summary(study2$nsessions)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 1.0 1.0 1.0 1.3 1.0 90.0 44
describe(study2$nsessions)
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 1231 1.28 3.02 1 1.03 0 1 90 89 25 678 0.09
# 98% of users acessed the intervention once
# One user "joycesilva01@gmail.com" visited 60 pages!!
# Number of pages visited ====
ggplot(study2, aes(pagesVisited)) + geom_histogram(binwidth = 15)
## Warning: Removed 44 rows containing non-finite values (stat_bin).

summary(study2$pagesVisited)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 1 7 12 15 18 681 44
describe(study2$pagesVisited)
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 1231 14.9 27.2 12 12.5 8.9 1 681 680 18.7 420 0.77
quantile(study2$pagesVisited, c(.25,.5,.95), na.rm = TRUE)
## 25% 50% 95%
## 7 12 34
# 25% of users acessed less than 7 pages; 50% less than 12; 5% more than 33
# Have you tried to quit before? ====
ggplot(study2, aes(factor(tentou_parar))) + geom_bar()

cbind(round(prop.table(table(study2$tentou_parar))*100,3))
## [,1]
## 0 53.3
## 1 46.7
# Number of cigarettes smoked
describe(study2$ftnd_4)
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 317 18.3 10.2 18 17 8.9 2 60 58 1.38 2.69 0.57
#######################
# FTND -----
#######################
########################
## Exploratory data analysis =====
########################
# Subset only with all cases completed
ftnd <- subset(study2, !is.na(study2$ftnd.score))
ftnd <- ftnd[, c("ftnd_1", "ftnd_2", "ftnd_3", "ftnd_4r", "ftnd_5", "ftnd_6")]
# Inspect Scatter Plot Matrix
pairs.panels(ftnd)

# Inspect Outliers
outlier(ftnd)

## 14 97 153 154 158 162 163 291 294 305 329 330
## 5.96 10.45 4.74 3.49 5.55 6.74 7.63 4.59 4.59 10.45 6.74 8.59
## 332 335 336 337 340 347 351 352 353 355 358 360
## 6.74 6.60 9.29 7.57 5.66 6.49 9.43 6.74 11.32 5.14 2.93 4.73
## 361 368 375 378 380 381 384 385 387 388 389 392
## 2.93 11.32 9.72 3.28 3.52 9.47 7.30 7.07 4.73 3.28 9.09 8.89
## 393 394 395 396 397 399 400 401 402 403 404 405
## 2.93 9.29 2.93 8.63 4.88 3.56 4.08 5.80 4.17 3.56 3.28 3.28
## 408 409 411 413 418 420 424 429 434 437 438 439
## 3.84 2.93 2.93 2.93 4.17 5.01 5.35 6.40 2.93 4.08 4.73 2.91
## 440 442 443 445 448 450 453 454 456 459 460 461
## 4.73 3.28 4.59 3.49 5.99 3.12 7.20 5.44 5.17 6.67 3.56 5.01
## 463 464 465 466 467 468 471 473 474 475 476 477
## 4.73 5.14 3.12 5.30 3.28 9.72 11.58 3.84 8.92 7.24 3.52 7.71
## 479 481 482 483 485 490 491 494 496 499 500 502
## 10.99 12.53 5.63 3.56 8.61 3.28 6.16 5.89 3.28 8.89 5.86 5.37
## 508 511 512 514 515 517 521 522 523 524 530 533
## 5.68 3.12 8.41 4.17 2.91 7.20 7.20 6.06 5.14 3.84 5.63 8.64
## 534 535 536 537 540 541 544 545 547 548 549 555
## 6.30 12.53 6.40 6.74 7.24 2.93 9.77 9.24 3.52 3.12 12.40 7.24
## 558 566 570 575 581 598 622 626 640 651 652 668
## 2.93 7.71 4.88 9.77 3.52 5.66 3.28 9.43 5.89 2.91 9.09 6.25
## 674 679 684 695 701 705 744 749 751 752 754 762
## 3.28 7.30 3.28 8.92 6.30 10.14 8.14 9.77 7.20 8.64 12.51 3.49
## 783 808 820 823 854 899 905 913 930 951 955 960
## 5.01 5.89 7.22 6.77 3.12 3.49 6.16 4.17 6.45 6.60 7.55 7.24
## 965 969 971 991 1002 1010 1014 1027 1029 1031 1033 1037
## 6.40 2.93 6.42 5.35 8.89 3.49 5.66 5.35 4.17 5.68 9.77 2.91
## 1039 1042 1051 1058 1062 1067 1075 1079 1110 1117 1121 1122
## 8.04 5.86 7.63 3.84 6.48 5.44 3.86 3.49 6.45 3.86 7.57 4.88
## 1123 1136 1139 1140 1143 1144 1147 1162 1163 1165 1168 1170
## 4.88 4.73 3.49 5.35 11.58 10.31 7.24 5.99 5.99 3.28 5.99 6.66
## 1173 1174 1178 1182 1185 1189 1192 1198 1202 1206 1207 1209
## 11.28 4.88 10.02 3.28 10.83 5.55 5.35 8.92 6.06 8.41 3.56 5.63
## 1219 1223 1224 1226 1231 1233 1235 1237 1241 1242 1245 1246
## 4.74 4.17 4.59 10.04 9.09 12.85 5.92 8.63 8.41 3.49 3.28 6.66
## 1249 1256 1258 1259 1260 1263 1271 1274 1276 1278 1289 1291
## 7.89 5.44 4.73 3.52 5.63 8.59 3.52 5.92 2.91 5.01 7.91 7.45
## 1294 1296 1298 1307 1312 1318 1325 1330 1331 1332 1333 1334
## 5.96 3.49 3.49 9.98 3.12 4.40 6.16 9.80 8.64 3.52 8.10 8.10
## 1336 1337 1339 1341 1350 1351 1353 1357 1358 1359 1365 1368
## 4.60 7.18 4.40 5.92 6.48 4.93 6.67 3.28 2.91 9.72 7.20 6.33
## 1370 1379 1380 1384 1385 1388 1394 1400 1405 1409 1413 1416
## 4.93 7.89 5.66 6.40 3.12 2.91 6.25 5.30 4.08 5.30 7.24 7.30
## 1418 1421 1422 1423 1429 1430 1431 1433 1436 1437 1442 1444
## 6.60 6.74 6.33 5.30 5.89 5.14 3.52 7.24 3.28 3.52 2.91 6.74
## 1447 1449 1455 1458 1462 1464 1465 1467 1469 1475 1485 1488
## 7.71 4.98 9.77 5.66 5.66 7.71 8.84 6.74 6.60 6.30 8.61 3.28
## 1489 1491 1494 1496 1497 1499 1506 1507 1508 1509 1510 1519
## 3.28 4.73 4.73 2.93 7.49 6.48 6.48 3.52 7.71 6.60 4.17 6.67
## 1521 1523 1526 1527 1528 1529 1530 1531 1537 1539 1542 1543
## 5.37 2.93 3.12 4.08 4.59 7.24 9.43 3.56 5.96 6.48 3.28 4.73
## 1544 1546
## 3.28 4.08
# Inspect Correlations; we may compare this to
r <- lowerCor(ftnd)
## ftn_1 ftn_2 ftn_3 ftn_4 ftn_5 ftn_6
## ftnd_1 1.00
## ftnd_2 0.27 1.00
## ftnd_3 0.32 0.06 1.00
## ftnd_4r 0.38 0.19 0.08 1.00
## ftnd_5 0.30 0.08 0.32 0.11 1.00
## ftnd_6 0.29 0.26 0.05 0.25 0.04 1.00
corPlot(r)

# Parallel analysis
fa.parallel(ftnd)

## Parallel analysis suggests that the number of factors = 2 and the number of components = 2
##########################
# Classic Theory
# FTND - EFA ----
##########################
KMO(ftnd)
## Kaiser-Meyer-Olkin factor adequacy
## Call: KMO(r = ftnd)
## Overall MSA = 0.68
## MSA for each item =
## ftnd_1 ftnd_2 ftnd_3 ftnd_4r ftnd_5 ftnd_6
## 0.66 0.73 0.64 0.70 0.67 0.71
bartlett.test(ftnd)
##
## Bartlett test of homogeneity of variances
##
## data: ftnd
## Bartlett's K-squared = 400, df = 5, p-value <2e-16
# EFA --------
efaModel0 <- fa(ftnd, nfactors = 2, fm="minres", rotate = "oblimin")
## Loading required namespace: GPArotation
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate =
## rotate, : I am sorry, to do these rotations requires the GPArotation
## package to be installed
print(efaModel0, cut = .3)
## Factor Analysis using method = minres
## Call: fa(r = ftnd, nfactors = 2, rotate = "oblimin", fm = "minres")
## Standardized loadings (pattern matrix) based upon correlation matrix
## MR1 MR2 h2 u2 com
## ftnd_1 0.76 0.57 0.43 1.0
## ftnd_2 0.37 0.19 0.81 1.7
## ftnd_3 0.41 0.40 0.33 0.67 2.0
## ftnd_4r 0.46 0.25 0.75 1.4
## ftnd_5 0.40 0.37 0.30 0.70 2.0
## ftnd_6 0.41 -0.34 0.28 0.72 1.9
##
## MR1 MR2
## SS loadings 1.43 0.51
## Proportion Var 0.24 0.08
## Cumulative Var 0.24 0.32
## Proportion Explained 0.74 0.26
## Cumulative Proportion 0.74 1.00
##
## Mean item complexity = 1.7
## Test of the hypothesis that 2 factors are sufficient.
##
## The degrees of freedom for the null model are 15 and the objective function was 0.66 with Chi Square of 204
## The degrees of freedom for the model are 4 and the objective function was 0.01
##
## The root mean square of the residuals (RMSR) is 0.02
## The df corrected root mean square of the residuals is 0.03
##
## The harmonic number of observations is 314 with the empirical chi square 2.54 with prob < 0.64
## The total number of observations was 314 with Likelihood Chi Square = 2.68 with prob < 0.61
##
## Tucker Lewis Index of factoring reliability = 1.03
## RMSEA index = 0 and the 90 % confidence intervals are 0 0.071
## BIC = -20.3
## Fit based upon off diagonal values = 0.99
## Measures of factor score adequacy
## MR1 MR2
## Correlation of scores with factors 0.85 0.65
## Multiple R square of scores with factors 0.72 0.42
## Minimum correlation of possible factor scores 0.43 -0.17
plot(efaModel0)

fa.diagram(efaModel0)

# Reliability - Cronbach's Alfa; = .61; This shitty result has been found in literature.
psych::alpha(ftnd)
##
## Reliability analysis
## Call: psych::alpha(x = ftnd)
##
## raw_alpha std.alpha G6(smc) average_r S/N ase mean sd
## 0.6 0.6 0.59 0.2 1.5 0.031 1.9 0.38
##
## lower alpha upper 95% confidence boundaries
## 0.54 0.6 0.66
##
## Reliability if an item is dropped:
## raw_alpha std.alpha G6(smc) average_r S/N alpha se
## ftnd_1 0.43 0.46 0.43 0.14 0.85 0.048
## ftnd_2 0.57 0.58 0.56 0.21 1.36 0.033
## ftnd_3 0.58 0.58 0.55 0.22 1.39 0.033
## ftnd_4r 0.55 0.55 0.53 0.20 1.24 0.034
## ftnd_5 0.58 0.58 0.55 0.22 1.37 0.033
## ftnd_6 0.57 0.57 0.55 0.21 1.34 0.034
##
## Item statistics
## n raw.r std.r r.cor r.drop mean sd
## ftnd_1 314 0.81 0.74 0.70 0.56 3.1 0.96
## ftnd_2 314 0.48 0.54 0.37 0.29 1.4 0.49
## ftnd_3 314 0.47 0.53 0.37 0.28 1.6 0.48
## ftnd_4r 314 0.67 0.58 0.44 0.36 2.0 0.87
## ftnd_5 314 0.48 0.53 0.38 0.29 1.5 0.50
## ftnd_6 314 0.48 0.55 0.39 0.32 1.8 0.43
##
## Non missing response frequency for each item
## 1 2 3 4 miss
## ftnd_1 0.11 0.10 0.39 0.4 0
## ftnd_2 0.60 0.40 0.00 0.0 0
## ftnd_3 0.37 0.63 0.00 0.0 0
## ftnd_4r 0.26 0.54 0.11 0.1 0
## ftnd_5 0.50 0.50 0.00 0.0 0
## ftnd_6 0.24 0.76 0.00 0.0 0
factorOne <- ftnd[,c(1,2,4,6)]
factorTwo <- ftnd[,c(3,5)]
psych::alpha(factorOne)
##
## Reliability analysis
## Call: psych::alpha(x = factorOne)
##
## raw_alpha std.alpha G6(smc) average_r S/N ase mean sd
## 0.57 0.6 0.54 0.27 1.5 0.034 2.1 0.48
##
## lower alpha upper 95% confidence boundaries
## 0.5 0.57 0.64
##
## Reliability if an item is dropped:
## raw_alpha std.alpha G6(smc) average_r S/N alpha se
## ftnd_1 0.42 0.48 0.38 0.23 0.92 0.051
## ftnd_2 0.54 0.57 0.47 0.31 1.32 0.039
## ftnd_4r 0.46 0.53 0.43 0.27 1.14 0.044
## ftnd_6 0.53 0.54 0.45 0.28 1.16 0.041
##
## Item statistics
## n raw.r std.r r.cor r.drop mean sd
## ftnd_1 314 0.81 0.72 0.58 0.46 3.1 0.96
## ftnd_2 314 0.54 0.64 0.42 0.32 1.4 0.49
## ftnd_4r 314 0.75 0.67 0.50 0.40 2.0 0.87
## ftnd_6 314 0.55 0.67 0.48 0.36 1.8 0.43
##
## Non missing response frequency for each item
## 1 2 3 4 miss
## ftnd_1 0.11 0.10 0.39 0.4 0
## ftnd_2 0.60 0.40 0.00 0.0 0
## ftnd_4r 0.26 0.54 0.11 0.1 0
## ftnd_6 0.24 0.76 0.00 0.0 0
psych::alpha(factorTwo)
## Warning in matrix(unlist(drop.item), ncol = 8, byrow = TRUE): data length
## [12] is not a sub-multiple or multiple of the number of columns [8]
##
## Reliability analysis
## Call: psych::alpha(x = factorTwo)
##
## raw_alpha std.alpha G6(smc) average_r S/N ase mean sd
## 0.48 0.48 0.32 0.32 0.94 0.058 1.6 0.4
##
## lower alpha upper 95% confidence boundaries
## 0.37 0.48 0.6
##
## Reliability if an item is dropped:
## raw_alpha std.alpha G6(smc) average_r S/N alpha se
## ftnd_3 0.32 0.32 0.1 0.32 NA NA
## ftnd_5 0.10 0.32 NA NA 0.32 0.032
##
## Item statistics
## n raw.r std.r r.cor r.drop mean sd
## ftnd_3 314 0.80 0.81 0.46 0.32 1.6 0.48
## ftnd_5 314 0.82 0.81 0.46 0.32 1.5 0.50
##
## Non missing response frequency for each item
## 1 2 miss
## ftnd_3 0.37 0.63 0
## ftnd_5 0.50 0.50 0
rm(factorOne,factorTwo)
# What to show and Discuss
# 1. Sample characteristics
# 2. Table 1 - FTND Correlation Matrix
# 3. Table 2 of Figure 1 - FTND EFA result
# Alpha was just horrific like previous studies. KMO was quite bad as well.
#######################
# PHQ-9 -----
#######################
########################
## Exploratory data analysis =====
########################
# Subset only with all cases completed
phq <- subset(study2, !is.na(study2$phq.score))
phq <- phq[, c("phq_1", "phq_2", "phq_3", "phq_4", "phq_5", "phq_6","phq_7", "phq_8", "phq_9")]
# Inspect Scatter Plot Matrix
pairs.panels(phq)

# Inspect Outliers
outlier(phq)

## 14 97 153 158 163 254 291 294 305 329
## 8.604 3.431 2.829 4.137 10.980 13.835 3.726 9.655 13.721 3.690
## 330 332 335 336 339 345 347 351 352 353
## 15.975 25.770 20.800 4.225 7.489 20.993 26.917 12.526 11.359 18.035
## 355 358 360 361 367 368 375 378 379 380
## 2.732 4.903 6.663 15.053 3.878 3.041 17.961 8.284 15.809 3.540
## 381 384 385 387 388 389 392 393 394 395
## 10.510 4.954 35.106 19.007 7.908 4.748 12.302 3.300 8.888 10.330
## 396 397 399 400 401 402 403 404 405 408
## 5.657 3.113 17.476 3.726 6.868 4.789 4.644 7.496 9.000 3.282
## 409 411 418 420 424 429 434 437 438 439
## 7.452 6.641 10.265 18.907 7.122 1.412 4.129 17.381 3.646 5.345
## 440 443 445 447 448 450 452 453 454 456
## 5.241 7.925 7.542 7.340 3.276 13.693 10.439 5.193 3.878 3.269
## 459 460 461 463 464 465 466 467 468 471
## 16.668 3.779 15.642 12.956 7.198 16.856 24.297 12.526 0.831 14.914
## 473 474 475 476 477 479 481 482 485 490
## 33.926 2.841 14.909 30.582 7.259 15.863 4.954 3.957 6.350 11.266
## 491 494 496 498 499 500 502 508 511 512
## 5.665 4.159 15.642 3.988 2.829 18.311 3.847 1.486 18.081 5.350
## 514 515 517 521 522 523 524 530 533 534
## 11.389 10.201 14.708 22.142 5.127 5.658 22.677 3.384 6.053 5.308
## 535 536 537 540 541 545 547 548 549 555
## 18.193 9.907 3.878 4.690 6.457 14.722 6.353 5.566 11.236 12.405
## 558 566 570 575 581 626 651 652 668 674
## 17.408 3.276 3.690 7.997 20.720 3.690 25.813 4.377 3.957 1.412
## 679 684 687 695 701 723 744 749 751 752
## 3.690 15.362 9.319 19.038 3.726 12.577 5.401 16.744 2.922 8.317
## 754 757 762 783 808 820 823 899 905 906
## 44.162 1.486 2.922 1.840 2.938 1.840 5.270 9.372 6.529 11.709
## 913 930 931 951 955 960 965 969 971 991
## 3.862 4.590 5.879 18.137 17.838 3.113 6.353 4.759 17.564 4.387
## 1002 1011 1014 1022 1027 1029 1031 1033 1037 1039
## 18.370 16.648 0.831 16.609 6.366 3.540 3.847 9.629 15.766 3.646
## 1047 1051 1058 1062 1067 1071 1075 1079 1093 1110
## 4.789 3.462 3.705 7.092 3.646 9.853 9.224 6.438 16.281 0.831
## 1117 1121 1122 1123 1129 1133 1136 1138 1139 1140
## 29.545 2.938 2.971 2.971 9.031 6.181 12.828 4.543 2.753 3.690
## 1143 1144 1147 1162 1163 1165 1168 1169 1170 1173
## 7.178 3.646 3.540 5.658 5.658 2.922 3.282 13.330 2.829 1.412
## 1174 1175 1176 1178 1182 1183 1185 1189 1192 1198
## 12.638 1.412 1.412 3.779 5.350 11.427 1.486 4.931 11.780 26.071
## 1202 1206 1207 1209 1219 1220 1223 1224 1226 1230
## 13.877 1.840 8.695 17.184 6.098 8.389 5.910 12.555 3.462 6.735
## 1231 1233 1235 1236 1237 1241 1242 1245 1246 1252
## 21.694 4.159 14.204 16.364 7.443 30.116 3.888 1.412 7.934 11.757
## 1256 1258 1259 1260 1263 1272 1273 1274 1276 1278
## 4.540 3.554 5.728 2.922 6.157 13.946 13.946 12.673 3.057 11.335
## 1289 1291 1294 1296 1298 1307 1310 1312 1313 1316
## 0.831 3.690 3.847 0.831 6.157 11.666 12.953 10.330 18.158 0.831
## 1318 1325 1330 1331 1332 1333 1334 1336 1337 1339
## 3.467 10.707 21.050 2.922 4.301 5.350 5.350 11.807 13.891 8.556
## 1340 1341 1350 1351 1353 1357 1358 1359 1368 1370
## 23.099 13.557 30.631 12.698 4.996 21.627 3.957 1.840 12.220 9.881
## 1372 1379 1380 1384 1385 1388 1393 1394 1400 1403
## 3.726 5.333 1.840 5.075 25.454 4.540 14.283 17.258 4.510 7.845
## 1405 1409 1410 1413 1414 1416 1418 1421 1422 1423
## 4.357 7.046 2.887 16.331 16.568 3.878 2.229 15.731 24.890 8.604
## 1429 1430 1431 1433 1436 1437 1441 1442 1444 1447
## 1.486 5.781 6.022 5.187 14.291 7.157 5.268 12.064 7.347 6.589
## 1449 1453 1455 1458 1462 1464 1465 1467 1469 1471
## 6.539 10.182 0.831 3.705 0.831 15.949 6.948 14.399 12.685 2.922
## 1475 1478 1484 1485 1487 1488 1489 1491 1494 1496
## 5.194 6.157 2.922 14.087 9.081 4.494 15.346 3.726 10.821 4.716
## 1497 1499 1506 1507 1508 1509 1510 1515 1519 1520
## 2.922 2.942 5.703 3.690 8.112 12.615 7.996 4.052 2.922 4.047
## 1521 1523 1526 1527 1528 1529 1530 1531 1537 1539
## 1.412 20.206 1.486 10.628 8.783 4.159 14.745 10.844 21.627 11.757
## 1542 1543 1544 1546
## 2.229 0.831 11.501 1.412
# Inspect Correlations; we may compare this to
r <- lowerCor(phq)
## phq_1 phq_2 phq_3 phq_4 phq_5 phq_6 phq_7 phq_8 phq_9
## phq_1 1.00
## phq_2 0.72 1.00
## phq_3 0.54 0.56 1.00
## phq_4 0.65 0.61 0.55 1.00
## phq_5 0.44 0.48 0.45 0.56 1.00
## phq_6 0.53 0.69 0.46 0.62 0.56 1.00
## phq_7 0.49 0.53 0.43 0.53 0.49 0.61 1.00
## phq_8 0.45 0.58 0.46 0.53 0.44 0.56 0.66 1.00
## phq_9 0.41 0.48 0.35 0.33 0.31 0.44 0.33 0.40 1.00
corPlot(phq)

# Parallel analysis
fa.parallel(phq)

## Parallel analysis suggests that the number of factors = 3 and the number of components = 1
vss(phq)

##
## Very Simple Structure
## Call: vss(x = phq)
## VSS complexity 1 achieves a maximimum of 0.91 with 1 factors
## VSS complexity 2 achieves a maximimum of 0.93 with 2 factors
##
## The Velicer MAP achieves a minimum of 0.03 with 1 factors
## BIC achieves a minimum of NA with 2 factors
## Sample Size adjusted BIC achieves a minimum of NA with 4 factors
##
## Statistics by number of factors
## vss1 vss2 map dof chisq prob sqresid fit RMSEA BIC SABIC
## 1 0.91 0.00 0.032 27 1.6e+02 4.4e-21 2.4 0.91 0.121 3.6 89.3
## 2 0.51 0.93 0.063 19 8.6e+01 1.6e-10 2.0 0.93 0.102 -24.8 35.4
## 3 0.36 0.77 0.085 12 4.6e+01 8.2e-06 1.7 0.94 0.091 -24.5 13.5
## 4 0.35 0.68 0.124 6 1.3e+01 4.3e-02 1.5 0.95 0.059 -22.0 -3.0
## 5 0.29 0.56 0.191 1 4.0e+00 4.4e-02 1.3 0.95 0.096 -1.8 1.4
## 6 0.33 0.55 0.293 -3 4.7e-02 NA 1.1 0.96 NA NA NA
## 7 0.31 0.57 0.523 -6 2.0e-08 NA 1.1 0.96 NA NA NA
## 8 0.31 0.59 1.000 -8 0.0e+00 NA 1.0 0.96 NA NA NA
## complex eChisq SRMR eCRMS eBIC
## 1 1.0 6.3e+01 5.1e-02 0.058 -94.4
## 2 1.7 2.7e+01 3.3e-02 0.046 -83.6
## 3 2.1 1.2e+01 2.2e-02 0.039 -57.7
## 4 2.3 3.6e+00 1.2e-02 0.030 -31.4
## 5 2.5 1.2e+00 6.9e-03 0.041 -4.7
## 6 2.4 1.0e-02 6.4e-04 NA NA
## 7 2.7 2.8e-09 3.3e-07 NA NA
## 8 2.8 8.6e-18 1.9e-11 NA NA
##########################
# Classic Theory
# PHQ-9 - EFA ----
##########################
KMO(phq)
## Kaiser-Meyer-Olkin factor adequacy
## Call: KMO(r = phq)
## Overall MSA = 0.89
## MSA for each item =
## phq_1 phq_2 phq_3 phq_4 phq_5 phq_6 phq_7 phq_8 phq_9
## 0.86 0.87 0.95 0.91 0.94 0.89 0.88 0.88 0.94
bartlett.test(phq)
##
## Bartlett test of homogeneity of variances
##
## data: phq
## Bartlett's K-squared = 30, df = 8, p-value = 4e-04
# EFA --------
efaModel0 <- fa(phq, nfactors = 1, fm="minres")
print(efaModel0, cut = .3)
## Factor Analysis using method = minres
## Call: fa(r = phq, nfactors = 1, fm = "minres")
## Standardized loadings (pattern matrix) based upon correlation matrix
## MR1 h2 u2 com
## phq_1 0.75 0.56 0.44 1
## phq_2 0.83 0.70 0.30 1
## phq_3 0.66 0.43 0.57 1
## phq_4 0.78 0.61 0.39 1
## phq_5 0.65 0.42 0.58 1
## phq_6 0.79 0.63 0.37 1
## phq_7 0.72 0.51 0.49 1
## phq_8 0.71 0.51 0.49 1
## phq_9 0.52 0.27 0.73 1
##
## MR1
## SS loadings 4.63
## Proportion Var 0.51
##
## Mean item complexity = 1
## Test of the hypothesis that 1 factor is sufficient.
##
## The degrees of freedom for the null model are 36 and the objective function was 4.8 with Chi Square of 1629
## The degrees of freedom for the model are 27 and the objective function was 0.48
##
## The root mean square of the residuals (RMSR) is 0.05
## The df corrected root mean square of the residuals is 0.06
##
## The harmonic number of observations is 344 with the empirical chi square 63.3 with prob < 9.5e-05
## The total number of observations was 344 with Likelihood Chi Square = 161 with prob < 4.4e-21
##
## Tucker Lewis Index of factoring reliability = 0.887
## RMSEA index = 0.121 and the 90 % confidence intervals are 0.103 0.139
## BIC = 3.63
## Fit based upon off diagonal values = 0.99
## Measures of factor score adequacy
## MR1
## Correlation of scores with factors 0.96
## Multiple R square of scores with factors 0.91
## Minimum correlation of possible factor scores 0.83
plot(efaModel0)

fa.diagram(efaModel0)

# Reliability - Cronbach's Alfa; = .61;
psych::alpha(phq)
##
## Reliability analysis
## Call: psych::alpha(x = phq)
##
## raw_alpha std.alpha G6(smc) average_r S/N ase mean sd
## 0.9 0.9 0.91 0.51 9.2 0.0078 2.1 0.73
##
## lower alpha upper 95% confidence boundaries
## 0.89 0.9 0.92
##
## Reliability if an item is dropped:
## raw_alpha std.alpha G6(smc) average_r S/N alpha se
## phq_1 0.89 0.89 0.89 0.50 8.0 0.0088
## phq_2 0.88 0.88 0.88 0.48 7.5 0.0093
## phq_3 0.90 0.89 0.90 0.52 8.5 0.0084
## phq_4 0.89 0.89 0.89 0.49 7.8 0.0091
## phq_5 0.90 0.90 0.90 0.52 8.6 0.0083
## phq_6 0.89 0.89 0.89 0.49 7.7 0.0092
## phq_7 0.89 0.89 0.89 0.50 8.2 0.0087
## phq_8 0.89 0.89 0.89 0.50 8.2 0.0087
## phq_9 0.90 0.90 0.91 0.54 9.5 0.0079
##
## Item statistics
## n raw.r std.r r.cor r.drop mean sd
## phq_1 344 0.77 0.77 0.75 0.70 2.2 0.95
## phq_2 344 0.84 0.84 0.83 0.78 2.2 0.96
## phq_3 344 0.71 0.71 0.66 0.62 2.3 1.00
## phq_4 344 0.80 0.80 0.77 0.74 2.5 0.96
## phq_5 344 0.71 0.70 0.65 0.62 2.2 1.04
## phq_6 344 0.82 0.81 0.79 0.75 2.3 1.06
## phq_7 344 0.76 0.75 0.72 0.68 2.1 1.00
## phq_8 344 0.76 0.75 0.72 0.68 1.8 1.01
## phq_9 344 0.58 0.60 0.52 0.49 1.4 0.82
##
## Non missing response frequency for each item
## 1 2 3 4 miss
## phq_1 0.22 0.47 0.18 0.14 0
## phq_2 0.26 0.47 0.13 0.14 0
## phq_3 0.23 0.46 0.14 0.18 0
## phq_4 0.12 0.49 0.17 0.22 0
## phq_5 0.30 0.37 0.17 0.16 0
## phq_6 0.27 0.36 0.18 0.18 0
## phq_7 0.35 0.37 0.16 0.12 0
## phq_8 0.49 0.31 0.10 0.11 0
## phq_9 0.76 0.12 0.06 0.05 0