This example goes with a question posted to the lavaan google group. This code depends on a dataframe called mydata. It is a multivariate ordinal dataset simulated from the the MultiOrd package. The simulation takes a long time to run, so this file just brings in the 9 variable dataframe (n=500) dataframe from a gist. The gist is basically a dput of the dataframe that is too long for this document.
Full R code for this example here.
First, load packages.
library(devtools) # to run script from gist and bring in data
library(lavaan)
library(semPlot)
library(knitr)
library(ggplot2)
library(reshape2)
library(psych)
Then read in the data.
# read in data from gist
# gist is at https://gist.github.com/ericpgreen/7091485
# this creates data frame mydata
gist <- "https://gist.github.com/ericpgreen/7091485/raw/f4daec526bd69557874035b3c175b39cf6395408/simord.R"
source_url(gist, sha1="da165a61f147592e6a25cf2f0dcaa85027605290")
# if you get an error "SHA-1 hash of downloaded file does not match expected value"
# it means the gist has been modified
# run source_url(gist) to get new sha1 and replace string in quotes
# list of variables
items <- c("v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9")
# head
head(mydata)
## v1 v2 v3 v4 v5 v6 v7 v8 v9
## 1 3 4 3 4 3 3 4 4 3
## 2 2 1 2 2 4 3 2 1 3
## 3 1 3 4 4 4 2 1 2 2
## 4 1 2 1 2 1 2 1 3 2
## 5 3 3 4 4 1 1 2 4 1
## 6 2 2 2 2 2 2 1 1 1
Run some descriptives and plot histograms.
# alpha
alpha(mydata)
##
## Reliability analysis
## Call: alpha(x = mydata)
##
## raw_alpha std.alpha G6(smc) average_r mean sd
## 0.82 0.82 0.84 0.34 2.5 0.66
##
## Reliability if an item is dropped:
## raw_alpha std.alpha G6(smc) average_r
## v1 0.81 0.81 0.82 0.34
## v2 0.81 0.81 0.82 0.34
## v3 0.81 0.80 0.82 0.34
## v4 0.78 0.79 0.81 0.32
## v5 0.80 0.81 0.82 0.35
## v6 0.80 0.81 0.82 0.34
## v7 0.79 0.80 0.81 0.33
## v8 0.80 0.81 0.82 0.35
## v9 0.81 0.81 0.83 0.35
##
## Item statistics
## n r r.cor r.drop mean sd
## v1 500 0.63 0.57 0.47 2.4 0.77
## v2 500 0.62 0.56 0.46 2.4 0.78
## v3 500 0.64 0.59 0.49 2.4 0.77
## v4 500 0.75 0.73 0.66 2.7 1.17
## v5 500 0.61 0.55 0.51 2.9 1.14
## v6 500 0.63 0.57 0.53 2.8 1.15
## v7 500 0.69 0.65 0.61 2.4 1.13
## v8 500 0.61 0.55 0.49 2.4 1.13
## v9 500 0.60 0.53 0.48 2.4 1.11
##
## Non missing response frequency for each item
## 1 2 3 4 miss
## v1 0.12 0.43 0.39 0.06 0
## v2 0.09 0.47 0.35 0.09 0
## v3 0.11 0.44 0.38 0.07 0
## v4 0.21 0.21 0.21 0.37 0
## v5 0.18 0.19 0.23 0.40 0
## v6 0.19 0.19 0.22 0.40 0
## v7 0.29 0.20 0.28 0.22 0
## v8 0.31 0.17 0.31 0.21 0
## v9 0.29 0.22 0.29 0.21 0
# cor
cor(mydata)
## v1 v2 v3 v4 v5 v6 v7 v8 v9
## v1 1.0000 0.5057 0.5419 0.4104 0.1895 0.2248 0.2670 0.2257 0.2640
## v2 0.5057 1.0000 0.5424 0.4277 0.1689 0.2181 0.2385 0.2637 0.2202
## v3 0.5419 0.5424 1.0000 0.4607 0.2350 0.2115 0.2126 0.2562 0.2466
## v4 0.4104 0.4277 0.4607 1.0000 0.5277 0.5191 0.4566 0.2659 0.2935
## v5 0.1895 0.1689 0.2350 0.5277 1.0000 0.5420 0.3989 0.2590 0.2048
## v6 0.2248 0.2181 0.2115 0.5191 0.5420 1.0000 0.4324 0.2285 0.2697
## v7 0.2670 0.2385 0.2126 0.4566 0.3989 0.4324 1.0000 0.5160 0.4697
## v8 0.2257 0.2637 0.2562 0.2659 0.2590 0.2285 0.5160 1.0000 0.5156
## v9 0.2640 0.2202 0.2466 0.2935 0.2048 0.2697 0.4697 0.5156 1.0000
# histograms
mydata.melt <- melt(mydata)
ggplot(mydata.melt, aes(x = value)) +
facet_wrap(~variable,scales = "free_x") +
geom_density() +
xlim(1, 4)
Model A is one first-order factor.
mod.A <- '
f0 =~ v1 + v2 + v3 + v4 + v5 + v6 + v7 + v8 + v9
'
fit.mod.A <- cfa(mod.A, data = mydata, ordered=items)
semPaths(fit.mod.A, "std", title = FALSE,
nCharNodes=0, edge.label.cex=0.6)
Model B is 3 first order correlated factors (f1, f2, f3).
mod.B <- '
f1 =~ v1 + v2 + v3
f2 =~ v4 + v5 + v6
f3 =~ v7 + v8 + v9
'
fit.mod.B <- cfa(mod.B, data = mydata, ordered=items)
semPaths(fit.mod.B, "std", title = FALSE,
nCharNodes=0, edge.label.cex=0.6)
Model C is 3 first order correlated factors (f1, f2, f3) with two cross-loading items on f1 and f2.
mod.C <- '
f1 =~ v1 + v2 + v3 + v4
f2 =~ v4 + v5 + v6 + v7
f3 =~ v7 + v8 + v9
'
fit.mod.C <- cfa(mod.C, data = mydata, ordered=items)
semPaths(fit.mod.C, "std", title = FALSE,
nCharNodes=0, edge.label.cex=0.6)
Model D is a hierarchical model with 1 second-order factor (g) and 3 first-order uncorrelated factors (f1, f2, f3).
mod.D <- '
g =~ f1 + f2 + f3
f1 =~ v1 + v2 + v3
f2 =~ v4 + v5 + v6
f3 =~ v7 + v8 + v9
'
fit.mod.D <- cfa(mod.D, data = mydata, ordered=items, std.lv=TRUE)
semPaths(fit.mod.D, "std", title = FALSE,
nCharNodes=0, edge.label.cex=0.6)
Model E is a bifactor model with 1 general factor (g) and 3 first-order uncorrelated factors (f1, f2, f3).
mod.E <- '
g =~ v1 + v2 + v3 + v4 + v5 + v6 + v7 + v8 + v9
f1 =~ v1 + v2 + v3
f2 =~ v4 + v5 + v6
f3 =~ v7 + v8 + v9
'
fit.mod.E <- cfa(mod.E, data = mydata, ordered=items, orthogonal=TRUE, std.lv=TRUE)
semPaths(fit.mod.E, "std", title = FALSE,
nCharNodes=0, edge.label.cex=0.6, rotate=2)
# rotate does not seem to be working
# know how to flip so g is on left and f1, f2, f3 on right?