Reproducible example

This example goes with a question posted to the lavaan google group. This code depends on a dataframe called mydata. It is a multivariate ordinal dataset simulated from the the MultiOrd package. The simulation takes a long time to run, so this file just brings in the 9 variable dataframe (n=500) dataframe from a gist. The gist is basically a dput of the dataframe that is too long for this document.

Full R code for this example here.

First, load packages.

  library(devtools) # to run script from gist and bring in data
  library(lavaan)
  library(semPlot)
  library(knitr)
  library(ggplot2)
  library(reshape2)
  library(psych)

Then read in the data.

# read in data from gist
# gist is at https://gist.github.com/ericpgreen/7091485
# this creates data frame mydata
  gist <- "https://gist.github.com/ericpgreen/7091485/raw/f4daec526bd69557874035b3c175b39cf6395408/simord.R"
  source_url(gist, sha1="da165a61f147592e6a25cf2f0dcaa85027605290")
  # if you get an error "SHA-1 hash of downloaded file does not match expected value"
  # it means the gist has been modified
  # run source_url(gist) to get new sha1 and replace string in quotes
# list of variables
  items <- c("v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9")
# head
  head(mydata)

##   v1 v2 v3 v4 v5 v6 v7 v8 v9
## 1  3  4  3  4  3  3  4  4  3
## 2  2  1  2  2  4  3  2  1  3
## 3  1  3  4  4  4  2  1  2  2
## 4  1  2  1  2  1  2  1  3  2
## 5  3  3  4  4  1  1  2  4  1
## 6  2  2  2  2  2  2  1  1  1

Run some descriptives and plot histograms.

# alpha
  alpha(mydata)

## 
## Reliability analysis   
## Call: alpha(x = mydata)
## 
##   raw_alpha std.alpha G6(smc) average_r mean   sd
##       0.82      0.82    0.84      0.34  2.5 0.66
## 
##  Reliability if an item is dropped:
##    raw_alpha std.alpha G6(smc) average_r
## v1      0.81      0.81    0.82      0.34
## v2      0.81      0.81    0.82      0.34
## v3      0.81      0.80    0.82      0.34
## v4      0.78      0.79    0.81      0.32
## v5      0.80      0.81    0.82      0.35
## v6      0.80      0.81    0.82      0.34
## v7      0.79      0.80    0.81      0.33
## v8      0.80      0.81    0.82      0.35
## v9      0.81      0.81    0.83      0.35
## 
##  Item statistics 
##      n    r r.cor r.drop mean   sd
## v1 500 0.63  0.57   0.47  2.4 0.77
## v2 500 0.62  0.56   0.46  2.4 0.78
## v3 500 0.64  0.59   0.49  2.4 0.77
## v4 500 0.75  0.73   0.66  2.7 1.17
## v5 500 0.61  0.55   0.51  2.9 1.14
## v6 500 0.63  0.57   0.53  2.8 1.15
## v7 500 0.69  0.65   0.61  2.4 1.13
## v8 500 0.61  0.55   0.49  2.4 1.13
## v9 500 0.60  0.53   0.48  2.4 1.11
## 
## Non missing response frequency for each item
##       1    2    3    4 miss
## v1 0.12 0.43 0.39 0.06    0
## v2 0.09 0.47 0.35 0.09    0
## v3 0.11 0.44 0.38 0.07    0
## v4 0.21 0.21 0.21 0.37    0
## v5 0.18 0.19 0.23 0.40    0
## v6 0.19 0.19 0.22 0.40    0
## v7 0.29 0.20 0.28 0.22    0
## v8 0.31 0.17 0.31 0.21    0
## v9 0.29 0.22 0.29 0.21    0

# cor
  cor(mydata)

##        v1     v2     v3     v4     v5     v6     v7     v8     v9
## v1 1.0000 0.5057 0.5419 0.4104 0.1895 0.2248 0.2670 0.2257 0.2640
## v2 0.5057 1.0000 0.5424 0.4277 0.1689 0.2181 0.2385 0.2637 0.2202
## v3 0.5419 0.5424 1.0000 0.4607 0.2350 0.2115 0.2126 0.2562 0.2466
## v4 0.4104 0.4277 0.4607 1.0000 0.5277 0.5191 0.4566 0.2659 0.2935
## v5 0.1895 0.1689 0.2350 0.5277 1.0000 0.5420 0.3989 0.2590 0.2048
## v6 0.2248 0.2181 0.2115 0.5191 0.5420 1.0000 0.4324 0.2285 0.2697
## v7 0.2670 0.2385 0.2126 0.4566 0.3989 0.4324 1.0000 0.5160 0.4697
## v8 0.2257 0.2637 0.2562 0.2659 0.2590 0.2285 0.5160 1.0000 0.5156
## v9 0.2640 0.2202 0.2466 0.2935 0.2048 0.2697 0.4697 0.5156 1.0000

# histograms
  mydata.melt <- melt(mydata)
  ggplot(mydata.melt, aes(x = value)) + 
      facet_wrap(~variable,scales = "free_x") +
      geom_density() +
      xlim(1, 4)

plot of chunk des

Model A

Model A is one first-order factor.

  mod.A <- '
  f0 =~ v1 + v2 + v3 + v4 + v5 + v6 + v7 + v8 + v9
  '
  fit.mod.A <- cfa(mod.A, data = mydata, ordered=items)
  semPaths(fit.mod.A, "std", title = FALSE, 
           nCharNodes=0, edge.label.cex=0.6)

plot of chunk a

Model B

Model B is 3 first order correlated factors (f1, f2, f3).

  mod.B <- '
  f1 =~ v1 + v2 + v3
  f2 =~ v4 + v5 + v6
  f3 =~ v7 + v8 + v9
  '
  fit.mod.B <- cfa(mod.B, data = mydata, ordered=items)
  semPaths(fit.mod.B, "std", title = FALSE,
           nCharNodes=0, edge.label.cex=0.6)

plot of chunk b

Model C

Model C is 3 first order correlated factors (f1, f2, f3) with two cross-loading items on f1 and f2.

  mod.C <- '
  f1 =~ v1 + v2 + v3 + v4
  f2 =~ v4 + v5 + v6 + v7
  f3 =~ v7 + v8 + v9
  '
  fit.mod.C <- cfa(mod.C, data = mydata, ordered=items)
  semPaths(fit.mod.C, "std", title = FALSE,
           nCharNodes=0, edge.label.cex=0.6)

plot of chunk c

Model D

Model D is a hierarchical model with 1 second-order factor (g) and 3 first-order uncorrelated factors (f1, f2, f3).

  mod.D <- '
  g =~ f1 + f2 + f3
  f1 =~ v1 + v2 + v3
  f2 =~ v4 + v5 + v6
  f3 =~ v7 + v8 + v9
  '
  fit.mod.D <- cfa(mod.D, data = mydata, ordered=items, std.lv=TRUE)
  semPaths(fit.mod.D, "std", title = FALSE,
           nCharNodes=0, edge.label.cex=0.6)

plot of chunk d

Model E

Model E is a bifactor model with 1 general factor (g) and 3 first-order uncorrelated factors (f1, f2, f3).

  mod.E <- '
  g =~ v1 + v2 + v3 + v4 + v5 + v6 + v7 + v8 + v9
  f1 =~ v1 + v2 + v3
  f2 =~ v4 + v5 + v6
  f3 =~ v7 + v8 + v9
  '
  fit.mod.E <- cfa(mod.E, data = mydata, ordered=items, orthogonal=TRUE, std.lv=TRUE)
  semPaths(fit.mod.E, "std", title = FALSE,
           nCharNodes=0, edge.label.cex=0.6, rotate=2)

plot of chunk e

  # rotate does not seem to be working
  # know how to flip so g is on left and f1, f2, f3 on right?