Setting up environment
library(lavaan)
## This is lavaan 0.6-15
## lavaan is FREE software! Please report any bugs.
#install.packages("simsem")
library(simsem)
##
## #################################################################
## This is simsem 0.5-16
## simsem is BETA software! Please report any bugs.
## simsem was first developed at the University of Kansas Center for
## Research Methods and Data Analysis, under NSF Grant 1053160.
## #################################################################
##
## Attaching package: 'simsem'
## The following object is masked from 'package:lavaan':
##
## inspect
PART I – POWER TO DETECT A PATH WITHIN A MODEL
Remember the image. We’re assuming all loadings are .7 We’re trying to determine how much power we have to detect an additional path that goes from G to D, and we’re thinking that path will have a loading of .3 Use Wright’s tracing rules to determine the residuals This is what will generate a var/cov matrix based on these values
pop.model.3 <- '
# loadings
G =~ .7*A + .7*B + .7*C + .3*D
H =~ .7*D + .7*E + .7*F
# factor correlation
G~~.6*H
# factor variances
G~~1*G
H~~1*H
# residual variances
A~~.51*A
B~~.51*B
C~~.51*C
D~~.168*D
E~~.51*E
F~~.51*F
'
Again, this is the model we believe to be true in the population It includes our path of interest
analysis.model <- '
G =~ A + B + C + D
H =~ D + E + F
G~~H
'
pop.fit.3 <- cfa(pop.model.3, do.fit=FALSE)
fitted(pop.fit.3)
## $cov
## A B C D E F
## A 1.000
## B 0.490 1.000
## C 0.490 0.490 1.000
## D 0.504 0.504 0.504 1.000
## E 0.294 0.294 0.294 0.616 1.000
## F 0.294 0.294 0.294 0.616 0.490 1.000
Time to see what our power is to detect a path from Latent variable G to observed variable, D with a loading of .3 This will take about 5 minutes #10,000 repetition - when you are looking for a specific path
specific.power.30 <- sim(nRep=10000, generate=pop.model.3, model=analysis.model,
n =500, lavaanfun = "cfa",
seed=45687, std.lv=TRUE, multicore=TRUE)
## Progress tracker is not available when 'multicore' is TRUE.
summaryParam(specific.power.30,detail=TRUE)
## Estimate Average Estimate SD Average SE Power (Not equal 0) Std Est
## G=~A 0.6988132 0.04415652 0.04382669 1.0000 0.6995609
## G=~B 0.6989687 0.04364761 0.04382949 1.0000 0.6996647
## G=~C 0.6987137 0.04385288 0.04382331 1.0000 0.6995242
## G=~D 0.2964885 0.05716986 0.05747629 0.9872 0.2970717
## H=~D 0.7017493 0.05771198 0.05808784 1.0000 0.7029176
## H=~E 0.6986445 0.04375163 0.04379744 1.0000 0.6995417
## H=~F 0.6989524 0.04394722 0.04380980 1.0000 0.6996201
## G~~H 0.6005593 0.05002394 0.05045988 1.0000 0.6005593
## A~~A 0.5075572 0.04331729 0.04297652 1.0000 0.5096606
## B~~B 0.5075179 0.04274313 0.04297987 1.0000 0.5095307
## C~~C 0.5075211 0.04300292 0.04296809 1.0000 0.5097147
## D~~D 0.1646987 0.03314954 0.03348459 0.9901 0.1657041
## E~~E 0.5074439 0.04355436 0.04291511 1.0000 0.5096994
## F~~F 0.5075750 0.04270081 0.04293660 1.0000 0.5095836
## Std Est SD Std Ave SE Average Param Average Bias Coverage Rel Bias
## G=~A 0.03088771 0.03063585 0.700 -0.0011867992 0.9519 -0.0016954274
## G=~B 0.03063840 0.03063075 0.700 -0.0010312771 0.9524 -0.0014732531
## G=~C 0.03084230 0.03063607 0.700 -0.0012863188 0.9479 -0.0018375982
## G=~D 0.05673269 0.05696961 0.300 -0.0035115087 0.9587 -0.0117050289
## H=~D 0.05211240 0.05272338 0.700 0.0017493066 0.9578 0.0024990095
## H=~E 0.03069302 0.03060362 0.700 -0.0013555417 0.9523 -0.0019364881
## H=~F 0.03079379 0.03059994 0.700 -0.0010476217 0.9535 -0.0014966025
## G~~H 0.05002394 0.05045988 0.600 0.0005592597 0.9484 0.0009320995
## A~~A 0.04309727 0.04274996 0.510 -0.0024427884 0.9471 -0.0047897812
## B~~B 0.04274486 0.04275058 0.510 -0.0024821108 0.9495 -0.0048668839
## C~~C 0.04301210 0.04274769 0.510 -0.0024788875 0.9469 -0.0048605638
## D~~D 0.03452925 0.03476510 0.168 -0.0033012608 0.9595 -0.0196503620
## E~~E 0.04282431 0.04270485 0.510 -0.0025560536 0.9447 -0.0050118699
## F~~F 0.04293443 0.04270331 0.510 -0.0024249553 0.9494 -0.0047548144
## Std Bias Rel SE Bias Not Cover Below Not Cover Above Average CI Width
## G=~A -0.02687710 -0.0074695642 0.0212 0.0269 0.1717975
## G=~B -0.02362734 0.0041669175 0.0214 0.0262 0.1718084
## G=~C -0.02933260 -0.0006743765 0.0216 0.0305 0.1717842
## G=~D -0.06142237 0.0053599819 0.0280 0.0133 0.2253029
## H=~D 0.03031098 0.0065127995 0.0138 0.0284 0.2277002
## H=~E -0.03098266 0.0010471231 0.0213 0.0264 0.1716828
## H=~F -0.02383818 -0.0031269422 0.0207 0.0258 0.1717313
## G~~H 0.01117984 0.0087146186 0.0333 0.0183 0.1977991
## A~~A -0.05639292 -0.0078669736 0.0139 0.0390 0.1684648
## B~~B -0.05807041 0.0055386595 0.0133 0.0372 0.1684780
## C~~C -0.05764463 -0.0008100706 0.0144 0.0387 0.1684318
## D~~D -0.09958693 0.0101073298 0.0216 0.0189 0.1312572
## E~~E -0.05868651 -0.0146770670 0.0165 0.0388 0.1682242
## F~~F -0.05678944 0.0055218068 0.0136 0.0370 0.1683084
## SD CI Width
## G=~A 0.005033007
## G=~B 0.005004818
## G=~C 0.005004626
## G=~D 0.034457945
## H=~D 0.033542098
## H=~E 0.005250623
## H=~F 0.005189163
## G~~H 0.011945888
## A~~A 0.010120980
## B~~B 0.010072388
## C~~C 0.010082137
## D~~D 0.015958901
## E~~E 0.010550943
## F~~F 0.010432364
Look at the fourth row. That’s the path we want with the appropriate estimate (.296) which basically rounds up to .3 - this assumes that this is true power - standardize because it is not bigger than 1 We then look at the power column and we see that given a sample size of 500 after fitting models 10000, we have a power of .987. This tells us that given that sample size, we will most likely find that path with that particular loading, assuming this is the true model.
#Part I.2a misspecified analysis model (fit ‘false’ model to test misspecified missing path) What if we know that there is an extra path, but we’re not sure where in the model that path will be? We need to start with our ‘false’ model - used to create the model
analysis.model.mis <- '
G =~ A + B + C
H =~ D + E + F
G~~H
'
model.power.30.mis <- sim(nRep=10000, generate=pop.model.3, model=analysis.model.mis, n =500,
lavaanfun = "cfa", seed=45687, std.lv=TRUE, multicore=TRUE)
## Progress tracker is not available when 'multicore' is TRUE.
summaryParam(model.power.30.mis,detail=TRUE)
## Estimate Average Estimate SD Average SE Power (Not equal 0) Std Est
## G=~A 0.69881368 0.04421515 0.04389758 1.0000 0.69956159
## G=~B 0.69897442 0.04367388 0.04390046 1.0000 0.69967139
## G=~C 0.69871708 0.04392163 0.04389424 1.0000 0.69952704
## H=~D 0.96854576 0.03887911 0.03811152 1.0000 0.97022082
## H=~E 0.63652993 0.04405659 0.04216897 1.0000 0.63726495
## H=~F 0.63687977 0.04393614 0.04218019 1.0000 0.63742413
## G~~H 0.73536581 0.03091875 0.03310596 1.0000 0.73536581
## A~~A 0.50755139 0.04344714 0.04311723 1.0000 0.50965420
## B~~B 0.50750768 0.04286671 0.04312092 1.0000 0.50951783
## C~~C 0.50751037 0.04310328 0.04310906 1.0000 0.50970558
## D~~D 0.05781554 0.04104469 0.03877937 0.3523 0.05821405
## E~~E 0.59035093 0.04337528 0.04133856 1.0000 0.59286551
## F~~F 0.59049463 0.04307409 0.04135367 1.0000 0.59265671
## Std Est SD Std Ave SE Average Param Average Bias Coverage Rel Bias
## G=~A 0.03097536 0.03073754 0.700 -0.001186316 0.9537 -0.001694737
## G=~B 0.03069543 0.03073238 0.700 -0.001025581 0.9521 -0.001465115
## G=~C 0.03092622 0.03073781 0.700 -0.001282921 0.9472 -0.001832744
## H=~D 0.02139030 0.02021603 0.700 0.268545756 0.0000 0.383636794
## H=~E 0.03206209 0.02993737 0.700 -0.063470073 0.6604 -0.090671533
## H=~F 0.03215373 0.02992973 0.700 -0.063120227 0.6683 -0.090171753
## G~~H 0.03091875 0.03310596 0.600 0.135365806 0.0225 0.225609677
## A~~A 0.04321892 0.04289163 0.510 -0.002448606 0.9474 -0.004801189
## B~~B 0.04283108 0.04289280 0.510 -0.002492325 0.9488 -0.004886911
## C~~C 0.04313148 0.04288965 0.510 -0.002489629 0.9472 -0.004881625
## D~~D 0.04150896 0.03923751 0.168 -0.110184462 0.1834 -0.655859890
## E~~E 0.04075900 0.03805276 0.510 0.080350926 0.5183 0.157550836
## F~~F 0.04084202 0.03805164 0.510 0.080494627 0.5102 0.157832601
## Std Bias Rel SE Bias Not Cover Below Not Cover Above Average CI Width
## G=~A -0.02683052 -0.0071825191 0.0206 0.0257 0.1720753
## G=~B -0.02348270 0.0051877784 0.0213 0.0266 0.1720866
## G=~C -0.02920932 -0.0006235491 0.0219 0.0309 0.1720623
## H=~D 6.90719956 -0.0197429833 1.0000 0.0000 0.1493944
## H=~E -1.44064887 -0.0428452274 0.0006 0.3390 0.1652993
## H=~F -1.43663582 -0.0399659423 0.0006 0.3311 0.1653433
## G~~H 4.37811403 0.0707404876 0.9775 0.0000 0.1297730
## A~~A -0.05635828 -0.0075932609 0.0134 0.0392 0.1690165
## B~~B -0.05814127 0.0059303806 0.0136 0.0376 0.1690309
## C~~C -0.05775961 0.0001339320 0.0146 0.0382 0.1689844
## D~~D -2.68449958 -0.0551915786 0.0000 0.8166 0.1520123
## E~~E 1.85245895 -0.0469557725 0.4814 0.0003 0.1620442
## F~~F 1.86874808 -0.0399410416 0.4896 0.0002 0.1621034
## SD CI Width
## G=~A 0.005058064
## G=~B 0.005034377
## G=~C 0.005031285
## H=~D 0.003856873
## H=~E 0.004673697
## H=~F 0.004635082
## G~~H 0.008294174
## A~~A 0.010169121
## B~~B 0.010127705
## C~~C 0.010129709
## D~~D 0.013184726
## E~~E 0.009841546
## F~~F 0.009789254
We’re now going to fit our ‘false’ model to the simulated data we got from our ‘true’ model and get a sense of misfit. The degree of misfit is essentially our chi-square cut-off, which will become our estimate of NCP, which we will next use to determine our power #give me the cut off
chi.cutoff <- getCutoff(specific.power.30, alpha = 0.05, usedFit="chisq")
chi.cutoff
## chisq
## 95% 14.21405
Let’s calculate how much power we have to reject the misspecified (false) based on our chi-square cutoff
getPowerFit(model.power.30.mis, cutoff=chi.cutoff)
## chisq
## 0.8406
Now we know that we have power of .84 to detect a path, even if we don’t know where that path will be
Chandra mentioned she’s not sure in which situations you would need to do this
Part I.2b misspecified analysis model beyond chisq, power for other fit stats #good power to estimate power on other fit indices - cfi power of .8 to gives a model that has # a good fit to detect a path that will still give us good fit indices - how much power we have to detect that will still give us good fit indices - if we find a path in the model it will have ideal CFI and TFI - AIC/BIC NOT really intreprtable
mult.cutoff <- getCutoff(specific.power.30, alpha = 0.05)
getPowerFit(model.power.30.mis, cutoff = mult.cutoff)
## chisq cfi tli aic bic rmsea srmr
## 0.8406 0.8100 0.7676 0.0732 0.0651 0.7607 0.8505
We can also see how much power we have based on other fit indices We see that we have a good amount of power in terms of CFI and SRMR. Power based on RMSEA and TLI isn’t horrible, but isn’t great Power based on AIC and BIC is horrible Not sure how common it is to use the other fit indices to determine power. The most common is chi-square
Play around the N and see how the results vary
PART II – POWER TO REJECT A MODEL
First we’ll go over how to figure out how much power we have to reject a model Remember, we go based on RMSEA We’ll need to create a function because R doesn’t have something built-in
rmsea.power <- function(df,n,alpha=.05,rmsea0=.10,rmseaa=.05)
{
# alpha is significance level
# rmsea0 is RMSEA under null
# rmseaa is RMSEA under alternative
# ncp0 is ncp under null
# ncpa is ncp under alternative
#
#Compute power
if(rmsea0 < rmseaa) {
#NCP
ncp0 <- (n-1)*df*rmsea0^2
ncpa <- (n-1)*df*rmseaa^2
#critical value
cval <- qchisq(p=alpha,df=df,ncp=ncp0,lower.tail=FALSE)
# power
power <- pchisq(q=cval,df=df,ncp=ncpa,lower.tail=FALSE)
}
if(rmsea0 > rmseaa) {
#NCP
ncp0 <- (n-1)*df*rmsea0^2
ncpa <- (n-1)*df*rmseaa^2
#critical value
cval <- qchisq(p=alpha,df=df,ncp=ncp0,lower.tail=TRUE)
# power
power <- pchisq(q=cval,df=df,ncp=ncpa,lower.tail=TRUE)
}
return(power)
}
Now that we’ve set up our function, we can go through the example in Appendix G We want to know how much power we have to reject a model given that we have a sample size of 300 and 5 degrees of freedom - #rmsea0 under the null hypothesis 0.51 not the best power
rmsea.power(alpha=.05,n=300,rmsea0=.1,rmseaa=.05,df=7)
## [1] 0.6358331
We can see that we don’t have a lot of power. Remember we typically want power of .8
So, since we don’t have a lot of power with a sample size of 300, let’s figure out how large of a sample size we need to increase our power to .8 Let’s set up another function first. This one is a little more complex
rmsea.n <- function(alpha=.05,power=.80,rmsea0=.1,rmseaa=.05,df)
{
# rmsea0 is RMSEA under null
# rmseaa is RMSEA under alternative
# alpha is significance level
# powd is desired power
powd <- power
#initialize values
powa <- 0.0
n <- 0
#begin loop for finding initial level of n
while (powa<powd) {
n <- n+100
ncp0 <- (n-1)*df*rmsea0^2
ncpa <- (n-1)*df*rmseaa^2
#compute power
if(rmsea0<rmseaa) {
cval <- qchisq(alpha,df,ncp=ncp0,lower.tail=FALSE)
powa <- pchisq(cval,df,ncp=ncpa,lower.tail=FALSE)
}
else {
cval <- qchisq(alpha,df,ncp=ncp0,lower.tail=TRUE)
powa <- pchisq(cval,df,ncp=ncpa,lower.tail=TRUE)
}
}
#begin loop for interval halving
dir <- -1
newn <- n
intv <- 200
powdiff <- powa - powd
while (powdiff>.001) {
intv <- intv*.5
newn <- newn + dir*intv*.5
ncp0 <- (newn-1)*df*rmsea0^2
ncpa <- (newn-1)*df*rmseaa^2
#compute power
if(rmsea0<rmseaa) {
cval <- qchisq(alpha,df,ncp=ncp0,lower.tail=FALSE)
powa <- pchisq(cval,df,ncp=ncpa,lower.tail=FALSE)
}
else {
cval <- qchisq(alpha,df,ncp=ncp0,lower.tail=TRUE)
powa <- pchisq(cval,df,ncp=ncpa,lower.tail=TRUE)
}
powdiff <- abs(powa-powd)
if (powa<powd) {
dir <- 1
}
if (powa>powd) {
dir <- -1
}
}
minn <- newn
return(round(minn))
}
Let’s now do the example in Appendix G - does not include the sample size — we want .8 power ….
rmsea.n(alpha=.05,power=.8,rmsea0=.1,rmseaa=.05,df=7)
## [1] 428
We see we need a couple hundred more
Okay, now let’s go through finding power to reject the model in your document First, where did we get our df? Look at the model, consider all paths (including the path from G to D; assume we’re working with a var/cov matrix and assume we’re also calculating residuals)
How much power do we have to reject the model?
rmsea.power(alpha=.05,n=500,rmsea0=.1,rmseaa=.05,df=7)
## [1] 0.8607617
We’re doing pretty well! What does this tell us? If the fit is good in the population (RMSEA ~ .05), we have a high probability of .86 with our sample size (n=500) of being able to reject the hypothesis that our model is bad (RMSEA ≥ .10)
But now let’s assume we’re doing a pre-registration and we want to know how many people we need to recruit to achieve power of .8 # sample size for 7 df and power of .8
rmsea.n(alpha=.05, power=.8, rmsea0=.1, rmseaa=.05, df=7)
## [1] 428
That’s a lot of people!
rmsea.power(alpha=.05, n=200, rmsea0=.1, rmseaa=.05, df=5)
## [1] 0.3512089
rmsea.n(alpha=.05, power=.8, rmsea0=.1, rmseaa=.05, df=5)
## [1] 569