This is an example of how to use survey design weights with linear mixed models using the lmer()
function. It follows the logic of:
[“Carle, 2009”] (http://www.biomedcentral.com/1471-2288/9/49)
library(lme4)
## Loading required package: Matrix
## Loading required package: Rcpp
library(car)
#load brfss from github
library(RCurl)
## Loading required package: bitops
url<-"https://raw.githubusercontent.com/coreysparks/data/master/brfss_11.csv"
brfss_11<-getURL(url)
brfss_11<-read.csv(textConnection(brfss_11), header=T, dec=",")
nams<-names(brfss_11)
newnames<-gsub("_", "", nams)
names(brfss_11)<-tolower(newnames)
brfss_11$statefip<-sprintf("%02d", brfss_11$state )
brfss_11$cofip<-sprintf("%03d", brfss_11$cnty )
brfss_11$cofips<-paste(brfss_11$statefip, brfss_11$cofip, sep="")
#just for brevity, I just select TX respondents with non missing weights
brfss_11<-brfss_11[is.na(brfss_11$cntywt)==F,]
#insurance
brfss_11$ins<-ifelse(brfss_11$hlthpln1==1,1,0)
#smoking currently
brfss_11$smoke<-recode(brfss_11$smoker3, recodes="1:2=1; 3:4=0; else=NA")
#low physical activity
brfss_11$lowact<-recode(brfss_11$pacat, recodes="1:2=0; 3:4=1; else=NA")
#high blood pressure
brfss_11$hbp<-recode(brfss_11$bphigh4, recodes="1=1; 2:4=0; else=NA")
#high cholesterol
brfss_11$hc<-recode(brfss_11$toldhi2, recodes="1=1; 2=0; else=NA")
#bmi
brfss_11$bmi<-ifelse(is.na(brfss_11$bmi5)==T, NA, brfss_11$bmi5/100)
#poor or fair health
brfss_11$badhealth<-ifelse(brfss_11$genhlth %in% c(4,5),1,0)
#race
brfss_11$black<-recode(brfss_11$racegr2, recodes="2=1; 9=NA; else=0", as.factor.result=T)
brfss_11$white<-recode(brfss_11$racegr2, recodes="1=1; 9=NA; else=0", as.factor.result=T)
brfss_11$other<-recode(brfss_11$racegr2, recodes="3:4=1; 9=NA; else=0", as.factor.result=T)
brfss_11$hispanic<-recode(brfss_11$racegr2, recodes="5=1; 9=NA; else=0", as.factor.result=T)
#have a personal doctor?
brfss_11$doc<-recode(brfss_11$persdoc2, recodes="1:2=1; 3=0; else=NA", as.factor.result=F)
#needed care in last year but couldn't get it because of cost
brfss_11$medacc<-recode(brfss_11$medcost, recodes="1=1;2=0;else=NA")
#education level
brfss_11$lths<-recode(brfss_11$educa, recodes="1:3=1;9=NA; else=0", as.factor.result=F)
brfss_11$coll<-recode(brfss_11$educa, recodes="5:6=1;9=NA; else=0", as.factor.result=F)
#marital status
brfss_11$marst<-recode(brfss_11$marital, recodes="1='married'; 2='divorced'; 3='widowed'; 4='separated'; 5='nm';6='cohab'; else=NA", as.factor.result=T)
brfss_11$marst<-relevel(brfss_11$marst, ref='married')
#income
brfss_11$inc<-as.factor(ifelse(brfss_11$incomg==9, NA, brfss_11$incomg))
#Age cut into intervals
brfss_11$agec<-cut(brfss_11$age, breaks=c(0,24,39,59,79,99))
Now we will begin fitting the multilevel regression model with the county that the person lives in being the higher level
brfss_11$bmiz<-scale(brfss_11$bmi, center=T, scale=T)
brfss_11$agez<-scale(brfss_11$age, center=T, scale=T)
Here is an example of using weights that sum to the county-specific sample size make an id that is the combination of state and strata
brfss_11$cntywt<-as.numeric(as.character(brfss_11$cntywt))
brfss_11$sampleid<-paste(brfss_11$state, brfss_11$ststr)
#within each sampling unit, sum the weights
wts<-tapply(brfss_11$cntywt,brfss_11$sampleid,sum)
#make a data frame from this
wts<-data.frame(id=names(unlist(wts)), wt=unlist(wts))
#get the unique sampling location ids'
t1<-as.data.frame(table(brfss_11$sampleid))
#put all of this into a data set
wts2<-data.frame(ids=wts$id, sumwt=wts$wt, jn=t1$Freq)
#merge all of this back to the original data file
brfss_11<-merge(brfss_11, wts2, by.x="sampleid", by.y="ids", all.x=T)
#In the new data set, multiply the original weight by the fraction of the
#sampling unit total population each person represents
brfss_11$swts<-brfss_11$cntywt*(brfss_11$jn/brfss_11$sumwt)
hist(brfss_11$swts)
hist(brfss_11$cntywt)
names(brfss_11)
## [1] "sampleid" "x" "state" "precall" "fmonth"
## [6] "intvid" "seqno" "psu" "ctelenum" "cellfon"
## [11] "pvtresid" "genhlth" "physhlth" "menthlth" "poorhlth"
## [16] "hlthpln1" "persdoc2" "medcost" "checkup1" "bphigh4"
## [21] "bpmeds" "bloodcho" "cholchk" "toldhi2" "cvdinfr4"
## [26] "cvdcrhd4" "cvdstrk3" "asthma3" "asthnow" "chcscncr"
## [31] "chcocncr" "chccopd" "havarth3" "addepev2" "chckidny"
## [36] "chcvison" "diabete3" "smoke100" "smokday2" "stopsmk2"
## [41] "lastsmk2" "usenow3" "age" "hispanc2" "mrace"
## [46] "orace2" "veteran3" "marital" "children" "educa"
## [51] "employ" "income2" "weight2" "height3" "numhhol2"
## [56] "numphon2" "cpdemo1" "cpdemo2" "cpdemo3" "cpdemo4"
## [61] "renthom1" "sex" "pregnant" "fruitju1" "fruit1"
## [66] "fvbeans" "fvgreen" "fvorang" "vegetab1" "exerany2"
## [71] "exract01" "exeroft1" "exerhmm1" "exract02" "exeroft2"
## [76] "exerhmm2" "strength" "qlactlm2" "useequip" "lmtjoin3"
## [81] "arthdis2" "arthsocl" "joinpain" "seatbelt" "flushot5"
## [86] "flshtmy2" "imfvplac" "pneuvac3" "alcday5" "avedrnk2"
## [91] "drnk3ge5" "maxdrnks" "hivtst6" "hivtstd3" "hivrisk3"
## [96] "ststr" "impage" "imprace" "impnph" "rfhlth"
## [101] "hcvu651" "rfhype5" "cholchk.1" "rfchol" "ltasth1"
## [106] "casthm1" "asthms1" "drdxar1" "smoker3" "rfsmok3"
## [111] "mraceorg" "mraceasc" "prace" "mrace.1" "race2"
## [116] "raceg2" "racegr2" "raceg" "cnrace" "cnracec"
## [121] "ageg5yr" "age65yr" "ageg" "htin4" "htm4"
## [126] "wtkg3" "bmi5" "bmi5cat" "rfbmi5" "chldcnt"
## [131] "educag" "incomg" "ftjuda1" "frutda1" "beanday"
## [136] "grenday" "orngday" "vegeda1" "misfrtn" "misvegn"
## [141] "frtresp" "vegresp" "frutsum" "vegesum" "frt16"
## [146] "veg23" "fruitex" "vegetex" "totinda" "metval1"
## [151] "metval2" "maxvo2" "fc60" "actint1" "actint2"
## [156] "padur1" "padur2" "pafreq1" "pafreq2" "minact1"
## [161] "minact2" "strfreq" "pamiss" "pamin1" "pamin2"
## [166] "pamin" "pavigm1" "pavigm2" "pavigmn" "pacat"
## [171] "paindex" "pa150r1" "pa300r1" "pa3002l" "pastrng"
## [176] "parec" "pastaer" "rfseat2" "rfseat3" "flshot5"
## [181] "pneumo2" "drnkany5" "drocdy3" "rfbing5" "drnkdy4"
## [186] "drnkmo4" "rfdrhv4" "rfdrmn4" "rfdrwm4" "aidtst3"
## [191] "cntywt" "cnty" "endofrec" "badhealth" "black"
## [196] "white" "other" "hispanic" "ins" "inc"
## [201] "smoke" "hivrisk" "lowact" "doc" "medacc"
## [206] "educ" "marst" "agec" "bmi" "statefip"
## [211] "cofip" "cofips" "hbp" "hc" "lths"
## [216] "coll" "bmiz" "agez" "sumwt" "jn"
## [221] "swts"
Then we fit the multilevel model to specify a random intercept model for counties, we add a model term that is (1|cofips)
Here I fit the model with no weights
fit.mixnowt<-lmer(bmiz~agez+lths+coll+black+hispanic+other+(1|cofips), brfss_11)
summary(fit.mixnowt)
## Linear mixed model fit by REML ['lmerMod']
## Formula:
## bmiz ~ agez + lths + coll + black + hispanic + other + (1 | cofips)
## Data: brfss_11
##
## REML criterion at convergence: 19664
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.473 -0.671 -0.152 0.494 8.990
##
## Random effects:
## Groups Name Variance Std.Dev.
## cofips (Intercept) 0.00263 0.0513
## Residual 0.96982 0.9848
## Number of obs: 6991, groups: cofips, 9
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) -0.0236 0.0323 -0.73
## agez -0.0242 0.0127 -1.90
## lths 0.1160 0.0464 2.50
## coll -0.0635 0.0296 -2.15
## black1 0.4817 0.0424 11.35
## hispanic1 0.1731 0.0362 4.79
## other1 -0.1679 0.0523 -3.21
##
## Correlation of Fixed Effects:
## (Intr) agez lths coll black1 hspnc1
## agez -0.126
## lths -0.356 -0.040
## coll -0.714 0.065 0.427
## black1 -0.214 0.116 -0.045 0.091
## hispanic1 -0.275 0.253 -0.242 0.148 0.213
## other1 -0.112 0.137 -0.030 -0.023 0.118 0.147
Here I fit the model using the standardized sample weights
fit.mix<-lmer(bmiz~agez+lths+coll+black+hispanic+other+(1|cofips), brfss_11, weights=swts)
summary(fit.mix)
## Linear mixed model fit by REML ['lmerMod']
## Formula:
## bmiz ~ agez + lths + coll + black + hispanic + other + (1 | cofips)
## Data: brfss_11
## Weights: swts
##
## REML criterion at convergence: 21933
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -4.271 -0.565 -0.145 0.393 12.811
##
## Random effects:
## Groups Name Variance Std.Dev.
## cofips (Intercept) 0.0106 0.103
## Residual 1.0256 1.013
## Number of obs: 6991, groups: cofips, 9
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) -0.0108 0.0442 -0.24
## agez 0.0523 0.0131 4.01
## lths 0.0394 0.0432 0.91
## coll -0.0278 0.0296 -0.94
## black1 0.4859 0.0388 12.53
## hispanic1 0.2297 0.0332 6.91
## other1 -0.2447 0.0504 -4.86
##
## Correlation of Fixed Effects:
## (Intr) agez lths coll black1 hspnc1
## agez 0.040
## lths -0.264 -0.046
## coll -0.506 0.008 0.435
## black1 -0.195 0.065 -0.020 0.096
## hispanic1 -0.234 0.214 -0.235 0.158 0.271
## other1 -0.092 0.092 -0.030 -0.038 0.163 0.183
Here I fit the model using the state and stratum as random effects to mimic the sample design
fit.mix.samp<-lmer(bmiz~agez+lths+coll+black+hispanic+other+(1|ststr)+(1|cofips)
, brfss_11, weights=swts)
summary(fit.mix.samp)
## Linear mixed model fit by REML ['lmerMod']
## Formula:
## bmiz ~ agez + lths + coll + black + hispanic + other + (1 | ststr) +
## (1 | cofips)
## Data: brfss_11
## Weights: swts
##
## REML criterion at convergence: 21923
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -4.357 -0.559 -0.142 0.397 12.833
##
## Random effects:
## Groups Name Variance Std.Dev.
## ststr (Intercept) 0.00941 0.097
## cofips (Intercept) 0.01435 0.120
## Residual 1.02154 1.011
## Number of obs: 6991, groups: ststr, 29; cofips, 9
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 0.0027 0.0542 0.05
## agez 0.0460 0.0134 3.42
## lths 0.0421 0.0432 0.98
## coll -0.0264 0.0296 -0.89
## black1 0.4883 0.0388 12.58
## hispanic1 0.2356 0.0334 7.06
## other1 -0.2416 0.0504 -4.79
##
## Correlation of Fixed Effects:
## (Intr) agez lths coll black1 hspnc1
## agez 0.042
## lths -0.210 -0.043
## coll -0.412 0.011 0.435
## black1 -0.165 0.060 -0.021 0.096
## hispanic1 -0.193 0.191 -0.235 0.158 0.270
## other1 -0.083 0.088 -0.031 -0.036 0.164 0.184