Example of using survey design weights in lmer

This is an example of how to use survey design weights with linear mixed models using the lmer() function. It follows the logic of:

[“Carle, 2009”] (http://www.biomedcentral.com/1471-2288/9/49)

library(lme4)

## Loading required package: Matrix
## Loading required package: Rcpp

library(car)

#load brfss from github
library(RCurl)

## Loading required package: bitops

url<-"https://raw.githubusercontent.com/coreysparks/data/master/brfss_11.csv"
brfss_11<-getURL(url)
brfss_11<-read.csv(textConnection(brfss_11), header=T, dec=",")
nams<-names(brfss_11)
newnames<-gsub("_", "", nams)
names(brfss_11)<-tolower(newnames)

brfss_11$statefip<-sprintf("%02d", brfss_11$state )
brfss_11$cofip<-sprintf("%03d", brfss_11$cnty )
brfss_11$cofips<-paste(brfss_11$statefip, brfss_11$cofip, sep="")

#just for brevity, I just select TX respondents with non missing weights
brfss_11<-brfss_11[is.na(brfss_11$cntywt)==F,]
#insurance
brfss_11$ins<-ifelse(brfss_11$hlthpln1==1,1,0)
#smoking currently
brfss_11$smoke<-recode(brfss_11$smoker3, recodes="1:2=1; 3:4=0; else=NA")
#low physical activity
brfss_11$lowact<-recode(brfss_11$pacat, recodes="1:2=0; 3:4=1; else=NA")
#high blood pressure
brfss_11$hbp<-recode(brfss_11$bphigh4, recodes="1=1; 2:4=0; else=NA")
#high cholesterol
brfss_11$hc<-recode(brfss_11$toldhi2, recodes="1=1; 2=0; else=NA")
#bmi
brfss_11$bmi<-ifelse(is.na(brfss_11$bmi5)==T, NA, brfss_11$bmi5/100)
#poor or fair health
brfss_11$badhealth<-ifelse(brfss_11$genhlth %in% c(4,5),1,0)

#race
brfss_11$black<-recode(brfss_11$racegr2, recodes="2=1; 9=NA; else=0", as.factor.result=T)
brfss_11$white<-recode(brfss_11$racegr2, recodes="1=1; 9=NA; else=0", as.factor.result=T)
brfss_11$other<-recode(brfss_11$racegr2, recodes="3:4=1; 9=NA; else=0", as.factor.result=T)
brfss_11$hispanic<-recode(brfss_11$racegr2, recodes="5=1; 9=NA; else=0", as.factor.result=T)

#have a personal doctor?
brfss_11$doc<-recode(brfss_11$persdoc2, recodes="1:2=1; 3=0; else=NA", as.factor.result=F)

#needed care in last year but couldn't get it because of cost
brfss_11$medacc<-recode(brfss_11$medcost, recodes="1=1;2=0;else=NA")

#education level
brfss_11$lths<-recode(brfss_11$educa, recodes="1:3=1;9=NA; else=0", as.factor.result=F)
brfss_11$coll<-recode(brfss_11$educa, recodes="5:6=1;9=NA; else=0", as.factor.result=F)

#marital status
brfss_11$marst<-recode(brfss_11$marital, recodes="1='married'; 2='divorced'; 3='widowed'; 4='separated'; 5='nm';6='cohab'; else=NA", as.factor.result=T)
brfss_11$marst<-relevel(brfss_11$marst, ref='married')

#income
brfss_11$inc<-as.factor(ifelse(brfss_11$incomg==9, NA, brfss_11$incomg))

#Age cut into intervals
brfss_11$agec<-cut(brfss_11$age, breaks=c(0,24,39,59,79,99))

Now we will begin fitting the multilevel regression model with the county that the person lives in being the higher level

brfss_11$bmiz<-scale(brfss_11$bmi, center=T, scale=T)
brfss_11$agez<-scale(brfss_11$age, center=T, scale=T)

Here is an example of using weights that sum to the county-specific sample size make an id that is the combination of state and strata

brfss_11$cntywt<-as.numeric(as.character(brfss_11$cntywt))
brfss_11$sampleid<-paste(brfss_11$state, brfss_11$ststr)
#within each sampling unit, sum the weights
wts<-tapply(brfss_11$cntywt,brfss_11$sampleid,sum)
#make a data frame from this
wts<-data.frame(id=names(unlist(wts)), wt=unlist(wts))
#get the unique sampling location ids'
t1<-as.data.frame(table(brfss_11$sampleid))
#put all of this into a data set
wts2<-data.frame(ids=wts$id, sumwt=wts$wt, jn=t1$Freq)
#merge all of this back to the original data file
brfss_11<-merge(brfss_11, wts2, by.x="sampleid", by.y="ids", all.x=T)
#In the new data set, multiply the original weight by the fraction of the
#sampling unit total population each person represents
brfss_11$swts<-brfss_11$cntywt*(brfss_11$jn/brfss_11$sumwt)
hist(brfss_11$swts)

plot of chunk unnamed-chunk-3

hist(brfss_11$cntywt)

plot of chunk unnamed-chunk-3

names(brfss_11)

##   [1] "sampleid"  "x"         "state"     "precall"   "fmonth"   
##   [6] "intvid"    "seqno"     "psu"       "ctelenum"  "cellfon"  
##  [11] "pvtresid"  "genhlth"   "physhlth"  "menthlth"  "poorhlth" 
##  [16] "hlthpln1"  "persdoc2"  "medcost"   "checkup1"  "bphigh4"  
##  [21] "bpmeds"    "bloodcho"  "cholchk"   "toldhi2"   "cvdinfr4" 
##  [26] "cvdcrhd4"  "cvdstrk3"  "asthma3"   "asthnow"   "chcscncr" 
##  [31] "chcocncr"  "chccopd"   "havarth3"  "addepev2"  "chckidny" 
##  [36] "chcvison"  "diabete3"  "smoke100"  "smokday2"  "stopsmk2" 
##  [41] "lastsmk2"  "usenow3"   "age"       "hispanc2"  "mrace"    
##  [46] "orace2"    "veteran3"  "marital"   "children"  "educa"    
##  [51] "employ"    "income2"   "weight2"   "height3"   "numhhol2" 
##  [56] "numphon2"  "cpdemo1"   "cpdemo2"   "cpdemo3"   "cpdemo4"  
##  [61] "renthom1"  "sex"       "pregnant"  "fruitju1"  "fruit1"   
##  [66] "fvbeans"   "fvgreen"   "fvorang"   "vegetab1"  "exerany2" 
##  [71] "exract01"  "exeroft1"  "exerhmm1"  "exract02"  "exeroft2" 
##  [76] "exerhmm2"  "strength"  "qlactlm2"  "useequip"  "lmtjoin3" 
##  [81] "arthdis2"  "arthsocl"  "joinpain"  "seatbelt"  "flushot5" 
##  [86] "flshtmy2"  "imfvplac"  "pneuvac3"  "alcday5"   "avedrnk2" 
##  [91] "drnk3ge5"  "maxdrnks"  "hivtst6"   "hivtstd3"  "hivrisk3" 
##  [96] "ststr"     "impage"    "imprace"   "impnph"    "rfhlth"   
## [101] "hcvu651"   "rfhype5"   "cholchk.1" "rfchol"    "ltasth1"  
## [106] "casthm1"   "asthms1"   "drdxar1"   "smoker3"   "rfsmok3"  
## [111] "mraceorg"  "mraceasc"  "prace"     "mrace.1"   "race2"    
## [116] "raceg2"    "racegr2"   "raceg"     "cnrace"    "cnracec"  
## [121] "ageg5yr"   "age65yr"   "ageg"      "htin4"     "htm4"     
## [126] "wtkg3"     "bmi5"      "bmi5cat"   "rfbmi5"    "chldcnt"  
## [131] "educag"    "incomg"    "ftjuda1"   "frutda1"   "beanday"  
## [136] "grenday"   "orngday"   "vegeda1"   "misfrtn"   "misvegn"  
## [141] "frtresp"   "vegresp"   "frutsum"   "vegesum"   "frt16"    
## [146] "veg23"     "fruitex"   "vegetex"   "totinda"   "metval1"  
## [151] "metval2"   "maxvo2"    "fc60"      "actint1"   "actint2"  
## [156] "padur1"    "padur2"    "pafreq1"   "pafreq2"   "minact1"  
## [161] "minact2"   "strfreq"   "pamiss"    "pamin1"    "pamin2"   
## [166] "pamin"     "pavigm1"   "pavigm2"   "pavigmn"   "pacat"    
## [171] "paindex"   "pa150r1"   "pa300r1"   "pa3002l"   "pastrng"  
## [176] "parec"     "pastaer"   "rfseat2"   "rfseat3"   "flshot5"  
## [181] "pneumo2"   "drnkany5"  "drocdy3"   "rfbing5"   "drnkdy4"  
## [186] "drnkmo4"   "rfdrhv4"   "rfdrmn4"   "rfdrwm4"   "aidtst3"  
## [191] "cntywt"    "cnty"      "endofrec"  "badhealth" "black"    
## [196] "white"     "other"     "hispanic"  "ins"       "inc"      
## [201] "smoke"     "hivrisk"   "lowact"    "doc"       "medacc"   
## [206] "educ"      "marst"     "agec"      "bmi"       "statefip" 
## [211] "cofip"     "cofips"    "hbp"       "hc"        "lths"     
## [216] "coll"      "bmiz"      "agez"      "sumwt"     "jn"       
## [221] "swts"

Then we fit the multilevel model to specify a random intercept model for counties, we add a model term that is (1|cofips)

Here I fit the model with no weights

fit.mixnowt<-lmer(bmiz~agez+lths+coll+black+hispanic+other+(1|cofips), brfss_11)
summary(fit.mixnowt)

## Linear mixed model fit by REML ['lmerMod']
## Formula: 
## bmiz ~ agez + lths + coll + black + hispanic + other + (1 | cofips)
##    Data: brfss_11
## 
## REML criterion at convergence: 19664
## 
## Scaled residuals: 
##    Min     1Q Median     3Q    Max 
## -2.473 -0.671 -0.152  0.494  8.990 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  cofips   (Intercept) 0.00263  0.0513  
##  Residual             0.96982  0.9848  
## Number of obs: 6991, groups:  cofips, 9
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)  -0.0236     0.0323   -0.73
## agez         -0.0242     0.0127   -1.90
## lths          0.1160     0.0464    2.50
## coll         -0.0635     0.0296   -2.15
## black1        0.4817     0.0424   11.35
## hispanic1     0.1731     0.0362    4.79
## other1       -0.1679     0.0523   -3.21
## 
## Correlation of Fixed Effects:
##           (Intr) agez   lths   coll   black1 hspnc1
## agez      -0.126                                   
## lths      -0.356 -0.040                            
## coll      -0.714  0.065  0.427                     
## black1    -0.214  0.116 -0.045  0.091              
## hispanic1 -0.275  0.253 -0.242  0.148  0.213       
## other1    -0.112  0.137 -0.030 -0.023  0.118  0.147

Here I fit the model using the standardized sample weights

fit.mix<-lmer(bmiz~agez+lths+coll+black+hispanic+other+(1|cofips), brfss_11, weights=swts)
summary(fit.mix)

## Linear mixed model fit by REML ['lmerMod']
## Formula: 
## bmiz ~ agez + lths + coll + black + hispanic + other + (1 | cofips)
##    Data: brfss_11
## Weights: swts
## 
## REML criterion at convergence: 21933
## 
## Scaled residuals: 
##    Min     1Q Median     3Q    Max 
## -4.271 -0.565 -0.145  0.393 12.811 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  cofips   (Intercept) 0.0106   0.103   
##  Residual             1.0256   1.013   
## Number of obs: 6991, groups:  cofips, 9
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)  -0.0108     0.0442   -0.24
## agez          0.0523     0.0131    4.01
## lths          0.0394     0.0432    0.91
## coll         -0.0278     0.0296   -0.94
## black1        0.4859     0.0388   12.53
## hispanic1     0.2297     0.0332    6.91
## other1       -0.2447     0.0504   -4.86
## 
## Correlation of Fixed Effects:
##           (Intr) agez   lths   coll   black1 hspnc1
## agez       0.040                                   
## lths      -0.264 -0.046                            
## coll      -0.506  0.008  0.435                     
## black1    -0.195  0.065 -0.020  0.096              
## hispanic1 -0.234  0.214 -0.235  0.158  0.271       
## other1    -0.092  0.092 -0.030 -0.038  0.163  0.183

Here I fit the model using the state and stratum as random effects to mimic the sample design

fit.mix.samp<-lmer(bmiz~agez+lths+coll+black+hispanic+other+(1|ststr)+(1|cofips)
                   , brfss_11, weights=swts)
summary(fit.mix.samp)

## Linear mixed model fit by REML ['lmerMod']
## Formula: 
## bmiz ~ agez + lths + coll + black + hispanic + other + (1 | ststr) +  
##     (1 | cofips)
##    Data: brfss_11
## Weights: swts
## 
## REML criterion at convergence: 21923
## 
## Scaled residuals: 
##    Min     1Q Median     3Q    Max 
## -4.357 -0.559 -0.142  0.397 12.833 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  ststr    (Intercept) 0.00941  0.097   
##  cofips   (Intercept) 0.01435  0.120   
##  Residual             1.02154  1.011   
## Number of obs: 6991, groups:  ststr, 29; cofips, 9
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)   0.0027     0.0542    0.05
## agez          0.0460     0.0134    3.42
## lths          0.0421     0.0432    0.98
## coll         -0.0264     0.0296   -0.89
## black1        0.4883     0.0388   12.58
## hispanic1     0.2356     0.0334    7.06
## other1       -0.2416     0.0504   -4.79
## 
## Correlation of Fixed Effects:
##           (Intr) agez   lths   coll   black1 hspnc1
## agez       0.042                                   
## lths      -0.210 -0.043                            
## coll      -0.412  0.011  0.435                     
## black1    -0.165  0.060 -0.021  0.096              
## hispanic1 -0.193  0.191 -0.235  0.158  0.270       
## other1    -0.083  0.088 -0.031 -0.036  0.164  0.184