This is an example of how to use survey design weights with linear mixed models using the lmer() function. It follows the logic of:

[“Carle, 2009”] (http://www.biomedcentral.com/1471-2288/9/49)

library(lme4)
## Loading required package: Matrix
## Loading required package: Rcpp
library(car)

#load brfss from github
library(RCurl)
## Loading required package: bitops
url<-"https://raw.githubusercontent.com/coreysparks/data/master/brfss_11.csv"
brfss_11<-getURL(url)
brfss_11<-read.csv(textConnection(brfss_11), header=T, dec=",")
nams<-names(brfss_11)
newnames<-gsub("_", "", nams)
names(brfss_11)<-tolower(newnames)

brfss_11$statefip<-sprintf("%02d", brfss_11$state )
brfss_11$cofip<-sprintf("%03d", brfss_11$cnty )
brfss_11$cofips<-paste(brfss_11$statefip, brfss_11$cofip, sep="")

#just for brevity, I just select TX respondents with non missing weights
brfss_11<-brfss_11[is.na(brfss_11$cntywt)==F,]
#insurance
brfss_11$ins<-ifelse(brfss_11$hlthpln1==1,1,0)
#smoking currently
brfss_11$smoke<-recode(brfss_11$smoker3, recodes="1:2=1; 3:4=0; else=NA")
#low physical activity
brfss_11$lowact<-recode(brfss_11$pacat, recodes="1:2=0; 3:4=1; else=NA")
#high blood pressure
brfss_11$hbp<-recode(brfss_11$bphigh4, recodes="1=1; 2:4=0; else=NA")
#high cholesterol
brfss_11$hc<-recode(brfss_11$toldhi2, recodes="1=1; 2=0; else=NA")
#bmi
brfss_11$bmi<-ifelse(is.na(brfss_11$bmi5)==T, NA, brfss_11$bmi5/100)
#poor or fair health
brfss_11$badhealth<-ifelse(brfss_11$genhlth %in% c(4,5),1,0)

#race
brfss_11$black<-recode(brfss_11$racegr2, recodes="2=1; 9=NA; else=0", as.factor.result=T)
brfss_11$white<-recode(brfss_11$racegr2, recodes="1=1; 9=NA; else=0", as.factor.result=T)
brfss_11$other<-recode(brfss_11$racegr2, recodes="3:4=1; 9=NA; else=0", as.factor.result=T)
brfss_11$hispanic<-recode(brfss_11$racegr2, recodes="5=1; 9=NA; else=0", as.factor.result=T)

#have a personal doctor?
brfss_11$doc<-recode(brfss_11$persdoc2, recodes="1:2=1; 3=0; else=NA", as.factor.result=F)

#needed care in last year but couldn't get it because of cost
brfss_11$medacc<-recode(brfss_11$medcost, recodes="1=1;2=0;else=NA")

#education level
brfss_11$lths<-recode(brfss_11$educa, recodes="1:3=1;9=NA; else=0", as.factor.result=F)
brfss_11$coll<-recode(brfss_11$educa, recodes="5:6=1;9=NA; else=0", as.factor.result=F)

#marital status
brfss_11$marst<-recode(brfss_11$marital, recodes="1='married'; 2='divorced'; 3='widowed'; 4='separated'; 5='nm';6='cohab'; else=NA", as.factor.result=T)
brfss_11$marst<-relevel(brfss_11$marst, ref='married')

#income
brfss_11$inc<-as.factor(ifelse(brfss_11$incomg==9, NA, brfss_11$incomg))

#Age cut into intervals
brfss_11$agec<-cut(brfss_11$age, breaks=c(0,24,39,59,79,99))

Now we will begin fitting the multilevel regression model with the county that the person lives in being the higher level

brfss_11$bmiz<-scale(brfss_11$bmi, center=T, scale=T)
brfss_11$agez<-scale(brfss_11$age, center=T, scale=T)

Here is an example of using weights that sum to the county-specific sample size make an id that is the combination of state and strata

brfss_11$cntywt<-as.numeric(as.character(brfss_11$cntywt))
brfss_11$sampleid<-paste(brfss_11$state, brfss_11$ststr)
#within each sampling unit, sum the weights
wts<-tapply(brfss_11$cntywt,brfss_11$sampleid,sum)
#make a data frame from this
wts<-data.frame(id=names(unlist(wts)), wt=unlist(wts))
#get the unique sampling location ids'
t1<-as.data.frame(table(brfss_11$sampleid))
#put all of this into a data set
wts2<-data.frame(ids=wts$id, sumwt=wts$wt, jn=t1$Freq)
#merge all of this back to the original data file
brfss_11<-merge(brfss_11, wts2, by.x="sampleid", by.y="ids", all.x=T)
#In the new data set, multiply the original weight by the fraction of the
#sampling unit total population each person represents
brfss_11$swts<-brfss_11$cntywt*(brfss_11$jn/brfss_11$sumwt)
hist(brfss_11$swts)

plot of chunk unnamed-chunk-3

hist(brfss_11$cntywt)

plot of chunk unnamed-chunk-3

names(brfss_11)
##   [1] "sampleid"  "x"         "state"     "precall"   "fmonth"   
##   [6] "intvid"    "seqno"     "psu"       "ctelenum"  "cellfon"  
##  [11] "pvtresid"  "genhlth"   "physhlth"  "menthlth"  "poorhlth" 
##  [16] "hlthpln1"  "persdoc2"  "medcost"   "checkup1"  "bphigh4"  
##  [21] "bpmeds"    "bloodcho"  "cholchk"   "toldhi2"   "cvdinfr4" 
##  [26] "cvdcrhd4"  "cvdstrk3"  "asthma3"   "asthnow"   "chcscncr" 
##  [31] "chcocncr"  "chccopd"   "havarth3"  "addepev2"  "chckidny" 
##  [36] "chcvison"  "diabete3"  "smoke100"  "smokday2"  "stopsmk2" 
##  [41] "lastsmk2"  "usenow3"   "age"       "hispanc2"  "mrace"    
##  [46] "orace2"    "veteran3"  "marital"   "children"  "educa"    
##  [51] "employ"    "income2"   "weight2"   "height3"   "numhhol2" 
##  [56] "numphon2"  "cpdemo1"   "cpdemo2"   "cpdemo3"   "cpdemo4"  
##  [61] "renthom1"  "sex"       "pregnant"  "fruitju1"  "fruit1"   
##  [66] "fvbeans"   "fvgreen"   "fvorang"   "vegetab1"  "exerany2" 
##  [71] "exract01"  "exeroft1"  "exerhmm1"  "exract02"  "exeroft2" 
##  [76] "exerhmm2"  "strength"  "qlactlm2"  "useequip"  "lmtjoin3" 
##  [81] "arthdis2"  "arthsocl"  "joinpain"  "seatbelt"  "flushot5" 
##  [86] "flshtmy2"  "imfvplac"  "pneuvac3"  "alcday5"   "avedrnk2" 
##  [91] "drnk3ge5"  "maxdrnks"  "hivtst6"   "hivtstd3"  "hivrisk3" 
##  [96] "ststr"     "impage"    "imprace"   "impnph"    "rfhlth"   
## [101] "hcvu651"   "rfhype5"   "cholchk.1" "rfchol"    "ltasth1"  
## [106] "casthm1"   "asthms1"   "drdxar1"   "smoker3"   "rfsmok3"  
## [111] "mraceorg"  "mraceasc"  "prace"     "mrace.1"   "race2"    
## [116] "raceg2"    "racegr2"   "raceg"     "cnrace"    "cnracec"  
## [121] "ageg5yr"   "age65yr"   "ageg"      "htin4"     "htm4"     
## [126] "wtkg3"     "bmi5"      "bmi5cat"   "rfbmi5"    "chldcnt"  
## [131] "educag"    "incomg"    "ftjuda1"   "frutda1"   "beanday"  
## [136] "grenday"   "orngday"   "vegeda1"   "misfrtn"   "misvegn"  
## [141] "frtresp"   "vegresp"   "frutsum"   "vegesum"   "frt16"    
## [146] "veg23"     "fruitex"   "vegetex"   "totinda"   "metval1"  
## [151] "metval2"   "maxvo2"    "fc60"      "actint1"   "actint2"  
## [156] "padur1"    "padur2"    "pafreq1"   "pafreq2"   "minact1"  
## [161] "minact2"   "strfreq"   "pamiss"    "pamin1"    "pamin2"   
## [166] "pamin"     "pavigm1"   "pavigm2"   "pavigmn"   "pacat"    
## [171] "paindex"   "pa150r1"   "pa300r1"   "pa3002l"   "pastrng"  
## [176] "parec"     "pastaer"   "rfseat2"   "rfseat3"   "flshot5"  
## [181] "pneumo2"   "drnkany5"  "drocdy3"   "rfbing5"   "drnkdy4"  
## [186] "drnkmo4"   "rfdrhv4"   "rfdrmn4"   "rfdrwm4"   "aidtst3"  
## [191] "cntywt"    "cnty"      "endofrec"  "badhealth" "black"    
## [196] "white"     "other"     "hispanic"  "ins"       "inc"      
## [201] "smoke"     "hivrisk"   "lowact"    "doc"       "medacc"   
## [206] "educ"      "marst"     "agec"      "bmi"       "statefip" 
## [211] "cofip"     "cofips"    "hbp"       "hc"        "lths"     
## [216] "coll"      "bmiz"      "agez"      "sumwt"     "jn"       
## [221] "swts"

Then we fit the multilevel model to specify a random intercept model for counties, we add a model term that is (1|cofips)

Here I fit the model with no weights

fit.mixnowt<-lmer(bmiz~agez+lths+coll+black+hispanic+other+(1|cofips), brfss_11)
summary(fit.mixnowt)
## Linear mixed model fit by REML ['lmerMod']
## Formula: 
## bmiz ~ agez + lths + coll + black + hispanic + other + (1 | cofips)
##    Data: brfss_11
## 
## REML criterion at convergence: 19664
## 
## Scaled residuals: 
##    Min     1Q Median     3Q    Max 
## -2.473 -0.671 -0.152  0.494  8.990 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  cofips   (Intercept) 0.00263  0.0513  
##  Residual             0.96982  0.9848  
## Number of obs: 6991, groups:  cofips, 9
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)  -0.0236     0.0323   -0.73
## agez         -0.0242     0.0127   -1.90
## lths          0.1160     0.0464    2.50
## coll         -0.0635     0.0296   -2.15
## black1        0.4817     0.0424   11.35
## hispanic1     0.1731     0.0362    4.79
## other1       -0.1679     0.0523   -3.21
## 
## Correlation of Fixed Effects:
##           (Intr) agez   lths   coll   black1 hspnc1
## agez      -0.126                                   
## lths      -0.356 -0.040                            
## coll      -0.714  0.065  0.427                     
## black1    -0.214  0.116 -0.045  0.091              
## hispanic1 -0.275  0.253 -0.242  0.148  0.213       
## other1    -0.112  0.137 -0.030 -0.023  0.118  0.147

Here I fit the model using the standardized sample weights

fit.mix<-lmer(bmiz~agez+lths+coll+black+hispanic+other+(1|cofips), brfss_11, weights=swts)
summary(fit.mix)
## Linear mixed model fit by REML ['lmerMod']
## Formula: 
## bmiz ~ agez + lths + coll + black + hispanic + other + (1 | cofips)
##    Data: brfss_11
## Weights: swts
## 
## REML criterion at convergence: 21933
## 
## Scaled residuals: 
##    Min     1Q Median     3Q    Max 
## -4.271 -0.565 -0.145  0.393 12.811 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  cofips   (Intercept) 0.0106   0.103   
##  Residual             1.0256   1.013   
## Number of obs: 6991, groups:  cofips, 9
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)  -0.0108     0.0442   -0.24
## agez          0.0523     0.0131    4.01
## lths          0.0394     0.0432    0.91
## coll         -0.0278     0.0296   -0.94
## black1        0.4859     0.0388   12.53
## hispanic1     0.2297     0.0332    6.91
## other1       -0.2447     0.0504   -4.86
## 
## Correlation of Fixed Effects:
##           (Intr) agez   lths   coll   black1 hspnc1
## agez       0.040                                   
## lths      -0.264 -0.046                            
## coll      -0.506  0.008  0.435                     
## black1    -0.195  0.065 -0.020  0.096              
## hispanic1 -0.234  0.214 -0.235  0.158  0.271       
## other1    -0.092  0.092 -0.030 -0.038  0.163  0.183

Here I fit the model using the state and stratum as random effects to mimic the sample design

fit.mix.samp<-lmer(bmiz~agez+lths+coll+black+hispanic+other+(1|ststr)+(1|cofips)
                   , brfss_11, weights=swts)
summary(fit.mix.samp)
## Linear mixed model fit by REML ['lmerMod']
## Formula: 
## bmiz ~ agez + lths + coll + black + hispanic + other + (1 | ststr) +  
##     (1 | cofips)
##    Data: brfss_11
## Weights: swts
## 
## REML criterion at convergence: 21923
## 
## Scaled residuals: 
##    Min     1Q Median     3Q    Max 
## -4.357 -0.559 -0.142  0.397 12.833 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  ststr    (Intercept) 0.00941  0.097   
##  cofips   (Intercept) 0.01435  0.120   
##  Residual             1.02154  1.011   
## Number of obs: 6991, groups:  ststr, 29; cofips, 9
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)   0.0027     0.0542    0.05
## agez          0.0460     0.0134    3.42
## lths          0.0421     0.0432    0.98
## coll         -0.0264     0.0296   -0.89
## black1        0.4883     0.0388   12.58
## hispanic1     0.2356     0.0334    7.06
## other1       -0.2416     0.0504   -4.79
## 
## Correlation of Fixed Effects:
##           (Intr) agez   lths   coll   black1 hspnc1
## agez       0.042                                   
## lths      -0.210 -0.043                            
## coll      -0.412  0.011  0.435                     
## black1    -0.165  0.060 -0.021  0.096              
## hispanic1 -0.193  0.191 -0.235  0.158  0.270       
## other1    -0.083  0.088 -0.031 -0.036  0.164  0.184