This example will cover the use of R functions for fitting Ordinal and Multinomial logit models to complex survey data.

For this example I am using 2011 CDC Behavioral Risk Factor Surveillance System (BRFSS) SMART county data. Link

#load brfss
library(car)
library(survey)
## Loading required package: grid
## 
## Attaching package: 'survey'
## 
## The following object is masked from 'package:graphics':
## 
##     dotchart
library(nnet)
load("~/Google Drive/dem7283/data/brfss_11.Rdata")

#The names in the data are very ugly, so I make them less ugly
nams<-names(brfss_11)
head(nams, n=10)
##  [1] "_STATE"   "precall"  "fmonth"   "intvid"   "seqno"    "_PSU"    
##  [7] "ctelenum" "cellfon"  "pvtresid" "genhlth"
#we see some names are lower case, some are upper and some have a little _ in the first position. This is a nightmare.
newnames<-gsub(pattern = "_",replacement =  "",x =  nams)
names(brfss_11)<-tolower(newnames)

#Our outcome this time is the full spectrum of reported health 
brfss_11$health<-as.factor(ifelse(brfss_11$genhlth%in%c(7,9),NA, brfss_11$genhlth))
brfss_11$health<-relevel(brfss_11$health, ref="1")
brfss_11$health2<-ifelse(brfss_11$genhlth%in%c(7,9),NA, brfss_11$genhlth)

#race/ethnicity
brfss_11$black<-recode(brfss_11$racegr2, recodes="2=1; 9=NA; else=0")
brfss_11$white<-recode(brfss_11$racegr2, recodes="1=1; 9=NA; else=0")
brfss_11$other<-recode(brfss_11$racegr2, recodes="3:4=1; 9=NA; else=0")
brfss_11$hispanic<-recode(brfss_11$racegr2, recodes="5=1; 9=NA; else=0")
brfss_11$race_group<-recode(brfss_11$racegr2, recodes="1='NH white'; 2='NH black'; 3:4='NH other';5='hispanic'; else=NA", as.factor.result = T)
brfss_11$race_group<-relevel(brfss_11$race_group, ref = 'NH white')
#insurance
brfss_11$ins<-ifelse(brfss_11$hlthpln1==1,1,0)

#income grouping
brfss_11$inc<-ifelse(brfss_11$incomg==9, NA, brfss_11$incomg)

#education level
brfss_11$educ<-recode(brfss_11$educa, recodes="1:2='0Prim'; 3='1somehs'; 4='2hsgrad'; 5='3somecol'; 6='4colgrad';9=NA", as.factor.result=T)
#brfss_11$educ<-relevel(brfss_11$educ, ref='0Prim')

#employment
brfss_11$employ<-recode(brfss_11$employ, recodes="1:2='Employed'; 2:6='nilf'; 7='retired'; 8='unable'; else=NA", as.factor.result=T)
brfss_11$employ<-relevel(brfss_11$employ, ref='Employed')

#marital status
brfss_11$marst<-recode(brfss_11$marital, recodes="1='married'; 2='divorced'; 3='widowed'; 4='separated'; 5='nm';6='cohab'; else=NA", as.factor.result=T)
brfss_11$marst<-relevel(brfss_11$marst, ref='married')

#Age cut into intervals
brfss_11$agec<-cut(brfss_11$age, breaks=c(0,24,39,59,79,99))

Analysis

First, we will subset our data to have complete cases for our variables in our model and make our survey design object

#First we tell R our survey design
options(survey.lonely.psu = "adjust")
brfss_11<-brfss_11[complete.cases(brfss_11$health,brfss_11$black,brfss_11$educ,brfss_11$agec) ,]
des<-svydesign(ids=~psu, strata=~ststr, weights=~cntywt, data = brfss_11[is.na(brfss_11$cntywt)==F,], nest=T )

Ordinal Regression example

To fit an ordinal logit to survey data in R, we use the svyolr fucntion in the survey library.

#Here I fit three nested models for the health outcome
fit.solr1<-svyolr(health~black+hispanic+other,des)
fit.solr2<-svyolr(health~black+hispanic+other+educ,des)
fit.solr3<-svyolr(health~black+hispanic+other+educ+agec,des)

#Show summary of each model and calculate the AIC for each model, even though the AIC is probably not right
summary(fit.solr1);fit.solr1$deviance+2*length(fit.solr1$coefficients)
## Call:
## svyolr(health ~ black + hispanic + other, des)
## 
## Coefficients:
##              Value Std. Error  t value
## black    0.5702469 0.02572036 22.17103
## hispanic 0.7846439 0.02690469 29.16384
## other    0.1228688 0.03274422  3.75238
## 
## Intercepts:
##     Value     Std. Error t value  
## 1|2   -1.1699    0.0106  -110.4291
## 2|3    0.3294    0.0091    36.1502
## 3|4    1.8947    0.0131   144.3386
## 4|5    3.4507    0.0234   147.1528
## [1] 679738
summary(fit.solr2);fit.solr2$deviance+2*length(fit.solr2$coefficients)
## Call:
## svyolr(health ~ black + hispanic + other + educ, des)
## 
## Coefficients:
##                   Value Std. Error    t value
## black         0.4010183 0.02643104  15.172247
## hispanic      0.2945284 0.02811464  10.475980
## other         0.1915769 0.03358538   5.704175
## educ1somehs  -0.5697689 0.06236792  -9.135609
## educ2hsgrad  -1.1224331 0.05581058 -20.111476
## educ3somecol -1.4822546 0.05559810 -26.660166
## educ4colgrad -2.0072167 0.05569360 -36.040348
## 
## Intercepts:
##     Value    Std. Error t value 
## 1|2  -2.7462   0.0560   -49.0412
## 2|3  -1.1811   0.0548   -21.5468
## 3|4   0.4766   0.0542     8.7979
## 4|5   2.0842   0.0560    37.2026
## [1] 662336.4
summary(fit.solr3);fit.solr3$deviance+2*length(fit.solr3$coefficients)
## Call:
## svyolr(health ~ black + hispanic + other + educ + agec, des)
## 
## Coefficients:
##                   Value Std. Error    t value
## black         0.4888241 0.02653683  18.420591
## hispanic      0.4946935 0.02886040  17.140914
## other         0.3158002 0.03337465   9.462275
## educ1somehs  -0.3880685 0.06185141  -6.274207
## educ2hsgrad  -0.9585043 0.05537993 -17.307793
## educ3somecol -1.2865151 0.05537298 -23.233625
## educ4colgrad -1.8636030 0.05552767 -33.561702
## agec(24,39]   0.2968264 0.03256324   9.115383
## agec(39,59]   0.6399445 0.03061534  20.902738
## agec(59,79]   1.0194199 0.03106605  32.814598
## agec(79,99]   1.2943649 0.04064550  31.845220
## 
## Intercepts:
##     Value    Std. Error t value 
## 1|2  -2.0040   0.0623   -32.1667
## 2|3  -0.4064   0.0615    -6.6080
## 3|4   1.2919   0.0609    21.2009
## 4|5   2.9265   0.0619    47.2479
## [1] 653886.4
#"Examine" proportional odds assumption by fitting logits for each change
ex1<-svyglm(I(health2>1)~black+hispanic+other+educ+agec,des, family="binomial")
## Warning: non-integer #successes in a binomial glm!
ex2<-svyglm(I(health2>2)~black+hispanic+other+educ+agec,des, family="binomial")
## Warning: non-integer #successes in a binomial glm!
ex3<-svyglm(I(health2>3)~black+hispanic+other+educ+agec,des, family="binomial")
## Warning: non-integer #successes in a binomial glm!
ex4<-svyglm(I(health2>4)~black+hispanic+other+educ+agec,des, family="binomial")
## Warning: non-integer #successes in a binomial glm!
#Just a a rough guide, I plot the coefficients of each model. If the proportional odds
#assumption is ok, they all should be "approximately" the same values, they are NOT!!
plot(coef(ex1)[-1], ylim=c(-2, 3), type="l", ylab="Beta", main=c("Comparison of betas for", " proportional odds assumption"))
lines(coef(ex2)[-1], col=2) #not bad
lines(coef(ex3)[-1], col=3) # again, not bad
lines(coef(ex4)[-1], col=4) #not as good, the effect of age are very different for the change between

#fair or better -> poor health


#Just print the odds ratios, 
round(exp(round(rbind(coef(ex1)[-1], coef(ex2)[-1], coef(ex3)[-1], coef(ex4)[-1]),2)),3)
##      black hispanic other educ1somehs educ2hsgrad educ3somecol
## [1,] 1.405    1.310 1.162       0.756       0.566        0.432
## [2,] 1.733    1.878 1.537       0.712       0.403        0.284
## [3,] 1.733    1.768 1.419       0.705       0.395        0.278
## [4,] 1.284    1.083 1.323       0.763       0.375        0.259
##      educ4colgrad agec(24,39] agec(39,59] agec(59,79] agec(79,99]
## [1,]        0.262       1.271       1.632       2.340       3.456
## [2,]        0.154       1.336       1.804       2.612       3.525
## [3,]        0.129       1.600       2.945       4.306       5.366
## [4,]        0.110       2.293       6.297       8.846       9.300

Multinomial Model

Unfortunately, in R there is no multinomial survey design model, so we have to trick it, by creating some replicate weights. This is what Thomas Lumley (the author of the survey package) says to do in this case (and in cases where the type of model can’t be fit otherwise) WARNING, THIS LOOKS SCREWY

#create a bootstrap replicate weight set
des2<-as.svrepdesign(des, type="bootstrap" , replicates=10)
#Now I fit the model, this is screwy looking
mfit<-withReplicates(des2, quote(coef(multinom(health~black+hispanic+other+educ+agec,weights=.weights,trace=F ))))
#get the betas
mfitcoef<-data.frame(matrix(attr(attr(mfit, "var"), "means")[-1:-4], nrow=4, ncol=11, byrow=F))
names(mfitcoef)<-names(coef(ex1)[-1])

#odds ratios
round(exp(mfitcoef), 3)
##   black hispanic other educ1somehs educ2hsgrad educ3somecol educ4colgrad
## 1 1.048    0.897 0.905       1.049       1.206        1.107        0.879
## 2 1.637    1.603 1.430       0.883       0.613        0.433        0.229
## 3 2.302    2.306 1.514       0.628       0.327        0.195        0.070
## 4 1.838    1.537 1.529       0.589       0.220        0.119        0.036
##   agec(24,39] agec(39,59] agec(59,79] agec(79,99]
## 1       1.100       1.250       1.507       1.893
## 2       1.281       1.612       2.435       3.733
## 3       1.643       3.118       5.817       9.968
## 4       2.529       9.283      18.331      27.412
#get the covariance matrix for the betas
vcov<-matrix(attr(mfit, "var"), nrow=48, ncol=48)

#get the z tests by using the coefficients and the standard errors (diag(vcov))
z<-as.vector(mfitcoef/sqrt(diag(vcov)[-1:-4]))
round(z, 2)
##   black hispanic other educ1somehs educ2hsgrad educ3somecol educ4colgrad
## 1  1.02    -2.38 -1.47        0.25        0.90         0.48        -0.60
## 2 14.41     7.29  6.05       -0.60       -2.24        -3.90        -6.72
## 3 17.43    15.64  4.15       -2.28       -5.88        -8.20       -13.21
## 4 10.98     5.13  3.43       -3.01       -7.43       -10.12       -13.74
##   agec(24,39] agec(39,59] agec(59,79] agec(79,99]
## 1        1.74        6.02        9.96       11.41
## 2        3.90        9.50       22.48       15.29
## 3        5.97       12.32       20.01       28.30
## 4        5.96       15.70       25.33       22.17
#p values
pvals<-round((1-pnorm(abs(as.matrix(z)),0,1))*2,4)
round(pvals, 3)
##      black hispanic other educ1somehs educ2hsgrad educ3somecol
## [1,] 0.309    0.017 0.141       0.801       0.366        0.633
## [2,] 0.000    0.000 0.000       0.548       0.025        0.000
## [3,] 0.000    0.000 0.000       0.022       0.000        0.000
## [4,] 0.000    0.000 0.001       0.003       0.000        0.000
##      educ4colgrad agec(24,39] agec(39,59] agec(59,79] agec(79,99]
## [1,]         0.55       0.082           0           0           0
## [2,]         0.00       0.000           0           0           0
## [3,]         0.00       0.000           0           0           0
## [4,]         0.00       0.000           0           0           0