Professor C. Sparks

In this homework, we will perform multi-level analysis using multi-level data (individual and higher level variables). The higher level variable will be metropolitan and micropolitan statistical area (mmsa) from BRFSS 2016 dataset.

Does the number of hours of sleep for individuals vary between MMSAs while account for sociodemographic factors such as education, marital staus, labor force participation, and exercise?

The continuous outcome: number of hours of sleep Predictors: mmsa (higher level) college education (individual level) marital status (individual level) labor force participation (individual level) exercise (individual level)

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(car)
## Loading required package: carData
## 
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
## 
##     recode
library(survey)
## Loading required package: grid
## Loading required package: Matrix
## Loading required package: survival
## 
## Attaching package: 'survey'
## The following object is masked from 'package:graphics':
## 
##     dotchart
load("~/Documents/1A_DEM Doctorate/DEM 7283/brfss16_mmsa.Rdata")
names(brfss16m)
##   [1] "DISPCODE" "HHADULT"  "GENHLTH"  "PHYSHLTH" "MENTHLTH" "POORHLTH"
##   [7] "HLTHPLN1" "PERSDOC2" "MEDCOST"  "CHECKUP1" "EXERANY2" "SLEPTIM1"
##  [13] "CVDINFR4" "CVDCRHD4" "CVDSTRK3" "ASTHMA3"  "ASTHNOW"  "CHCSCNCR"
##  [19] "CHCOCNCR" "CHCCOPD1" "HAVARTH3" "ADDEPEV2" "CHCKIDNY" "DIABETE3"
##  [25] "DIABAGE2" "LASTDEN3" "RMVTETH3" "SEX"      "MARITAL"  "EDUCA"   
##  [31] "RENTHOM1" "NUMHHOL2" "NUMPHON2" "VETERAN3" "EMPLOY1"  "CHILDREN"
##  [37] "INCOME2"  "INTERNET" "WEIGHT2"  "HEIGHT3"  "PREGNANT" "DEAF"    
##  [43] "BLIND"    "DECIDE"   "DIFFWALK" "DIFFDRES" "DIFFALON" "SMOKE100"
##  [49] "SMOKDAY2" "STOPSMK2" "LASTSMK2" "USENOW3"  "ECIGARET" "ECIGNOW" 
##  [55] "ALCDAY5"  "AVEDRNK2" "DRNK3GE5" "MAXDRNKS" "FLUSHOT6" "FLSHTMY2"
##  [61] "PNEUVAC3" "TETANUS"  "FALL12MN" "FALLINJ2" "SEATBELT" "DRNKDRI2"
##  [67] "HADMAM"   "HOWLONG"  "HADPAP2"  "LASTPAP2" "HPVTEST"  "HPLSTTST"
##  [73] "HADHYST2" "PCPSAAD2" "PCPSADI1" "PCPSARE1" "PSATEST1" "PSATIME" 
##  [79] "PCPSARS1" "BLDSTOOL" "LSTBLDS3" "HADSIGM3" "HADSGCO1" "LASTSIG3"
##  [85] "HIVTST6"  "HIVTSTD3" "HIVRISK4" "_STSTR"   "_IMPSEX"  "_RFHLTH" 
##  [91] "_PHYS14D" "_MENT14D" "_HCVU651" "_TOTINDA" "_MICHD"   "_LTASTH1"
##  [97] "_CASTHM1" "_ASTHMS1" "_DRDXAR1" "_EXTETH2" "_ALTETH2" "_DENVST2"
## [103] "_PRACE1"  "_MRACE1"  "_HISPANC" "_RACE"    "_RACEG21" "_RACEGR3"
## [109] "_AGEG5YR" "_AGE65YR" "_AGE80"   "_AGE_G"   "WTKG3"    "_BMI5"   
## [115] "_BMI5CAT" "_RFBMI5"  "_EDUCAG"  "_INCOMG"  "_SMOKER3" "_RFSMOK3"
## [121] "_ECIGSTS" "_CURECIG" "DRNKANY5" "_RFBING5" "_DRNKWEK" "_RFDRHV5"
## [127] "_FLSHOT6" "_PNEUMO2" "_RFSEAT2" "_RFSEAT3" "_DRNKDRV" "_RFMAM2Y"
## [133] "_MAM5021" "_RFPAP33" "_RFPSA21" "_RFBLDS3" "_COL10YR" "_HFOB3YR"
## [139] "_FS5YR"   "_FOBTFS"  "_CRCREC"  "_AIDTST3" "_MMSA"    "_MMSAWT" 
## [145] "SEQNO"    "MMSANAME"
Clean up names
newnames<-tolower(gsub(pattern = "_", replacement = "", x= names(brfss16m))) 
names(brfss16m)<-newnames
names(brfss16m)
##   [1] "dispcode" "hhadult"  "genhlth"  "physhlth" "menthlth" "poorhlth"
##   [7] "hlthpln1" "persdoc2" "medcost"  "checkup1" "exerany2" "sleptim1"
##  [13] "cvdinfr4" "cvdcrhd4" "cvdstrk3" "asthma3"  "asthnow"  "chcscncr"
##  [19] "chcocncr" "chccopd1" "havarth3" "addepev2" "chckidny" "diabete3"
##  [25] "diabage2" "lastden3" "rmvteth3" "sex"      "marital"  "educa"   
##  [31] "renthom1" "numhhol2" "numphon2" "veteran3" "employ1"  "children"
##  [37] "income2"  "internet" "weight2"  "height3"  "pregnant" "deaf"    
##  [43] "blind"    "decide"   "diffwalk" "diffdres" "diffalon" "smoke100"
##  [49] "smokday2" "stopsmk2" "lastsmk2" "usenow3"  "ecigaret" "ecignow" 
##  [55] "alcday5"  "avedrnk2" "drnk3ge5" "maxdrnks" "flushot6" "flshtmy2"
##  [61] "pneuvac3" "tetanus"  "fall12mn" "fallinj2" "seatbelt" "drnkdri2"
##  [67] "hadmam"   "howlong"  "hadpap2"  "lastpap2" "hpvtest"  "hplsttst"
##  [73] "hadhyst2" "pcpsaad2" "pcpsadi1" "pcpsare1" "psatest1" "psatime" 
##  [79] "pcpsars1" "bldstool" "lstblds3" "hadsigm3" "hadsgco1" "lastsig3"
##  [85] "hivtst6"  "hivtstd3" "hivrisk4" "ststr"    "impsex"   "rfhlth"  
##  [91] "phys14d"  "ment14d"  "hcvu651"  "totinda"  "michd"    "ltasth1" 
##  [97] "casthm1"  "asthms1"  "drdxar1"  "exteth2"  "alteth2"  "denvst2" 
## [103] "prace1"   "mrace1"   "hispanc"  "race"     "raceg21"  "racegr3" 
## [109] "ageg5yr"  "age65yr"  "age80"    "ageg"     "wtkg3"    "bmi5"    
## [115] "bmi5cat"  "rfbmi5"   "educag"   "incomg"   "smoker3"  "rfsmok3" 
## [121] "ecigsts"  "curecig"  "drnkany5" "rfbing5"  "drnkwek"  "rfdrhv5" 
## [127] "flshot6"  "pneumo2"  "rfseat2"  "rfseat3"  "drnkdrv"  "rfmam2y" 
## [133] "mam5021"  "rfpap33"  "rfpsa21"  "rfblds3"  "col10yr"  "hfob3yr" 
## [139] "fs5yr"    "fobtfs"   "crcrec"   "aidtst3"  "mmsa"     "mmsawt"  
## [145] "seqno"    "mmsaname"
Recode variables
#outcome variable: Number of hours of sleep
brfss16m$sleephrs<-car::recode(brfss16m$sleptim1, recodes = '77:99=NA', as.factor=F)

#z-scale outcome variable of number hours of sleep
brfss16m$sleephrsz<-as.numeric(scale(brfss16m$sleephrs, center=T, scale=T))

#recode the number of hours of sleep to binary
brfss16m$shortsleep<-Recode(brfss16m$sleptim1, recodes ="1:6='short sleep'; 7:24='normal sleep'; else=NA")

#recode self rated health. Any day interfered with ADL then poor, all else good
brfss16m$mentpoor<-Recode(brfss16m$menthlth, recodes ="1:30='poor'; 88='good'; else=NA")
brfss16m$physpoor<-Recode(brfss16m$physhlth, recodes ="1:30='poor'; 88='good'; else=NA")

#recode general health.  Good and excelent are good, all else less good
brfss16m$genpoor<-Recode(brfss16m$genhlth, recodes ="4:5='0poor'; 1:3='1good'; else=NA")

#recode sex variable to male yes or no
brfss16m$male<-Recode(brfss16m$sex, recodes="1='1male'; 0='0female'; else=NA", as.factor=T)

#recode 5 level race/ethnicity to individuala variables
brfss16m$black<-Recode(brfss16m$racegr3, recodes="2='black';9=NA; else='non black'")
brfss16m$white<-Recode(brfss16m$racegr3, recodes="1='white'; 9=NA; else='non white'")
brfss16m$other<-Recode(brfss16m$racegr3, recodes="3:4='other'; 9=NA; else='non other'")
brfss16m$hispanic<-Recode(brfss16m$racegr3, recodes="5='hispanic'; 9=NA; else='non hispanic'")

##Education
#recode college education
brfss16m$college<-Recode(brfss16m$educag, recodes="1='college'; 2:6='no college'; else=NA")

#income
#recode poverty
brfss16m$poverty<-Recode(brfss16m$incomeg, recodes="1:2='poverty'; 3:5='no poverty'; else=NA")
## Warning: Unknown or uninitialised column: 'incomeg'.
#recode employment
brfss16m$employyes<-Recode(brfss16m$employ1, recodes="1:2='employed'; 3:4='not employed';  else=NA")

#recode laborforce etc
brfss16m$laborforce<-Recode(brfss16m$employ1, recodes="1:4='in lab force'; 5:8='not in lab force';  else=NA")
brfss16m$student<-Recode(brfss16m$employ1, recodes="6=1; 9=NA;  else=0")
brfss16m$homemaker<-Recode(brfss16m$employ1, recodes="5=1; 9=NA;  else=0")
brfss16m$retired<-Recode(brfss16m$employ1, recodes="7=1; 9=NA;  else=0")
brfss16m$unable<-Recode(brfss16m$employ1, recodes="8=1; 9=NA;  else=0")

#recode veteran status
brfss16m$vetyes<-Recode(brfss16m$veteran3, recodes="1='vet'; 2='not vet'; else=NA")

#recode marital status
brfss16m$married<-Recode(brfss16m$marital, recodes="1='married'; 2:6='not married'; else=NA")

##Chronic diseases 

#recode asthma ever
brfss16m$asthma<-Recode(brfss16m$asthma3, recodes="1='asthma'; 2='no asthma'; else=NA")

#recode heart attack ever
brfss16m$hrattack<-Recode(brfss16m$cvdinfr4, recodes="1='heart attack'; 2='no ha'; else=NA")

#recode COPD ever
brfss16m$copd<-Recode(brfss16m$chccopd1, recodes="1='copd'; 2='no copd'; else=NA")

#recode diabetes ever
brfss16m$dm<-Recode(brfss16m$diabete3, recodes="1='diabetes'; 2='no dm'; else=NA")

#recode obese ever
brfss16m$obese<-Recode(brfss16m$bmi5cat, recodes="1='obese'; 2='not obese'; else=NA")

#recode depression ever
brfss16m$depression<-Recode(brfss16m$addepev2, recodes="1='depression'; 2:6='no depression'; else=NA")

##Behaviors
#recode exercise recently
brfss16m$exercise<-Recode(brfss16m$exerany2, recodes="1='exercise'; 2:6='no exercise'; else=NA")
data.frame(name=table(brfss16m$mmsaname), id=unique(brfss16m$mmsa))
##                                                                         name.Var1
## 1                      Albany-Schenectady-Troy, NY, Metropolitan Statistical Area
## 2                                  Albuquerque, NM, Metropolitan Statistical Area
## 3                Allentown-Bethlehem-Easton, PA-NJ, Metropolitan Statistical Area
## 4                                    Anchorage, AK, Metropolitan Statistical Area
## 5                Atlanta-Sandy Springs-Roswell, GA, Metropolitan Statistical Area
## 6                   Augusta-Richmond County, GA-SC, Metropolitan Statistical Area
## 7                            Austin-Round Rock, TX, Metropolitan Statistical Area
## 8                    Baltimore-Columbia-Towson, MD, Metropolitan Statistical Area
## 9                                  Baton Rouge, LA, Metropolitan Statistical Area
## 10                                     Beckley, WV, Metropolitan Statistical Area
## 11                                   Berlin, NH-VT, Micropolitan Statistical Area
## 12                                    Billings, MT, Metropolitan Statistical Area
## 13                                  Binghamton, NY, Metropolitan Statistical Area
## 14                           Birmingham-Hoover, AL, Metropolitan Statistical Area
## 15                                    Bismarck, ND, Metropolitan Statistical Area
## 16                                  Boise City, ID, Metropolitan Statistical Area
## 17                                              Boston, MA, Metropolitan Division
## 18           Buffalo-Cheektowaga-Niagara Falls, NY, Metropolitan Statistical Area
## 19                 Burlington-South Burlington, VT, Metropolitan Statistical Area
## 20                         Cambridge-Newton-Framingham, MA, Metropolitan Division
## 21                                              Camden, NJ, Metropolitan Division
## 22                                Cedar Rapids, IA, Metropolitan Statistical Area
## 23                 Charleston-North Charleston, SC, Metropolitan Statistical Area
## 24                                  Charleston, WV, Metropolitan Statistical Area
## 25               Charlotte-Concord-Gastonia, NC-SC, Metropolitan Statistical Area
## 26                              Chattanooga, TN-GA, Metropolitan Statistical Area
## 27              Chicago-Naperville-Elgin, IL-IN-WI, Metropolitan Statistical Area
## 28                            Cincinnati, OH-KY-IN, Metropolitan Statistical Area
## 29                        Claremont-Lebanon, NH-VT, Micropolitan Statistical Area
## 30                            Cleveland-Elyria, OH, Metropolitan Statistical Area
## 31                       College Station-Bryan, TX, Metropolitan Statistical Area
## 32                            Colorado Springs, CO, Metropolitan Statistical Area
## 33                                    Columbia, SC, Metropolitan Statistical Area
## 34                                    Columbus, OH, Metropolitan Statistical Area
## 35                              Corpus Christi, TX, Metropolitan Statistical Area
## 36          Crestview-Fort Walton Beach-Destin, FL, Metropolitan Statistical Area
## 37                               Cumberland, MD-WV, Metropolitan Statistical Area
## 38                                 Dallas-Plano-Irving, TX, Metropolitan Division
## 39                                      Dayton, OH, Metropolitan Statistical Area
## 40          Deltona-Daytona Beach-Ormond Beach, FL, Metropolitan Statistical Area
## 41                      Denver-Aurora-Lakewood, CO, Metropolitan Statistical Area
## 42                  Des Moines-West Des Moines, IA, Metropolitan Statistical Area
## 43                                   Duluth, MN-WI, Metropolitan Statistical Area
## 44                       Dutchess County-Putnam County, NY, Metropolitan Division
## 45                                     El Paso, TX, Metropolitan Statistical Area
## 46                                    Fargo, ND-MN, Metropolitan Statistical Area
## 47           Fayetteville-Springdale-Rogers, AR-MO, Metropolitan Statistical Area
## 48                                  Fort Wayne, IN, Metropolitan Statistical Area
## 49                                Fort Worth-Arlington, TX, Metropolitan Division
## 50                                 Gainesville, FL, Metropolitan Statistical Area
## 51                                 Glens Falls, NY, Metropolitan Statistical Area
## 52                              Grand Forks, ND-MN, Metropolitan Statistical Area
## 53                                Grand Island, NE, Metropolitan Statistical Area
## 54                        Grand Rapids-Wyoming, MI, Metropolitan Statistical Area
## 55                 Greenville-Anderson-Mauldin, SC, Metropolitan Statistical Area
## 56                   Hagerstown-Martinsburg, MD-WV, Metropolitan Statistical Area
## 57        Hartford-West Hartford-East Hartford, CT, Metropolitan Statistical Area
## 58        Hilton Head Island-Bluffton-Beaufort, SC, Metropolitan Statistical Area
## 59            Houston-The Woodlands-Sugar Land, TX, Metropolitan Statistical Area
## 60                    Huntington-Ashland, WV-KY-OH, Metropolitan Statistical Area
## 61                Indianapolis-Carmel-Anderson, IN, Metropolitan Statistical Area
## 62                                     Jackson, MS, Metropolitan Statistical Area
## 63                                Jacksonville, FL, Metropolitan Statistical Area
## 64                              Kansas City, MO-KS, Metropolitan Statistical Area
## 65                Kingsport-Bristol-Bristol, TN-VA, Metropolitan Statistical Area
## 66                                   Knoxville, TN, Metropolitan Statistical Area
## 67                        Lansing-East Lansing, MI, Metropolitan Statistical Area
## 68                                     Lincoln, NE, Metropolitan Statistical Area
## 69        Little Rock-North Little Rock-Conway, AR, Metropolitan Statistical Area
## 70                                    Logan, UT-ID, Metropolitan Statistical Area
## 71              Los Angeles-Long Beach-Anaheim, CA, Metropolitan Statistical Area
## 72              Louisville/Jefferson County, KY-IN, Metropolitan Statistical Area
## 73                               Memphis, TN-MS-AR, Metropolitan Statistical Area
## 74       Miami-Fort Lauderdale-West Palm Beach, FL, Metropolitan Statistical Area
## 75               Milwaukee-Waukesha-West Allis, WI, Metropolitan Statistical Area
## 76         Minneapolis-St. Paul-Bloomington, MN-WI, Metropolitan Statistical Area
## 77                                       Minot, ND, Micropolitan Statistical Area
## 78       Montgomery County-Bucks County-Chester County, PA, Metropolitan Division
## 79   Myrtle Beach-Conway-North Myrtle Beach, SC-NC, Metropolitan Statistical Area
## 80  Nashville-Davidson--Murfreesboro--Franklin, TN, Metropolitan Statistical Area
## 81                        Nassau County-Suffolk County, NY, Metropolitan Division
## 82                        New Orleans-Metairie, LA, Metropolitan Statistical Area
## 83                New York-Jersey City-White Plains, NY-NJ, Metropolitan Division
## 84                                           Newark, NJ-PA, Metropolitan Division
## 85                                     Norfolk, NE, Micropolitan Statistical Area
## 86                                North Platte, NE, Micropolitan Statistical Area
## 87               North Port-Sarasota-Bradenton, FL, Metropolitan Statistical Area
## 88                            Oakland-Hayward-Berkeley, CA, Metropolitan Division
## 89                            Ogden-Clearfield, UT, Metropolitan Statistical Area
## 90                               Oklahoma City, OK, Metropolitan Statistical Area
## 91                     Omaha-Council Bluffs, NE-IA, Metropolitan Statistical Area
## 92                   Orlando-Kissimmee-Sanford, FL, Metropolitan Statistical Area
## 93                                 Panama City, FL, Metropolitan Statistical Area
## 94                  Pensacola-Ferry Pass-Brent, FL, Metropolitan Statistical Area
## 95                                        Philadelphia, PA, Metropolitan Division
## 96                     Phoenix-Mesa-Scottsdale, AZ, Metropolitan Statistical Area
## 97                                  Pittsburgh, PA, Metropolitan Statistical Area
## 98                              Port St. Lucie, FL, Metropolitan Statistical Area
## 99                     Portland-South Portland, ME, Metropolitan Statistical Area
## 100            Portland-Vancouver-Hillsboro, OR-WA, Metropolitan Statistical Area
## 101                      Providence-Warwick, RI-MA, Metropolitan Statistical Area
## 102                                 Provo-Orem, UT, Metropolitan Statistical Area
## 103                                    Raleigh, NC, Metropolitan Statistical Area
## 104                                 Rapid City, SD, Metropolitan Statistical Area
## 105                                       Reno, NV, Metropolitan Statistical Area
## 106                                   Richmond, VA, Metropolitan Statistical Area
## 107           Riverside-San Bernardino-Ontario, CA, Metropolitan Statistical Area
## 108                                  Rochester, MN, Metropolitan Statistical Area
## 109                                  Rochester, NY, Metropolitan Statistical Area
## 110                 Rockingham County-Strafford County, NH, Metropolitan Division
## 111        Sacramento--Roseville--Arden-Arcade, CA, Metropolitan Statistical Area
## 112                                      Salem, OR, Metropolitan Statistical Area
## 113                               Salisbury, MD-DE, Metropolitan Statistical Area
## 114                             Salt Lake City, UT, Metropolitan Statistical Area
## 115                  San Antonio-New Braunfels, TX, Metropolitan Statistical Area
## 116     San Francisco-Redwood City-South San Francisco, CA, Metropolitan Division
## 117             San Jose-Sunnyvale-Santa Clara, CA, Metropolitan Statistical Area
## 118                   San Juan-Carolina-Caguas, PR, Metropolitan Statistical Area
## 119                                Scottsbluff, NE, Micropolitan Statistical Area
## 120                           Seattle-Bellevue-Everett, WA, Metropolitan Division
## 121                  Silver Spring-Frederick-Rockville, MD, Metropolitan Division
## 122                           Sioux City, IA-NE-SD, Metropolitan Statistical Area
## 123                                Sioux Falls, SD, Metropolitan Statistical Area
## 124                                Spartanburg, SC, Metropolitan Statistical Area
## 125                     Spokane-Spokane Valley, WA, Metropolitan Statistical Area
## 126                                Springfield, MA, Metropolitan Statistical Area
## 127                                  St. Cloud, MN, Metropolitan Statistical Area
## 128                               St. Louis, MO-IL, Metropolitan Statistical Area
## 129                                   Syracuse, NY, Metropolitan Statistical Area
## 130                                Tallahassee, FL, Metropolitan Statistical Area
## 131            Tampa-St. Petersburg-Clearwater, FL, Metropolitan Statistical Area
## 132                                     Toledo, OH, Metropolitan Statistical Area
## 133                                     Topeka, KS, Metropolitan Statistical Area
## 134                                      Tulsa, OK, Metropolitan Statistical Area
## 135                                 Tuscaloosa, AL, Metropolitan Statistical Area
## 136                                 Utica-Rome, NY, Metropolitan Statistical Area
## 137     Virginia Beach-Norfolk-Newport News, VA-NC, Metropolitan Statistical Area
## 138                       Warren-Troy-Farmington Hills, MI, Metropolitan Division
## 139           Washington-Arlington-Alexandria, DC-VA-MD-WV, Metropolitan Division
## 140                              Wichita Falls, TX, Metropolitan Statistical Area
## 141                                    Wichita, KS, Metropolitan Statistical Area
## 142                                   Wilmington, DE-MD-NJ, Metropolitan Division
## 143                               Worcester, MA-CT, Metropolitan Statistical Area
##     name.Freq    id
## 1        2901 10580
## 2        1300 10740
## 3         675 10900
## 4        1028 11260
## 5        2444 12060
## 6         849 12260
## 7        1567 12420
## 8        6718 12580
## 9         623 12940
## 10        543 13220
## 11        527 13620
## 12        578 13740
## 13        909 13780
## 14       1219 13820
## 15       1049 13900
## 16       1361 14260
## 17       2369 14454
## 18       1227 15380
## 19       1555 15540
## 20       2813 15764
## 21       1141 15804
## 22        515 16300
## 23       1407 16620
## 24        940 16700
## 25       1819 16740
## 26        532 16860
## 27       4209 16980
## 28       1766 17140
## 29       1806 17200
## 30       1036 17460
## 31        527 17780
## 32       1614 17820
## 33       1259 17900
## 34       1915 18140
## 35        539 18580
## 36       1063 18880
## 37        517 19060
## 38       1433 19124
## 39        592 19380
## 40       1192 19660
## 41       6600 19740
## 42       1302 19780
## 43        941 20260
## 44       1092 20524
## 45        541 21340
## 46       1221 22020
## 47        795 22220
## 48        651 23060
## 49        528 23104
## 50        892 23540
## 51       1132 24020
## 52        549 24220
## 53        731 24260
## 54       1037 24340
## 55       1456 24860
## 56       1141 25180
## 57       3757 25540
## 58        668 25940
## 59       1843 26420
## 60       1334 26580
## 61       3331 26900
## 62        596 27140
## 63       3015 27260
## 64       4398 28140
## 65        586 28700
## 66        695 28940
## 67        618 29620
## 68       1567 30700
## 69       1182 30780
## 70        569 30860
## 71       3512 31080
## 72       3668 31140
## 73        925 32820
## 74       2312 33100
## 75       1271 33340
## 76       8557 33460
## 77        635 33500
## 78        598 33874
## 79        913 34820
## 80        997 34980
## 81       1188 35004
## 82       1433 35084
## 83       8338 35380
## 84       2498 35614
## 85        665 35740
## 86        524 35820
## 87       1144 35840
## 88        809 36084
## 89       2431 36260
## 90       1964 36420
## 91       3234 36540
## 92       2688 36740
## 93       1098 37460
## 94       1105 37860
## 95        920 37964
## 96       5352 38060
## 97       1559 38300
## 98        993 38860
## 99       2740 38900
## 100      3244 38940
## 101      6240 39300
## 102      1503 39340
## 103       618 39580
## 104       956 39660
## 105      1389 39900
## 106      1417 40060
## 107      1119 40140
## 108       651 40340
## 109      2986 40380
## 110      1575 40484
## 111       824 40900
## 112       551 41060
## 113      2419 41180
## 114      3039 41420
## 115       558 41540
## 116       510 41620
## 117       529 41700
## 118      3738 41884
## 119       593 41940
## 120      5121 41980
## 121      3306 42420
## 122       905 42644
## 123      1073 43524
## 124       527 43580
## 125      1331 43620
## 126       814 43900
## 127       656 44060
## 128      2226 44140
## 129      1684 45060
## 130      2096 45220
## 131      2557 45300
## 132       705 45780
## 133      1162 45820
## 134      1624 46140
## 135       548 46220
## 136      1008 46540
## 137      1706 47260
## 138      2459 47664
## 139      9170 47894
## 140       527 48620
## 141      2567 48660
## 142      2292 48864
## 143      1572 49340
length(table(brfss16m$mmsa))
## [1] 143
subdb <- brfss16m %>%
  dplyr::select(sleephrs, sleephrsz, college, married, laborforce, exercise, mmsa, mmsaname) %>%
  filter(complete.cases(.))

To begin with, we need to determine if there is variation between the msa’s to justify proceeding with multi-level modeling

fit.an<-lm(sleephrsz~as.factor(mmsa), subdb)
anova(fit.an)
## Analysis of Variance Table
## 
## Response: sleephrsz
##                     Df Sum Sq Mean Sq F value    Pr(>F)    
## as.factor(mmsa)    142    907  6.3893  6.4555 < 2.2e-16 ***
## Residuals       241464 238985  0.9897                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#summary(fit.an)

There is significant variance between msa’s; therefore proceeding with multi-level modeling is justified.

1. Estimate of random intercept model using mmsa only.

Variance and Interclass Correlation (ICC) results below.

library(lme4)
library(lmerTest)
## 
## Attaching package: 'lmerTest'
## The following object is masked from 'package:lme4':
## 
##     lmer
## The following object is masked from 'package:stats':
## 
##     step
library(arm)
## Loading required package: MASS
## 
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
## 
##     select
## 
## arm (Version 1.10-1, built: 2018-4-12)
## Working directory is /Users/AlexMora/Documents/1A_DEM Doctorate/DEM 7283
## 
## Attaching package: 'arm'
## The following object is masked from 'package:car':
## 
##     logit
fit1<-glmer(sleephrsz~(1|mmsa), data=subdb, na.action = na.omit)
## Warning in glmer(sleephrsz ~ (1 | mmsa), data = subdb, na.action =
## na.omit): calling glmer() with family=gaussian (identity link) as a
## shortcut to lmer() is deprecated; please call lmer() directly
arm::display(fit1, detail=T)
## lme4::lmer(formula = sleephrsz ~ (1 | mmsa), data = subdb, na.action = na.omit)
##             coef.est coef.se t value
## (Intercept) 0.01     0.01    1.23   
## 
## Error terms:
##  Groups   Name        Std.Dev.
##  mmsa     (Intercept) 0.06    
##  Residual             0.99    
## ---
## number of obs: 241607, groups: mmsa, 143
## AIC = 683407, DIC = 683383.5
## deviance = 683392.2
summary(fit1 <- lmer(sleephrsz ~ (1|mmsa),data=subdb, na.action = na.omit))
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: sleephrsz ~ (1 | mmsa)
##    Data: subdb
## 
## REML criterion at convergence: 683400.9
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -4.2972 -0.6992 -0.0165  0.6411 11.7869 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  mmsa     (Intercept) 0.003069 0.0554  
##  Residual             0.989738 0.9949  
## Number of obs: 241607, groups:  mmsa, 143
## 
## Fixed effects:
##              Estimate Std. Error        df t value Pr(>|t|)
## (Intercept) 6.458e-03  5.256e-03 1.374e+02   1.229    0.221
library(sjstats)
## 
## Attaching package: 'sjstats'
## The following objects are masked from 'package:survey':
## 
##     cv, deff
#rand(fit1)
#re_var(fit1)
re_var(fit1, adjusted = TRUE)
## 
## Variance Components of Mixed Models
## 
## Family : gaussian (identity)
## Formula: list(conditional = sleephrsz ~ (1 | mmsa), random = ~1 | mmsa)
## 
##          fixed: 0.000
##         random: 0.003
##       residual: 0.990
##     dispersion: 0.000
##   distribution: 0.990
icc(fit1)
## 
## Intraclass Correlation Coefficient for Linear mixed model
## 
## Family : gaussian (identity)
## Formula: sleephrsz ~ (1 | mmsa)
## 
##   ICC (mmsa): 0.0031

There is a low amount of variance in our outcome variable due to mmsa’s. Per the ICC a small proportion of the variance can be explained by the group structure. MMSA very minimally explains number of hours of sleep.

mmsameans<-aggregate(cbind(sleephrs)~mmsaname, subdb, mean) 
summary(mmsameans)
##    mmsaname            sleephrs    
##  Length:143         Min.   :6.783  
##  Class :character   1st Qu.:6.996  
##  Mode  :character   Median :7.045  
##                     Mean   :7.050  
##                     3rd Qu.:7.107  
##                     Max.   :7.268
head(mmsameans)
##                                                           mmsaname
## 1       Albany-Schenectady-Troy, NY, Metropolitan Statistical Area
## 2                   Albuquerque, NM, Metropolitan Statistical Area
## 3 Allentown-Bethlehem-Easton, PA-NJ, Metropolitan Statistical Area
## 4                     Anchorage, AK, Metropolitan Statistical Area
## 5 Atlanta-Sandy Springs-Roswell, GA, Metropolitan Statistical Area
## 6    Augusta-Richmond County, GA-SC, Metropolitan Statistical Area
##   sleephrs
## 1 6.975506
## 2 7.102686
## 3 6.938066
## 4 7.081571
## 5 7.016532
## 6 6.975728
#stargazer::stargazer(mmsameans, type = "html", summary = FALSE, rownames = FALSE)

2. Multi-level model with both mmsa and individual level variables.

fit2<-lmer(sleephrsz~(1|mmsa)+college+married+laborforce+exercise, data=subdb, na.action=na.omit)
arm::display(fit2, detail=T)
## lmer(formula = sleephrsz ~ (1 | mmsa) + college + married + laborforce + 
##     exercise, data = subdb, na.action = na.omit)
##                            coef.est coef.se t value
## (Intercept)                 -0.07     0.01   -7.18 
## collegeno college            0.01     0.01    1.44 
## marriednot married          -0.05     0.00  -11.88 
## laborforcenot in lab force   0.21     0.00   51.94 
## exerciseno exercise         -0.04     0.00   -7.26 
## 
## Error terms:
##  Groups   Name        Std.Dev.
##  mmsa     (Intercept) 0.05    
##  Residual             0.99    
## ---
## number of obs: 241607, groups: mmsa, 143
## AIC = 680689, DIC = 680587.5
## deviance = 680631.2
summary(fit2)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: 
## sleephrsz ~ (1 | mmsa) + college + married + laborforce + exercise
##    Data: subdb
## 
## REML criterion at convergence: 680674.8
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -4.4230 -0.6276  0.0318  0.6022 11.9551 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  mmsa     (Intercept) 0.002799 0.0529  
##  Residual             0.978545 0.9892  
## Number of obs: 241607, groups:  mmsa, 143
## 
## Fixed effects:
##                              Estimate Std. Error         df t value
## (Intercept)                -7.075e-02  9.856e-03  1.918e+03  -7.178
## collegeno college           1.181e-02  8.212e-03  2.411e+05   1.438
## marriednot married         -4.843e-02  4.078e-03  2.412e+05 -11.876
## laborforcenot in lab force  2.144e-01  4.128e-03  2.405e+05  51.943
## exerciseno exercise        -3.528e-02  4.856e-03  2.409e+05  -7.265
##                            Pr(>|t|)    
## (Intercept)                1.01e-12 ***
## collegeno college              0.15    
## marriednot married          < 2e-16 ***
## laborforcenot in lab force  < 2e-16 ***
## exerciseno exercise        3.74e-13 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) cllgnc mrrdnm lbrilf
## collegncllg -0.813                     
## marrdntmrrd -0.223  0.061              
## lbrfrcntilf -0.210  0.059 -0.062       
## exrcsnexrcs -0.175  0.117 -0.071 -0.119

Not married and not exercising is associated with lower number of hours of sleep, while not participating in the labor force is associated with in increase number of hours of sleep. There was a positive trend with less than college education, however this was not statistically significant.

#rand(fit2)
#re_var(fit2)
re_var(fit2, adjusted = TRUE)
## 
## Variance Components of Mixed Models
## 
## Family : gaussian (identity)
## Formula: list(conditional = sleephrsz ~ (1 | mmsa) + college + married + laborforce + exercise, random = ~1 | mmsa)
## 
##          fixed: 0.011
##         random: 0.003
##       residual: 0.979
##     dispersion: 0.000
##   distribution: 0.979
icc(fit2)
## 
## Intraclass Correlation Coefficient for Linear mixed model
## 
## Family : gaussian (identity)
## Formula: sleephrsz ~ (1 | mmsa) + college + married + laborforce + exercise
## 
##   ICC (mmsa): 0.0029
anova(fit1, fit2, test = "Chisq")
## refitting model(s) with ML (instead of REML)
## Data: subdb
## Models:
## fit1: sleephrsz ~ (1 | mmsa)
## fit2: sleephrsz ~ (1 | mmsa) + college + married + laborforce + exercise
##      Df    AIC    BIC  logLik deviance Chisq Chi Df Pr(>Chisq)    
## fit1  3 683398 683429 -341696   683392                            
## fit2  7 680645 680718 -340316   680631  2761      4  < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

There is a benefit in adding individual level variables (Model 2, fit2) to increase model fit in compared to Model 1 (fit1). This can be denoted by significant findings and a lower AIC.