library(sjPlot)
## Warning in checkMatrixPackageVersion(): Package version inconsistency detected.
## TMB was built with Matrix version 1.2.13
## Current Matrix version is 1.2.12
## Please re-install 'TMB' from source using install.packages('TMB', type = 'source') or ask CRAN for a binary version of 'TMB' matching CRAN's 'Matrix' package
library(lme4)
## Loading required package: Matrix
library(haven)
library(readr)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
library(broom)
library(car)
## Loading required package: carData
## 
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
## 
##     recode
library(stargazer)
## 
## Please cite as:
##  Hlavac, Marek (2018). stargazer: Well-Formatted Regression and Summary Statistics Tables.
##  R package version 5.2.1. https://CRAN.R-project.org/package=stargazer
library(survey)
## Loading required package: grid
## Loading required package: survival
## 
## Attaching package: 'survey'
## The following object is masked from 'package:graphics':
## 
##     dotchart
library(questionr)
library(pander)
#importing data
load("C:/Users/tobik/OneDrive/Documents/DEM7383_Stats/brfss16.Rdata")
#The names in the data are very ugly, so I make them less ugly
nams<-names(brfss16)
#we see some names are lower case, some are upper and some have a little _ in the first position. This is a nightmare.
newnames<-gsub(pattern = "_",replacement =  "",x =  nams)
names(brfss16)<-tolower(newnames)

Checking where to find missing values:

colSums(is.na(brfss16))
##    state   fmonth    idate   imonth     iday    iyear dispcode    seqno 
##        0        0        0        0        0        0        0        0 
##      psu ctelenm1 pvtresd1 colghous stateres cellfon4   ladult numadult 
##        0   233290   234038   486262   236709   234038   486262   234051 
##   nummen numwomen ctelnum1 cellfon5   cadult pvtresd3 cclghous  cstate1 
##   234046   234046   252265   252266   252271   252265   485175   246342 
## landline  hhadult  genhlth physhlth menthlth poorhlth hlthpln1 persdoc2 
##   251640   260001        5        3        1   235696        0        1 
##  medcost checkup1 exerany2 sleptim1 cvdinfr4 cvdcrhd4 cvdstrk3  asthma3 
##        2        3        6        2        3        2        2        2 
##  asthnow chcscncr chcocncr chccopd1 havarth3 addepev2 chckidny diabete3 
##   420713        2        2        2        3        3        3        3 
## diabage2 lastden3 rmvteth3      sex  marital    educa renthom1 numhhol2 
##   420251        6        4        0       11        6        9   234055 
## numphon2  cpdemo1 veteran3  employ1 children  income2 internet  weight2 
##   471160   234059       65       73      121     3786     4920     6178 
##  height3 pregnant     deaf    blind   decide diffwalk diffdres diffalon 
##     6809   416898    12124    12987    13703    14310    14678    15267 
## smoke100 smokday2 stopsmk2 lastsmk2  usenow3 ecigaret  ecignow  alcday5 
##    16062   282147   417415   351519    16686    17418   411720    18348 
## avedrnk2 drnk3ge5 maxdrnks flushot6 flshtmy2 pneuvac3  tetanus fall12mn 
##   249223   249639   250034    22107   281847    23076    23573   146679 
## fallinj2 seatbelt drnkdri2   hadmam  howlong  hadpap2 lastpap2  hpvtest 
##   383838    25167   247187   225296   279045   225503   244482   226142 
## hplsttst hadhyst2 pcpsaad2 pcpsadi1 pcpsare1 psatest1  psatime pcpsars1 
##   419058   271408   334583   334711   334772   334840   398978   399044 
## bldstool lstblds3 hadsigm3 hadsgco1 lastsig3  hivtst6 hivtstd3 hivrisk4 
##   181611   381928   181924   264494   264557    32982   350195    34235 
## pdiabtst prediab1  insulin bldsugar feetchk2 doctdiab chkhemo3  feetchk 
##   294693   294692   476768   476768   476768   476769   476768   476814 
##  eyeexam  diabeye  diabedu painact2 qlmentl2 qlstres2  qlhlth2 medicare 
##   476768   476768   476768   476964   476971   476979   476989   432666 
## hlthcvr1 delaymed dlyother nocov121 lstcovrg drvisits medscost carercvd 
##   432667   429001        0   432668   482616   428980   428980   428980 
## medbill1 medadvic undrstnd  written caregiv1 crgvrel1 crgvlng1 crgvhrs1 
##   428980   395078   395157   395297   409710   471117   471137   471165 
## crgvprb2 crgvpers crgvhous crgvmst2 crgvexpt cimemlos  cdhouse cdassist 
##   471192   471216   471231   471264   425077   410160   477890   477897 
##   cdhelp cdsocial cddiscus ssbsugr2 ssbfrut2 calrinfo marijana usemrjna 
##   483805   477903   477913   418152   418236   474791   379483   480901 
## asthmage asattack aservist asdrvist asrchkup asactlim asymptom asnoslep 
##   485748   485931   486129   486130   485932   485933   485936   486041 
## asthmed3 asinhalr imfvplac hpvadvc2 hpvadsht shingle2 numburn2 cncrdiff 
##   485937   485937   470779   462620   483878   468928   462908   477896 
##  cncrage cncrtyp1 csrvtrt1 csrvdoc1  csrvsum csrvrtrn csrvinst csrvinsr 
##   478004   478012   478010   480151   480158   480161   482161   480165 
## csrvdein csrvclin csrvpain csrvctl1 profexam lengexam pcpsade1 pcdmdecn 
##   480166   480167   480170   485884   479759   480304   486303        0 
## sxorient trnsgndr rcsgendr rcsrltn2 casthdx2 casthno2 emtsuprt lsatisfy 
##   280869   281030   416701   416947   422065   478587   456766   456780 
## qlactlm2 useequip   qstver  qstlang   mscode    ststr    strwt  rawrake 
##   279078   279477        0       14   240028        0        0        0 
##  wt2rake  chispnc   crace1   cprace  cllcpwt  dualuse  dualcor  llcpwt2 
##        0   109413   412360   412360   416724        0   196720        0 
##   llcpwt   rfhlth  phys14d  ment14d  hcvu651  totinda    michd  ltasth1 
##        0        0        0        0        0        0     4125        0 
##  casthm1  asthms1  drdxar1  exteth2  alteth2  denvst2   prace1   mrace1 
##        0        0     2477        0   308872        0        4        4 
##  hispanc     race  raceg21  racegr3   raceg1  ageg5yr  age65yr    age80 
##        0        0        0        0     8683        0        0        0 
##     ageg    htin4     htm4    wtkg3     bmi5  bmi5cat   rfbmi5  chldcnt 
##        0    19133    16972    33530    39616    39616        0        0 
##   educag   incomg  smoker3  rfsmok3  ecigsts  curecig drnkany5  drocdy3 
##        0        0        0        0        0        0        0        0 
##  rfbing5  drnkwek  rfdrhv5  flshot6  pneumo2  rfseat2  rfseat3  drnkdrv 
##        0        0        0   308872   308872        0        0        0 
##  rfmam2y  mam5021  rfpap33  rfpsa21  rfblds3  col10yr  hfob3yr    fs5yr 
##   263226   354260   383917   334448   251512   256162   251512   375290 
##   fobtfs   crcrec  aidtst3 
##   272505   255147    32982

Checking on Skin Cancer & recoding

#Individual Levels

#Binary outcome: known instance of skin cancer
brfss16$skincancer<-recode(brfss16$chcscncr, recodes="1='1'; else='0'", as.factor=T)

#income grouping
brfss16$inc<-recode(brfss16$incomg, recodes = "9= NA;1='1_lt15k'; 2='2_15-25k';3='3_25-35k';4='4_35-50k';5='5_50kplus'", as.factor= T)
brfss16$inc<-as.ordered(brfss16$inc)


#education level
brfss16$educ<-recode(brfss16$educa,
recodes="1:2='0Prim'; 3='1somehs'; 4='2hsgrad'; 5='3somecol'; 6='4colgrad';9=NA", as.factor=T)

#race/ethnicity
brfss16$raceeth<-recode(brfss16$racegr3, recodes="1='1_White';  2='2_Black'; 5='3_Hispanic'; 3:4='4_other'; else=NA",as.factor=T)

#Higher Level

#mscode density of living arrangements
brfss16$urbanstyle<-recode(brfss16$mscode, recodes=
            "1='Inner_City';
            2='City_Skirts';
            3='City_Couty';
            5='Rural_County';
            else=NA",
            as.factor=T)
brfss16<-brfss16%>%
  select(skincancer,inc,educ,raceeth,urbanstyle)%>%
  filter(complete.cases(.))

ANOVA

anova(glm(skincancer~urbanstyle,brfss16,family=binomial),test="Chisq")
## Analysis of Deviance Table
## 
## Model: binomial, link: logit
## 
## Response: skincancer
## 
## Terms added sequentially (first to last)
## 
## 
##            Df Deviance Resid. Df Resid. Dev Pr(>Chi)  
## NULL                      198869     149335           
## urbanstyle  3   11.002    198866     149324  0.01171 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Yay, I found significance.

2) Estimating the multilevel

brfss16.fit<-glmer(skincancer~inc+educ+raceeth+(1|urbanstyle),family=binomial, brfss16, control=glmerControl(optimizer="Nelder_Mead",optCtrl=list(maxfun=2e5)))

Refit due to failure (changed optimizer after failure ub refit)

{r} #brfss16.fit<-refit(brfss16.fit) #

Without individual level variables

brfss16.fit2<-glmer(skincancer~(1|urbanstyle),family=binomial, data=brfss16)

ANOVA comparing with and without group level

anova(brfss16.fit,brfss16.fit2, test="Chisq")
## Data: brfss16
## Models:
## brfss16.fit2: skincancer ~ (1 | urbanstyle)
## brfss16.fit: skincancer ~ inc + educ + raceeth + (1 | urbanstyle)
##              Df    AIC    BIC logLik deviance  Chisq Chi Df Pr(>Chisq)    
## brfss16.fit2  2 149336 149357 -74666   149332                             
## brfss16.fit  13 143616 143749 -71795   143590 5741.9     11  < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

3) Discussion

Skin cancer is better described when using the level of urbanization as group. The ANOVA comparing the two models with and without income, race/ethnicity, and education showed that the better fit is including those components.