This week we worked on the logistic regression homework assignment. I will share helpful pieces of code I used in doing this assingment.

library(stableGR)
## Loading required package: mcmcse
## mcmcse: Monte Carlo Standard Errors for MCMC
## Version 1.4-1 created on 2020-01-29.
## copyright (c) 2012, James M. Flegal, University of California, Riverside
##                     John Hughes, University of Colorado, Denver
##                     Dootika Vats, University of Warwick
##                     Ning Dai, University of Minnesota
##  For citation information, type citation("mcmcse").
##  Type help("mcmcse-package") to get started.
library(faraway)
data(titanic.complete)
Titanic <- titanic.complete

Write down the logistic regression model with no predictors.

mod1 <- glm(Titanic$Survived~1, family = binomial)
summary(mod1)
## 
## Call:
## glm(formula = Titanic$Survived ~ 1, family = binomial)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -1.018  -1.018  -1.018   1.345   1.345  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -0.38677    0.07636  -5.065 4.08e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 960.9  on 711  degrees of freedom
## Residual deviance: 960.9  on 711  degrees of freedom
## AIC: 962.9
## 
## Number of Fisher Scoring iterations: 4

One thing to note: Some questions may ask for the odds of Survival which is equal to p/1-p. If they ask for probability you can use the ilogit function or solve for p. 

SurvivalOdds <- exp(-.38677)
SurvivalProbability <- ilogit(-.38677)

Confidence intervals can be calculated using the confint function and interpreted using exponents.

CI <- confint(mod1, level = .95)
## Waiting for profiling to be done...
exp(CI)
##     2.5 %    97.5 % 
## 0.5843707 0.7883955

We can be 95% confident that the odds of surviving the Titanic are between exp(CI)[1] and exp(CI)[2]

##Create and Interpret a Logistic Model

attach(Titanic)
Titanic$Adult <- as.numeric(Titanic$Age > 18)
AdultSurvival <- glm(Survived~Titanic$Adult, family = binomial)
summary(AdultSurvival)
## 
## Call:
## glm(formula = Survived ~ Titanic$Adult, family = binomial)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.1835  -0.9785  -0.9785   1.3902   1.3902  
## 
## Coefficients:
##               Estimate Std. Error z value Pr(>|z|)   
## (Intercept)    0.01439    0.16964   0.085  0.93241   
## Titanic$Adult -0.50201    0.19022  -2.639  0.00831 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 960.90  on 711  degrees of freedom
## Residual deviance: 953.96  on 710  degrees of freedom
## AIC: 957.96
## 
## Number of Fisher Scoring iterations: 4

log(p/1-p)= .01439 - .50201(Αdult)

exp(.01439)
## [1] 1.014494
exp(.01439-.50201)
## [1] 0.6140862

The odds of a child surviving are 1.014494 The odds of an adult surviving are .6141

#Breast Cancer

library(faraway)
data(wbca)

BigMod <- glm(Class~Adhes + BNucl + Chrom + Epith + Mitos + NNucl + Thick + UShap + USize, data = wbca, family = binomial)

If you need to drop unnecessary variables use the drop1 function.

drop1(BigMod, test = "F")
## Warning in drop1.glm(BigMod, test = "F"): F test assumes 'quasibinomial' family
## Single term deletions
## 
## Model:
## Class ~ Adhes + BNucl + Chrom + Epith + Mitos + NNucl + Thick + 
##     UShap + USize
##        Df Deviance    AIC  F value    Pr(>F)    
## <none>      89.464 109.46                       
## Adhes   1   98.844 116.84  70.3502 2.927e-16 ***
## BNucl   1  109.000 127.00 146.5261 < 2.2e-16 ***
## Chrom   1   99.841 117.84  77.8278 < 2.2e-16 ***
## Epith   1   89.613 107.61   1.1154  0.291286    
## Mitos   1   93.551 111.55  30.6517 4.433e-08 ***
## NNucl   1   95.204 113.20  43.0522 1.066e-10 ***
## Thick   1  110.239 128.24 155.8122 < 2.2e-16 ***
## UShap   1   90.627 108.63   8.7212  0.003255 ** 
## USize   1   89.523 107.52   0.4438  0.505505    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
BigishMod <- glm(Class~Adhes + BNucl + Chrom + Epith + Mitos + NNucl + Thick + UShap, data = wbca, family = binomial)
drop1(BigishMod, test = "F")
## Warning in drop1.glm(BigishMod, test = "F"): F test assumes 'quasibinomial'
## family
## Single term deletions
## 
## Model:
## Class ~ Adhes + BNucl + Chrom + Epith + Mitos + NNucl + Thick + 
##     UShap
##        Df Deviance    AIC  F value    Pr(>F)    
## <none>      89.523 107.52                       
## Adhes   1   99.042 115.04  71.4499 < 2.2e-16 ***
## BNucl   1  109.064 125.06 146.6793 < 2.2e-16 ***
## Chrom   1  100.153 116.15  79.7899 < 2.2e-16 ***
## Epith   1   89.662 105.66   1.0388 0.3084664    
## Mitos   1   93.552 109.55  30.2391 5.431e-08 ***
## NNucl   1   95.231 111.23  42.8438 1.177e-10 ***
## Thick   1  110.465 126.47 157.2002 < 2.2e-16 ***
## UShap   1   91.355 107.36  13.7466 0.0002265 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
SmallerMod <- glm(Class~Adhes + BNucl + Chrom + Mitos + NNucl + Thick + UShap, data = wbca, family = binomial)

#Lack of fit

pchisq(deviance(SmallerMod),lower.tail = FALSE, df = 673)
## [1] 1

We can say that the model does not have a lack of fit because of our low p value.

#Outliers

halfnorm(residuals(SmallerMod))

We have outliers at points 419 and 244