This week we were bassically focusing working on the Logistic Regression Homework. ## Week 6 - Stat 413

library(stableGR)
## Warning: package 'stableGR' was built under R version 4.0.4
## Loading required package: mcmcse
## Warning: package 'mcmcse' was built under R version 4.0.4
## mcmcse: Monte Carlo Standard Errors for MCMC
## Version 1.4-1 created on 2020-01-29.
## copyright (c) 2012, James M. Flegal, University of California, Riverside
##                     John Hughes, University of Colorado, Denver
##                     Dootika Vats, University of Warwick
##                     Ning Dai, University of Minnesota
##  For citation information, type citation("mcmcse").
##  Type help("mcmcse-package") to get started.
library(faraway)
## Warning: package 'faraway' was built under R version 4.0.3
data(titanic.complete)
titanic <- titanic.complete

Checking on the variable of the data set we are working on

?titanic.complete
## starting httpd help server ... done

1. Do EDA and calculate the propotion that survived in each class and for each sex

(meansurvivalsex <- aggregate(Survived ~ Sex, titanic, mean))
##      Sex  Survived
## 1 female 0.7528958
## 2   male 0.2052980
(meansurvivalclass <- aggregate(Survived ~ Pclass, titanic, mean))
##   Pclass  Survived
## 1      1 0.6521739
## 2      2 0.4797688
## 3      3 0.2394366

From above, I can see that the mean survival by sex and class

2. Create a logistic regression for survival using no predictors

(mod2 <- glm(titanic$Survived~1, family=binomial))
## 
## Call:  glm(formula = titanic$Survived ~ 1, family = binomial)
## 
## Coefficients:
## (Intercept)  
##     -0.3868  
## 
## Degrees of Freedom: 711 Total (i.e. Null);  711 Residual
## Null Deviance:       960.9 
## Residual Deviance: 960.9     AIC: 962.9
exp(coef(mod2)[1])
## (Intercept) 
##   0.6792453
ilogit(coef(mod2)[1])
## (Intercept) 
##   0.4044944
(ci <- confint(mod2, parm=1, level = .95))
## Waiting for profiling to be done...
##      2.5 %     97.5 % 
## -0.5372197 -0.2377555
(ci <- exp(confint(mod2, parm=1, level = .95)))
## Waiting for profiling to be done...
##     2.5 %    97.5 % 
## 0.5843707 0.7883955
  1. Log (P/1-P) = -0.387
  2. The odd of surviving are .679
  3. The probability of survival are .4045
  4. We are 95 % confident that the log odds of survival are between -0.5372 and -0.2377
  5. We are 95 % confident that the odds of survival are between 0.5843 and 0.7884

3. Logistic regression to compare odds of survivals for children and adults. Adults (1) and Children(0)

titanic$Adult <- as.numeric(titanic$Age > 18)
(mod3 <- glm(titanic$Survived~titanic$Adult, family=binomial))
## 
## Call:  glm(formula = titanic$Survived ~ titanic$Adult, family = binomial)
## 
## Coefficients:
##   (Intercept)  titanic$Adult  
##       0.01439       -0.50201  
## 
## Degrees of Freedom: 711 Total (i.e. Null);  710 Residual
## Null Deviance:       960.9 
## Residual Deviance: 954   AIC: 958
summary(mod3)
## 
## Call:
## glm(formula = titanic$Survived ~ titanic$Adult, family = binomial)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.1835  -0.9785  -0.9785   1.3902   1.3902  
## 
## Coefficients:
##               Estimate Std. Error z value Pr(>|z|)   
## (Intercept)    0.01439    0.16964   0.085  0.93241   
## titanic$Adult -0.50201    0.19022  -2.639  0.00831 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 960.90  on 711  degrees of freedom
## Residual deviance: 953.96  on 710  degrees of freedom
## AIC: 957.96
## 
## Number of Fisher Scoring iterations: 4
(num1 <- exp(coef(mod3)[1]))
## (Intercept) 
##    1.014493
exp(0.01439 - .50201)
## [1] 0.6140862

The logistic regression equation is p/(1-p)= .0144 - .5020((Adult). The odds of a child surviving are 1.0145 and the odds of an adult surviving are .6141.

4. Logistic regression to compare odds of survival for women (1) and men (0)

(mod4 <- glm(titanic$Survived~titanic$Sex, family=binomial))
## 
## Call:  glm(formula = titanic$Survived ~ titanic$Sex, family = binomial)
## 
## Coefficients:
##     (Intercept)  titanic$Sexmale  
##           1.114           -2.468  
## 
## Degrees of Freedom: 711 Total (i.e. Null);  710 Residual
## Null Deviance:       960.9 
## Residual Deviance: 749.6     AIC: 753.6
summary(mod4)
## 
## Call:
## glm(formula = titanic$Survived ~ titanic$Sex, family = binomial)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.6721  -0.6779  -0.6779   0.7534   1.7795  
## 
## Coefficients:
##                 Estimate Std. Error z value Pr(>|z|)    
## (Intercept)       1.1141     0.1441   7.734 1.04e-14 ***
## titanic$Sexmale  -2.4676     0.1852 -13.327  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 960.90  on 711  degrees of freedom
## Residual deviance: 749.57  on 710  degrees of freedom
## AIC: 753.57
## 
## Number of Fisher Scoring iterations: 4
(num3 <- exp(coef(mod4)[1]))
## (Intercept) 
##    3.046875
(num4 <- exp(coef(mod4)[2]))
## titanic$Sexmale 
##      0.08478632
exp(1.114 - 2.468)
## [1] 0.2582054

The odds of a female surviving are 3.0469 and the odds of a male surviving are .2582.

5. Logistic regression to compare odds of survival for crew (class=0), first class passengers (1), second class passengers (2) and third class passengers (3)

(mod5 <- glm(titanic$Survived~titanic$Pclass, family=binomial))
## 
## Call:  glm(formula = titanic$Survived ~ titanic$Pclass, family = binomial)
## 
## Coefficients:
##     (Intercept)  titanic$Pclass2  titanic$Pclass3  
##          0.6286          -0.7096          -1.7844  
## 
## Degrees of Freedom: 711 Total (i.e. Null);  709 Residual
## Null Deviance:       960.9 
## Residual Deviance: 868.1     AIC: 874.1
summary(mod5)
## 
## Call:
## glm(formula = titanic$Survived ~ titanic$Pclass, family = binomial)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.4533  -0.7399  -0.7399   0.9246   1.6908  
## 
## Coefficients:
##                 Estimate Std. Error z value Pr(>|z|)    
## (Intercept)       0.6286     0.1548   4.061 4.88e-05 ***
## titanic$Pclass2  -0.7096     0.2171  -3.269  0.00108 ** 
## titanic$Pclass3  -1.7844     0.1986  -8.987  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 960.90  on 711  degrees of freedom
## Residual deviance: 868.11  on 709  degrees of freedom
## AIC: 874.11
## 
## Number of Fisher Scoring iterations: 4
(num5 <- exp(coef(mod5)[1]))
## (Intercept) 
##       1.875
exp( 0.6286 - 0.7096)
## [1] 0.9221937
exp( 0.6286 - 1.7844)
## [1] 0.3148056

The odds of a first class passenger surviving are 1.875 The odds of a second class passenging survivng are .9222 The odds of a third class passenger surviving are 0.3148.

6. Logistic regression with all three predictors

(mod6 <- glm(titanic$Survived~titanic$Pclass + titanic$Sex + titanic$Adult, family=binomial))
## 
## Call:  glm(formula = titanic$Survived ~ titanic$Pclass + titanic$Sex + 
##     titanic$Adult, family = binomial)
## 
## Coefficients:
##     (Intercept)  titanic$Pclass2  titanic$Pclass3  titanic$Sexmale  
##          3.0146          -0.9927          -2.1628          -2.5076  
##   titanic$Adult  
##         -0.7845  
## 
## Degrees of Freedom: 711 Total (i.e. Null);  707 Residual
## Null Deviance:       960.9 
## Residual Deviance: 662.1     AIC: 672.1
summary(mod6)
## 
## Call:
## glm(formula = titanic$Survived ~ titanic$Pclass + titanic$Sex + 
##     titanic$Adult, family = binomial)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.4749  -0.7035  -0.4088   0.7138   2.2467  
## 
## Coefficients:
##                 Estimate Std. Error z value Pr(>|z|)    
## (Intercept)       3.0146     0.3349   9.001  < 2e-16 ***
## titanic$Pclass2  -0.9927     0.2597  -3.822 0.000132 ***
## titanic$Pclass3  -2.1628     0.2509  -8.621  < 2e-16 ***
## titanic$Sexmale  -2.5076     0.2048 -12.243  < 2e-16 ***
## titanic$Adult    -0.7845     0.2495  -3.144 0.001667 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 960.90  on 711  degrees of freedom
## Residual deviance: 662.13  on 707  degrees of freedom
## AIC: 672.13
## 
## Number of Fisher Scoring iterations: 4
(num6 <- exp(coef(mod6)[1]))
## (Intercept) 
##    20.38168
exp(0.6286 - 0.7096)
## [1] 0.9221937
exp( 0.6286 - 1.7844)
## [1] 0.3148056
exp(sum(3.0146 - 2.5076 - 0.7845))
## [1] 0.7576756

The odds of surviving for a first class, female, child are 20.3817. If youโ€™re in second class, your log odds of survival decrease by -0.993 My odds of survival (Adult, M, 1st class) would be: exp(sum(3.0146 - 2.5076 - 0.7845)) = 0.75767

7. Test for 2-way interaction terms. Use drop-in-deviance and forward selection.

(basemod <- glm(titanic$Survived~titanic$Pclass + titanic$Sex + titanic$Adult, family=binomial))
## 
## Call:  glm(formula = titanic$Survived ~ titanic$Pclass + titanic$Sex + 
##     titanic$Adult, family = binomial)
## 
## Coefficients:
##     (Intercept)  titanic$Pclass2  titanic$Pclass3  titanic$Sexmale  
##          3.0146          -0.9927          -2.1628          -2.5076  
##   titanic$Adult  
##         -0.7845  
## 
## Degrees of Freedom: 711 Total (i.e. Null);  707 Residual
## Null Deviance:       960.9 
## Residual Deviance: 662.1     AIC: 672.1