This week we were bassically focusing working on the Logistic Regression Homework. ## Week 6 - Stat 413
library(stableGR)
## Warning: package 'stableGR' was built under R version 4.0.4
## Loading required package: mcmcse
## Warning: package 'mcmcse' was built under R version 4.0.4
## mcmcse: Monte Carlo Standard Errors for MCMC
## Version 1.4-1 created on 2020-01-29.
## copyright (c) 2012, James M. Flegal, University of California, Riverside
## John Hughes, University of Colorado, Denver
## Dootika Vats, University of Warwick
## Ning Dai, University of Minnesota
## For citation information, type citation("mcmcse").
## Type help("mcmcse-package") to get started.
library(faraway)
## Warning: package 'faraway' was built under R version 4.0.3
data(titanic.complete)
titanic <- titanic.complete
?titanic.complete
## starting httpd help server ... done
(meansurvivalsex <- aggregate(Survived ~ Sex, titanic, mean))
## Sex Survived
## 1 female 0.7528958
## 2 male 0.2052980
(meansurvivalclass <- aggregate(Survived ~ Pclass, titanic, mean))
## Pclass Survived
## 1 1 0.6521739
## 2 2 0.4797688
## 3 3 0.2394366
From above, I can see that the mean survival by sex and class
(mod2 <- glm(titanic$Survived~1, family=binomial))
##
## Call: glm(formula = titanic$Survived ~ 1, family = binomial)
##
## Coefficients:
## (Intercept)
## -0.3868
##
## Degrees of Freedom: 711 Total (i.e. Null); 711 Residual
## Null Deviance: 960.9
## Residual Deviance: 960.9 AIC: 962.9
exp(coef(mod2)[1])
## (Intercept)
## 0.6792453
ilogit(coef(mod2)[1])
## (Intercept)
## 0.4044944
(ci <- confint(mod2, parm=1, level = .95))
## Waiting for profiling to be done...
## 2.5 % 97.5 %
## -0.5372197 -0.2377555
(ci <- exp(confint(mod2, parm=1, level = .95)))
## Waiting for profiling to be done...
## 2.5 % 97.5 %
## 0.5843707 0.7883955
titanic$Adult <- as.numeric(titanic$Age > 18)
(mod3 <- glm(titanic$Survived~titanic$Adult, family=binomial))
##
## Call: glm(formula = titanic$Survived ~ titanic$Adult, family = binomial)
##
## Coefficients:
## (Intercept) titanic$Adult
## 0.01439 -0.50201
##
## Degrees of Freedom: 711 Total (i.e. Null); 710 Residual
## Null Deviance: 960.9
## Residual Deviance: 954 AIC: 958
summary(mod3)
##
## Call:
## glm(formula = titanic$Survived ~ titanic$Adult, family = binomial)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.1835 -0.9785 -0.9785 1.3902 1.3902
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.01439 0.16964 0.085 0.93241
## titanic$Adult -0.50201 0.19022 -2.639 0.00831 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 960.90 on 711 degrees of freedom
## Residual deviance: 953.96 on 710 degrees of freedom
## AIC: 957.96
##
## Number of Fisher Scoring iterations: 4
(num1 <- exp(coef(mod3)[1]))
## (Intercept)
## 1.014493
exp(0.01439 - .50201)
## [1] 0.6140862
The logistic regression equation is p/(1-p)= .0144 - .5020((Adult). The odds of a child surviving are 1.0145 and the odds of an adult surviving are .6141.
(mod4 <- glm(titanic$Survived~titanic$Sex, family=binomial))
##
## Call: glm(formula = titanic$Survived ~ titanic$Sex, family = binomial)
##
## Coefficients:
## (Intercept) titanic$Sexmale
## 1.114 -2.468
##
## Degrees of Freedom: 711 Total (i.e. Null); 710 Residual
## Null Deviance: 960.9
## Residual Deviance: 749.6 AIC: 753.6
summary(mod4)
##
## Call:
## glm(formula = titanic$Survived ~ titanic$Sex, family = binomial)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.6721 -0.6779 -0.6779 0.7534 1.7795
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.1141 0.1441 7.734 1.04e-14 ***
## titanic$Sexmale -2.4676 0.1852 -13.327 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 960.90 on 711 degrees of freedom
## Residual deviance: 749.57 on 710 degrees of freedom
## AIC: 753.57
##
## Number of Fisher Scoring iterations: 4
(num3 <- exp(coef(mod4)[1]))
## (Intercept)
## 3.046875
(num4 <- exp(coef(mod4)[2]))
## titanic$Sexmale
## 0.08478632
exp(1.114 - 2.468)
## [1] 0.2582054
The odds of a female surviving are 3.0469 and the odds of a male surviving are .2582.
(mod5 <- glm(titanic$Survived~titanic$Pclass, family=binomial))
##
## Call: glm(formula = titanic$Survived ~ titanic$Pclass, family = binomial)
##
## Coefficients:
## (Intercept) titanic$Pclass2 titanic$Pclass3
## 0.6286 -0.7096 -1.7844
##
## Degrees of Freedom: 711 Total (i.e. Null); 709 Residual
## Null Deviance: 960.9
## Residual Deviance: 868.1 AIC: 874.1
summary(mod5)
##
## Call:
## glm(formula = titanic$Survived ~ titanic$Pclass, family = binomial)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.4533 -0.7399 -0.7399 0.9246 1.6908
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.6286 0.1548 4.061 4.88e-05 ***
## titanic$Pclass2 -0.7096 0.2171 -3.269 0.00108 **
## titanic$Pclass3 -1.7844 0.1986 -8.987 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 960.90 on 711 degrees of freedom
## Residual deviance: 868.11 on 709 degrees of freedom
## AIC: 874.11
##
## Number of Fisher Scoring iterations: 4
(num5 <- exp(coef(mod5)[1]))
## (Intercept)
## 1.875
exp( 0.6286 - 0.7096)
## [1] 0.9221937
exp( 0.6286 - 1.7844)
## [1] 0.3148056
The odds of a first class passenger surviving are 1.875 The odds of a second class passenging survivng are .9222 The odds of a third class passenger surviving are 0.3148.
(mod6 <- glm(titanic$Survived~titanic$Pclass + titanic$Sex + titanic$Adult, family=binomial))
##
## Call: glm(formula = titanic$Survived ~ titanic$Pclass + titanic$Sex +
## titanic$Adult, family = binomial)
##
## Coefficients:
## (Intercept) titanic$Pclass2 titanic$Pclass3 titanic$Sexmale
## 3.0146 -0.9927 -2.1628 -2.5076
## titanic$Adult
## -0.7845
##
## Degrees of Freedom: 711 Total (i.e. Null); 707 Residual
## Null Deviance: 960.9
## Residual Deviance: 662.1 AIC: 672.1
summary(mod6)
##
## Call:
## glm(formula = titanic$Survived ~ titanic$Pclass + titanic$Sex +
## titanic$Adult, family = binomial)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.4749 -0.7035 -0.4088 0.7138 2.2467
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 3.0146 0.3349 9.001 < 2e-16 ***
## titanic$Pclass2 -0.9927 0.2597 -3.822 0.000132 ***
## titanic$Pclass3 -2.1628 0.2509 -8.621 < 2e-16 ***
## titanic$Sexmale -2.5076 0.2048 -12.243 < 2e-16 ***
## titanic$Adult -0.7845 0.2495 -3.144 0.001667 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 960.90 on 711 degrees of freedom
## Residual deviance: 662.13 on 707 degrees of freedom
## AIC: 672.13
##
## Number of Fisher Scoring iterations: 4
(num6 <- exp(coef(mod6)[1]))
## (Intercept)
## 20.38168
exp(0.6286 - 0.7096)
## [1] 0.9221937
exp( 0.6286 - 1.7844)
## [1] 0.3148056
exp(sum(3.0146 - 2.5076 - 0.7845))
## [1] 0.7576756
The odds of surviving for a first class, female, child are 20.3817. If youโre in second class, your log odds of survival decrease by -0.993 My odds of survival (Adult, M, 1st class) would be: exp(sum(3.0146 - 2.5076 - 0.7845)) = 0.75767
(basemod <- glm(titanic$Survived~titanic$Pclass + titanic$Sex + titanic$Adult, family=binomial))
##
## Call: glm(formula = titanic$Survived ~ titanic$Pclass + titanic$Sex +
## titanic$Adult, family = binomial)
##
## Coefficients:
## (Intercept) titanic$Pclass2 titanic$Pclass3 titanic$Sexmale
## 3.0146 -0.9927 -2.1628 -2.5076
## titanic$Adult
## -0.7845
##
## Degrees of Freedom: 711 Total (i.e. Null); 707 Residual
## Null Deviance: 960.9
## Residual Deviance: 662.1 AIC: 672.1