library(stableGR)
## Loading required package: mcmcse
## mcmcse: Monte Carlo Standard Errors for MCMC
## Version 1.4-1 created on 2020-01-29.
## copyright (c) 2012, James M. Flegal, University of California, Riverside
## John Hughes, University of Colorado, Denver
## Dootika Vats, University of Warwick
## Ning Dai, University of Minnesota
## For citation information, type citation("mcmcse").
## Type help("mcmcse-package") to get started.
data(titanic.complete)
titanic <- titanic.complete
?titanic.complete
attach(titanic)
maledata <- subset(titanic, Sex == "male")
femaledata <- subset(titanic, Sex == "female")
meanmalesurvival <- aggregate(Survived ~ Sex, titanic, mean)
meanclasssurvival <-aggregate(Survived ~ Pclass, titanic, mean)
myresp<-cbind(titanic$Survived, 1-titanic$Survived)
modT1 <- glm(myresp~1, family=binomial)
## exp(-.3868) or .679 is the log odds of surviving
library(faraway)
ilogit(coef(modT1))
## (Intercept)
## 0.4044944
ilogit(-.3868)
## [1] 0.4044879
#probability of surviving is 40.45% from the ilogit function
confint(modT1,parm = 1, level = .95)
## Waiting for profiling to be done...
## 2.5 % 97.5 %
## -0.5372197 -0.2377555
confint(modT1, level = .95)
## Waiting for profiling to be done...
## 2.5 % 97.5 %
## -0.5372197 -0.2377555
## 95% confident the log odds of surviving are between -.537 and -.238
exp(-0.5372197)
## [1] 0.5843707
exp(-0.2377555)
## [1] 0.7883954
## 95% confident the odds of surviving are between .584 and .788
titanic$Adult <- as.numeric(titanic$Age > 18)
attach(titanic)
## The following objects are masked from titanic (pos = 4):
##
## Age, Embarked, Fare, Parch, Pclass, Sex, SibSp, Survived
modT2 <- glm(myresp ~ Adult, family=binomial)
modT3 <- glm(Survived ~ Adult, family=binomial)
summary(modT2)
##
## Call:
## glm(formula = myresp ~ Adult, family = binomial)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.1835 -0.9785 -0.9785 1.3902 1.3902
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.01439 0.16964 0.085 0.93241
## Adult -0.50201 0.19022 -2.639 0.00831 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 960.90 on 711 degrees of freedom
## Residual deviance: 953.96 on 710 degrees of freedom
## AIC: 957.96
##
## Number of Fisher Scoring iterations: 4
summary(modT3)
##
## Call:
## glm(formula = Survived ~ Adult, family = binomial)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.1835 -0.9785 -0.9785 1.3902 1.3902
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.01439 0.16964 0.085 0.93241
## Adult -0.50201 0.19022 -2.639 0.00831 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 960.90 on 711 degrees of freedom
## Residual deviance: 953.96 on 710 degrees of freedom
## AIC: 957.96
##
## Number of Fisher Scoring iterations: 4
#the odds of surviving are 1.015 (exp(.01439)), the odds of a adult surviving are .614 (exp(.01439 - .50201))
titanic$Women <- as.numeric(titanic$Sex == "female")
attach(titanic)
## The following objects are masked from titanic (pos = 3):
##
## Adult, Age, Embarked, Fare, Parch, Pclass, Sex, SibSp,
## Survived
## The following objects are masked from titanic (pos = 5):
##
## Age, Embarked, Fare, Parch, Pclass, Sex, SibSp, Survived
modT4 <- glm(Survived ~ Women, family = binomial)
summary(modT4)
##
## Call:
## glm(formula = Survived ~ Women, family = binomial)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.6721 -0.6779 -0.6779 0.7534 1.7795
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.3535 0.1163 -11.64 <2e-16 ***
## Women 2.4676 0.1852 13.33 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 960.90 on 711 degrees of freedom
## Residual deviance: 749.57 on 710 degrees of freedom
## AIC: 753.57
##
## Number of Fisher Scoring iterations: 4
## log (p/1-p) = -1.3535 + I()
#the odds of a male surviving are .258 (exp(-1.3535))
#the odds of a female surviving are 3.047 (exp(2.4676 - 1.3535))
modT5 <- glm(Survived ~ Pclass, family = binomial)
summary(modT5)
##
## Call:
## glm(formula = Survived ~ Pclass, family = binomial)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.4533 -0.7399 -0.7399 0.9246 1.6908
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.6286 0.1548 4.061 4.88e-05 ***
## Pclass2 -0.7096 0.2171 -3.269 0.00108 **
## Pclass3 -1.7844 0.1986 -8.987 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 960.90 on 711 degrees of freedom
## Residual deviance: 868.11 on 709 degrees of freedom
## AIC: 874.11
##
## Number of Fisher Scoring iterations: 4
## log (p/1-p) = .6286 - .7096I(class 2) - 1.7844(class 3)
# The odds of someone from class 1 surviving are 1.875 or exp(.6286)
# The odds of someone from class 2 surviving are .922 or exp(.6286 - .7096)
# The odds of someone from class 3 surviving are .315 or exp(.6286 - 1.7844)
modT6 <- glm(Survived ~ Adult + Women + Pclass, family = binomial)
summary(modT6)
##
## Call:
## glm(formula = Survived ~ Adult + Women + Pclass, family = binomial)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.4749 -0.7035 -0.4088 0.7138 2.2467
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.5070 0.2998 1.691 0.090805 .
## Adult -0.7845 0.2495 -3.144 0.001667 **
## Women 2.5076 0.2048 12.243 < 2e-16 ***
## Pclass2 -0.9927 0.2597 -3.822 0.000132 ***
## Pclass3 -2.1628 0.2509 -8.621 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 960.90 on 711 degrees of freedom
## Residual deviance: 662.13 on 707 degrees of freedom
## AIC: 672.13
##
## Number of Fisher Scoring iterations: 4
### log (p/1-p) = .5070 - .7845I(Adult) + 2.5076(Woman) - .9927 (class 2) - 2.1628 (class 3)
# If youre a woman
## As a class 1 male adult, I would have odds of .7577 odds of survival which is exp(.507 + -.7845 (adult))
titanic$Class <- as.numeric(titanic$Pclass)
attach(titanic)
## The following objects are masked from titanic (pos = 3):
##
## Adult, Age, Embarked, Fare, Parch, Pclass, Sex, SibSp,
## Survived, Women
## The following objects are masked from titanic (pos = 4):
##
## Adult, Age, Embarked, Fare, Parch, Pclass, Sex, SibSp,
## Survived
## The following objects are masked from titanic (pos = 6):
##
## Age, Embarked, Fare, Parch, Pclass, Sex, SibSp, Survived
AW <- Adult * Women
AC <- Adult * Class
WC <- Women * Class
AIC(modT3)
## [1] 957.9593
AIC(modT4)
## [1] 753.5699
AIC(modT5)
## [1] 874.112
#Testing AICs, we know gender is the best predictor and will start there
modT6 <- glm(Survived ~ Adult + Women, family = binomial)
modT7 <- glm(Survived ~ Women + Pclass, family = binomial)
modT9 <- glm(Survived ~ Sex, family = binomial)
summary(modT9)
##
## Call:
## glm(formula = Survived ~ Sex, family = binomial)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.6721 -0.6779 -0.6779 0.7534 1.7795
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.1141 0.1441 7.734 1.04e-14 ***
## Sexmale -2.4676 0.1852 -13.327 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 960.90 on 711 degrees of freedom
## Residual deviance: 749.57 on 710 degrees of freedom
## AIC: 753.57
##
## Number of Fisher Scoring iterations: 4
AIC(modT9)
## [1] 753.5699
AIC(modT4)
## [1] 753.5699
AIC(modT6)
## [1] 754.6399
AIC(modT7)
## [1] 680.0589
modT8 <- glm(Survived ~ Adult + Women + Pclass , family = binomial)
AIC(modT6)
## [1] 754.6399
modT10 <- glm(Survived ~ Adult + Women + AW, family = binomial)
modT11 <- glm(Survived ~ Adult + Women + AC, family = binomial)
modT12 <- glm(Survived ~ Adult + Women + WC, family = binomial)
modT13 <- glm(Survived ~ Sex + Class , family = binomial)
modT14 <- glm(Survived ~ Sex + Class +WC, family = binomial)
AIC(modT13)
## [1] 678.1466
AIC(modT14)
## [1] 657.556
AIC(modT6)
## [1] 754.6399
AIC(modT10)
## [1] 746.5519
AIC(modT11)
## [1] 693.8633
AIC(modT12)
## [1] 671.0785
AIC(modT8)
## [1] 672.1257
anova(modT4, modT6, test = "Chisq")
## Analysis of Deviance Table
##
## Model 1: Survived ~ Women
## Model 2: Survived ~ Adult + Women
## Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1 710 749.57
## 2 709 748.64 1 0.93007 0.3348
anova(modT4, modT7, test = "Chisq")
## Analysis of Deviance Table
##
## Model 1: Survived ~ Women
## Model 2: Survived ~ Women + Pclass
## Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1 710 749.57
## 2 708 672.06 2 77.511 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Pclass is significant so we will add that to the model
modT8 <- glm(Survived ~ Women + Pclass + Adult, family = binomial)
anova(modT7, modT8, test = "Chisq")
## Analysis of Deviance Table
##
## Model 1: Survived ~ Women + Pclass
## Model 2: Survived ~ Women + Pclass + Adult
## Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1 708 672.06
## 2 707 662.13 1 9.9332 0.001623 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
modT6 <- glm(Survived ~ Adult + Women + Pclass + AW, family = binomial)
modT7 <- glm(Survived ~ Adult + Women + Pclass + AC, family = binomial)
modT8 <- glm(Survived ~ Adult + Women + Pclass + WC, family = binomial)