library(stableGR)
## Loading required package: mcmcse
## mcmcse: Monte Carlo Standard Errors for MCMC
## Version 1.4-1 created on 2020-01-29.
## copyright (c) 2012, James M. Flegal, University of California, Riverside
##                     John Hughes, University of Colorado, Denver
##                     Dootika Vats, University of Warwick
##                     Ning Dai, University of Minnesota
##  For citation information, type citation("mcmcse").
##  Type help("mcmcse-package") to get started.
data(titanic.complete)
titanic <- titanic.complete

?titanic.complete
attach(titanic)
maledata <- subset(titanic, Sex == "male")
femaledata <- subset(titanic, Sex == "female")



meanmalesurvival <- aggregate(Survived ~ Sex, titanic, mean)
meanclasssurvival <-aggregate(Survived ~ Pclass, titanic, mean)


myresp<-cbind(titanic$Survived, 1-titanic$Survived)
modT1 <- glm(myresp~1, family=binomial)

## exp(-.3868) or .679 is the log odds of surviving
library(faraway)
ilogit(coef(modT1))
## (Intercept) 
##   0.4044944
ilogit(-.3868)
## [1] 0.4044879
#probability of surviving is 40.45% from the ilogit function

confint(modT1,parm = 1, level = .95)
## Waiting for profiling to be done...
##      2.5 %     97.5 % 
## -0.5372197 -0.2377555
confint(modT1, level = .95)
## Waiting for profiling to be done...
##      2.5 %     97.5 % 
## -0.5372197 -0.2377555
## 95% confident the log odds of surviving are between -.537 and -.238

exp(-0.5372197)
## [1] 0.5843707
exp(-0.2377555)
## [1] 0.7883954
## 95% confident the odds of surviving are between .584 and .788



titanic$Adult <- as.numeric(titanic$Age > 18)
attach(titanic)
## The following objects are masked from titanic (pos = 4):
## 
##     Age, Embarked, Fare, Parch, Pclass, Sex, SibSp, Survived
modT2 <- glm(myresp ~ Adult, family=binomial)
modT3 <- glm(Survived ~ Adult, family=binomial)
summary(modT2)
## 
## Call:
## glm(formula = myresp ~ Adult, family = binomial)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.1835  -0.9785  -0.9785   1.3902   1.3902  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)   
## (Intercept)  0.01439    0.16964   0.085  0.93241   
## Adult       -0.50201    0.19022  -2.639  0.00831 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 960.90  on 711  degrees of freedom
## Residual deviance: 953.96  on 710  degrees of freedom
## AIC: 957.96
## 
## Number of Fisher Scoring iterations: 4
summary(modT3)
## 
## Call:
## glm(formula = Survived ~ Adult, family = binomial)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.1835  -0.9785  -0.9785   1.3902   1.3902  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)   
## (Intercept)  0.01439    0.16964   0.085  0.93241   
## Adult       -0.50201    0.19022  -2.639  0.00831 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 960.90  on 711  degrees of freedom
## Residual deviance: 953.96  on 710  degrees of freedom
## AIC: 957.96
## 
## Number of Fisher Scoring iterations: 4
#the odds of surviving are 1.015 (exp(.01439)), the odds of a adult surviving are .614 (exp(.01439 - .50201))

titanic$Women <- as.numeric(titanic$Sex == "female")
attach(titanic)
## The following objects are masked from titanic (pos = 3):
## 
##     Adult, Age, Embarked, Fare, Parch, Pclass, Sex, SibSp,
##     Survived
## The following objects are masked from titanic (pos = 5):
## 
##     Age, Embarked, Fare, Parch, Pclass, Sex, SibSp, Survived
modT4 <- glm(Survived ~ Women, family = binomial)

summary(modT4)
## 
## Call:
## glm(formula = Survived ~ Women, family = binomial)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.6721  -0.6779  -0.6779   0.7534   1.7795  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  -1.3535     0.1163  -11.64   <2e-16 ***
## Women         2.4676     0.1852   13.33   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 960.90  on 711  degrees of freedom
## Residual deviance: 749.57  on 710  degrees of freedom
## AIC: 753.57
## 
## Number of Fisher Scoring iterations: 4
## log (p/1-p) = -1.3535 + I()
#the odds of a male surviving are .258 (exp(-1.3535))
#the odds of a female surviving are 3.047 (exp(2.4676 - 1.3535))

modT5 <- glm(Survived ~ Pclass, family = binomial)

summary(modT5)
## 
## Call:
## glm(formula = Survived ~ Pclass, family = binomial)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.4533  -0.7399  -0.7399   0.9246   1.6908  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   0.6286     0.1548   4.061 4.88e-05 ***
## Pclass2      -0.7096     0.2171  -3.269  0.00108 ** 
## Pclass3      -1.7844     0.1986  -8.987  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 960.90  on 711  degrees of freedom
## Residual deviance: 868.11  on 709  degrees of freedom
## AIC: 874.11
## 
## Number of Fisher Scoring iterations: 4
## log (p/1-p) = .6286 - .7096I(class 2) - 1.7844(class 3)
# The odds of someone from class 1 surviving are 1.875 or exp(.6286)
# The odds of someone from class 2 surviving are .922 or exp(.6286 - .7096)
# The odds of someone from class 3 surviving are .315 or exp(.6286 - 1.7844)

modT6 <- glm(Survived ~ Adult + Women + Pclass, family = binomial)
summary(modT6)
## 
## Call:
## glm(formula = Survived ~ Adult + Women + Pclass, family = binomial)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.4749  -0.7035  -0.4088   0.7138   2.2467  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   0.5070     0.2998   1.691 0.090805 .  
## Adult        -0.7845     0.2495  -3.144 0.001667 ** 
## Women         2.5076     0.2048  12.243  < 2e-16 ***
## Pclass2      -0.9927     0.2597  -3.822 0.000132 ***
## Pclass3      -2.1628     0.2509  -8.621  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 960.90  on 711  degrees of freedom
## Residual deviance: 662.13  on 707  degrees of freedom
## AIC: 672.13
## 
## Number of Fisher Scoring iterations: 4
### log (p/1-p) = .5070 - .7845I(Adult) + 2.5076(Woman) - .9927 (class 2) - 2.1628 (class 3)
# If youre a woman 
## As a class 1 male adult, I would have odds of .7577 odds of survival which is exp(.507 + -.7845 (adult))
titanic$Class <- as.numeric(titanic$Pclass)
attach(titanic)
## The following objects are masked from titanic (pos = 3):
## 
##     Adult, Age, Embarked, Fare, Parch, Pclass, Sex, SibSp,
##     Survived, Women
## The following objects are masked from titanic (pos = 4):
## 
##     Adult, Age, Embarked, Fare, Parch, Pclass, Sex, SibSp,
##     Survived
## The following objects are masked from titanic (pos = 6):
## 
##     Age, Embarked, Fare, Parch, Pclass, Sex, SibSp, Survived
AW <- Adult * Women
AC <- Adult * Class
WC <- Women * Class

AIC(modT3)
## [1] 957.9593
AIC(modT4)
## [1] 753.5699
AIC(modT5)
## [1] 874.112
#Testing AICs, we know gender is the best predictor and will start there

modT6 <- glm(Survived ~ Adult + Women, family = binomial)
modT7 <- glm(Survived ~ Women + Pclass, family = binomial)

modT9 <- glm(Survived ~ Sex, family = binomial)
summary(modT9)
## 
## Call:
## glm(formula = Survived ~ Sex, family = binomial)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.6721  -0.6779  -0.6779   0.7534   1.7795  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   1.1141     0.1441   7.734 1.04e-14 ***
## Sexmale      -2.4676     0.1852 -13.327  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 960.90  on 711  degrees of freedom
## Residual deviance: 749.57  on 710  degrees of freedom
## AIC: 753.57
## 
## Number of Fisher Scoring iterations: 4
AIC(modT9)
## [1] 753.5699
AIC(modT4)
## [1] 753.5699
AIC(modT6)
## [1] 754.6399
AIC(modT7)
## [1] 680.0589
modT8 <- glm(Survived ~ Adult + Women + Pclass , family = binomial)


AIC(modT6)
## [1] 754.6399
modT10 <- glm(Survived ~ Adult + Women + AW, family = binomial)
modT11 <- glm(Survived ~ Adult + Women + AC, family = binomial)
modT12 <- glm(Survived ~ Adult + Women + WC, family = binomial)
modT13 <- glm(Survived ~ Sex + Class , family = binomial)
modT14 <- glm(Survived ~ Sex + Class +WC, family = binomial)
AIC(modT13)
## [1] 678.1466
AIC(modT14)
## [1] 657.556
AIC(modT6)
## [1] 754.6399
AIC(modT10)
## [1] 746.5519
AIC(modT11)
## [1] 693.8633
AIC(modT12)
## [1] 671.0785
AIC(modT8)
## [1] 672.1257
anova(modT4, modT6, test = "Chisq")
## Analysis of Deviance Table
## 
## Model 1: Survived ~ Women
## Model 2: Survived ~ Adult + Women
##   Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1       710     749.57                     
## 2       709     748.64  1  0.93007   0.3348
anova(modT4, modT7, test = "Chisq")
## Analysis of Deviance Table
## 
## Model 1: Survived ~ Women
## Model 2: Survived ~ Women + Pclass
##   Resid. Df Resid. Dev Df Deviance  Pr(>Chi)    
## 1       710     749.57                          
## 2       708     672.06  2   77.511 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Pclass is significant so we will add that to the model

modT8 <- glm(Survived ~ Women + Pclass + Adult, family = binomial)
anova(modT7, modT8, test = "Chisq")
## Analysis of Deviance Table
## 
## Model 1: Survived ~ Women + Pclass
## Model 2: Survived ~ Women + Pclass + Adult
##   Resid. Df Resid. Dev Df Deviance Pr(>Chi)   
## 1       708     672.06                        
## 2       707     662.13  1   9.9332 0.001623 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
modT6 <- glm(Survived ~ Adult + Women + Pclass + AW, family = binomial)
modT7 <- glm(Survived ~ Adult + Women + Pclass + AC, family = binomial)
modT8 <- glm(Survived ~ Adult + Women + Pclass + WC, family = binomial)