Homework Assignment part 1

Data packages & Creating Variable Survival

library(Zelig)
## Loading required package: survival
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(radiant.data)
## Loading required package: magrittr
## Loading required package: ggplot2
## Loading required package: lubridate
## 
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
## 
##     date
## Loading required package: tidyr
## 
## Attaching package: 'tidyr'
## The following object is masked from 'package:magrittr':
## 
##     extract
## 
## Attaching package: 'radiant.data'
## The following objects are masked from 'package:lubridate':
## 
##     month, wday
## The following object is masked from 'package:ggplot2':
## 
##     diamonds
## The following object is masked from 'package:dplyr':
## 
##     mutate_each
library(tidyr)
data(titanic)
titanic <- titanic %>% 
  mutate(age = as.numeric(age),
         surv= as.integer(survived),
         survival = ifelse(surv == 1,1,0))%>%
select(pclass, survived, age ,survival, surv, sex, fare)
head(titanic)
z.out1 <- zlogit$new()
z.out1$zelig(survival ~ age + sex*pclass + fare,  data = titanic)
summary(z.out1)
## Model: 
## 
## Call:
## z.out1$zelig(formula = survival ~ age + sex * pclass + fare, 
##     data = titanic)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -3.0634  -0.6641  -0.4943   0.4336   2.4941  
## 
## Coefficients:
##                     Estimate Std. Error z value            Pr(>|z|)
## (Intercept)        4.8978215  0.6131092   7.988 0.00000000000000137
## age               -0.0387245  0.0068044  -5.691 0.00000001262563680
## sexmale           -3.8996177  0.5015659  -7.775 0.00000000000000755
## pclass2nd         -1.5923247  0.6024844  -2.643             0.00822
## pclass3rd         -4.1382715  0.5601819  -7.387 0.00000000000014976
## fare              -0.0009058  0.0020509  -0.442             0.65874
## sexmale:pclass2nd -0.0600076  0.6372949  -0.094             0.92498
## sexmale:pclass3rd  2.5019110  0.5479901   4.566 0.00000498035051247
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1409.99  on 1042  degrees of freedom
## Residual deviance:  931.45  on 1035  degrees of freedom
## AIC: 947.45
## 
## Number of Fisher Scoring iterations: 5
## 
## Next step: Use 'setx' method

Age

z.out1$setrange(age = min(titanic$age):max(titanic$age))
z.out1$sim()
z.out1$graph()

Fare

z.outfare <- zlogit$new()
z.outfare$zelig(survival ~ age + sex*pclass + fare,  data = titanic)
z.outfare$setrange(fare = min(titanic$fare):max(titanic$fare))
z.outfare$sim()
z.outfare$graph()

Sex

z.outsex <- zlogit$new()
z.outsex$zelig(survival ~ age + sex*pclass + fare,  data = titanic)
z.outsex$setx(sex = "male")
z.outsex$setx1(sex = "female")
z.outsex$sim()
summary(z.outsex)
## 
##  sim x :
##  -----
## ev
##           mean         sd       50%      2.5%     97.5%
## [1,] 0.1394837 0.01906977 0.1389486 0.1057736 0.1789058
## pv
##          0     1
## [1,] 0.856 0.144
## 
##  sim x1 :
##  -----
## ev
##           mean         sd      50%      2.5%     97.5%
## [1,] 0.3939871 0.04411036 0.392269 0.3094375 0.4822637
## pv
##          0     1
## [1,] 0.596 0.404
## fd
##           mean         sd       50%      2.5%     97.5%
## [1,] 0.2545034 0.04418557 0.2537848 0.1714272 0.3454157

Difference between men and women surving (25%)

Difference Number One

fd <- z.outsex$get_qi(xvalue="x1", qi="fd")
summary(fd)
##        V1        
##  Min.   :0.1339  
##  1st Qu.:0.2253  
##  Median :0.2538  
##  Mean   :0.2545  
##  3rd Qu.:0.2841  
##  Max.   :0.4036
plot(z.outsex)

First class

z.outsc <- zlogit$new()
z.outsc$zelig(survival ~ age + sex*pclass + fare,  data = titanic)
z.outsc$setx(sex = "male", pclass = "1st")
z.outsc$setx1(sex = "female", pclass ="1st")
z.outsc$sim()
summary(z.outsc)
## 
##  sim x :
##  -----
## ev
##           mean         sd       50%      2.5%     97.5%
## [1,] 0.4523948 0.05167907 0.4528206 0.3535123 0.5521282
## pv
##          0     1
## [1,] 0.557 0.443
## 
##  sim x1 :
##  -----
## ev
##           mean        sd       50%      2.5%     97.5%
## [1,] 0.9732143 0.0136162 0.9761032 0.9372802 0.9907316
## pv
##         0    1
## [1,] 0.03 0.97
## fd
##           mean         sd       50%      2.5%     97.5%
## [1,] 0.5208195 0.05072025 0.5206454 0.4212704 0.6212012

Second Class

z.outsc2 <- zlogit$new()
z.outsc2$zelig(survival ~ age + sex*pclass + fare,  data = titanic)
z.outsc2$setx(sex = "male", pclass = "2nd")
z.outsc2$setx1(sex = "female", pclass = "2nd")
z.outsc2$sim()
summary(z.outsc2)
## 
##  sim x :
##  -----
## ev
##           mean         sd    50%       2.5%     97.5%
## [1,] 0.1386786 0.02852177 0.1347 0.09061366 0.2040956
## pv
##          0     1
## [1,] 0.878 0.122
## 
##  sim x1 :
##  -----
## ev
##           mean         sd       50%     2.5%     97.5%
## [1,] 0.8880386 0.03194495 0.8912547 0.814016 0.9384093
## pv
##          0     1
## [1,] 0.103 0.897
## fd
##         mean         sd       50%      2.5%     97.5%
## [1,] 0.74936 0.04345855 0.7515359 0.6560352 0.8251198

Third Class

z.outsc3 <- zlogit$new()
z.outsc3$zelig(survival ~ age + sex*pclass + fare,  data = titanic)
z.outsc3$setx(sex = "male", pclass = "3rd")
z.outsc3$setx1(sex = "female", pclass = "3rd")
z.outsc3$sim()
summary(z.outsc3)
## 
##  sim x :
##  -----
## ev
##          mean         sd       50%      2.5%     97.5%
## [1,] 0.139671 0.01929874 0.1392893 0.1046588 0.1790789
## pv
##          0     1
## [1,] 0.857 0.143
## 
##  sim x1 :
##  -----
## ev
##           mean         sd       50%      2.5%     97.5%
## [1,] 0.3948181 0.04358677 0.3939354 0.3129223 0.4796949
## pv
##          0     1
## [1,] 0.618 0.382
## fd
##           mean         sd       50%      2.5%     97.5%
## [1,] 0.2551471 0.04412811 0.2544979 0.1720169 0.3388358

Plots

plot(z.outsc)

plot(z.outsc3)

d1 <- z.outsc$get_qi(xvalue="x1", qi="fd")
d2 <- z.outsc2$get_qi(xvalue="x1", qi="fd")
d3 <- z.outsc3$get_qi(xvalue="x1", qi="fd")
dfd <- as.data.frame(cbind(d1, d2, d3))
head(dfd)
library(tidyr)
tidd <- dfd %>% 
  gather(class, simv, 1:3)
head(tidd)
library(dplyr)
tidd %>% 
  group_by(class) %>% 
  summarise(mean = mean(simv), sd = sd(simv))

GG Plot

library(ggplot2)
ggplot(tidd, aes(simv)) + geom_histogram() + facet_grid(~class)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.