#install.packages("sjmisc")
library(magrittr)
library(tidyverse)
library(readr)
library(knitr)
library(Zelig)
library(radiant.data)
data("titanic")
head(titanic)
## # A tibble: 6 x 10
## pclass survived sex age sibsp parch fare name cabin embarked
## <fct> <fct> <fct> <dbl> <int> <int> <dbl> <chr> <chr> <fct>
## 1 1st Yes female 29 0 0 211. Allen, Mi… B5 Southam…
## 2 1st Yes male 0.917 1 2 152. Allison, … C22 … Southam…
## 3 1st No female 2 1 2 152. Allison, … C22 … Southam…
## 4 1st No male 30 1 2 152. Allison, … C22 … Southam…
## 5 1st No female 25 1 2 152. Allison, … C22 … Southam…
## 6 1st Yes male 48 0 0 26.5 Anderson,… E12 Southam…
MUTATION OF VARIABLE SURV INTO SURVIVAL
titanic <- titanic %>%
mutate(surv = as.integer(survived)) %>%
mutate(survival = sjmisc::rec(surv, rec = "2=0; 1=1")) %>%
select(pclass, survived, survival, everything())
head(titanic)
## # A tibble: 6 x 12
## pclass survived survival sex age sibsp parch fare name cabin
## <fct> <fct> <dbl> <fct> <dbl> <int> <int> <dbl> <chr> <chr>
## 1 1st Yes 1 fema… 29 0 0 211. Alle… B5
## 2 1st Yes 1 male 0.917 1 2 152. Alli… C22 …
## 3 1st No 0 fema… 2 1 2 152. Alli… C22 …
## 4 1st No 0 male 30 1 2 152. Alli… C22 …
## 5 1st No 0 fema… 25 1 2 152. Alli… C22 …
## 6 1st Yes 1 male 48 0 0 26.5 Ande… E12
## # … with 2 more variables: embarked <fct>, surv <int>
IMPLEMENTATION OF ZELIG 5 SYNTAX TO HIGHLIGHT NEWLY CREATED DICHOTOMOUS VARIABLE SURVIVAL
z5 <- zlogit$new()
z5$zelig(survival ~ age + sex*pclass + fare, data = titanic)
summary(z5)
## Model:
##
## Call:
## z5$zelig(formula = survival ~ age + sex * pclass + fare, data = titanic)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -3.0634 -0.6641 -0.4943 0.4336 2.4941
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 4.8978215 0.6131092 7.988 1.37e-15
## age -0.0387245 0.0068044 -5.691 1.26e-08
## sexmale -3.8996177 0.5015659 -7.775 7.55e-15
## pclass2nd -1.5923247 0.6024844 -2.643 0.00822
## pclass3rd -4.1382715 0.5601819 -7.387 1.50e-13
## fare -0.0009058 0.0020509 -0.442 0.65874
## sexmale:pclass2nd -0.0600076 0.6372949 -0.094 0.92498
## sexmale:pclass3rd 2.5019110 0.5479901 4.566 4.98e-06
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1409.99 on 1042 degrees of freedom
## Residual deviance: 931.45 on 1035 degrees of freedom
## AIC: 947.45
##
## Number of Fisher Scoring iterations: 5
##
## Next step: Use 'setx' method
EFFECT OF AGE
z5$setrange(age=min(titanic$age):max(titanic$age))
z5$sim()
z5$graph()
EFFECT OF FARE
z5$setrange(fare=min(titanic$fare):max(titanic$fare))
z5$sim()
z5$graph()
SEX DIFFERENCE
z.sex <- zlogit$new()
z.sex$zelig(survival ~ age + sex*pclass + fare, data = titanic)
z.sex$setx(sex = "male")
z.sex$setx1(sex = "female")
z.sex$sim()
summary(z.sex)
##
## sim x :
## -----
## ev
## mean sd 50% 2.5% 97.5%
## [1,] 0.1406593 0.01970743 0.1397259 0.1064106 0.1817742
## pv
## 0 1
## [1,] 0.851 0.149
##
## sim x1 :
## -----
## ev
## mean sd 50% 2.5% 97.5%
## [1,] 0.3968774 0.04676838 0.3964406 0.307353 0.4928045
## pv
## 0 1
## [1,] 0.597 0.403
## fd
## mean sd 50% 2.5% 97.5%
## [1,] 0.2562181 0.04644506 0.2562792 0.163559 0.3515011
FIRST DIFFERENCE BETWEEN THE SEXES
fd <- z.sex$get_qi(xvalue="x1", qi="fd")
summary(fd)
## V1
## Min. :0.09978
## 1st Qu.:0.22687
## Median :0.25628
## Mean :0.25622
## 3rd Qu.:0.28798
## Max. :0.42362
SEX DIFFERENCES BY CLASS
First Class passengers
z5cl <- zlogit$new()
z5cl$zelig(survival ~ age + sex*pclass + fare, data = titanic)
z5cl$setx(sex = "male", pclass = "1st")
z5cl$setx1(sex = "female", pclass = "1st")
z5cl$sim()
summary(z5cl)
##
## sim x :
## -----
## ev
## mean sd 50% 2.5% 97.5%
## [1,] 0.4521426 0.04947199 0.4517859 0.3567711 0.5489564
## pv
## 0 1
## [1,] 0.554 0.446
##
## sim x1 :
## -----
## ev
## mean sd 50% 2.5% 97.5%
## [1,] 0.9734146 0.0134011 0.9759893 0.9393136 0.9908709
## pv
## 0 1
## [1,] 0.027 0.973
## fd
## mean sd 50% 2.5% 97.5%
## [1,] 0.521272 0.04854888 0.5224354 0.4256914 0.6168008
plot(z5cl)
Second Class passengers
z5cl2 <- zlogit$new()
z5cl2$zelig(survival ~ age + sex*pclass + fare, data = titanic)
z5cl2$setx(sex = "male", pclass = "2nd")
z5cl2$setx1(sex = "female", pclass = "2nd")
z5cl2$sim()
summary(z5cl2)
##
## sim x :
## -----
## ev
## mean sd 50% 2.5% 97.5%
## [1,] 0.1388147 0.02808807 0.1374725 0.09066829 0.2018672
## pv
## 0 1
## [1,] 0.861 0.139
##
## sim x1 :
## -----
## ev
## mean sd 50% 2.5% 97.5%
## [1,] 0.8881749 0.03249202 0.8911112 0.8148349 0.9409201
## pv
## 0 1
## [1,] 0.115 0.885
## fd
## mean sd 50% 2.5% 97.5%
## [1,] 0.7493602 0.04324667 0.7527703 0.6595137 0.8273213
plot(z5cl2)
Third class passengers
z5cl3 <- zlogit$new()
z5cl3$zelig(survival ~ age + sex*pclass + fare, data = titanic)
z5cl3$setx(sex = "male", pclass = "3rd")
z5cl3$setx1(sex = "female", pclass = "3rd")
z5cl3$sim()
summary(z5cl3)
##
## sim x :
## -----
## ev
## mean sd 50% 2.5% 97.5%
## [1,] 0.1406689 0.02033217 0.1393531 0.1058836 0.1838825
## pv
## 0 1
## [1,] 0.85 0.15
##
## sim x1 :
## -----
## ev
## mean sd 50% 2.5% 97.5%
## [1,] 0.3947143 0.04409182 0.3959683 0.3086232 0.4808728
## pv
## 0 1
## [1,] 0.587 0.413
## fd
## mean sd 50% 2.5% 97.5%
## [1,] 0.2540453 0.04361054 0.2561075 0.1662824 0.3379693
plot(z5cl3)
PUTTING THE DATA IN ONE FRAME
d1<-z5cl$get_qi(xvalue="x1", qi="fd")
d2<-z5cl2$get_qi(xvalue="x1", qi="fd")
d3<-z5cl3$get_qi(xvalue="x1", qi="fd")
dfd <- as.data.frame(cbind(d1, d2, d3))
head(dfd)
## V1 V2 V3
## 1 0.5372015 0.7770912 0.2068092
## 2 0.5164485 0.7382971 0.2154375
## 3 0.5948568 0.6769464 0.2196744
## 4 0.4577496 0.8144584 0.2197770
## 5 0.5903832 0.7186659 0.2406991
## 6 0.5280015 0.7123962 0.2946334
__
library(tidyr)
tidd <- dfd %>%
gather(class, simv, 1:3)
head(tidd)
## class simv
## 1 V1 0.5372015
## 2 V1 0.5164485
## 3 V1 0.5948568
## 4 V1 0.4577496
## 5 V1 0.5903832
## 6 V1 0.5280015
HOW CLOSE TO THE MEAN IS THE SAMPLING DISTRIBUTION
library(dplyr)
tidd %>%
group_by(class) %>%
summarise(mean = mean(simv), sd = sd(simv))
## # A tibble: 3 x 3
## class mean sd
## <chr> <dbl> <dbl>
## 1 V1 0.521 0.0485
## 2 V2 0.749 0.0432
## 3 V3 0.254 0.0436
GG PLOT ANALYSIS
library(ggplot2)
ggplot(tidd, aes(simv)) + geom_histogram() + facet_grid(~class)