#Assignment#6.1: Re-implementation of Slide 5.2 to 5.18 using Zelig 5 syntax
library(Zelig)
## Loading required package: survival
library(DAAG)
## Loading required package: lattice
##
## Attaching package: 'DAAG'
## The following object is masked from 'package:survival':
##
## lung
library(tidyverse)
## ── Attaching packages ───────────────────────────────────────────────────────────────────── tidyverse 1.2.1 ──
## ✔ ggplot2 2.2.1 ✔ purrr 0.2.4
## ✔ tibble 1.4.2 ✔ dplyr 0.7.4
## ✔ tidyr 0.8.0 ✔ stringr 1.2.0
## ✔ readr 1.1.1 ✔ forcats 0.2.0
## ── Conflicts ──────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ✖ purrr::reduce() masks Zelig::reduce()
library(dplyr)
library(tidyr)
library(ggplot2)
library(survival)
library(radiant.data)
## Loading required package: magrittr
##
## Attaching package: 'magrittr'
## The following object is masked from 'package:purrr':
##
## set_names
## The following object is masked from 'package:tidyr':
##
## extract
## Loading required package: lubridate
##
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
##
## date
##
## Attaching package: 'radiant.data'
## The following objects are masked from 'package:lubridate':
##
## month, wday
## The following object is masked from 'package:forcats':
##
## as_factor
## The following object is masked from 'package:dplyr':
##
## mutate_each
## The following object is masked from 'package:purrr':
##
## is_empty
## The following object is masked from 'package:ggplot2':
##
## diamonds
data(titanic)
head(titanic)
## pclass survived sex age sibsp parch fare name cabin embarked
## 1 1st Yes female 29.0000 0 0 211.3375 Allen, Miss. Elisabeth Walton B5 Southampton
## 2 1st Yes male 0.9167 1 2 151.5500 Allison, Master. Hudson Trevor C22 C26 Southampton
## 3 1st No female 2.0000 1 2 151.5500 Allison, Miss. Helen Loraine C22 C26 Southampton
## 4 1st No male 30.0000 1 2 151.5500 Allison, Mr. Hudson Joshua Crei C22 C26 Southampton
## 5 1st No female 25.0000 1 2 151.5500 Allison, Mrs. Hudson J C (Bessi C22 C26 Southampton
## 6 1st Yes male 48.0000 0 0 26.5500 Anderson, Mr. Harry E12 Southampton
titanic <- titanic %>%
mutate(survived1 = as.integer(survived)) %>%
mutate (age = as.integer(age)) %>%
mutate(survival = sjmisc::rec(survived1, rec = "2=0; 1=1")) %>%
select(survived, survival, age, sex, pclass, fare)
head(titanic)
## survived survival age sex pclass fare
## 1 Yes 1 29 female 1st 211.3375
## 2 Yes 1 0 male 1st 151.5500
## 3 No 0 2 female 1st 151.5500
## 4 No 0 30 male 1st 151.5500
## 5 No 0 25 female 1st 151.5500
## 6 Yes 1 48 male 1st 26.5500
max(titanic$age)
## [1] 80
z5 <- zlogit$new()
z5$zelig(survival ~ age + sex*pclass + fare,data = titanic)
summary(z5)
## Model:
##
## Call:
## z5$zelig(formula = survival ~ age + sex * pclass + fare, data = titanic)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -3.0628 -0.6636 -0.4941 0.4337 2.4940
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 4.8959649 0.6128145 7.989 0.00000000000000136
## age -0.0386781 0.0067926 -5.694 0.00000001239977237
## sexmale -3.9001038 0.5015680 -7.776 0.00000000000000750
## pclass2nd -1.5922712 0.6024689 -2.643 0.00822
## pclass3rd -4.1381922 0.5601346 -7.388 0.00000000000014922
## fare -0.0009074 0.0020510 -0.442 0.65820
## sexmale:pclass2nd -0.0603255 0.6373047 -0.095 0.92459
## sexmale:pclass3rd 2.5016703 0.5479814 4.565 0.00000498908018340
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1409.99 on 1042 degrees of freedom
## Residual deviance: 931.42 on 1035 degrees of freedom
## AIC: 947.42
##
## Number of Fisher Scoring iterations: 5
##
## Next step: Use 'setx' method
summary(titanic$age)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.00 21.00 28.00 29.79 39.00 80.00
z5$setrange(age = 0:80)
z5$sim()
z5$graph()
#Fare effect
summary(titanic$fare)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.00 8.05 15.75 36.60 35.08 512.33
z5$setrange(fare = 0:512.33)
z5$sim()
z5$graph()
z5.sex <- zlogit$new()
z5.sex$zelig(survival ~ age + sex*pclass + fare, data = titanic)
z5.sex$setx(sex = "male")
z5.sex$setx1(sex = "female")
z5.sex$sim()
summary(z5.sex)
##
## sim x :
## -----
## ev
## mean sd 50% 2.5% 97.5%
## [1,] 0.1393829 0.01928101 0.1388332 0.1056682 0.1789379
## pv
## 0 1
## [1,] 0.866 0.134
##
## sim x1 :
## -----
## ev
## mean sd 50% 2.5% 97.5%
## [1,] 0.3951089 0.04185373 0.3941491 0.3132035 0.4800687
## pv
## 0 1
## [1,] 0.596 0.404
## fd
## mean sd 50% 2.5% 97.5%
## [1,] 0.255726 0.04353896 0.2547804 0.1734585 0.3402523
fd <- z5.sex$get_qi (xvalue = "x1", qi = "fd")
summary(fd)
## V1
## Min. :0.1100
## 1st Qu.:0.2269
## Median :0.2548
## Mean :0.2557
## 3rd Qu.:0.2849
## Max. :0.3863
plot(z5.sex)
z5.c1s <- zlogit$new()
z5.c1s$zelig(survival ~ age + sex*pclass + fare, data = titanic)
z5.c1s$setx(sex = "male", pclass = "1st")
z5.c1s$setx1(sex = "female", pclass = "1st")
z5.c1s$sim()
z5.c2s <- zlogit$new()
z5.c2s$zelig(survival ~ age + sex*pclass + fare, data = titanic)
z5.c2s$setx(sex = "male", pclass = "2nd")
z5.c2s$setx1(sex = "female", pclass = "2nd")
z5.c2s$sim()
z5.c3s <- zlogit$new()
z5.c3s$zelig(survival ~ age + sex*pclass + fare, data = titanic)
z5.c3s$setx(sex = "male", pclass = "3rd")
z5.c3s$setx1(sex = "female", pclass = "3rd")
z5.c3s$sim()
plot(z5.c1s)
plot(z5.c2s)
plot(z5.c3s)
d1 <- z5.c1s$get_qi(xvalue="x1", qi="fd")
d2 <- z5.c2s$get_qi(xvalue="x1", qi="fd")
d3 <- z5.c3s$get_qi(xvalue="x1", qi="fd")
dfd <- as.data.frame(cbind(d1, d2, d3))
head(dfd)
## V1 V2 V3
## 1 0.6268752 0.7379985 0.2538173
## 2 0.6119724 0.8209339 0.2662584
## 3 0.5880165 0.7573147 0.2862220
## 4 0.4676590 0.7817792 0.2528908
## 5 0.4940628 0.7512419 0.2061413
## 6 0.4770414 0.8014236 0.3001763
tidd <- dfd %>%
gather(class, simv, 1:3)
head(tidd)
## class simv
## 1 V1 0.6268752
## 2 V1 0.6119724
## 3 V1 0.5880165
## 4 V1 0.4676590
## 5 V1 0.4940628
## 6 V1 0.4770414
library(dplyr)
tidd %>%
group_by(class) %>%
summarise(mean = mean(simv), sd = sd(simv))
## # A tibble: 3 x 3
## class mean sd
## <chr> <dbl> <dbl>
## 1 V1 0.517 0.0500
## 2 V2 0.750 0.0435
## 3 V3 0.255 0.0420
library(ggplot2)
ggplot(tidd, aes(simv)) + geom_histogram() + facet_grid(~class)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.