library(dplyr)
library(Zelig)
library(texreg)
library(visreg)
library(ggplot2)
library(sjmisc)
library(tidyr)
library(radiant.data)
data(titanic)
titanic <- titanic %>%
mutate(survival = as.integer(survived)) %>%
mutate(survival = sjmisc::rec(survival, rec = "2=0; 1=1")) %>%
select(survival,survived, everything())
alpha <- zlogit$new()
alpha$zelig(survival ~ age + sex*pclass + fare, data = titanic)
summary(alpha)
## Model:
##
## Call:
## alpha$zelig(formula = survival ~ age + sex * pclass + fare, data = titanic)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -3.0634 -0.6641 -0.4943 0.4336 2.4941
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 4.8978215 0.6131092 7.988 1.37e-15
## age -0.0387245 0.0068044 -5.691 1.26e-08
## sexmale -3.8996177 0.5015659 -7.775 7.55e-15
## pclass2nd -1.5923247 0.6024844 -2.643 0.00822
## pclass3rd -4.1382715 0.5601819 -7.387 1.50e-13
## fare -0.0009058 0.0020509 -0.442 0.65874
## sexmale:pclass2nd -0.0600076 0.6372949 -0.094 0.92498
## sexmale:pclass3rd 2.5019110 0.5479901 4.566 4.98e-06
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1409.99 on 1042 degrees of freedom
## Residual deviance: 931.45 on 1035 degrees of freedom
## AIC: 947.45
##
## Number of Fisher Scoring iterations: 5
##
## Next step: Use 'setx' method
alpha$setrange(age=min(titanic$age):max(titanic$age))
alpha$sim()
alpha$graph()
alpha$setrange(fare=min(titanic$fare):max(titanic$fare))
alpha$sim()
alpha$graph()
alpha$setx(sex="male")
alpha$setx1(sex="female")
alpha$sim()
fd <- alpha$get_qi(xvalue="x1", qi="fd")
summary(fd)
## V1
## Min. :0.1292
## 1st Qu.:0.2264
## Median :0.2570
## Mean :0.2574
## 3rd Qu.:0.2886
## Max. :0.4025
par(“mar”)
par(mar= c(1,1,1,1))
alpha$graph()
bravo <- zlogit$new()
bravo$zelig(survival ~ age +sex*pclass + fare, data=titanic)
bravo$setx(sex="male", pclass="1st")
bravo$setx1(sex="female", pclass="1st")
par(mar= c(1,1,1,1))
bravo$sim()
bravo$graph()
charlie <- zlogit$new()
charlie$zelig(survival ~ age +sex*pclass + fare, data=titanic)
charlie$setx(sex="male", pclass="2nd")
charlie$setx1(sex="female", pclass="2nd")
par(mar= c(1,1,1,1))
charlie$sim()
charlie$graph()
delta <- zlogit$new()
delta$zelig(survival ~ age +sex*pclass + fare, data=titanic)
delta$setx(sex="male", pclass="3rd")
delta$setx1(sex="female", pclass="3rd")
par(mar= c(1,1,1,1))
delta$sim()
delta$graph()
d1 <- bravo$get_qi(xvalue="x1", qi="fd")
d2 <- charlie$get_qi(xvalue="x1", qi="fd")
d3 <- delta$get_qi(xvalue="x1", qi="fd")
dfd <- as.data.frame(cbind(d1, d2, d3))
head(dfd)
## V1 V2 V3
## 1 0.5967198 0.8029676 0.2556934
## 2 0.5167552 0.6980133 0.2694845
## 3 0.5174264 0.7471248 0.3146801
## 4 0.4993967 0.6766970 0.2583324
## 5 0.4736819 0.7334844 0.2473503
## 6 0.4905807 0.7487350 0.2716825
tidd <- dfd %>%
gather(class, simv, 1:3)
head(tidd)
## class simv
## 1 V1 0.5967198
## 2 V1 0.5167552
## 3 V1 0.5174264
## 4 V1 0.4993967
## 5 V1 0.4736819
## 6 V1 0.4905807
tidd %>%
group_by(class) %>%
summarise(mean=mean(simv), sd=sd(simv))
## # A tibble: 3 x 3
## class mean sd
## <chr> <dbl> <dbl>
## 1 V1 0.519 0.0506
## 2 V2 0.747 0.0422
## 3 V3 0.258 0.0441
par(mar= c(1,1,1,1))
ggplot(tidd, aes(simv)) + geom_histogram(fill = "black", color="blue") + facet_grid(~class)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
The dataset being used for this weeks assignment is a continuation of last assignment Game of Thrones dataset. Just like before, I’m interedted in looking at the popularity of the nobles and commoners of Game of Thrones.
got$Noble <- as.character (got$isNoble)
got$Noble[got$isNoble == "1"] <- "Noble"
got$Noble[got$isNoble == "0"] <- "Commoner"
got$Alive <- as.character (got$isAlive)
got$Alive[got$isAlive == "1"] <- "Alive"
got$Alive[got$isAlive == "0"] <- "Dead"
got$Popular <- as.character (got$isPopular)
got$Popular[got$isPopular == "1"] <- "Popular"
got$Popular[got$isPopular == "0"] <- "Non-Popular"
got$Sex <- as.character (got$male)
got$Sex[got$male == "1"] <- "male"
got$Sex[got$male == "0"] <- "female"
got$male <- as.factor(got$male)
got$isNoble <- as.factor(got$isNoble)
got$isPopular <- as.factor(got$isPopular)
z.got <- zelig(isAlive ~ male + isPopular*isNoble, model = "logit", data = got)
## How to cite this model in Zelig:
## R Core Team. 2007.
## logit: Logistic Regression for Dichotomous Dependent Variables
## in Christine Choirat, Christopher Gandrud, James Honaker, Kosuke Imai, Gary King, and Olivia Lau,
## "Zelig: Everyone's Statistical Software," http://zeligproject.org/
summary(z.got)
## Model:
##
## Call:
## z5$zelig(formula = isAlive ~ male + isPopular * isNoble, data = got)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.9081 -0.7435 0.6303 0.7930 1.6858
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.6438 0.1066 15.419 < 2e-16
## male1 -0.6482 0.1183 -5.478 4.29e-08
## isPopular1 -2.1401 0.3971 -5.390 7.06e-08
## isNoble1 -0.1286 0.1120 -1.149 0.2507
## isPopular1:isNoble1 1.4917 0.4639 3.215 0.0013
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 2207.1 on 1945 degrees of freedom
## Residual deviance: 2121.4 on 1941 degrees of freedom
## AIC: 2131.4
##
## Number of Fisher Scoring iterations: 4
##
## Next step: Use 'setx' method
The data so far does not indicate any level of significance. This is unexpected as when performing a similar analysis the previous week, there were a few levels of significant using similar variables. However, based on these results, it can be stated that the log odds of a popular male character being alive is -2.14 less that that of a female character.
x <- setx(z.got, isNoble = "0")
x1 <- setx(z.got, isNoble = "1")
a <- sim(z.got, x = x, x1 = x1)
fd <- a$get_qi(xvalue="x1", qi="fd")
summary(fd)
## V1
## Min. :-0.09625
## 1st Qu.:-0.04110
## Median :-0.02464
## Mean :-0.02578
## 3rd Qu.:-0.01119
## Max. : 0.03946
Based on the first results, we can state on average Nobles are -.026 less likely of being alive by the end of Game of Thrones
plot(a)
z <- setx(z.got, isPopular = "0")
z1 <- setx(z.got, isPopular = "1")
b <- sim(z.got, x = z, x1 = z1)
fd <- b$get_qi(xvalue="x1", qi="fd")
summary(fd)
## V1
## Min. :-0.6672
## 1st Qu.:-0.5340
## Median :-0.4914
## Mean :-0.4835
## 3rd Qu.:-0.4412
## Max. :-0.1866
Based on the second results, we can state on average that popular characters are -.48 less likely of being alive by the end of Game of Thrones
plot(b)
Pop1 <- setx(z.got, isNoble="0", isPopular="0")
Pop1a <- setx(z.got, isNoble="1", isPopular="0")
Pops <- sim(z.got, x=Pop1, x1=Pop1a)
Pop2 <- setx(z.got, isNoble="0", isPopular="1")
Pop2a <- setx(z.got, isNoble="1", isPopular="1")
Popsa <- sim(z.got, x=Pop2, x1=Pop2a)
par(mar= c(1,1,1,1))
plot(Pops)
par(mar= c(1,1,1,1))
plot(Popsa)
a1 <- Pops$get_qi(xvalue="x1", qi="fd")
a2 <- Popsa$get_qi(xvalue="x1", qi="fd")
afd <- as.data.frame(cbind(a1, a2))
Popular <- afd %>%
gather(isAlive, simv, 1:2)
head(Popular)
## isAlive simv
## 1 V1 -0.02699210
## 2 V1 -0.05142281
## 3 V1 -0.02679997
## 4 V1 -0.03815652
## 5 V1 -0.02111528
## 6 V1 -0.03472899
Popular %>%
group_by(isAlive) %>%
summarise(mean=mean(simv), sd=sd(simv))
## # A tibble: 2 x 3
## isAlive mean sd
## <chr> <dbl> <dbl>
## 1 V1 -0.0255 0.0234
## 2 V2 0.301 0.0906
Between nobles and popular characters, it can be states that both nobles and popular characters are less likely to be alive at the end of Game of Thrones.
ggplot(Popular, aes(simv)) + geom_histogram(fill="red", color="blue") + facet_grid(~isAlive) + geom_vline(xintercept=mean(Popular))
## Warning in mean.default(Popular): argument is not numeric or logical:
## returning NA
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 2 rows containing missing values (geom_vline).