In the second part of this assignment we will conduct
library(tidyverse)
library(Zelig)
library(pander)
library(texreg)
library(visreg)
library(lmtest)
library(sjmisc)
library(radiant.data)
Recoding Titanic Data Set using Zelig5
data("titanic")
titanic1 <- titanic%>%
mutate(survival1 = as.integer(survived))%>%
mutate(survival = factor(ifelse(survival1 == 1,1,0)),
age = as.integer(age))%>%
select(pclass, survived, age,survival, sex, pclass, fare)
head(titanic1)
z5tit <- zlogit$new()
z5tit$zelig(survival ~ age + sex*pclass + fare, data = titanic1)
summary(z5tit)
Model:
Call:
z5tit$zelig(formula = survival ~ age + sex * pclass + fare, data = titanic1)
Deviance Residuals:
Min 1Q Median 3Q Max
-3.0628 -0.6636 -0.4941 0.4337 2.4940
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) 4.8959649 0.6128145 7.989 0.00000000000000136
age -0.0386781 0.0067926 -5.694 0.00000001239977237
sexmale -3.9001038 0.5015680 -7.776 0.00000000000000750
pclass2nd -1.5922712 0.6024689 -2.643 0.00822
pclass3rd -4.1381922 0.5601346 -7.388 0.00000000000014922
fare -0.0009074 0.0020510 -0.442 0.65820
sexmale:pclass2nd -0.0603255 0.6373047 -0.095 0.92459
sexmale:pclass3rd 2.5016703 0.5479814 4.565 0.00000498908018340
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 1409.99 on 1042 degrees of freedom
Residual deviance: 931.42 on 1035 degrees of freedom
AIC: 947.42
Number of Fisher Scoring iterations: 5
Next step: Use 'setx' method
Age Effect Using Zelig5
z5tit$setrange(age = min(titanic1$age):max(titanic1$age))
z5tit$sim()
ci.plot(z5tit)
Fare Effect Using Zelig5
z5fare <- zlogit$new()
z5fare$zelig(survival ~ age + sex*pclass + fare, data = titanic1)
z5fare$setrange(fare = min(titanic1$fare):max(titanic1$fare))
z5fare$sim()
ci.plot(z5fare)
Difference in Sex Using Zelig5
z5sex <- zlogit$new()
z5sex$zelig(survival ~ age + sex*pclass + fare, data = titanic1)
z5sex$setx(sex = "male")
z5sex$setx1(sex = "female")
z5sex$sim()
summary(z5sex)
sim x :
-----
ev
mean sd 50% 2.5% 97.5%
[1,] 0.138858 0.01877611 0.1379595 0.1052475 0.1762027
pv
0 1
[1,] 0.885 0.115
sim x1 :
-----
ev
mean sd 50% 2.5% 97.5%
[1,] 0.396046 0.04263319 0.3947367 0.3124353 0.4832082
pv
0 1
[1,] 0.619 0.381
fd
mean sd 50% 2.5% 97.5%
[1,] 0.257188 0.04284021 0.2571369 0.1723735 0.3377271
plot(z5sex)
fd <- z5sex$get_qi(xvalue="x1", qi="fd")
summary(fd)
V1
Min. :0.1354
1st Qu.:0.2285
Median :0.2571
Mean :0.2572
3rd Qu.:0.2862
Max. :0.3903
Difference in Class Using Zelig5
z5sex1 <- zlogit$new()
z5sex1$zelig(survival ~ age + sex*pclass + fare, data = titanic1)
z5sex1$setx(sex = "male", pclass = "1st")
z5sex1$setx1(sex = "female", pclass = "1st")
z5sex1$sim()
plot(z5sex1)
z5sex2 <- zlogit$new()
z5sex2$zelig(survival ~ age + sex*pclass + fare, data = titanic1)
z5sex2$setx(sex = "male", pclass = "2nd")
z5sex2$setx1(sex = "female", pclass = "2nd")
z5sex2$sim()
plot(z5sex2)
z5sex3 <- zlogit$new()
z5sex3$zelig(survival ~ age + sex*pclass + fare, data = titanic1)
z5sex3$setx(sex = "male", pclass = "1st")
z5sex3$setx1(sex = "female", pclass = "1st")
z5sex3$sim()
plot(z5sex3)
d1 <- z5sex1$get_qi(xvalue="x1", qi="fd")
d2 <- z5sex2$get_qi(xvalue="x1", qi="fd")
d3 <- z5sex3$get_qi(xvalue="x1", qi="fd")
dfd <- as.data.frame(cbind(d1, d2, d3))
head(dfd)
tidd <- dfd %>%
gather(class, simv, 1:3)
head(tidd)
tidd %>%
group_by(class) %>%
summarise(mean = mean(simv), sd = sd(simv))
ggplot(tidd, aes(simv)) + geom_histogram() + facet_grid(~class)
The purpose of this study is to examine the variables that influence those who have had an affair. More specifically, this study will seek to understand whether gender, yearsmarried, rating, and children have an effect on whether and if so how many affairs a participant had based on these independent variables.
library(Zelig)
data(Affairs)
data set ‘Affairs’ not found
head(Affairs)
Affairs1 <- Affairs%>%
select(affairs, children, gender, rating)%>%
mutate(children = as.factor(ifelse(children == 2, "yes", "no")),
gender = as.factor(ifelse(gender == 1, "female", "male")))
head(Affairs1)
Those with children are more likely to have an affair than those who have no children.
A3 <- Affairs%>%
group_by(children)%>%
summarize(avg = mean(affairs))
ggplot(A3)+
geom_col(aes(x = children, y = avg, fill = avg))
Affairs based on gender we can see that gender has almost no effect on whether or not a person will chose to have an affair.
A2 <- Affairs%>%
group_by(gender)%>%
summarize(avg = mean(affairs))
ggplot(A2)+
geom_col(aes(x = gender, y = avg, fill = avg))
Respondents who were unhappy in their marriage were more than double likely to have an affair than those who were reporting being happy in their marriage.
A4 <- Affairs%>%
group_by(rating)%>%
summarize(avg = mean(affairs))
ggplot(A4)+
geom_col(aes(x = rating, y = avg, fill = avg))
Based on the respondents the longer a person is married the more affairs that person is likely to have.
A5 <- Affairs%>%
group_by(yearsmarried)%>%
summarize(avg = mean(affairs))
ggplot(A5)+
geom_col(aes(x = yearsmarried, y = avg, fill = avg))
model1 <- zelig(affairs ~ yearsmarried + rating + gender + children, model = "ls", data = Affairs, cite = F)
summary(model1)
Model:
Call:
z5$zelig(formula = affairs ~ yearsmarried + rating + gender +
children, data = Affairs)
Residuals:
Min 1Q Median 3Q Max
-4.3830 -1.7110 -0.8957 -0.1511 12.0412
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 3.83211 0.59825 6.405 0.000000000304
yearsmarried 0.08338 0.02855 2.920 0.00363
rating -0.74871 0.12053 -6.212 0.000000000982
gendermale 0.04890 0.25800 0.190 0.84973
childrenyes -0.19225 0.34916 -0.551 0.58210
Residual standard error: 3.151 on 596 degrees of freedom
Multiple R-squared: 0.09365, Adjusted R-squared: 0.08756
F-statistic: 15.4 on 4 and 596 DF, p-value: 0.000000000005441
Next step: Use 'setx' method
model2 <- zelig(affairs ~ yearsmarried + rating*gender + children, model = "ls", data = Affairs, cite = F)
summary(model2)
Model:
Call:
z5$zelig(formula = affairs ~ yearsmarried + rating * gender +
children, data = Affairs)
Residuals:
Min 1Q Median 3Q Max
-4.5554 -1.7438 -0.8820 -0.1875 11.9889
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 3.63161 0.73921 4.913 0.00000116
yearsmarried 0.08369 0.02858 2.929 0.00354
rating -0.70017 0.15990 -4.379 0.00001410
gendermale 0.47792 0.96316 0.496 0.61994
childrenyes -0.18241 0.35004 -0.521 0.60247
rating:gendermale -0.10933 0.23648 -0.462 0.64401
Residual standard error: 3.153 on 595 degrees of freedom
Multiple R-squared: 0.09397, Adjusted R-squared: 0.08636
F-statistic: 12.34 on 5 and 595 DF, p-value: 0.00000000002086
Next step: Use 'setx' method
The simulation illustration below shows that those who had children were more likely to have an affair
C <- setx(model1, children = "yes")
C1 <- setx1(model1, children = "no")
CH <- sim(model1, x = C, x1 = C1)
summary(CH)
sim x :
-----
ev
mean sd 50% 2.5% 97.5%
1 1.374363 0.2119047 1.379075 0.9509012 1.755278
pv
mean sd 50% 2.5% 97.5%
[1,] 1.292174 3.021392 1.306768 -4.642168 7.188486
sim x1 :
-----
ev
mean sd 50% 2.5% 97.5%
1 1.571395 0.3022732 1.577708 0.9818904 2.188667
pv
mean sd 50% 2.5% 97.5%
[1,] 1.650209 3.170288 1.618075 -4.404932 8.01561
fd
mean sd 50% 2.5% 97.5%
1 0.197032 0.3466742 0.1959629 -0.4550392 0.8748012
plot(CH)
The gender difference of those who have or have not had an affair is almost identical.
library(tidyr)
tidd <- dfd %>%
gather(gender, simv, 1:1)
head(tidd)
library(ggplot2)
ggplot(tidd, aes(simv)) + geom_histogram() + facet_grid(~gender)