Titanic
library(tidyverse)
library(Zelig)
library(pander)
library(texreg)
library(visreg)
library(lmtest)
library(sjmisc)
library(radiant.data)
data("titanic")
titanic1 <- titanic%>%
mutate(survival1 = as.integer(survived))%>%
mutate(survival = factor(ifelse(survival1 == 1,1,0)),
age = as.integer(age))%>%
select(pclass, survived, age,survival, sex, pclass, fare)
head(titanic1)
summary(Titanic)
Number of cases in table: 2201
Number of factors: 4
Test for independence of all factors:
Chisq = 1637.4, df = 25, p-value = 0
Chi-squared approximation may be incorrect
z5tit <- zlogit$new()
z5tit$zelig(survival ~ age + sex*pclass + fare, data = titanic1)
summary(z5tit)
Model:
Call:
z5tit$zelig(formula = survival ~ age + sex * pclass + fare, data = titanic1)
Deviance Residuals:
Min 1Q Median 3Q Max
-3.0628 -0.6636 -0.4941 0.4337 2.4940
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) 4.8959649 0.6128145 7.989 0.00000000000000136
age -0.0386781 0.0067926 -5.694 0.00000001239977237
sexmale -3.9001038 0.5015680 -7.776 0.00000000000000750
pclass2nd -1.5922712 0.6024689 -2.643 0.00822
pclass3rd -4.1381922 0.5601346 -7.388 0.00000000000014922
fare -0.0009074 0.0020510 -0.442 0.65820
sexmale:pclass2nd -0.0603255 0.6373047 -0.095 0.92459
sexmale:pclass3rd 2.5016703 0.5479814 4.565 0.00000498908018340
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 1409.99 on 1042 degrees of freedom
Residual deviance: 931.42 on 1035 degrees of freedom
AIC: 947.42
Number of Fisher Scoring iterations: 5
Next step: Use 'setx' method
z5tit$setrange(age = min(titanic1$age):max(titanic1$age))
z5tit$sim()
ci.plot(z5tit)
z5fare <- zlogit$new()
z5fare$zelig(survival ~ age + sex*pclass + fare, data = titanic1)
z5fare$setrange(fare = min(titanic1$fare):max(titanic1$fare))
z5fare$sim()
ci.plot(z5fare)
z5sex <- zlogit$new()
z5sex$zelig(survival ~ age + sex*pclass + fare, data = titanic1)
z5sex$setx(sex = "male")
z5sex$setx1(sex = "female")
z5sex$sim()
summary(z5sex)
sim x :
-----
ev
mean sd 50% 2.5% 97.5%
[1,] 0.1398616 0.01926466 0.1395581 0.1034941 0.1792588
pv
0 1
[1,] 0.874 0.126
sim x1 :
-----
ev
mean sd 50% 2.5% 97.5%
[1,] 0.3944493 0.04475939 0.3936429 0.3148842 0.485208
pv
0 1
[1,] 0.595 0.405
fd
mean sd 50% 2.5% 97.5%
[1,] 0.2545877 0.04546418 0.2543691 0.1729286 0.3512824
fd <- z5sex$get_qi(xvalue="x1", qi="fd")
summary(fd)
V1
Min. :0.1220
1st Qu.:0.2229
Median :0.2544
Mean :0.2546
3rd Qu.:0.2829
Max. :0.3943
plot(z5sex)
fd <- z5sex$get_qi(xvalue="x1", qi="fd")
summary(fd)
V1
Min. :0.1220
1st Qu.:0.2229
Median :0.2544
Mean :0.2546
3rd Qu.:0.2829
Max. :0.3943
z5sex1 <- zlogit$new()
z5sex1$zelig(survival ~ age + sex*pclass + fare, data = titanic1)
z5sex1$setx(sex = "male", pclass = "1st")
z5sex1$setx1(sex = "female", pclass = "1st")
z5sex1$sim()
plot(z5sex1)
z5sex2 <- zlogit$new()
z5sex2$zelig(survival ~ age + sex*pclass + fare, data = titanic1)
z5sex2$setx(sex = "male", pclass = "2nd")
z5sex2$setx1(sex = "female", pclass = "2nd")
z5sex2$sim()
plot(z5sex2)
z5sex3 <- zlogit$new()
z5sex3$zelig(survival ~ age + sex*pclass + fare, data = titanic1)
z5sex3$setx(sex = "male", pclass = "3rd")
z5sex3$setx1(sex = "female", pclass = "3rd")
z5sex3$sim()
plot(z5sex3)
d1 <- z5sex1$get_qi(xvalue="x1", qi="fd")
d2 <- z5sex2$get_qi(xvalue="x1", qi="fd")
d3 <- z5sex3$get_qi(xvalue="x1", qi="fd")
dfd <- as.data.frame(cbind(d1, d2, d3))
head(dfd)
tidd <- dfd %>%
gather(class, simv, 1:3)
head(tidd)
tidd %>%
group_by(class) %>%
summarise(mean = mean(simv), sd = sd(simv))
ggplot(tidd, aes(simv)) + geom_histogram() + facet_grid(~class)
The purpose of this study is to examine the variables that influence those who have had an affair. More specifically, this study will seek to understand whether gender, yearsmarried, self rating of marriage, and children influence have an influence on the amount of affairs a participant had based on these independent variables.
Variables selected: affairs- numeric. How often engaged in extramarital sexual intercourse during the past year?
gender- factor indicating gender.
yearsmarried- numeric variable coding number of years married: 0.125 = 3 months or less, 0.417 = 4–6 months, 0.75 = 6 months–1 year, 1.5 = 1–2 years, 4 = 3–5 years, 7 = 6–8 years, 10 = 9–11 years, 15 = 12 or more years.
children factor. Are there children in the marriage?
rating numeric variable coding self rating of marriage: 1 = very unhappy, 2 = somewhat unhappy, 3 = average, 4 = happier than average, 5 = very happy.
library(Zelig)
library(AER)
data(Affairs)
head(Affairs)
summary(Affairs)
affairs gender age yearsmarried children religiousness education
Min. : 0.000 female:315 Min. :17.50 Min. : 0.125 no :171 Min. :1.000 Min. : 9.00
1st Qu.: 0.000 male :286 1st Qu.:27.00 1st Qu.: 4.000 yes:430 1st Qu.:2.000 1st Qu.:14.00
Median : 0.000 Median :32.00 Median : 7.000 Median :3.000 Median :16.00
Mean : 1.456 Mean :32.49 Mean : 8.178 Mean :3.116 Mean :16.17
3rd Qu.: 0.000 3rd Qu.:37.00 3rd Qu.:15.000 3rd Qu.:4.000 3rd Qu.:18.00
Max. :12.000 Max. :57.00 Max. :15.000 Max. :5.000 Max. :20.00
occupation rating
Min. :1.000 Min. :1.000
1st Qu.:3.000 1st Qu.:3.000
Median :5.000 Median :4.000
Mean :4.195 Mean :3.932
3rd Qu.:6.000 3rd Qu.:5.000
Max. :7.000 Max. :5.000
Affairs1 <- Affairs%>%
select(affairs, children, gender, rating)%>%
mutate(children = as.factor(ifelse(children == 2, "yes", "no")),
gender = as.factor(ifelse(gender == 1, "female", "male")))
head(Affairs1)
Those with children are more likely to have an affair than those who have no children.
A3 <- Affairs%>%
group_by(children)%>%
summarize(avgaffairs = mean(affairs))
ggplot(A3)+
geom_col(aes(x = children, y = avgaffairs, fill = avgaffairs))
Affairs based on gender we can see that gender has almost no effect on whether or not a person will chose to have an affair.
A2 <- Affairs%>%
group_by(gender)%>%
summarize(avg = mean(affairs))
ggplot(A2)+
geom_col(aes(x = gender, y = avg, fill = avg))
Respondents who were unhappy in their marriage were more than double likely to have an affair than those who were reporting being happy in their marriage.
A4 <- Affairs%>%
group_by(rating)%>%
summarize(avg = mean(affairs))
ggplot(A4)+
geom_col(aes(x = rating, y = avg, fill = avg))
Based on the respondents the longer a person is married the more affairs that person is likely to have.
A5 <- Affairs%>%
group_by(yearsmarried)%>%
summarize(avg = mean(affairs))
ggplot(A5)+
geom_col(aes(x = yearsmarried, y = avg, fill = avg))
model1 <- zelig(affairs ~ yearsmarried + rating + gender + children, model = "ls", data = Affairs, cite = F)
summary(model1)
Model:
Call:
z5$zelig(formula = affairs ~ yearsmarried + rating + gender +
children, data = Affairs)
Residuals:
Min 1Q Median 3Q Max
-4.3830 -1.7110 -0.8957 -0.1511 12.0412
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 3.83211 0.59825 6.405 0.000000000304
yearsmarried 0.08338 0.02855 2.920 0.00363
rating -0.74871 0.12053 -6.212 0.000000000982
gendermale 0.04890 0.25800 0.190 0.84973
childrenyes -0.19225 0.34916 -0.551 0.58210
Residual standard error: 3.151 on 596 degrees of freedom
Multiple R-squared: 0.09365, Adjusted R-squared: 0.08756
F-statistic: 15.4 on 4 and 596 DF, p-value: 0.000000000005441
Next step: Use 'setx' method
The results show that self rating and yearsmarried affect the amount of extramarital sexual intercourse an individual has had during the past year; specifically, as the number of years married increases, the number of Affairs had in the past year also increases by .08338. Additionally, as a persons self rating of their marriage increases, the amount of affairs actually decrease by .74871. Gender and amount of children do not appear to affect amount of affairs an individual has in the time of a year.
model2 <- zelig(affairs ~ yearsmarried + rating + gender*children, model = "ls", data = Affairs, cite = F)
summary(model2)
Model:
Call:
z5$zelig(formula = affairs ~ yearsmarried + rating + gender *
children, data = Affairs)
Residuals:
Min 1Q Median 3Q Max
-4.2903 -1.7437 -0.8492 -0.1353 12.0984
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 3.93763 0.63410 6.210 0.000000000997
yearsmarried 0.08452 0.02866 2.949 0.00331
rating -0.75349 0.12097 -6.229 0.000000000890
gendermale -0.16163 0.49066 -0.329 0.74195
childrenyes -0.33200 0.44584 -0.745 0.45677
gendermale:childrenyes 0.29243 0.57955 0.505 0.61404
Residual standard error: 3.153 on 595 degrees of freedom
Multiple R-squared: 0.09403, Adjusted R-squared: 0.08642
F-statistic: 12.35 on 5 and 595 DF, p-value: 0.00000000002046
Next step: Use 'setx' method
It appears that the interaction of gender and children positively impacts number of affairs an individual had per year (as mens number of children increase, the number of affairs an individual has decreases), but these results are not statistically significant. However, years married and gender still remain statistically significant factors that influence the amount of affairs an individual has.
C <- setx(model1, children = "yes")
C1 <- setx1(model1, children = "no")
CH <- sim(model1, x = C, x1 = C1)
summary(CH)
sim x :
-----
ev
mean sd 50% 2.5% 97.5%
1 1.382409 0.2174073 1.376922 0.9665806 1.828432
pv
mean sd 50% 2.5% 97.5%
[1,] 1.371616 3.123736 1.449703 -4.941555 7.391678
sim x1 :
-----
ev
mean sd 50% 2.5% 97.5%
1 1.583537 0.2929335 1.583272 1.018614 2.150701
pv
mean sd 50% 2.5% 97.5%
[1,] 1.599709 3.181166 1.411258 -4.467644 8.022331
fd
mean sd 50% 2.5% 97.5%
1 0.2011276 0.3375873 0.2154004 -0.4926196 0.8401015
The simulation illustration above shows that those who had children were more likely to have an affair.
model1$setrange(yearsmarried=.125:15)
model1$sim()
model1$graph()
Here, the relationship between years married and affairs are illustrated. We can see that the longer an individual has been married the more affairs they have.
model1$setrange(rating=1:5)
model1$sim()
model1$graph()
Here the relationship between affairs and self rating of their marriage is illustrated. The higher that an individual rate their own marriage the less amount of affairs that individual has.
plot(CH)
The gender difference of those who have or have not had an affair is almost identical.
library(tidyr)
tidd <- dfd %>%
gather(gender, simv, 1:1)
head(tidd)
tidd %>%
group_by(gender) %>%
summarise(mean = mean(simv), sd = sd(simv))
library(ggplot2)
ggplot(tidd, aes(simv)) + geom_histogram() + facet_grid(~gender)