require(pacman)
## Loading required package: pacman
pacman::p_load(tidyverse, lme4, nlme, magrittr)
#theme set
theme_set(theme_bw())
Q5
#data
dta5<-read.table("http://data.princeton.edu/wws509/datasets/ceb.dat", h=T)
head(dta5)
dur res educ mean var n y
1 0-4 Suva none 0.50 1.14 8 4.00
2 0-4 Suva lower 1.14 0.73 21 23.94
3 0-4 Suva upper 0.90 0.67 42 37.80
4 0-4 Suva sec+ 0.73 0.48 51 37.23
5 0-4 urban none 1.17 1.06 12 14.04
6 0-4 urban lower 0.85 1.59 27 22.95
str(dta5)
'data.frame': 70 obs. of 7 variables:
$ dur : Factor w/ 6 levels "0-4","10-14",..: 1 1 1 1 1 1 1 1 1 1 ...
$ res : Factor w/ 3 levels "rural","Suva",..: 2 2 2 2 3 3 3 3 1 1 ...
$ educ: Factor w/ 4 levels "lower","none",..: 2 1 4 3 2 1 4 3 2 1 ...
$ mean: num 0.5 1.14 0.9 0.73 1.17 0.85 1.05 0.69 0.97 0.96 ...
$ var : num 1.14 0.73 0.67 0.48 1.06 1.59 0.73 0.54 0.88 0.81 ...
$ n : int 8 21 42 51 12 27 39 51 62 102 ...
$ y : num 4 23.9 37.8 37.2 14 ...
dta5$os<- log(dta5$n)
dta5$kids<- round(dta5$y)
dta5$educ <- factor(dta5$educ, levels = c("none", "lower", "upper", "sec+"))
dta5$dur <- factor(dta5$dur, levels = c("0-4", "5-9", "10-14", "15-19", "20-24", "25-29"))
dta5$res <- factor(dta5$res, levels = c("rural", "urban", "Suva"))
#plot
ggplot(dta5, aes(x = res, y = mean, color = educ, group = educ))+
stat_summary(fun.y = mean, geom = "point")+
stat_summary(fun.y = mean, geom = "line")+
stat_summary(fun.data = mean_se, geom = "errorbar", width = .1)+
labs(x = "Residence", y = "Mean")
#model
summary(m0 <- glm(kids~offset(os), family = "poisson", data = dta5))
Call:
glm(formula = kids ~ offset(os), family = "poisson", data = dta5)
Deviance Residuals:
Min 1Q Median 3Q Max
-18.6368 -4.4774 -0.8548 2.5292 21.9744
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) 1.376346 0.009712 141.7 <2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Dispersion parameter for poisson family taken to be 1)
Null deviance: 3731.9 on 69 degrees of freedom
Residual deviance: 3731.9 on 69 degrees of freedom
AIC: 4163.3
Number of Fisher Scoring iterations: 5
summary(m1 <- glm(kids~dur+res+educ, family = "poisson", data = dta5))
Call:
glm(formula = kids ~ dur + res + educ, family = "poisson", data = dta5)
Deviance Residuals:
Min 1Q Median 3Q Max
-18.2023 -4.1368 -0.3074 3.7858 16.2193
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) 4.93649 0.04614 107.00 < 2e-16 ***
dur5-9 0.89541 0.05240 17.09 < 2e-16 ***
dur10-14 1.22136 0.05024 24.31 < 2e-16 ***
dur15-19 1.31091 0.04975 26.35 < 2e-16 ***
dur20-24 1.32965 0.04965 26.78 < 2e-16 ***
dur25-29 1.89210 0.04756 39.78 < 2e-16 ***
resurban -1.04901 0.02405 -43.62 < 2e-16 ***
resSuva -1.44312 0.02785 -51.83 < 2e-16 ***
educlower -0.16257 0.02168 -7.50 6.4e-14 ***
educupper -1.04995 0.02886 -36.38 < 2e-16 ***
educsec+ -2.10594 0.05171 -40.73 < 2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Dispersion parameter for poisson family taken to be 1)
Null deviance: 13735.6 on 69 degrees of freedom
Residual deviance: 2504.1 on 59 degrees of freedom
AIC: 2955.6
Number of Fisher Scoring iterations: 5
exp(coef(m1))
(Intercept) dur5-9 dur10-14 dur15-19 dur20-24 dur25-29
139.2799255 2.4483431 3.3918129 3.7095517 3.7797271 6.6332554
resurban resSuva educlower educupper educsec+
0.3502850 0.2361892 0.8499568 0.3499568 0.1217314
Q1
#data
dta1<-read.table("http://titan.ccunix.ccu.edu.tw/~psycfs/lmm/Data/domesticViolence.txt", h=T)
head(dta1)
X1 X0 X0.1
1 1 0 0
2 1 0 0
3 1 0 0
4 1 0 0
5 1 0 0
6 1 0 0
str(dta1)
'data.frame': 312 obs. of 3 variables:
$ X1 : int 1 1 1 1 1 1 1 1 1 1 ...
$ X0 : int 0 0 0 0 0 0 0 0 0 0 ...
$ X0.1: int 0 0 0 0 0 0 0 0 0 0 ...
colnames(dta1) = c("ftrviol", "advise", "arrest ")
dta1$treatment<- dta1$advise+dta1$`arrest `+dta1$`arrest `
dta1$treatments<-factor(dta1$treatment, levels = c(0,1,2), labels = c("seperate", "advise", "arrest"))
#plot
ggplot(dta1, aes(x = treatment, y = ftrviol))+
stat_summary(fun.data = mean_se)+
labs(x = "0=sep, 1=adv, 2=arr", y = "Proportion")
#model
summary(m0 <- glm(ftrviol~treatments, data = dta1, family = "binomial"))
Call:
glm(formula = ftrviol ~ treatments, family = "binomial", data = dta1)
Deviance Residuals:
Min 1Q Median 3Q Max
-0.7108 -0.7108 -0.6576 -0.4797 2.1067
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -1.2470 0.2269 -5.495 3.9e-08 ***
treatmentsadvise -0.1744 0.3326 -0.524 0.6001
treatmentsarrest -0.8571 0.4046 -2.119 0.0341 *
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 293.66 on 311 degrees of freedom
Residual deviance: 288.59 on 309 degrees of freedom
AIC: 294.59
Number of Fisher Scoring iterations: 4
exp(-1.247 - 0.8571)/(1+exp(-1.2470 - 0.8571))
[1] 0.108699
#This is the probability of future violence for the arrest treatment.
#The observed probability is 0.108699