library(pacman)
pacman::p_load(tidyverse,nlme,lme4,MASS,HH,MPDiR,nnet)
Question 1
dta1 <- read.table("http://titan.ccunix.ccu.edu.tw/~psycfs/lmm/Data/domesticViolence.txt",header = T)
#
str(dta1)
## 'data.frame': 312 obs. of 3 variables:
## $ X1 : int 1 1 1 1 1 1 1 1 1 1 ...
## $ X0 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ X0.1: int 0 0 0 0 0 0 0 0 0 0 ...
#
dta1t <- matrix(0,312,3)
dta1t <- as.data.frame(dta1t)
for (i in 1:length(dta1[,1])){
if (dta1[i,2]== 0 & dta1[i,3] == 0){
dta1t[i,1] = "Separate"
dta1t[i,2] = dta1[i,1]
} else if (dta1[i,2] == 1 &dta1[i,3] == 0){
dta1t[i,1] = "Advise"
dta1t[i,2] = dta1[i,1]
} else{
dta1t[i,1] = "Arrest"
dta1t[i,2] = dta1[i,1]
}
}
for (i in 1:length(dta1t[,1])){
if (dta1t[i,2] == 1){
dta1t[i,2] = 1
}else{
dta1t[i,2] = 0
dta1t[i,3] = 1
}
}
colnames(dta1t) <- c("Treatment","Future_violence","No_Future_violence")
str(dta1t)
## 'data.frame': 312 obs. of 3 variables:
## $ Treatment : chr "Separate" "Separate" "Separate" "Separate" ...
## $ Future_violence : num 1 1 1 1 1 1 1 1 1 1 ...
## $ No_Future_violence: num 0 0 0 0 0 0 0 0 0 0 ...
dta1t$Treatment <- as.factor(dta1t$Treatment)
dta1t$Treatment <- factor(dta1t$Treatment, levels(dta1t$Treatment)[c(3,2,1)])
#table
aggregate(cbind(Future_violence,No_Future_violence) ~ Treatment, data = dta1t, sum)
## Treatment Future_violence No_Future_violence
## 1 Separate 25 87
## 2 Arrest 10 82
## 3 Advise 21 87
##ggplot
ggplot(data = dta1t , aes(x = Treatment , y = Future_violence , group = Treatment))+
stat_summary(fun.y = mean,geom = "point")+
stat_summary(fun.data = mean_se, geom = "errorbar",width = 0)+
labs( x = "" , y = "Propotion",title = "Future Violence Against Spouse By Treatment")+
theme_bw()

#
summary(r1 <- glm(Future_violence ~ Treatment, data = dta1t,
family = "binomial"))
##
## Call:
## glm(formula = Future_violence ~ Treatment, family = "binomial",
## data = dta1t)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.7108 -0.7108 -0.6576 -0.4797 2.1067
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.2470 0.2269 -5.495 3.9e-08 ***
## TreatmentArrest -0.8571 0.4046 -2.119 0.0341 *
## TreatmentAdvise -0.1744 0.3326 -0.524 0.6001
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 293.66 on 311 degrees of freedom
## Residual deviance: 288.59 on 309 degrees of freedom
## AIC: 294.59
##
## Number of Fisher Scoring iterations: 4
Question 2
dta2 <- HSP
str(dta2)
## 'data.frame': 30 obs. of 5 variables:
## $ Q : num 46.9 73.1 113.8 177.4 276.1 ...
## $ p : num 0 9.4 33.3 73.5 100 100 0 7.5 40 80 ...
## $ N : int 35 35 35 35 35 35 40 40 40 40 ...
## $ Obs: Factor w/ 3 levels "SH","SS","MHP": 1 1 1 1 1 1 1 1 1 1 ...
## $ Run: Factor w/ 2 levels "R1","R2": 1 1 1 1 1 1 2 2 2 2 ...
##ggplot
ggplot(dta2, aes(Q, p/100, color = Run)) +
geom_jitter() +
stat_smooth(method = "glm", formula = y ~ x,
method.args = list(family = binomial),
se = FALSE) +
labs(x = "Q", y = "prob") +
theme(legend.position = c(.1, .9))+
facet_wrap(~ Obs)+
theme_bw()

##
summary(r2 <- glm(cbind(p,(100-p))~ Q, data = dta2,family = "binomial"(link = "probit")))
##
## Call:
## glm(formula = cbind(p, (100 - p)) ~ Q, family = binomial(link = "probit"),
## data = dta2)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -5.5981 -2.2669 -0.1871 1.1863 6.0055
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.4570295 0.0772710 -31.80 <2e-16 ***
## Q 0.0210146 0.0007056 29.78 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 2569.32 on 29 degrees of freedom
## Residual deviance: 231.45 on 28 degrees of freedom
## AIC: 313.55
##
## Number of Fisher Scoring iterations: 6
Question 3
dta3 <- read.table("http://titan.ccunix.ccu.edu.tw/~psycfs/lmm/Data/mentalSES.txt",header = T)
str(dta3)
## 'data.frame': 1660 obs. of 8 variables:
## $ Id : int 1 2 3 4 5 6 7 8 9 10 ...
## $ ses : int 6 6 6 6 6 6 6 6 6 6 ...
## $ A : int 1 1 1 1 1 1 1 1 1 1 ...
## $ B : int 0 0 0 0 0 0 0 0 0 0 ...
## $ C : int 0 0 0 0 0 0 0 0 0 0 ...
## $ D : int 0 0 0 0 0 0 0 0 0 0 ...
## $ E : int 0 0 0 0 0 0 0 0 0 0 ...
## $ mental: int 1 1 1 1 1 1 1 1 1 1 ...
dta3$ses <- as.factor(dta3$ses)
dta3$mental <- as.factor(dta3$mental)
levels(dta3$ses) <- c("F","E","D","C","B","A")
dta3$ses <- factor(dta3$ses, levels = c("A", "B", "C", "D", "E", "F"))
#ggplot
ggplot(dta3, aes(x = mental, fill = mental))+
geom_bar(position = "dodge")+
facet_wrap(~ses)+
labs(x = "Mental impairement", y = "Count")+
scale_fill_discrete(name = "", lab = c("well", "mild", "moderate", "impaired"))+
coord_flip()+
theme_bw()

#### model
dta3$ses = relevel(dta3$ses, ref = 6)
summary(r3 <- polr(mental ~ ses, data = dta3,
method = "logistic", Hess = TRUE))
## Call:
## polr(formula = mental ~ ses, data = dta3, Hess = TRUE, method = "logistic")
##
## Coefficients:
## Value Std. Error t value
## sesA -0.8238 0.1662 -4.956
## sesB -0.8408 0.1681 -5.001
## sesC -0.6157 0.1620 -3.802
## sesD -0.5248 0.1532 -3.425
## sesE -0.2571 0.1647 -1.561
##
## Intercepts:
## Value Std. Error t value
## 1|2 -2.0278 0.1334 -15.2044
## 2|3 -0.3285 0.1236 -2.6587
## 3|4 0.6802 0.1249 5.4455
##
## Residual Deviance: 4449.382
## AIC: 4465.382
Question 4
dta4 <- read.table("http://titan.ccunix.ccu.edu.tw/~psycfs/lmm/Data/victims.txt",header = T)
str(dta4)
## 'data.frame': 1308 obs. of 2 variables:
## $ nvic: int 0 0 0 0 0 0 0 0 0 0 ...
## $ race: int 0 0 0 0 0 0 0 0 0 0 ...
#table
with(dta4,table(nvic,race))
## race
## nvic 0 1
## 0 1070 119
## 1 60 16
## 2 14 12
## 3 4 7
## 4 0 3
## 5 0 2
## 6 1 0
#
ggplot(data = dta4 , aes(x = race , y =nvic , group = race))+
stat_summary(fun.y = mean,geom = "point")+
stat_summary(fun.data = mean_se, geom = "errorbar",width = 0)+
labs(x = "Race", y = "Mean victims personally known")+
theme_bw()

# poisson regression for count data - full model
summary(r4 <- glm(nvic ~ race, data=dta4, family = poisson))
##
## Call:
## glm(formula = nvic ~ race, family = poisson, data = dta4)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.0218 -0.4295 -0.4295 -0.4295 6.1874
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.38321 0.09713 -24.54 <2e-16 ***
## race 1.73314 0.14657 11.82 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 962.80 on 1307 degrees of freedom
## Residual deviance: 844.71 on 1306 degrees of freedom
## AIC: 1122
##
## Number of Fisher Scoring iterations: 6
# parameter estimates and CIs
show(r4_est <- cbind(Estimate = coef(r4), confint(r4)))
## Waiting for profiling to be done...
## Estimate 2.5 % 97.5 %
## (Intercept) -2.383208 -2.579819 -2.198699
## race 1.733145 1.443698 2.019231
# rates and CIs
exp(r4_est)
## Estimate 2.5 % 97.5 %
## (Intercept) 0.09225413 0.0757877 0.1109474
## race 5.65841937 4.2363330 7.5325339
# check overdispersion
summary(r4_1 <- glm(nvic ~ race, data=dta4, family = quasipoisson))
##
## Call:
## glm(formula = nvic ~ race, family = quasipoisson, data = dta4)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.0218 -0.4295 -0.4295 -0.4295 6.1874
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.3832 0.1283 -18.57 <2e-16 ***
## race 1.7331 0.1937 8.95 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for quasipoisson family taken to be 1.745694)
##
## Null deviance: 962.80 on 1307 degrees of freedom
## Residual deviance: 844.71 on 1306 degrees of freedom
## AIC: NA
##
## Number of Fisher Scoring iterations: 6
# parameter estimates
show(r4_1_est <- cbind(Estimate = coef(r4_1), confint(r4_1)))
## Waiting for profiling to be done...
## Estimate 2.5 % 97.5 %
## (Intercept) -2.383208 -2.645738 -2.141805
## race 1.733145 1.349505 2.110911
# rate estimates
exp(r4_1_est)
## Estimate 2.5 % 97.5 %
## (Intercept) 0.09225413 0.07095298 0.1174427
## race 5.65841937 3.85551586 8.2557556
#
summary(r4_2 <- glm.nb(nvic~race, data = dta4, contrasts = "contr.sum"))
##
## Call:
## glm.nb(formula = nvic ~ race, data = dta4, contrasts = "contr.sum",
## init.theta = 0.2023119205, link = log)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.7184 -0.3899 -0.3899 -0.3899 3.5072
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.3832 0.1172 -20.335 < 2e-16 ***
## race 1.7331 0.2385 7.268 3.66e-13 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for Negative Binomial(0.2023) family taken to be 1)
##
## Null deviance: 471.57 on 1307 degrees of freedom
## Residual deviance: 412.60 on 1306 degrees of freedom
## AIC: 1001.8
##
## Number of Fisher Scoring iterations: 1
##
##
## Theta: 0.2023
## Std. Err.: 0.0409
##
## 2 x log-likelihood: -995.7980
# model comparison
anova(r4_1, r4_2)
## Analysis of Deviance Table
##
## Model 1: nvic ~ race
## Model 2: nvic ~ race
## Resid. Df Resid. Dev Df Deviance
## 1 1306 844.71
## 2 1306 412.60 0 432.11
# fortify data with fitted values and residuals
dta_m <- data.frame(dta4, fit0 = predict(r4, type = "response"),
fit2 = predict(r4_2, type = "response"),
r0 = resid(r4, type="pearson"),
r2 = resid(r4_2, type="pearson"))
Question 5
dta5 <- read.table("http://data.princeton.edu/wws509/datasets/ceb.dat",header = T)
str(dta5)
## 'data.frame': 70 obs. of 7 variables:
## $ dur : Factor w/ 6 levels "0-4","10-14",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ res : Factor w/ 3 levels "rural","Suva",..: 2 2 2 2 3 3 3 3 1 1 ...
## $ educ: Factor w/ 4 levels "lower","none",..: 2 1 4 3 2 1 4 3 2 1 ...
## $ mean: num 0.5 1.14 0.9 0.73 1.17 0.85 1.05 0.69 0.97 0.96 ...
## $ var : num 1.14 0.73 0.67 0.48 1.06 1.59 0.73 0.54 0.88 0.81 ...
## $ n : int 8 21 42 51 12 27 39 51 62 102 ...
## $ y : num 4 23.9 37.8 37.2 14 ...
dta5 <- dta5 %>%mutate(kids = round(y),os = log(n))
levels(dta5$educ) <- c("none","loewr","upper","sec+")
levels(dta5$dur) <- c("0-4","5-9","10-14","15-19","20-24","25-29")
# set dodge amount
pd <- position_dodge(width=.2)
#ggplot
ggplot(dta5,aes(x = res,y = mean, group = educ, color = educ))+
stat_summary(fun.y = mean,geom = "line")+
stat_summary(fun.y = mean,geom = "point",position = pd)+
stat_summary(fun.data = mean_se, geom = "errorbar", position = pd, linetype = "solid", width = 0.5) +
facet_wrap(~dur)+
labs(x = "Residence", y = "Mean number of children") +
theme_bw()

#
summary(r5 <- glm(kids ~ offset(os), family = "poisson",data = dta5))
##
## Call:
## glm(formula = kids ~ offset(os), family = "poisson", data = dta5)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -18.6368 -4.4774 -0.8548 2.5292 21.9744
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.376346 0.009712 141.7 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 3731.9 on 69 degrees of freedom
## Residual deviance: 3731.9 on 69 degrees of freedom
## AIC: 4163.3
##
## Number of Fisher Scoring iterations: 5
Question 6
dta6 <- read.table("http://titan.ccunix.ccu.edu.tw/~psycfs/lmm/Data/grayScale.txt",header = T)
str(dta6)
## 'data.frame': 60 obs. of 4 variables:
## $ oid : int 1 2 3 4 5 6 7 8 9 10 ...
## $ count : int 19 14 3 2 0 0 0 0 0 0 ...
## $ resp : int 1 2 3 4 5 6 7 8 9 10 ...
## $ contrast: int -2 -2 -2 -2 -2 -2 -2 -2 -2 -2 ...
dta6$contrast <- as.factor(dta6$contrast)
dta6$resp <- as.factor(dta6$resp)
levels(dta6$resp) <- c("1","2","3","4","5","6","7","8","9","10","11","12")
dta6$contrast <- as.numeric(dta6$contrast)
## likert scale plotting
m <- as.data.frame(matrix(dta6$count, 12, 5))
names(m) <- c("-2", "-1","0","1","2")
rownames(m) <- c("Dark6", "Dark5", "Dark4", "Dark3","Dark2","Dark1",
"Bright1","Bright2","Bright3","Bright4","Bright5","Bright6")
#
print(m)
## -2 -1 0 1 2
## Dark6 19 3 0 0 0
## Dark5 14 4 0 0 0
## Dark4 3 11 0 0 0
## Dark3 2 5 3 1 0
## Dark2 0 13 15 4 0
## Dark1 0 0 14 1 0
## Bright1 0 2 4 8 0
## Bright2 0 0 3 8 1
## Bright3 0 0 0 11 4
## Bright4 0 0 1 7 12
## Bright5 0 0 0 0 8
## Bright6 0 0 0 0 13
#
likert(t(m), as.percent = FALSE, main = "", ylab = "Stimulus")

likert(t(m), as.percent = TRUE, main = "", ylab = "Stimulus")

#
summary(r6 <- polr( resp ~ contrast, data = dta6, weights = count, Hess = T))
## Call:
## polr(formula = resp ~ contrast, data = dta6, weights = count,
## Hess = T)
##
## Coefficients:
## Value Std. Error t value
## contrast 3.09 0.2354 13.13
##
## Intercepts:
## Value Std. Error t value
## 1|2 3.1486 0.4171 7.5482
## 2|3 4.7247 0.4799 9.8458
## 3|4 5.9731 0.5441 10.9779
## 4|5 6.8676 0.5914 11.6130
## 5|6 9.2785 0.7556 12.2792
## 6|7 10.3400 0.8195 12.6167
## 7|8 11.4883 0.9093 12.6338
## 8|9 12.4468 0.9672 12.8686
## 9|10 13.6508 1.0408 13.1157
## 10|11 15.4324 1.1461 13.4655
## 11|12 16.2394 1.1736 13.8369
##
## Residual Deviance: 594.6589
## AIC: 618.6589