Q1
# package management
require(pacman)
## Loading required package: pacman
# load packages
pacman::p_load(tidyverse, MASS, HH)
dat = read.table("http://140.116.183.121/~sheu/lmm/Data/mentalSES.txt", h = T)
str(dat)
## 'data.frame': 1660 obs. of 8 variables:
## $ Id : int 1 2 3 4 5 6 7 8 9 10 ...
## $ ses : int 6 6 6 6 6 6 6 6 6 6 ...
## $ A : int 1 1 1 1 1 1 1 1 1 1 ...
## $ B : int 0 0 0 0 0 0 0 0 0 0 ...
## $ C : int 0 0 0 0 0 0 0 0 0 0 ...
## $ D : int 0 0 0 0 0 0 0 0 0 0 ...
## $ E : int 0 0 0 0 0 0 0 0 0 0 ...
## $ mental: int 1 1 1 1 1 1 1 1 1 1 ...
t<-with(dat, addmargins(table(ses, mental)))
row.names(t)<-c("F","E","D","C","B","A","total")
colnames(t)<-c("well","mild","moderate","impaired","total")
t
## mental
## ses well mild moderate impaired total
## F 21 71 54 71 217
## E 36 97 54 78 265
## D 72 141 77 94 384
## C 57 105 65 60 287
## B 57 94 54 40 245
## A 64 94 58 46 262
## total 307 602 362 389 1660
likert(t(t), as.percent = TRUE, main = "", ylab = "mental impairment")

dat$mental<-dat$mental %>% as.factor
summary(m0 <- polr(mental ~ ses, data = dat,
method = "probit", Hess = TRUE))
## Call:
## polr(formula = mental ~ ses, data = dat, Hess = TRUE, method = "probit")
##
## Coefficients:
## Value Std. Error t value
## ses -0.1021 0.01644 -6.213
##
## Intercepts:
## Value Std. Error t value
## 1|2 -1.2679 0.0698 -18.1603
## 2|3 -0.2387 0.0654 -3.6490
## 3|4 0.3741 0.0659 5.6761
##
## Residual Deviance: 4450.272
## AIC: 4458.272
Q2
##Within the past 12 months, how many people have you known personally that were victims of homicide?
#Let Yij be the response for subject j of race i.
#It is assumed that Yij follows a Negative Binomial random variable with a parameter μij (expected value).
require(pacman)
pacman::p_load(tidyverse, MASS, magrittr)
dat <- read.table("http://140.116.183.121/~sheu/lmm/Data/victims.txt", h = T)
dat$race %<>% as.factor
head(dat)
## nvic race
## 1 0 0
## 2 0 0
## 3 0 0
## 4 0 0
## 5 0 0
## 6 0 0
str(dat)
## 'data.frame': 1308 obs. of 2 variables:
## $ nvic: int 0 0 0 0 0 0 0 0 0 0 ...
## $ race: Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
aggregate(nvic ~ race, data = dat, mean)
## race nvic
## 1 0 0.09225413
## 2 1 0.52201258
aggregate(nvic ~ race, data = dat, sd)
## race nvic
## 1 0 0.3940112
## 2 1 1.0723007
#Plot
ggplot(dat, aes(x = race, y = nvic))+
stat_summary(fun.data = mean_se)+
labs(x = "Race", y = "Mean victims personally known")+
theme_bw()

require(qcc)
## Loading required package: qcc
## Package 'qcc' version 2.7
## Type 'citation("qcc")' for citing this R package in publications.
qcc.overdispersion.test(dat$nvic,type='poisson')
##
## Overdispersion test Obs.Var/Theor.Var Statistic p-value
## poisson data 2.042251 2669.222 0
#Negative Binomial
summary(m <- glm.nb(nvic~race, data = dat))
##
## Call:
## glm.nb(formula = nvic ~ race, data = dat, init.theta = 0.2023119205,
## link = log)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.7184 -0.3899 -0.3899 -0.3899 3.5072
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.3832 0.1172 -20.335 < 2e-16 ***
## race1 1.7331 0.2385 7.268 3.66e-13 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for Negative Binomial(0.2023) family taken to be 1)
##
## Null deviance: 471.57 on 1307 degrees of freedom
## Residual deviance: 412.60 on 1306 degrees of freedom
## AIC: 1001.8
##
## Number of Fisher Scoring iterations: 1
##
##
## Theta: 0.2023
## Std. Err.: 0.0409
##
## 2 x log-likelihood: -995.7980
Q3
##Interpret the effect of having above secondary education compared to none,
#holding other terms in the model constant, on the expected number of children born.
#log(E[Yjkl]) = log(njkl) + log(E[Yijkl]),
#where Yijkl follows a Poisson distribution with mean μijkl (for each woman i);
#log(μijkl) = β0 + β1×durationij+ β2×residenceik+ β3×educationil.
#The offset is the number of women in a cell njkl on the logarithmic unit.
dat = read.table("http://data.princeton.edu/wws509/datasets/ceb.dat", h = T)
str(dat)
## 'data.frame': 70 obs. of 7 variables:
## $ dur : Factor w/ 6 levels "0-4","10-14",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ res : Factor w/ 3 levels "rural","Suva",..: 2 2 2 2 3 3 3 3 1 1 ...
## $ educ: Factor w/ 4 levels "lower","none",..: 2 1 4 3 2 1 4 3 2 1 ...
## $ mean: num 0.5 1.14 0.9 0.73 1.17 0.85 1.05 0.69 0.97 0.96 ...
## $ var : num 1.14 0.73 0.67 0.48 1.06 1.59 0.73 0.54 0.88 0.81 ...
## $ n : int 8 21 42 51 12 27 39 51 62 102 ...
## $ y : num 4 23.9 37.8 37.2 14 ...
dat %<>% mutate(ln = log(n))
dat %<>% mutate(ry = round(y))
# conditional histogram
ggplot(dat, aes(mean,n, fill=educ)) +
geom_bar(position="dodge", stat="identity")+
labs(x = "Mean number of children ever born", y = "Number of women in the cell")+
theme_bw()

summary(m0 <- glm(ry~offset(ln), data = dat, family = "poisson"))
##
## Call:
## glm(formula = ry ~ offset(ln), family = "poisson", data = dat)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -18.6368 -4.4774 -0.8548 2.5292 21.9744
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.376346 0.009712 141.7 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 3731.9 on 69 degrees of freedom
## Residual deviance: 3731.9 on 69 degrees of freedom
## AIC: 4163.3
##
## Number of Fisher Scoring iterations: 5
summary(m1 <- glm(ry~dur+res+educ, data = dat, family = "poisson"))
##
## Call:
## glm(formula = ry ~ dur + res + educ, family = "poisson", data = dat)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -18.2023 -4.1368 -0.3074 3.7858 16.2193
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 4.77392 0.04655 102.56 < 2e-16 ***
## dur10-14 1.22136 0.05024 24.31 < 2e-16 ***
## dur15-19 1.31091 0.04975 26.35 < 2e-16 ***
## dur20-24 1.32965 0.04965 26.78 < 2e-16 ***
## dur25-29 1.89210 0.04756 39.78 < 2e-16 ***
## dur5-9 0.89541 0.05240 17.09 < 2e-16 ***
## resSuva -1.44312 0.02785 -51.83 < 2e-16 ***
## resurban -1.04901 0.02405 -43.62 < 2e-16 ***
## educnone 0.16257 0.02168 7.50 6.4e-14 ***
## educsec+ -1.94337 0.05208 -37.32 < 2e-16 ***
## educupper -0.88738 0.02951 -30.07 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 13735.6 on 69 degrees of freedom
## Residual deviance: 2504.1 on 59 degrees of freedom
## AIC: 2955.6
##
## Number of Fisher Scoring iterations: 5
show(m1_est <- cbind(Estimate = coef(m1), confint(m1)))
## Waiting for profiling to be done...
## Estimate 2.5 % 97.5 %
## (Intercept) 4.7739160 4.6815191 4.8640221
## dur10-14 1.2213645 1.1237563 1.3207362
## dur15-19 1.3109110 1.2142997 1.4093489
## dur20-24 1.3296518 1.2332392 1.4279035
## dur25-29 1.8920957 1.7999103 1.9863830
## dur5-9 0.8954115 0.7934300 0.9988723
## resSuva -1.4431223 -1.4980066 -1.3888461
## resurban -1.0490081 -1.0963210 -1.0020520
## educnone 0.1625697 0.1201065 0.2050818
## educsec+ -1.9433683 -2.0468417 -1.8426479
## educupper -0.8873758 -0.9454534 -0.8297636
exp(m1_est)
## Estimate 2.5 % 97.5 %
## (Intercept) 118.3819229 107.9339086 129.5441994
## dur10-14 3.3918129 3.0763883 3.7461784
## dur15-19 3.7095517 3.3679346 4.0932894
## dur20-24 3.7797271 3.4323295 4.1699476
## dur25-29 6.6332554 6.0491049 7.2891210
## dur5-9 2.4483431 2.2109671 2.7152182
## resSuva 0.2361892 0.2235754 0.2493629
## resurban 0.3502850 0.3340980 0.3671253
## educnone 1.1765304 1.1276170 1.2276255
## educsec+ 0.1432207 0.1291421 0.1583975
## educupper 0.4117348 0.3885034 0.4361524
#控制其他變項下,above secondary education者多0.14倍的出生小孩數量
Q4
#
# cumulative probit model : ordinal regression
#
dat = read.table("http://140.116.183.121/~sheu/lmm/Data/grayScale.txt", h = T)
#
require(pacman)
pacman::p_load(tidyverse, MASS, HH)
head(dat)
## oid count resp contrast
## 1 1 19 1 -2
## 2 2 14 2 -2
## 3 3 3 3 -2
## 4 4 2 4 -2
## 5 5 0 5 -2
## 6 6 0 6 -2
str(dat)
## 'data.frame': 60 obs. of 4 variables:
## $ oid : int 1 2 3 4 5 6 7 8 9 10 ...
## $ count : int 19 14 3 2 0 0 0 0 0 0 ...
## $ resp : int 1 2 3 4 5 6 7 8 9 10 ...
## $ contrast: int -2 -2 -2 -2 -2 -2 -2 -2 -2 -2 ...
## likert scale plotting
dat_g = as.data.frame(matrix(dat$count, 12, 5)) #12Response category*5Stimulus level
names(dat_g) = -2:2
rownames(dat_g) <- c('6d', '5d', '4d', '3d','2d', '1d','1b', '2b', '3b', '4b','5b', '6b')
likert(t(dat_g), as.percent = T, main = "", ylab = "Stimulus level")

#
dat$resp %<>% as.factor
summary(m0 <- polr(resp ~ contrast, data = dat, weight = count,
method = "probit", Hess = TRUE))
## Call:
## polr(formula = resp ~ contrast, data = dat, weights = count,
## Hess = TRUE, method = "probit")
##
## Coefficients:
## Value Std. Error t value
## contrast 1.707 0.1118 15.27
##
## Intercepts:
## Value Std. Error t value
## 1|2 -3.3680 0.2448 -13.7585
## 2|3 -2.4946 0.2157 -11.5656
## 3|4 -1.7943 0.1886 -9.5129
## 4|5 -1.2721 0.1689 -7.5337
## 5|6 0.0259 0.1433 0.1804
## 6|7 0.5944 0.1485 4.0014
## 7|8 1.2060 0.1654 7.2927
## 8|9 1.7557 0.1836 9.5635
## 9|10 2.4285 0.2066 11.7558
## 10|11 3.4400 0.2525 13.6242
## 11|12 3.9295 0.2738 14.3505
##
## Residual Deviance: 597.6344
## AIC: 621.6344
confint(m0)
## Waiting for profiling to be done...
## 2.5 % 97.5 %
## 1.492628 1.930716
Q6
##log(p/(1-p)) = β0 + β1 Advise + β2 Arrest,
#p = probability of future violence.
dat = read.table("http://140.116.183.121/~sheu/lmm/Data/domesticViolence.txt",header = T)
str(dat)
## 'data.frame': 312 obs. of 3 variables:
## $ X1 : int 1 1 1 1 1 1 1 1 1 1 ...
## $ X0 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ X0.1: int 0 0 0 0 0 0 0 0 0 0 ...
dat %<>% mutate(treatment = ifelse(X0.1==1, "arrest",
ifelse(X0==1, "advise", "separate")))
names(dat) <- c("v", "adv", "ar","treatment")
#Plot
ggplot(dat, aes(x = treatment, y = v))+
stat_summary(fun.data = mean_se)+
labs(x = "Treatment", y = "Proportion of future violence")+
theme_bw()

summary(m <- glm(v~treatment, data = dat, family = "binomial", contrasts = "contr.sum"))
##
## Call:
## glm(formula = v ~ treatment, family = "binomial", data = dat,
## contrasts = "contr.sum")
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.7108 -0.7108 -0.6576 -0.4797 2.1067
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.4214 0.2431 -5.846 5.03e-09 ***
## treatmentarrest -0.6827 0.4139 -1.650 0.099 .
## treatmentseparate 0.1744 0.3326 0.524 0.600
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 293.66 on 311 degrees of freedom
## Residual deviance: 288.59 on 309 degrees of freedom
## AIC: 294.59
##
## Number of Fisher Scoring iterations: 4