library(pacman)
pacman::p_load(tidyverse,nlme,lme4,MASS,HH,MPDiR,nnet)

Question 1

dta1 <- read.table("http://titan.ccunix.ccu.edu.tw/~psycfs/lmm/Data/domesticViolence.txt",header = T)

#
str(dta1)
## 'data.frame':    312 obs. of  3 variables:
##  $ X1  : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ X0  : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ X0.1: int  0 0 0 0 0 0 0 0 0 0 ...
#
dta1t <- matrix(0,312,3)
dta1t <- as.data.frame(dta1t)
for (i in 1:length(dta1[,1])){
  if (dta1[i,2]== 0 & dta1[i,3] == 0){
    dta1t[i,1] = "Separate"
    dta1t[i,2] = dta1[i,1]
  } else if (dta1[i,2] == 1 &dta1[i,3] == 0){
    dta1t[i,1] = "Advise"
    dta1t[i,2] = dta1[i,1]
  } else{
    dta1t[i,1] = "Arrest"
    dta1t[i,2] = dta1[i,1]
  }
}

for (i in 1:length(dta1t[,1])){
  if (dta1t[i,2] == 1){
    dta1t[i,2] = 1
  }else{
    dta1t[i,2] = 0
    dta1t[i,3] = 1
  }
}



colnames(dta1t) <- c("Treatment","Future_violence","No_Future_violence") 
str(dta1t)
## 'data.frame':    312 obs. of  3 variables:
##  $ Treatment         : chr  "Separate" "Separate" "Separate" "Separate" ...
##  $ Future_violence   : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ No_Future_violence: num  0 0 0 0 0 0 0 0 0 0 ...
dta1t$Treatment <- as.factor(dta1t$Treatment)
dta1t$Treatment <- factor(dta1t$Treatment, levels(dta1t$Treatment)[c(3,2,1)])




#table
aggregate(cbind(Future_violence,No_Future_violence) ~ Treatment, data = dta1t, sum)
##   Treatment Future_violence No_Future_violence
## 1  Separate              25                 87
## 2    Arrest              10                 82
## 3    Advise              21                 87
##ggplot
ggplot(data = dta1t , aes(x = Treatment , y = Future_violence , group = Treatment))+
  stat_summary(fun.y = mean,geom = "point")+
  stat_summary(fun.data = mean_se, geom = "errorbar",width = 0)+
  labs( x = "" , y = "Propotion",title = "Future Violence Against Spouse By Treatment")+
  theme_bw()

#
summary(r1 <- glm(Future_violence ~ Treatment, data = dta1t,
                  family = "binomial"))
## 
## Call:
## glm(formula = Future_violence ~ Treatment, family = "binomial", 
##     data = dta1t)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -0.7108  -0.7108  -0.6576  -0.4797   2.1067  
## 
## Coefficients:
##                 Estimate Std. Error z value Pr(>|z|)    
## (Intercept)      -1.2470     0.2269  -5.495  3.9e-08 ***
## TreatmentArrest  -0.8571     0.4046  -2.119   0.0341 *  
## TreatmentAdvise  -0.1744     0.3326  -0.524   0.6001    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 293.66  on 311  degrees of freedom
## Residual deviance: 288.59  on 309  degrees of freedom
## AIC: 294.59
## 
## Number of Fisher Scoring iterations: 4

Question 2

dta2 <- HSP
str(dta2)
## 'data.frame':    30 obs. of  5 variables:
##  $ Q  : num  46.9 73.1 113.8 177.4 276.1 ...
##  $ p  : num  0 9.4 33.3 73.5 100 100 0 7.5 40 80 ...
##  $ N  : int  35 35 35 35 35 35 40 40 40 40 ...
##  $ Obs: Factor w/ 3 levels "SH","SS","MHP": 1 1 1 1 1 1 1 1 1 1 ...
##  $ Run: Factor w/ 2 levels "R1","R2": 1 1 1 1 1 1 2 2 2 2 ...
##ggplot
ggplot(dta2, aes(Q, p/100, color = Run)) + 
  geom_jitter() + 
  stat_smooth(method = "glm", formula = y ~ x, 
              method.args = list(family = binomial),
               se = FALSE) +
  labs(x = "Q", y = "prob") +
  theme(legend.position = c(.1, .9))+
  facet_wrap(~ Obs)+
  theme_bw()

##
summary(r2 <- glm(cbind(p,(100-p))~ Q, data = dta2,family = "binomial"(link = "probit")))
## 
## Call:
## glm(formula = cbind(p, (100 - p)) ~ Q, family = binomial(link = "probit"), 
##     data = dta2)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -5.5981  -2.2669  -0.1871   1.1863   6.0055  
## 
## Coefficients:
##               Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -2.4570295  0.0772710  -31.80   <2e-16 ***
## Q            0.0210146  0.0007056   29.78   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 2569.32  on 29  degrees of freedom
## Residual deviance:  231.45  on 28  degrees of freedom
## AIC: 313.55
## 
## Number of Fisher Scoring iterations: 6

Question 3

dta3 <- read.table("http://titan.ccunix.ccu.edu.tw/~psycfs/lmm/Data/mentalSES.txt",header = T)
str(dta3)
## 'data.frame':    1660 obs. of  8 variables:
##  $ Id    : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ ses   : int  6 6 6 6 6 6 6 6 6 6 ...
##  $ A     : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ B     : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ C     : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ D     : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ E     : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ mental: int  1 1 1 1 1 1 1 1 1 1 ...
dta3$ses <- as.factor(dta3$ses)
dta3$mental <- as.factor(dta3$mental)
levels(dta3$ses) <- c("F","E","D","C","B","A")
dta3$ses <- factor(dta3$ses, levels = c("A", "B", "C", "D", "E", "F"))



#ggplot
ggplot(dta3, aes(x = mental, fill = mental))+
  geom_bar(position = "dodge")+
  facet_wrap(~ses)+
  labs(x = "Mental impairement", y = "Count")+
  scale_fill_discrete(name = "", lab = c("well", "mild", "moderate", "impaired"))+
  coord_flip()+
  theme_bw()

#### model
dta3$ses = relevel(dta3$ses, ref = 6)
summary(r3 <- polr(mental ~ ses, data = dta3, 
                   method = "logistic", Hess = TRUE))
## Call:
## polr(formula = mental ~ ses, data = dta3, Hess = TRUE, method = "logistic")
## 
## Coefficients:
##        Value Std. Error t value
## sesA -0.8238     0.1662  -4.956
## sesB -0.8408     0.1681  -5.001
## sesC -0.6157     0.1620  -3.802
## sesD -0.5248     0.1532  -3.425
## sesE -0.2571     0.1647  -1.561
## 
## Intercepts:
##     Value    Std. Error t value 
## 1|2  -2.0278   0.1334   -15.2044
## 2|3  -0.3285   0.1236    -2.6587
## 3|4   0.6802   0.1249     5.4455
## 
## Residual Deviance: 4449.382 
## AIC: 4465.382

Question 4

dta4 <- read.table("http://titan.ccunix.ccu.edu.tw/~psycfs/lmm/Data/victims.txt",header = T)
str(dta4)
## 'data.frame':    1308 obs. of  2 variables:
##  $ nvic: int  0 0 0 0 0 0 0 0 0 0 ...
##  $ race: int  0 0 0 0 0 0 0 0 0 0 ...
#table
with(dta4,table(nvic,race))
##     race
## nvic    0    1
##    0 1070  119
##    1   60   16
##    2   14   12
##    3    4    7
##    4    0    3
##    5    0    2
##    6    1    0
#
ggplot(data = dta4 , aes(x = race , y =nvic , group = race))+
  stat_summary(fun.y = mean,geom = "point")+
  stat_summary(fun.data = mean_se, geom = "errorbar",width = 0)+
  labs(x = "Race", y = "Mean victims personally known")+
  theme_bw()

# poisson regression for count data - full model
summary(r4 <- glm(nvic ~ race, data=dta4, family = poisson))
## 
## Call:
## glm(formula = nvic ~ race, family = poisson, data = dta4)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.0218  -0.4295  -0.4295  -0.4295   6.1874  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -2.38321    0.09713  -24.54   <2e-16 ***
## race         1.73314    0.14657   11.82   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for poisson family taken to be 1)
## 
##     Null deviance: 962.80  on 1307  degrees of freedom
## Residual deviance: 844.71  on 1306  degrees of freedom
## AIC: 1122
## 
## Number of Fisher Scoring iterations: 6
# parameter estimates and CIs
show(r4_est <- cbind(Estimate = coef(r4), confint(r4)))                    
## Waiting for profiling to be done...
##              Estimate     2.5 %    97.5 %
## (Intercept) -2.383208 -2.579819 -2.198699
## race         1.733145  1.443698  2.019231
# rates and CIs
exp(r4_est)
##               Estimate     2.5 %    97.5 %
## (Intercept) 0.09225413 0.0757877 0.1109474
## race        5.65841937 4.2363330 7.5325339
# check overdispersion
summary(r4_1 <- glm(nvic ~ race, data=dta4, family = quasipoisson))
## 
## Call:
## glm(formula = nvic ~ race, family = quasipoisson, data = dta4)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.0218  -0.4295  -0.4295  -0.4295   6.1874  
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -2.3832     0.1283  -18.57   <2e-16 ***
## race          1.7331     0.1937    8.95   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for quasipoisson family taken to be 1.745694)
## 
##     Null deviance: 962.80  on 1307  degrees of freedom
## Residual deviance: 844.71  on 1306  degrees of freedom
## AIC: NA
## 
## Number of Fisher Scoring iterations: 6
# parameter estimates
show(r4_1_est <- cbind(Estimate = coef(r4_1), confint(r4_1)))
## Waiting for profiling to be done...
##              Estimate     2.5 %    97.5 %
## (Intercept) -2.383208 -2.645738 -2.141805
## race         1.733145  1.349505  2.110911
# rate estimates
exp(r4_1_est)                   
##               Estimate      2.5 %    97.5 %
## (Intercept) 0.09225413 0.07095298 0.1174427
## race        5.65841937 3.85551586 8.2557556
#
summary(r4_2 <- glm.nb(nvic~race, data = dta4, contrasts = "contr.sum"))
## 
## Call:
## glm.nb(formula = nvic ~ race, data = dta4, contrasts = "contr.sum", 
##     init.theta = 0.2023119205, link = log)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -0.7184  -0.3899  -0.3899  -0.3899   3.5072  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  -2.3832     0.1172 -20.335  < 2e-16 ***
## race          1.7331     0.2385   7.268 3.66e-13 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for Negative Binomial(0.2023) family taken to be 1)
## 
##     Null deviance: 471.57  on 1307  degrees of freedom
## Residual deviance: 412.60  on 1306  degrees of freedom
## AIC: 1001.8
## 
## Number of Fisher Scoring iterations: 1
## 
## 
##               Theta:  0.2023 
##           Std. Err.:  0.0409 
## 
##  2 x log-likelihood:  -995.7980
# model comparison
anova(r4_1, r4_2)
## Analysis of Deviance Table
## 
## Model 1: nvic ~ race
## Model 2: nvic ~ race
##   Resid. Df Resid. Dev Df Deviance
## 1      1306     844.71            
## 2      1306     412.60  0   432.11
# fortify data with fitted values and residuals
dta_m <- data.frame(dta4, fit0 = predict(r4, type = "response"), 
                    fit2 = predict(r4_2, type = "response"), 
                    r0 = resid(r4, type="pearson"),
                    r2 = resid(r4_2, type="pearson"))

Question 5

dta5 <- read.table("http://data.princeton.edu/wws509/datasets/ceb.dat",header = T)
str(dta5)
## 'data.frame':    70 obs. of  7 variables:
##  $ dur : Factor w/ 6 levels "0-4","10-14",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ res : Factor w/ 3 levels "rural","Suva",..: 2 2 2 2 3 3 3 3 1 1 ...
##  $ educ: Factor w/ 4 levels "lower","none",..: 2 1 4 3 2 1 4 3 2 1 ...
##  $ mean: num  0.5 1.14 0.9 0.73 1.17 0.85 1.05 0.69 0.97 0.96 ...
##  $ var : num  1.14 0.73 0.67 0.48 1.06 1.59 0.73 0.54 0.88 0.81 ...
##  $ n   : int  8 21 42 51 12 27 39 51 62 102 ...
##  $ y   : num  4 23.9 37.8 37.2 14 ...
dta5 <- dta5 %>%mutate(kids = round(y),os = log(n))
levels(dta5$educ) <- c("none","loewr","upper","sec+")
levels(dta5$dur) <- c("0-4","5-9","10-14","15-19","20-24","25-29")

# set dodge amount
pd <- position_dodge(width=.2)

#ggplot
ggplot(dta5,aes(x = res,y = mean, group = educ, color = educ))+
  stat_summary(fun.y = mean,geom = "line")+
  stat_summary(fun.y = mean,geom = "point",position = pd)+
  stat_summary(fun.data = mean_se, geom = "errorbar", position = pd, linetype = "solid", width = 0.5) +
  facet_wrap(~dur)+
  labs(x = "Residence", y = "Mean number of children") +
  theme_bw()

#
summary(r5 <- glm(kids ~ offset(os), family = "poisson",data = dta5))
## 
## Call:
## glm(formula = kids ~ offset(os), family = "poisson", data = dta5)
## 
## Deviance Residuals: 
##      Min        1Q    Median        3Q       Max  
## -18.6368   -4.4774   -0.8548    2.5292   21.9744  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept) 1.376346   0.009712   141.7   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for poisson family taken to be 1)
## 
##     Null deviance: 3731.9  on 69  degrees of freedom
## Residual deviance: 3731.9  on 69  degrees of freedom
## AIC: 4163.3
## 
## Number of Fisher Scoring iterations: 5

Question 6

dta6 <- read.table("http://titan.ccunix.ccu.edu.tw/~psycfs/lmm/Data/grayScale.txt",header = T)
str(dta6)
## 'data.frame':    60 obs. of  4 variables:
##  $ oid     : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ count   : int  19 14 3 2 0 0 0 0 0 0 ...
##  $ resp    : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ contrast: int  -2 -2 -2 -2 -2 -2 -2 -2 -2 -2 ...
dta6$contrast <- as.factor(dta6$contrast)
dta6$resp <- as.factor(dta6$resp)
levels(dta6$resp) <- c("1","2","3","4","5","6","7","8","9","10","11","12")
dta6$contrast <- as.numeric(dta6$contrast)

## likert scale plotting
m <- as.data.frame(matrix(dta6$count, 12, 5))

names(m) <- c("-2", "-1","0","1","2")

rownames(m) <- c("Dark6", "Dark5", "Dark4", "Dark3","Dark2","Dark1",
                 "Bright1","Bright2","Bright3","Bright4","Bright5","Bright6")

#
print(m)
##         -2 -1  0  1  2
## Dark6   19  3  0  0  0
## Dark5   14  4  0  0  0
## Dark4    3 11  0  0  0
## Dark3    2  5  3  1  0
## Dark2    0 13 15  4  0
## Dark1    0  0 14  1  0
## Bright1  0  2  4  8  0
## Bright2  0  0  3  8  1
## Bright3  0  0  0 11  4
## Bright4  0  0  1  7 12
## Bright5  0  0  0  0  8
## Bright6  0  0  0  0 13
#
likert(t(m), as.percent = FALSE, main = "", ylab = "Stimulus")

likert(t(m), as.percent = TRUE, main = "", ylab = "Stimulus")

#
summary(r6 <- polr( resp ~ contrast, data = dta6, weights = count, Hess = T))
## Call:
## polr(formula = resp ~ contrast, data = dta6, weights = count, 
##     Hess = T)
## 
## Coefficients:
##          Value Std. Error t value
## contrast  3.09     0.2354   13.13
## 
## Intercepts:
##       Value   Std. Error t value
## 1|2    3.1486  0.4171     7.5482
## 2|3    4.7247  0.4799     9.8458
## 3|4    5.9731  0.5441    10.9779
## 4|5    6.8676  0.5914    11.6130
## 5|6    9.2785  0.7556    12.2792
## 6|7   10.3400  0.8195    12.6167
## 7|8   11.4883  0.9093    12.6338
## 8|9   12.4468  0.9672    12.8686
## 9|10  13.6508  1.0408    13.1157
## 10|11 15.4324  1.1461    13.4655
## 11|12 16.2394  1.1736    13.8369
## 
## Residual Deviance: 594.6589 
## AIC: 618.6589