Chapter 2

  1. P(Y = 1 | X = 1) = 0.75
6/8
## [1] 0.75
  1. P(Y = 0 | X = 0) = 0.571
4/7
## [1] 0.5714286

3.a. \(\theta = 2.25\)

odd_dpr <- (6/8) / (1- (6/8))
odd_nodpr <- (4/7) / (1 - (4/7))
odd_dpr / odd_nodpr # theta
## [1] 2.25

3.b. False

4.a. p-value = 0.608

dpr <- matrix(c(6, 4, 2, 3), nrow = 2)
dpr
##      [,1] [,2]
## [1,]    6    2
## [2,]    4    3
fisher.test(dpr, alternative = "two.sided")
## 
##  Fisher's Exact Test for Count Data
## 
## data:  dpr
## p-value = 0.6084
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
##   0.1603161 37.2032417
## sample estimates:
## odds ratio 
##   2.128487

4.b. Yes

Chapter 3 & 4

  1. a

  2. c

  3. c

  4. a

  5. b

  6. True

  7. c

setwd("/Users/artisan/Desktop/DataSets")
data1 <- read.table(file = "THA2_data1.txt", header = T)
data1
##   X G yes no
## 1 1 1   8  4
## 2 2 1  20  6
## 3 3 0  22  4
## 4 4 1  28 12
## 5 5 0  10 15
## 6 6 0   9 15
## 7 7 0   6  5
  1. The data is grouped data

n <- with(data1, yes + no)
fit.1 <- glm(yes/n ~ X + G, data = data1,family = binomial(link = logit), weights = n)
summary(fit.1)
## 
## Call:
## glm(formula = yes/n ~ X + G, family = binomial(link = logit), 
##     data = data1, weights = n)
## 
## Deviance Residuals: 
##       1        2        3        4        5        6        7  
## -1.3641  -0.1061   1.7712   0.7291  -1.4413  -0.8632   1.0921  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)   
## (Intercept)  1.81050    0.69045   2.622  0.00874 **
## X           -0.32698    0.13053  -2.505  0.01225 * 
## G            0.09704    0.42157   0.230  0.81795   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 21.3507  on 6  degrees of freedom
## Residual deviance:  9.5559  on 4  degrees of freedom
## AIC: 39.027
## 
## Number of Fisher Scoring iterations: 4

9a. Effect of G on the logit is 0.097

9b. For G, \(z^2 =0.053\)

0.23^2
## [1] 0.0529

9c. No

9d. 1.102

exp(0.097)
## [1] 1.10186

9e. 0.721

exp(-0.327)
## [1] 0.7210837

9f. b

9g. \(0.907\)

1 / 1.102
## [1] 0.907441

9h. predicted probability is 0.872

dd <- data.frame(X = 1, G = 4.5)
predict(fit.1, newdata = dd, type ="response")
##         1 
## 0.8721583
fit.2 <- glm(yes/n ~ X*G, data = data1,family = binomial(link = logit), weights = n)
summary(fit.2)
## 
## Call:
## glm(formula = yes/n ~ X * G, family = binomial(link = logit), 
##     data = data1, weights = n)
## 
## Deviance Residuals: 
##       1        2        3        4        5        6        7  
## -0.4953   0.5216   1.0148  -0.1351  -1.4564  -0.4565   1.6482  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)   
## (Intercept)   2.6887     0.9008   2.985  0.00284 **
## X            -0.5014     0.1721  -2.914  0.00357 **
## G            -1.6504     1.1154  -1.480  0.13897   
## X:G           0.4654     0.2715   1.714  0.08650 . 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 21.3507  on 6  degrees of freedom
## Residual deviance:  6.6114  on 3  degrees of freedom
## AIC: 38.083
## 
## Number of Fisher Scoring iterations: 4

10a. \(\hat{\beta}_3 = 0.465\)

10b. \(-0.501\)

10c. \(-0.501\)

10d. \(c\)

10e. Yes

10fa. the p-value is 0.086

anova(fit.1, fit.2, test ="LRT")
## Analysis of Deviance Table
## 
## Model 1: yes/n ~ X + G
## Model 2: yes/n ~ X * G
##   Resid. Df Resid. Dev Df Deviance Pr(>Chi)  
## 1         4     9.5559                       
## 2         3     6.6114  1   2.9445  0.08617 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

10fb. null: fit.1, alternative: fit.2

10ga. \(df = 3\)

pchisq(6.6114, df = 3, lower.tail = F)
## [1] 0.08537119

10gb. Yes

  1. \(-0.069\)
library(mfx)
## Loading required package: sandwich
## Loading required package: lmtest
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## Loading required package: MASS
## Loading required package: betareg
logitmfx(fit.1, atmean = FALSE, data = data1)
## Call:
## logitmfx(formula = fit.1, data = data1, atmean = FALSE)
## 
## Marginal Effects:
##       dF/dx Std. Err.       z   P>|z|  
## X -0.069483  0.031139 -2.2314 0.02566 *
## G  0.020761  0.090885  0.2284 0.81931  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## dF/dx is for discrete change for the following variables:
## 
## [1] "G"
  1. Overall proportion of correct classification is 0.659
dat1 <- data1
ungrp.dat1<- data.frame()
 for(j in 1:dim(dat1)[1]){
 X<- dat1$X[j]*rep(1, dat1$yes[j]+dat1$no[j])
 G<- dat1$G[j]*rep(1, dat1$yes[j]+dat1$no[j])
 yes<- rep(1, dat1$yes[j])
 no<- rep(0, dat1$no[j])
 Y<- c(yes, no)
 ungrp.dat1 <- rbind(ungrp.dat1, cbind(X, G, Y) )
 }
data2 <- ungrp.dat1
m2 <- glm(Y ~ X + G, data = data2, family = binomial(link = logit))
predicted.50 <- as.numeric(fitted(m2) > 0.5)
head(predicted.50)
## [1] 1 1 1 1 1 1
table(data2$Y, predicted.50)
##    predicted.50
##      0  1
##   0 20 41
##   1 15 88
(20+88)/(20+88+41+15)
## [1] 0.6585366
  1. False