6/8
## [1] 0.75
4/7
## [1] 0.5714286
3.a. \(\theta = 2.25\)
odd_dpr <- (6/8) / (1- (6/8))
odd_nodpr <- (4/7) / (1 - (4/7))
odd_dpr / odd_nodpr # theta
## [1] 2.25
3.b. False
4.a. p-value = 0.608
dpr <- matrix(c(6, 4, 2, 3), nrow = 2)
dpr
## [,1] [,2]
## [1,] 6 2
## [2,] 4 3
fisher.test(dpr, alternative = "two.sided")
##
## Fisher's Exact Test for Count Data
##
## data: dpr
## p-value = 0.6084
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
## 0.1603161 37.2032417
## sample estimates:
## odds ratio
## 2.128487
4.b. Yes
a
c
c
a
b
True
c
setwd("/Users/artisan/Desktop/DataSets")
data1 <- read.table(file = "THA2_data1.txt", header = T)
data1
## X G yes no
## 1 1 1 8 4
## 2 2 1 20 6
## 3 3 0 22 4
## 4 4 1 28 12
## 5 5 0 10 15
## 6 6 0 9 15
## 7 7 0 6 5
The data is grouped data
n <- with(data1, yes + no)
fit.1 <- glm(yes/n ~ X + G, data = data1,family = binomial(link = logit), weights = n)
summary(fit.1)
##
## Call:
## glm(formula = yes/n ~ X + G, family = binomial(link = logit),
## data = data1, weights = n)
##
## Deviance Residuals:
## 1 2 3 4 5 6 7
## -1.3641 -0.1061 1.7712 0.7291 -1.4413 -0.8632 1.0921
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.81050 0.69045 2.622 0.00874 **
## X -0.32698 0.13053 -2.505 0.01225 *
## G 0.09704 0.42157 0.230 0.81795
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 21.3507 on 6 degrees of freedom
## Residual deviance: 9.5559 on 4 degrees of freedom
## AIC: 39.027
##
## Number of Fisher Scoring iterations: 4
9a. Effect of G on the logit is 0.097
9b. For G, \(z^2 =0.053\)
0.23^2
## [1] 0.0529
9c. No
9d. 1.102
exp(0.097)
## [1] 1.10186
9e. 0.721
exp(-0.327)
## [1] 0.7210837
9f. b
9g. \(0.907\)
1 / 1.102
## [1] 0.907441
9h. predicted probability is 0.872
dd <- data.frame(X = 1, G = 4.5)
predict(fit.1, newdata = dd, type ="response")
## 1
## 0.8721583
fit.2 <- glm(yes/n ~ X*G, data = data1,family = binomial(link = logit), weights = n)
summary(fit.2)
##
## Call:
## glm(formula = yes/n ~ X * G, family = binomial(link = logit),
## data = data1, weights = n)
##
## Deviance Residuals:
## 1 2 3 4 5 6 7
## -0.4953 0.5216 1.0148 -0.1351 -1.4564 -0.4565 1.6482
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 2.6887 0.9008 2.985 0.00284 **
## X -0.5014 0.1721 -2.914 0.00357 **
## G -1.6504 1.1154 -1.480 0.13897
## X:G 0.4654 0.2715 1.714 0.08650 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 21.3507 on 6 degrees of freedom
## Residual deviance: 6.6114 on 3 degrees of freedom
## AIC: 38.083
##
## Number of Fisher Scoring iterations: 4
10a. \(\hat{\beta}_3 = 0.465\)
10b. \(-0.501\)
10c. \(-0.501\)
10d. \(c\)
10e. Yes
10fa. the p-value is 0.086
anova(fit.1, fit.2, test ="LRT")
## Analysis of Deviance Table
##
## Model 1: yes/n ~ X + G
## Model 2: yes/n ~ X * G
## Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1 4 9.5559
## 2 3 6.6114 1 2.9445 0.08617 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
10fb. null: fit.1, alternative: fit.2
10ga. \(df = 3\)
pchisq(6.6114, df = 3, lower.tail = F)
## [1] 0.08537119
10gb. Yes
library(mfx)
## Loading required package: sandwich
## Loading required package: lmtest
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
## Loading required package: MASS
## Loading required package: betareg
logitmfx(fit.1, atmean = FALSE, data = data1)
## Call:
## logitmfx(formula = fit.1, data = data1, atmean = FALSE)
##
## Marginal Effects:
## dF/dx Std. Err. z P>|z|
## X -0.069483 0.031139 -2.2314 0.02566 *
## G 0.020761 0.090885 0.2284 0.81931
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## dF/dx is for discrete change for the following variables:
##
## [1] "G"
dat1 <- data1
ungrp.dat1<- data.frame()
for(j in 1:dim(dat1)[1]){
X<- dat1$X[j]*rep(1, dat1$yes[j]+dat1$no[j])
G<- dat1$G[j]*rep(1, dat1$yes[j]+dat1$no[j])
yes<- rep(1, dat1$yes[j])
no<- rep(0, dat1$no[j])
Y<- c(yes, no)
ungrp.dat1 <- rbind(ungrp.dat1, cbind(X, G, Y) )
}
data2 <- ungrp.dat1
m2 <- glm(Y ~ X + G, data = data2, family = binomial(link = logit))
predicted.50 <- as.numeric(fitted(m2) > 0.5)
head(predicted.50)
## [1] 1 1 1 1 1 1
table(data2$Y, predicted.50)
## predicted.50
## 0 1
## 0 20 41
## 1 15 88
(20+88)/(20+88+41+15)
## [1] 0.6585366