Question 1: Simulation about n

LR Test

GLM set choosen for this question is a binomial response variable with a single independent variable (X). We use the typical logit link function for modeling.

#Examining Deviance test
set.seed(1366)
NREP = 500
sample_n = 100  #This variable shows number of steps for sample size. each steps will increase by 5
beta_A = c(0.1,0.2,0) #null hypothesis is true
beta_B = c(0.1, 0.2,-0.07)  #null hypothesis is false

Wrong_Dec_matrix = matrix(0,sample_n,5)
colnames(Wrong_Dec_matrix) <- c("Sample Size","Wrong_Rejections", "Wrong_acceptance", "Wrong_Decision", "Wrong_Decisions_Ratio")

for (k in 1:sample_n){
  sample_size = k * 5
  Null_Rej_DevianceT = matrix(0, NREP, 2)
  for (i in 1:NREP){
    x1 <- runif(sample_size,min = -2, max=2)
    x2 <- rnorm(sample_size, 0, 2)
    y_A <- rbinom(sample_size, size = 20, prob = 1/(1+exp(-(cbind(1,x1,x2) %*% beta_A))))
    y_B <- rbinom(sample_size, size = 20, prob = 1/(1+exp(-(cbind(1,x1,x2) %*% beta_B))))
  
    Full_model_A <- glm(cbind(y_A , 20-y_A) ~ cbind(x1,x2), family = binomial())
    Red_model_A <- glm(cbind(y_A , 20-y_A) ~ x1, family = binomial())
    
    Full_model_B <- glm(cbind(y_B , 20-y_B) ~ cbind(x1,x2), family = binomial())
    Red_model_B <- glm(cbind(y_B , 20-y_B) ~ x1, family = binomial())
    
    Null_Rej_DevianceT[i,1] <- as.numeric(deviance(Red_model_A) - deviance(Full_model_A) > qchisq(0.95, 1))
    
    Null_Rej_DevianceT[i,2] <- as.numeric((deviance(Red_model_B) - deviance(Full_model_B)) > qchisq(0.95, 1))
  }
  Wrong_Dec_matrix[k,1] = sample_size
  Wrong_Dec_matrix[k,2] = sum(Null_Rej_DevianceT[,1])
  Wrong_Dec_matrix[k,3] = NREP - sum(Null_Rej_DevianceT[,2])
  Wrong_Dec_matrix[k,4] = Wrong_Dec_matrix[k,2] + Wrong_Dec_matrix[k,3]
  Wrong_Dec_matrix[k,5] = Wrong_Dec_matrix[k,4] / NREP
}

print(head(Wrong_Dec_matrix,10))
##       Sample Size Wrong_Rejections Wrong_acceptance Wrong_Decision
##  [1,]           5               34              445            479
##  [2,]          10               26              429            455
##  [3,]          15               25              414            439
##  [4,]          20               26              375            401
##  [5,]          25               24              368            392
##  [6,]          30               23              304            327
##  [7,]          35               35              295            330
##  [8,]          40               19              268            287
##  [9,]          45               29              233            262
## [10,]          50               16              207            223
##       Wrong_Decisions_Ratio
##  [1,]                 0.958
##  [2,]                 0.910
##  [3,]                 0.878
##  [4,]                 0.802
##  [5,]                 0.784
##  [6,]                 0.654
##  [7,]                 0.660
##  [8,]                 0.574
##  [9,]                 0.524
## [10,]                 0.446
print(tail(Wrong_Dec_matrix,10))
##        Sample Size Wrong_Rejections Wrong_acceptance Wrong_Decision
##  [91,]         455               18                0             18
##  [92,]         460               21                0             21
##  [93,]         465               20                0             20
##  [94,]         470               26                0             26
##  [95,]         475               20                0             20
##  [96,]         480               29                0             29
##  [97,]         485               19                0             19
##  [98,]         490               21                0             21
##  [99,]         495               27                0             27
## [100,]         500               20                0             20
##        Wrong_Decisions_Ratio
##  [91,]                 0.036
##  [92,]                 0.042
##  [93,]                 0.040
##  [94,]                 0.052
##  [95,]                 0.040
##  [96,]                 0.058
##  [97,]                 0.038
##  [98,]                 0.042
##  [99,]                 0.054
## [100,]                 0.040

In the following plot, we can see how accuracy of LR test increases with regard to sample size:

plot(Wrong_Dec_matrix[,1], Wrong_Dec_matrix[,5], type = "l", lty=1, lwd=2, xlab = "Sample Size", ylab = "Wrong decision ratio", main = "Accuracy of LR test in different sample sizes")
lines(Wrong_Dec_matrix[,1], Wrong_Dec_matrix[,2]/NREP, col="red",lty=2)
lines(Wrong_Dec_matrix[,1], Wrong_Dec_matrix[,3]/NREP, col="blue",lty=3)
legend("topright",c("Wrong decision ratio","Wrong rejection ratio","Wrong acceptance ratio"),lty=1:3,lwd=c(2,1,1),col = c("black","red","blue"))

As we can see in the plot above when sample size increases, accuracy for LR test also increases and converges to its best amount (i.e. about %5 of wrong decision). We can see that for sample sizes greater than 200 (approximately) we do not see considerable improvement in accuracy. Also we should note that the factor that improves the accuracy of decisions is decrease in the rate of wrong acceptance ratio when the reduced model is not adequate but we do not reject it in favor of full model. Wrong rejection of reduced model does not get significantly better by increaseing sample size.

Wald and Score Test

As we have shown in the second question of this assignment, wald and score statistic are the same for binomial model. So in the following chunk, I am implementing these two tests at the same time.

Wrong_Dec_matrix2 = matrix(0,sample_n,5)
colnames(Wrong_Dec_matrix2) <- c("Sample Size","Wrong_Rejections", "Wrong_acceptance", "Wrong_Decision", "Wrong_Decisions_Ratio")

for (k in 1:sample_n){
  sample_size = k * 5
  Null_Rej_Wald = matrix(0, NREP, 2)
  for (i in 1:NREP){
    x1 <- runif(sample_size,min = -2, max=2)
    x2 <- rnorm(sample_size, 0, 2)
    y_A <- rbinom(sample_size, size = 20, prob = 1/(1+exp(-(cbind(1,x1,x2) %*% beta_A))))
    y_B <- rbinom(sample_size, size = 20, prob = 1/(1+exp(-(cbind(1,x1,x2) %*% beta_B))))
  
    Full_model_A <- glm(cbind(y_A , 20-y_A) ~ cbind(x1,x2), family = binomial())
    Red_model_A <- glm(cbind(y_A , 20-y_A) ~ x1, family = binomial())
    
    Full_model_B <- glm(cbind(y_B , 20-y_B) ~ cbind(x1,x2), family = binomial())
    Red_model_B <- glm(cbind(y_B , 20-y_B) ~ x1, family = binomial())
    
    Null_Rej_Wald[i,1] <- as.numeric(abs(Full_model_A$coefficients[3]/sqrt(summary(Full_model_A)$cov.unscaled[3,3])) > qnorm(0.975))
    
    Null_Rej_Wald[i,2] <- as.numeric(abs(Full_model_B$coefficients[3]/sqrt(summary(Full_model_B)$cov.unscaled[3,3])) > qnorm(0.975))
  }
  Wrong_Dec_matrix2[k,1] = sample_size
  Wrong_Dec_matrix2[k,2] = sum(Null_Rej_Wald[,1])
  Wrong_Dec_matrix2[k,3] = NREP - sum(Null_Rej_Wald[,2])
  Wrong_Dec_matrix2[k,4] = Wrong_Dec_matrix2[k,2] + Wrong_Dec_matrix2[k,3]
  Wrong_Dec_matrix2[k,5] = Wrong_Dec_matrix2[k,4] / NREP
}
print(head(Wrong_Dec_matrix2,10))
##       Sample Size Wrong_Rejections Wrong_acceptance Wrong_Decision
##  [1,]           5               21              458            479
##  [2,]          10               25              451            476
##  [3,]          15               22              393            415
##  [4,]          20               27              365            392
##  [5,]          25               19              350            369
##  [6,]          30               25              307            332
##  [7,]          35               26              289            315
##  [8,]          40               26              262            288
##  [9,]          45               25              243            268
## [10,]          50               21              214            235
##       Wrong_Decisions_Ratio
##  [1,]                 0.958
##  [2,]                 0.952
##  [3,]                 0.830
##  [4,]                 0.784
##  [5,]                 0.738
##  [6,]                 0.664
##  [7,]                 0.630
##  [8,]                 0.576
##  [9,]                 0.536
## [10,]                 0.470
print(tail(Wrong_Dec_matrix2,10))
##        Sample Size Wrong_Rejections Wrong_acceptance Wrong_Decision
##  [91,]         455               14                0             14
##  [92,]         460               25                0             25
##  [93,]         465               30                0             30
##  [94,]         470               20                0             20
##  [95,]         475               26                0             26
##  [96,]         480               18                0             18
##  [97,]         485               23                0             23
##  [98,]         490               19                0             19
##  [99,]         495               20                0             20
## [100,]         500               31                0             31
##        Wrong_Decisions_Ratio
##  [91,]                 0.028
##  [92,]                 0.050
##  [93,]                 0.060
##  [94,]                 0.040
##  [95,]                 0.052
##  [96,]                 0.036
##  [97,]                 0.046
##  [98,]                 0.038
##  [99,]                 0.040
## [100,]                 0.062
plot(Wrong_Dec_matrix2[,1], Wrong_Dec_matrix2[,5], type = "l", lty=1, lwd=2, xlab = "Sample Size", ylab = "Wrong decision ratio", main = "Accuracy of Wald test in different sample sizes")
lines(Wrong_Dec_matrix2[,1], Wrong_Dec_matrix2[,2]/NREP, col="red",lty=2)
lines(Wrong_Dec_matrix2[,1], Wrong_Dec_matrix2[,3]/NREP, col="blue",lty=3)
legend("topright",c("Wrong decision ratio","Wrong rejection ratio","Wrong acceptance ratio"),lty=1:3,lwd=c(2,1,1),col = c("black","red","blue"))

In the plot below, I compare LR and Wald test and we can see that there is no significant differnce between them in terms of convergence to best accuracy by sample size.

plot(Wrong_Dec_matrix[,1], Wrong_Dec_matrix[,5], type = "l", lty=1, col="blue", xlab = "Sample Size", ylab = "Wrong decision ratio", main = "Comparison of LR and Wald test")
lines(Wrong_Dec_matrix2[,1], Wrong_Dec_matrix2[,5], col="red",lty=2)
legend("topright", c("LR Test", "Wald test"), lty = c(1,2), col=c("blue","red"))

##Question 2- Exercise 5.1 (a) \[ f(y_i; n, \pi)=\binom{n}{y_i}\pi^{y_i}(1-\pi)^{n-y_i} \,\,\, for \,\,\,i=1,...,N \\ l(\pi;y)=\sum_{i=1}^N y_ilog\,\pi + (n-y_i)\,log\,(1-\pi)+log\,\binom{n}{y_i}\\ U=\frac{\partial l}{\partial \pi} = \sum_{i=1}^N \frac{y_i}\pi - (n-y_i)\frac 1{1-\pi} \\ U=0 \Rightarrow \hat \pi = \frac{\overline y}{n} \]

And for information matrix we have: \[ I=-E(U')\\ U'= \sum_{i=1}^N -\frac{y_i}{\pi^2}-(n-y_i)\frac 1{(1-\pi)^2} \\E(U') = -\frac{nN\pi}{\pi^2}-\frac{nN-nN\pi}{(1-\pi)^2} = -nN\bigg(\frac1\pi+\frac1{1-\pi}\bigg) \\ \Rightarrow I=nN\bigg(\frac1{\pi(1-\pi)}\bigg) \]

Then Wald statitic equals to: \[ (\hat \pi - \pi)^2(nN\frac1{\pi(1-\pi)})\\ = (\frac{\overline y}n - \pi)^2(nN\frac1{\pi(1-\pi)})\\ = (\overline y - n\pi)^2(N\frac1{n\pi(1-\pi)})\\ =\frac{(\overline y - n\pi)^2}{\frac{n\pi(1-\pi)}{N}} \] (b) \[ U=\frac{N\overline y}{\pi}-\frac{nN-N\overline y}{1-\pi}=N\bigg(\frac{\overline y -n\pi}{\pi(1-\pi)}\bigg)\\ \Rightarrow U^TI^{-1}U = N^2 \times\frac{(\overline y - n\pi)^2}{\pi^2(1-\pi)^2}\times\frac{\pi(1-\pi)}{nN}\\ = \frac{(\overline y - n\pi)^2}{\frac{n\pi(1-\pi)}{N}} \] (c) \[ D=2[l(\hat \pi;y)-l(\pi;y)]=2\bigg[\sum_{i=1}^N y_ilog\frac{\hat \pi}{\pi}+(n-y_i)log\frac{1-\hat \pi}{1-\pi}\bigg]=2\bigg [nN\hat\pi log\frac{\hat \pi}{\pi}+nN(1-\hat\pi) log\frac{1-\hat \pi}{1-\pi}\bigg]=\\ 2nN\bigg [\hat\pi log\frac{\hat \pi}{\pi}+(1-\hat\pi) log\frac{1-\hat \pi}{1-\pi}\bigg] \] (d)
(i) \(\pi=0.1\)
Wald Statistic:

rej1_W <- ((3-10*0.1)^2) / (10*0.1*0.9) > qchisq(0.95,1)
print(rej1_W)
## [1] TRUE
  1. is rejected by Wald test.

Deviance Statistic:

rej1_D <- 2*10*(0.3*log(0.3/0.1)+0.7*log(0.7/0.9)) > qchisq(0.95,1)
print(rej1_D)
## [1] FALSE
  1. is not rejected by Deviance test.

(ii) \(\pi=0.3\)
Wald Statistic:

rej2_W <- ((3-10*0.3)^2) / (10*0.3*0.7) > qchisq(0.95,1)
print(rej2_W)
## [1] FALSE
  1. is not rejected by Wald test.

Deviance Statistic:

rej2_D <- 2*10*(0.3*log(0.3/0.3)+0.7*log(0.7/0.7)) > qchisq(0.95,1)
print(rej2_D)
## [1] FALSE
  1. is not rejected by Deviance test.

(iii) \(\pi=0.5\)
Wald Statistic:

rej3_W <- ((3-10*0.5)^2) / (10*0.5*0.5) > qchisq(0.95,1)
print(rej3_W)
## [1] FALSE
  1. is not rejected by Wald test.

Deviance Statistic:

rej3_D <- 2*10*(0.3*log(0.3/0.5)+0.7*log(0.7/0.5)) > qchisq(0.95,1)
print(rej3_D)
## [1] FALSE
  1. is not rejected by Deviance test.

Question 3: Beetle mortality example

###First

dose <- c(1.6907, 1.7242, 1.7552, 1.7842, 1.8113, 1.8369, 1.8610, 1.8839)
n <- c(59, 60, 62, 56, 63, 59, 62, 60)
y <- c(6, 13, 18, 28, 52, 53, 61, 60)
p <- y / n

fit1 <- glm(cbind(y,n-y) ~ dose, family = binomial(link = "logit"))
fit2 <- glm(cbind(y,n-y) ~ dose, family = binomial(link = "probit"))
fit3 <- glm(cbind(y,n-y) ~ dose, family = binomial(link = "cloglog"))

plot(dose,p, pch=19)
points(dose, fit1$fitted.values, pch=22)
points(dose, fit2$fitted.values, pch=23)
points(dose, fit3$fitted.values, pch=24)
legend("topleft", c("True values", "Logit model", "Probit Model", "Cloglog model"), pch = c(19, 22, 23, 24))

Examining the plot, cloglog models seems a little bit better. Although, it is not evident and in my opinion we cannot graphically infere in this regard

###Second

Logit Link For the second part, we build a linear model which relates Doses (as the dependant variable) to the logit of odds (as the independant variable). Thus, the model will be like this: \[ Dose = \beta_0+\beta_1\,log\bigg(\frac{\pi}{1-\pi}\bigg) \] At first, I am going to visually examine the linear relationship between these two:

plot(log(p/(1-p)),dose)

As expected, the plot confirms the linear relationship.

x_new <- log(p/(1-p))
dose_model <- lm(dose ~ x_new, subset = c(1:7))
summary(dose_model)
## 
## Call:
## lm(formula = dose ~ x_new, subset = c(1:7))
## 
## Residuals:
##         1         2         3         4         5         6         7 
## -0.017971 -0.008446  0.012050  0.017062  0.002474  0.011295 -0.016465 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 1.767138   0.006113 289.067  9.4e-12 ***
## x_new       0.026838   0.002893   9.276 0.000245 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.01572 on 5 degrees of freedom
## Multiple R-squared:  0.9451, Adjusted R-squared:  0.9341 
## F-statistic: 86.05 on 1 and 5 DF,  p-value: 0.0002448

Since Residuals standard error is the estimate of standard deviation of the dependant variable (dose), we will have point estimate and confidence interval for %50 and %5 as follows:

dose_50 = as.numeric(dose_model$coefficients[1] + dose_model$coefficients[2] * log(1))
dose_50_CI <- c(dose_50-1.96*summary(dose_model)$sigma, dose_50+1.96*summary(dose_model)$sigma)
print("point estimation for %50 mortality dose is:")
## [1] "point estimation for %50 mortality dose is:"
dose_50
## [1] 1.767138
print("%95 confidence interval for %50 mortality dose is:")
## [1] "%95 confidence interval for %50 mortality dose is:"
dose_50_CI
## [1] 1.736329 1.797947
dose_5 = as.numeric(dose_model$coefficients[1] + dose_model$coefficients[2] * log(0.05/0.95))
dose_5_CI <- c(dose_5-1.96*summary(dose_model)$sigma, dose_5+1.96*summary(dose_model)$sigma)
print("point estimation for %5 mortality dose is:")
## [1] "point estimation for %5 mortality dose is:"
dose_5
## [1] 1.688116
print("%95 confidence interval for %5 mortality dose is:")
## [1] "%95 confidence interval for %5 mortality dose is:"
dose_5_CI
## [1] 1.657307 1.718924

Probit Link We repeat all the above steps for probit model. \[ Dose = \beta_0+\beta_1\Phi^{-1}(\pi) \]

dose_model2 <- lm(dose ~ qnorm(p), subset = c(1:7))
summary(dose_model2)
## 
## Call:
## lm(formula = dose ~ qnorm(p), subset = c(1:7))
## 
## Residuals:
##         1         2         3         4         5         6         7 
## -0.016064 -0.006248  0.013549  0.015762 -0.002529  0.006788 -0.011260 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 1.768438   0.005213   339.2 4.22e-12 ***
## qnorm(p)    0.048488   0.004448    10.9 0.000113 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.01348 on 5 degrees of freedom
## Multiple R-squared:  0.9596, Adjusted R-squared:  0.9515 
## F-statistic: 118.8 on 1 and 5 DF,  p-value: 0.0001129
dose_50 = as.numeric(dose_model2$coefficients[1] + dose_model2$coefficients[2] * qnorm(0.5))
dose_50_CI <- c(dose_50-1.96*summary(dose_model2)$sigma, dose_50+1.96*summary(dose_model2)$sigma)
print("point estimation for %50 mortality dose is:")
## [1] "point estimation for %50 mortality dose is:"
dose_50
## [1] 1.768438
print("%95 confidence interval for %50 mortality dose is:")
## [1] "%95 confidence interval for %50 mortality dose is:"
dose_50_CI
## [1] 1.742020 1.794856
dose_5 = as.numeric(dose_model2$coefficients[1] + dose_model2$coefficients[2] * qnorm(0.05))
dose_5_CI <- c(dose_5-1.96*summary(dose_model2)$sigma, dose_5+1.96*summary(dose_model2)$sigma)
print("point estimation for %5 mortality dose is:")
## [1] "point estimation for %5 mortality dose is:"
dose_5
## [1] 1.688682
print("%95 confidence interval for %5 mortality dose is:")
## [1] "%95 confidence interval for %5 mortality dose is:"
dose_5_CI
## [1] 1.662265 1.715100

Cloglog Link We repeat all the above steps for cloglog model. \[ Dose = \beta_0+\beta_1\,log[-log(1-\pi)] \]

dose_model3 <- lm(dose ~ log(-log(1-p)), subset = c(1:7))
summary(dose_model3)
## 
## Call:
## lm(formula = dose ~ log(-log(1 - p)), subset = c(1:7))
## 
## Residuals:
##          1          2          3          4          5          6          7 
## -0.0020954 -0.0064349  0.0089486  0.0055897 -0.0097736  0.0034188  0.0003469 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      1.795465   0.002844   631.3 1.89e-13 ***
## log(-log(1 - p)) 0.045986   0.002243    20.5 5.11e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.007273 on 5 degrees of freedom
## Multiple R-squared:  0.9882, Adjusted R-squared:  0.9859 
## F-statistic: 420.4 on 1 and 5 DF,  p-value: 5.108e-06
dose_50 = as.numeric(dose_model3$coefficients[1] + dose_model3$coefficients[2] * log(-log(0.5)))
dose_50_CI <- c(dose_50-1.96*summary(dose_model3)$sigma, dose_50+1.96*summary(dose_model3)$sigma)
print("point estimation for %50 mortality dose is:")
## [1] "point estimation for %50 mortality dose is:"
dose_50
## [1] 1.77861
print("%95 confidence interval for %50 mortality dose is:")
## [1] "%95 confidence interval for %50 mortality dose is:"
dose_50_CI
## [1] 1.764356 1.792865
dose_5 = as.numeric(dose_model3$coefficients[1] + dose_model3$coefficients[2] * log(-log(0.95)))
dose_5_CI <- c(dose_5-1.96*summary(dose_model3)$sigma, dose_5+1.96*summary(dose_model3)$sigma)
print("point estimation for %5 mortality dose is:")
## [1] "point estimation for %5 mortality dose is:"
dose_5
## [1] 1.658878
print("%95 confidence interval for %5 mortality dose is:")
## [1] "%95 confidence interval for %5 mortality dose is:"
dose_5_CI
## [1] 1.644624 1.673133

*Comparing three models Length of confidence intervals for three models are as follows:

## [1] "logit:"
## [1] 0.03143749
## [1] "probit:"
## [1] 0.02695694
## [1] "cloglog:"
## [1] 0.01454521

Regarding length of CI we can see that cloglog model is more accurate. If we consider \(R^2\), we see that cloglog model has highest amounts which shows that this model explains highest amount of variance in values of dose.

Question 4

library(generalhoslem)
## Loading required package: reshape
## Loading required package: MASS
set.seed(50)
NREP = 500
n_obs = 100

statistic_values <- array(0,NREP)
for (i in 1:NREP){
  x <- rnorm(n_obs, 1, 1)
  beta <- c(0.1,0.2)
  y <- rbinom(n_obs, size = 1, prob = (1/(1+exp(-(beta[1]+beta[2]*x)))))
  fit4 <- glm(y ~ x, family = binomial())
  hoslem <- logitgof(y,fit4$fitted.values)
  statistic_values[i] = hoslem$statistic
}
mean(statistic_values)
## [1] 8.168053
var(statistic_values)
## [1] 13.93862
qqplot(statistic_values, rchisq(500,8))
abline(0,1,lty=2)

This qqpolt compares values of the statistic with chi-square(df=8) and we can confirm that this statistic has distribution of chi-square with (g-2) degrees of freedom.

##Question 5 To implement EM algorithm for probit regression, at first I am going to prepare simulated data for modeling.

x <- rnorm(2000,0,2)
betas <- c(0.1 , 0.2)
z <- rnorm(2000, mean = cbind(1,x) %*% betas, sd=1)
y <- z
y[y>=0] <- 1
y[y<0] <- 0
t <- glm(y~x, family = binomial(link = "probit"))
summary(t)
## 
## Call:
## glm(formula = y ~ x, family = binomial(link = "probit"))
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.0109  -1.1485   0.7254   1.0386   1.8799  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   0.1564     0.0290   5.394 6.89e-08 ***
## x             0.2047     0.0155  13.208  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 2749.2  on 1999  degrees of freedom
## Residual deviance: 2563.9  on 1998  degrees of freedom
## AIC: 2567.9
## 
## Number of Fisher Scoring iterations: 4

As the next step, we should calculate \(\gamma_i\): \[ \gamma_i=E(z_i|y=y_i) \] \(z_i\)s conditioned on \(y_i\) have truncated normal distribution. So the expected value can be calculated by the equations below. If \(y_i=1\), \[ E(z_i|z_i \geq 0)=x^T\beta+\frac{\phi(x^T\beta)}{\Phi(x^T\beta)} \]

And If \(y_i=0\), \[ E(z_i|z_i<0) =x^T\beta-\frac{\phi(x^T\beta)}{\Phi(-x^T\beta)} \]

b <- c(10,2) #initial values for betas
ex <- cbind(1,x)
mu <- ex %*% b
it=1
converged = FALSE
maxits = 100000
tol=0.0001
gammas=array(0,20)

while ((!converged) & (it < maxits)) {
  b_old = b
  gammas = ifelse(y==1,mu+dnorm(mu)/pnorm(mu), mu-dnorm(mu)/pnorm(-mu)) #E-step
  b = solve(t(ex)%*%ex)%*%t(ex)%*%gammas   #Mstep
  mu = ex %*% b
  it = it + 1
  converged = max(abs(b_old - b)) <= tol
}
b
##        [,1]
##   0.1564594
## x 0.2047532
it
## [1] 19

We can see that the estimations are acceptable and it is not at all sensitive to initial values.

Question 6: Exercise

library(readr)
housedata <- read_csv("/Users/hamed/Documents/Academics/Generalized Linear Models/housedata.csv")
## Parsed with column specification:
## cols(
##   Sat_L = col_double(),
##   Sat_M = col_double(),
##   Sat_H = col_double(),
##   Is_apartment = col_double(),
##   Is_house = col_double(),
##   Contact = col_double()
## )

(a)

table_SC <-data.frame(
  Low_Satisfaction = rep(0,2),
  Medium_Satisfaction = rep(0,2),
  High_Satisfaction = rep(0,2)
)
row.names(table_SC) <- c("Low Contact","High Contact")

temp <- subset(housedata, Contact==0)
table_SC$Low_Satisfaction[1] = sum(temp$Sat_L)/(sum(temp$Sat_L)+sum(temp$Sat_M)+sum(temp$Sat_H))
table_SC$Medium_Satisfaction[1] = sum(temp$Sat_M)/(sum(temp$Sat_L)+sum(temp$Sat_M)+sum(temp$Sat_H))
table_SC$High_Satisfaction[1] = sum(temp$Sat_H)/(sum(temp$Sat_L)+sum(temp$Sat_M)+sum(temp$Sat_H))

temp <- subset(housedata, Contact==1)
table_SC$Low_Satisfaction[2] = sum(temp$Sat_L)/(sum(temp$Sat_L)+sum(temp$Sat_M)+sum(temp$Sat_H))
table_SC$Medium_Satisfaction[2] = sum(temp$Sat_M)/(sum(temp$Sat_L)+sum(temp$Sat_M)+sum(temp$Sat_H))
table_SC$High_Satisfaction[2] = sum(temp$Sat_H)/(sum(temp$Sat_L)+sum(temp$Sat_M)+sum(temp$Sat_H))

table_SC
##              Low_Satisfaction Medium_Satisfaction High_Satisfaction
## Low Contact         0.3674614           0.2496494         0.3828892
## High Contact        0.3150826           0.2768595         0.4080579
table_TC <-data.frame(
  Tower_block = rep(0,2),
  Apartment = rep(0,2),
  House = rep(0,2)
)
row.names(table_TC) <- c("Low Contact","High Contact")

temp <- subset(housedata, Is_apartment==0 & Is_house==0)
table_TC$Tower_block[1] = sum(subset(temp,Contact==0,select = c("Sat_L","Sat_M", "Sat_H"))) /sum(subset(temp,select = c("Sat_L","Sat_M", "Sat_H")))
table_TC$Tower_block[2] = sum(subset(temp,Contact==1,select = c("Sat_L","Sat_M", "Sat_H"))) /sum(subset(temp,select = c("Sat_L","Sat_M", "Sat_H")))

temp <- subset(housedata, Is_apartment==1)
table_TC$Apartment[1] = sum(subset(temp,Contact==0,select = c("Sat_L","Sat_M", "Sat_H"))) /sum(subset(temp,select = c("Sat_L","Sat_M", "Sat_H")))
table_TC$Apartment[2] = sum(subset(temp,Contact==1,select = c("Sat_L","Sat_M", "Sat_H"))) /sum(subset(temp,select = c("Sat_L","Sat_M", "Sat_H")))

temp <- subset(housedata, Is_house==1)
table_TC$House[1] = sum(subset(temp,Contact==0,select = c("Sat_L","Sat_M", "Sat_H"))) /sum(subset(temp,select = c("Sat_L","Sat_M", "Sat_H")))
table_TC$House[2] = sum(subset(temp,Contact==1,select = c("Sat_L","Sat_M", "Sat_H"))) /sum(subset(temp,select = c("Sat_L","Sat_M", "Sat_H")))

table_TC
##              Tower_block Apartment     House
## Low Contact       0.5475 0.4143791 0.3430233
## High Contact      0.4525 0.5856209 0.6569767
table_TS <-data.frame(
  Low_Satisfaction = rep(0,3),
  Medium_Satisfaction = rep(0,3),
  High_Satisfaction = rep(0,3)
)
row.names(table_TS) <- c("Tower_block","Apartment","House")

temp <- subset(housedata, Is_apartment==0 & Is_house==0)
table_TS$Low_Satisfaction[1]=sum(subset(temp,select = "Sat_L"))/sum(subset(temp,select = c("Sat_L","Sat_M", "Sat_H")))
table_TS$Medium_Satisfaction[1]=sum(subset(temp,select = "Sat_M"))/sum(subset(temp,select = c("Sat_L","Sat_M", "Sat_H")))
table_TS$High_Satisfaction[1]=sum(subset(temp,select = "Sat_H"))/sum(subset(temp,select = c("Sat_L","Sat_M", "Sat_H")))

temp <- subset(housedata, Is_apartment==1)
table_TS$Low_Satisfaction[2]=sum(subset(temp,select = "Sat_L"))/sum(subset(temp,select = c("Sat_L","Sat_M", "Sat_H")))
table_TS$Medium_Satisfaction[2]=sum(subset(temp,select = "Sat_M"))/sum(subset(temp,select = c("Sat_L","Sat_M", "Sat_H")))
table_TS$High_Satisfaction[2]=sum(subset(temp,select = "Sat_H"))/sum(subset(temp,select = c("Sat_L","Sat_M", "Sat_H")))

temp <- subset(housedata, Is_house==1)
table_TS$Low_Satisfaction[3]=sum(subset(temp,select = "Sat_L"))/sum(subset(temp,select = c("Sat_L","Sat_M", "Sat_H")))
table_TS$Medium_Satisfaction[3]=sum(subset(temp,select = "Sat_M"))/sum(subset(temp,select = c("Sat_L","Sat_M", "Sat_H")))
table_TS$High_Satisfaction[3]=sum(subset(temp,select = "Sat_H"))/sum(subset(temp,select = c("Sat_L","Sat_M", "Sat_H")))

table_TS
##             Low_Satisfaction Medium_Satisfaction High_Satisfaction
## Tower_block        0.2475000           0.2525000         0.5000000
## Apartment          0.3542484           0.2509804         0.3947712
## House              0.3817829           0.2965116         0.3217054

(b)

library(nnet)
library(readr)
newhousedata <- read_delim("/Users/hamed/Documents/Academics/Generalized Linear Models/newhousedata.csv", 
    "\t", escape_double = FALSE, trim_ws = TRUE)
## Parsed with column specification:
## cols(
##   n = col_double(),
##   Satisfaction = col_character(),
##   Is_apartment = col_double(),
##   Is_house = col_double(),
##   housetype = col_character(),
##   Contact = col_character(),
##   N = col_double()
## )
newhousedata$Satisfaction <- relevel(as.factor(newhousedata$Satisfaction), ref = "low")
newhousedata$housetype <- relevel(as.factor(newhousedata$housetype), ref = "Towerblock")
newhousedata$Contact <- relevel(as.factor(newhousedata$Contact), ref = "low")

nom_model <- multinom(Satisfaction~housetype+Contact, data = newhousedata, weights = n, Hess = TRUE)
## # weights:  15 (8 variable)
## initial  value 1846.767257 
## iter  10 value 1803.278543
## final  value 1802.740161 
## converged
summary(nom_model)
## Call:
## multinom(formula = Satisfaction ~ housetype + Contact, data = newhousedata, 
##     weights = n, Hess = TRUE)
## 
## Coefficients:
##        (Intercept) housetypeApartment housetypeHouse Contacthigh
## high     0.5607737         -0.6415967     -0.9456177   0.3282263
## medium  -0.1072644         -0.4067537     -0.3370771   0.2959803
## 
## Std. Errors:
##        (Intercept) housetypeApartment housetypeHouse Contacthigh
## high     0.1329301          0.1500773      0.1644850   0.1181870
## medium   0.1524077          0.1713011      0.1803577   0.1301046
## 
## Residual Deviance: 3605.48 
## AIC: 3621.48

(c) Yes, Ordinal Model might be more appropriate, because satisfaction has an ordinal nature.

library(MASS)
newhousedata <- read_delim("newhousedata.csv", 
    "\t", escape_double = FALSE, trim_ws = TRUE)
## Parsed with column specification:
## cols(
##   n = col_double(),
##   Satisfaction = col_character(),
##   Is_apartment = col_double(),
##   Is_house = col_double(),
##   housetype = col_character(),
##   Contact = col_character(),
##   N = col_double()
## )
newhousedata$Satisfaction <- as.ordered(newhousedata$Satisfaction)


ord_model <- polr(Satisfaction ~ housetype + Contact, data = newhousedata, weights = n)
summary(ord_model)
## 
## Re-fitting to get Hessian
## Call:
## polr(formula = Satisfaction ~ housetype + Contact, data = newhousedata, 
##     weights = n)
## 
## Coefficients:
##                        Value Std. Error t value
## housetypeHouse       0.27590    0.10434  2.6442
## housetypeTowerblock -0.29241    0.11786 -2.4810
## Contactlow           0.07453    0.09235  0.8071
## 
## Intercepts:
##            Value   Std. Error t value
## high|low   -0.3671  0.0802    -4.5796
## low|medium  1.0817  0.0843    12.8294
## 
## Residual Deviance: 3628.55 
## AIC: 3638.55

In the lines below, I am going to calculate p-values for coefficients and intercepts:

ctable <- coef(summary(ord_model))
## 
## Re-fitting to get Hessian
p <- pnorm(abs(ctable[,"t value"]), lower.tail = FALSE) * 2
ctable <- cbind(ctable, "p-value" = p)
ctable
##                           Value Std. Error   t value      p-value
## housetypeHouse       0.27590184 0.10434090  2.644235 8.187583e-03
## housetypeTowerblock -0.29240512 0.11785638 -2.481029 1.310037e-02
## Contactlow           0.07453312 0.09235173  0.807057 4.196336e-01
## high|low            -0.36711750 0.08016366 -4.579600 4.658661e-06
## low|medium           1.08168216 0.08431290 12.829379 1.122608e-37

According to the results, all the coefficients and intercepts are significant (at %95 level) except for intercept for medium to high which can be accepted to equal to zero at this level of significance. This code calculates standardized residuals for this model:

obs <- newhousedata$n / newhousedata$N
e <- predict(ord_model, data = newhousedata$Satisfaction, type = "prob")
est <- array(0,18)
for (i in 1:nrow(newhousedata)){
  est[i] = e[i,as.character(newhousedata$Satisfaction[i])]
}
resids <- (obs - est) / sqrt(est)
resids
##  [1] -0.046097209  0.069859635 -0.009021106 -0.228981667  0.128411728
##  [6]  0.102562205  0.118107280 -0.053728395 -0.065843122 -0.039314745
## [11]  0.011409002  0.026733023  0.053266795 -0.094268267  0.039029369
## [16]  0.062564040  0.001698972 -0.064361035
plot(obs,resids)
abline(0,0,lty=2)

We can see in the plot that residuals do not show specific pattern against observed probability in lower probabilities. For probabilities higher than 0.4 we can see some kind of increasing pattern.