Set up

rm(list=ls(all=TRUE))
setwd("/Users/imyenjh/Desktop/R/Econometrics")
library(dplyr)
library(stargazer)

Q5-5 and Q6

Data Cleaning

data <- read.csv("/Users/imyenjh/Desktop/我最喜歡上課/108-1/Econometric Theory/PS/Problem_set_2/collegehookup.csv")

data$others_hook_m <- 0
data$others_hook_max <- 0
data$others_hook_h_m <- 0
data$others_hook_h_max <- 0

for(i in 1:nrow(data)){
  rmate <- data[-i,]
  rmate <- rmate %>% filter(greek_group==data$greek_group[i])
  if(nrow(rmate)!=0){
    data$others_hook_m[i] <- mean(rmate$hookup_sum)
    data$others_hook_max[i] <- max(rmate$hookup_sum)
    data$others_hook_h_m[i] <- mean(rmate$hookup_highschool)
    data$others_hook_h_max[i] <- max(rmate$hookup_highschool)
  }else{
    rmate <- data %>% filter(greek_group==data$greek_group[i])
    data$others_hook_m[i] <- mean(rmate$hookup_sum)
    data$others_hook_max[i] <- max(rmate$hookup_sum)
    data$others_hook_h_m[i] <- mean(rmate$hookup_highschool)
    data$others_hook_h_max[i] <- max(rmate$hookup_highschool)
  }
}

Poisson Regression

m1 <- glm(hookup_sum ~ Gender+hookup_highschool+BMI+BMI2,family = poisson(link = "log"),data)
m2 <- glm(hookup_sum ~ Gender+hookup_highschool+BMI+BMI2+others_hook_m,family = poisson(link = "log"),data)
m3 <- glm(hookup_sum ~ Gender+hookup_highschool+BMI+BMI2+others_hook_m+Gender*others_hook_m,family = poisson(link = "log"),data)

stargazer(m1,m2,m3,type = "html")
Dependent variable:
hookup_sum
(1) (2) (3)
Gender -0.101 -0.101 -0.635***
(0.074) (0.074) (0.184)
hookup_highschool 0.039*** 0.039*** 0.040***
(0.003) (0.003) (0.003)
BMI 0.548*** 0.548*** 0.578***
(0.126) (0.127) (0.127)
BMI2 -0.010*** -0.010*** -0.011***
(0.003) (0.003) (0.003)
others_hook_m 0.001 -0.054**
(0.018) (0.026)
Gender:others_hook_m 0.120***
(0.037)
Constant -5.781*** -5.778*** -5.908***
(1.466) (1.468) (1.470)
Observations 233 233 233
Log Likelihood -944.403 -944.402 -939.083
Akaike Inf. Crit. 1,898.805 1,900.804 1,892.167
Note: p<0.1; p<0.05; p<0.01

Practice of MLE step by step (but failed to replicate the results)

fn1 <- function(theta) {
  loglikelihood <- 0
  for (i in 1:length(y)) {
    randa <- exp(X1[,i]%*%theta)
    loglikelihood <- loglikelihood + y[i]%*%log(randa) - log(factorial(y[i])) - randa
  }
  return(-loglikelihood)
}

y <- data$hookup_sum

X1 <- select(data,others_hook_m,Gender,BMI,hookup_highschool)
X1 <- bind_cols(inter=rep(1,length(y)),X1)
X1 <- t(as.matrix(X1))

res <- nlm(fn1,c(rep(1,5)),hessian = T)

vartheta <- solve(res$hessian)
sigma <- sqrt(diag(vartheta))
cat(res$estimate)
## 0.9592937 0.8371357 0.9593852 0.08911956 -1.034813