Set up

setwd("~/Desktop/R/Econometrics")
library(ggplot2)
library(dplyr)
library(reshape)
library(stargazer)

Q3-1

set.seed(123)
X <- rchisq(100, 5)
Y <- rchisq(10000, 5)

mean(X)
## [1] 4.455893
mean(Y)
## [1] 4.950174
which.min(abs(c(mean(X),mean(Y))-5))
## [1] 2

Q3-2

set.seed(1234)
CLT <- matrix(replicate(1000,rchisq(1000, 5)),nrow=1000)

Z <- colMeans(CLT)
set.seed(1)
chosen <- CLT[,sample(1:1000,1)]

ggplot(data.frame(X1=chosen),aes(x=X1))+
  geom_histogram(aes(y= ..density..),bins=50,color="grey",alpha=.7,fill="grey10")+
  stat_function(fun = dchisq,linetype="dashed", args = list(df = 5))+
  labs(title="Density histogram and theoretical distribution in dashed line",
       x="value")

ggplot(data.frame(X1=Z),aes(x=X1))+
  geom_histogram(aes(y= ..density..),color="grey",alpha=.7,fill="grey10")+
  stat_function(fun = dnorm,linetype="dashed",args = list(mean = mean(Z, na.rm = TRUE),sd = sd(Z, na.rm = TRUE)))+
  labs(title="Density plot of 1000 mean and normal distribution in dashed line",
       x="value")

mean(Z)
## [1] 5.004214
mean(Z^2)-(mean(Z)^2)
## [1] 0.00993916

Q4-1

set.seed(12)

x1 <- rnorm(100, 2, 1)
x2 <- rpois(100,4)
et <- rnorm(100,0,1)

y <- 1+x1-2*x2+et

Q4-2

model <- lm(y~x1+x2)
round(coef(model),4)
## (Intercept)          x1          x2 
##      1.2044      0.9021     -2.0079
res.4.1 <- round(vcov(model)[-1,-1],4)

Q4-3

x1.2 <- rnorm(100, 2, 36)
y.2 <- 1+x1.2-2*x2+et
model.2 <- lm(y.2~x1.2+x2)
round(coef(model.2),4)
## (Intercept)        x1.2          x2 
##      1.0139      1.0036     -2.0085
res.4.2 <- round(vcov(model.2)[-1,-1],7)

Q4-4

res.df <- data.frame(alpha=rep(0,1000),beta1=rep(0,1000),beta2=rep(0,1000))

set.seed(13)

for(i in 1:1000){
  x1.f <- rnorm(100, 2, 1)
  x2.f <- rpois(100,4)
  et.f <- rnorm(100,0,1)
  y.f <- 1+x1.f-2*x2.f+et.f
  model.f <- lm(y.f~x1.f+x2.f)
  res.df[i,] <- model.f$coefficients
}

res.df.m <- melt(res.df)
colnames(res.df.m) <- c("Estimators","value" )

ggplot(res.df.m,aes(x=value,color=Estimators,fill=Estimators))+
  geom_density(alpha=0.5)+
  scale_color_brewer(palette="Dark2")+
  scale_fill_brewer(palette="Dark2")+
  geom_hline(yintercept=0, colour="white", size=1)+
  geom_vline(xintercept = c(-2,1),size=.5,linetype="dashed")+
  labs(title="Density plot of estimators")

Q5

smoke <- read.csv("/Users/imyenjh/Desktop/我最喜歡上課/108-1/Econometric Theory/PS/problem_set_1/smoke.csv")
smoke <- as.tbl(smoke)

res.1 <- glm(smoke_dum~smoke_parent, data=smoke, family=binomial(link="logit"), na.action=na.exclude)
res.2 <- glm(smoke_dum~smoke_parent+parent_control, data=smoke, family=binomial(link="logit"), na.action=na.exclude)
res.3 <- glm(smoke_dum~smoke_parent+parent_control+male+conscientiousness, data=smoke, family=binomial(link="logit"), na.action=na.exclude)

stargazer(res.1,res.2,res.3,type="html",title="Regression Results")
Regression Results
Dependent variable:
smoke_dum
(1) (2) (3)
smoke_parent 0.643*** 0.634*** 0.628***
(0.055) (0.055) (0.055)
parent_control 1.220*** 1.242***
(0.122) (0.123)
male 0.053
(0.049)
conscientiousness -0.187***
(0.029)
Constant -1.679*** -2.598*** -2.647***
(0.047) (0.105) (0.108)
Observations 9,728 9,728 9,728
Log Likelihood -5,106.304 -5,053.600 -5,032.976
Akaike Inf. Crit. 10,216.610 10,113.200 10,075.950
Note: p<0.1; p<0.05; p<0.01
exp(res.3$coefficients)
##       (Intercept)      smoke_parent    parent_control              male 
##        0.07085245        1.87304091        3.46268017        1.05410996 
## conscientiousness 
##        0.82936704