# Libraries
library("stats")
library("stats4")
library("foreign")
library("MASS")
library("mlogit")
library("tidyverse")
library("lmtest")

Question 1

a

optain the difference of probability between some one who spends 5 hour sper wek and some one who spends 10 hours per week. SAT fix at 3.0, SAT 1,200.

\[\hat{P}(grad = 1|hsGPA, SAT, study) = \Lambda(−1.17+0.24hsGPA+0.00058SAT+0.073sTudy)\]

study10 = (0.24*3.0) + (0.00058*1200) + (0.073*10) - 1.17
study5 = -1.17 + 0.24*3.0 + 0.00058*1200 + 0.073*5

study10-study5
## [1] 0.365

b

The marginal of effect of studying one more hour per week … studying one more hour per week would have a 36.5% chance of incrasing

# NOT SURE IF I GOT IT
# hsGPA 3.1
# SAT 1250
# 10 hours per week
study10mar = (0.24*3.1) + (0.00058*1250) + (0.073*10) - 1.17

study10mar
## [1] 1.029

Question 2

Likelihood function to find the maximum likelyhood. input is the outcome output is the probability.

This function tells you what is the probability of y_i = 1

p=pr(y=1) \[f(y_i;p) = p^{y_i}(1-p)^{1-y_t}\]
\(y_t\) is the probability

output is the probability input is the outcome which are known.

# This is how to create a function. Different types of function can be created.  
likelihood = function(N,y,theta)
{return(theta^y*(1-theta)^(N-y))}  

N = 1000
y = rbinom(N,1,10)
# theta = 


theta = seq(from = -2, to = 4, by = 0.01)

plot(theta, likelihood(N, 4, theta))

a

plot
### b
plot
### c
plot
### d
What can you conclude from the above three plots?

Question 3

library(readxl)
pntsprd <- read_excel("pntsprd.xls")
head(pntsprd)
## # A tibble: 6 x 12
##   favscr undscr spread favhome neutral fav25 und25 fregion uregion scrdiff
##    <dbl>  <dbl>  <dbl>   <dbl>   <dbl> <dbl> <dbl>   <dbl>   <dbl>   <dbl>
## 1     72     61    7         0       0     1     0       3       4      11
## 2     82     74    7         1       0     0     0       3       1       8
## 3     87     57   17         1       0     0     0       3       3      30
## 4     69     70    9         1       0     0     0       3       3      -1
## 5     77     79    2.5       0       0     0     0       2       3      -2
## 6     91     65    9         0       1     1     0       3       4      26
## # ... with 2 more variables: sprdcvr <dbl>, favwin <dbl>

a

\[P(favwin = 1|spread) = \beta_0 + \beta_1spread\]

# Linear Probability Model
lpm <- lm(pntsprd$favwin~pntsprd$spread)

summary(lpm)
## 
## Call:
## lm(formula = pntsprd$favwin ~ pntsprd$spread)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.9836 -0.1192  0.1519  0.3069  0.4037 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    0.576949   0.028235  20.434  < 2e-16 ***
## pntsprd$spread 0.019366   0.002339   8.281 9.32e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4017 on 551 degrees of freedom
## Multiple R-squared:  0.1107, Adjusted R-squared:  0.1091 
## F-statistic: 68.57 on 1 and 551 DF,  p-value: 9.324e-16

Use the rubost standard error

b

Is spread statistically significant with 5% significance level? What is the estimated probability that the favored team wins when spread = 10?

c

estimate a probit

# Probit Model
Probitm <-glm(inlf~nwifeinc+educ+exper+expersq+age+kidslt6+kidsge6,family = binomial(link="probit"))

summary(Probitm)

d

Add the variables f avhome, f av25, and und25 to the probit model and test joint significance of these variables using the likelihood ratio test with 5% significance level.

Question 4

library(readxl)
loanapp <- read_excel("loanapp.xls")
head(loanapp)
## # A tibble: 6 x 59
##     occ loanamt action   msa suffolk appinc typur unit  married dep     emp
##   <dbl>   <dbl>  <dbl> <dbl>   <dbl>  <dbl> <dbl> <chr> <chr>   <chr> <dbl>
## 1     1      89      1  1120       0     72     0 1     0       0         0
## 2     1     128      3  1120       0     74     0 1     1       1         0
## 3     1     128      1  1120       0     84     3 1     0       0         1
## 4     1      66      1  1120       0     36     0 1     1       0         0
## 5     1     120      1  1120       0     59     8 1     1       0         0
## 6     1     111      1  1120       0     63     9 1     0       0         0
## # ... with 48 more variables: yjob <dbl>, self <dbl>, atotinc <dbl>,
## #   cototinc <dbl>, hexp <dbl>, price <dbl>, other <dbl>, liq <dbl>, rep <chr>,
## #   gdlin <dbl>, lines <dbl>, mortg <dbl>, cons <dbl>, pubrec <dbl>,
## #   hrat <dbl>, obrat <dbl>, fixadj <dbl>, term <dbl>, apr <dbl>, prop <dbl>,
## #   inss <dbl>, inson <dbl>, gift <dbl>, cosign <dbl>, unver <dbl>,
## #   review <dbl>, netw <dbl>, unem <dbl>, min30 <chr>, bd <dbl>, mi <dbl>,
## #   old <dbl>, vr <dbl>, sch <dbl>, black <dbl>, hispan <dbl>, male <chr>,
## #   reject <dbl>, approve <dbl>, mortno <dbl>, mortperf <dbl>, mortlat1 <dbl>,
## #   mortlat2 <dbl>, chist <dbl>, multi <chr>, loanprc <dbl>, thick <chr>,
## #   white <dbl>

a

estimate a probit modlel

# Probit Model
proapp = glm(loanapp$approve ~ loanapp$white,
             family = binomial(link = "probit"))
summary(proapp)
## 
## Call:
## glm(formula = loanapp$approve ~ loanapp$white, family = binomial(link = "probit"))
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.1864   0.4384   0.4384   0.4384   0.8314  
## 
## Coefficients:
##               Estimate Std. Error z value Pr(>|z|)    
## (Intercept)    0.54695    0.07544   7.251 4.15e-13 ***
## loanapp$white  0.78395    0.08671   9.041  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1480.7  on 1988  degrees of freedom
## Residual deviance: 1401.8  on 1987  degrees of freedom
## AIC: 1405.8
## 
## Number of Fisher Scoring iterations: 4

b

# Probit Model
proapp2 = glm(loanapp$approve ~ loanapp$white + loanapp$hrat + loanapp$obrat + loanapp$loanprc + loanapp$unem + loanapp$male + loanapp$married + loanapp$dep + loanapp$sch + loanapp$cosign + loanapp$chist + loanapp$pubrec + loanapp$mortlat1 + loanapp$mortlat2 + loanapp$vr,
             family = binomial(link = "probit"))
summary(proapp2)
## 
## Call:
## glm(formula = loanapp$approve ~ loanapp$white + loanapp$hrat + 
##     loanapp$obrat + loanapp$loanprc + loanapp$unem + loanapp$male + 
##     loanapp$married + loanapp$dep + loanapp$sch + loanapp$cosign + 
##     loanapp$chist + loanapp$pubrec + loanapp$mortlat1 + loanapp$mortlat2 + 
##     loanapp$vr, family = binomial(link = "probit"))
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -3.0950   0.2331   0.3389   0.4850   1.9979  
## 
## Coefficients: (1 not defined because of singularities)
##                    Estimate Std. Error z value Pr(>|z|)    
## (Intercept)       11.395347 204.586080   0.056   0.9556    
## loanapp$white      0.532479   0.097452   5.464 4.66e-08 ***
## loanapp$hrat       0.008364   0.007061   1.185   0.2362    
## loanapp$obrat     -0.028319   0.006192  -4.574 4.79e-06 ***
## loanapp$loanprc   -1.004248   0.239688  -4.190 2.79e-05 ***
## loanapp$unem      -0.035933   0.017828  -2.016   0.0438 *  
## loanapp$male0     -3.971989  94.258449  -0.042   0.9664    
## loanapp$male1     -4.010678  94.258422  -0.043   0.9661    
## loanapp$married0  -2.096414 418.227224  -0.005   0.9960    
## loanapp$married1  -1.829983 418.227210  -0.004   0.9965    
## loanapp$dep0      -3.276150 376.753578  -0.009   0.9931    
## loanapp$dep1      -3.291261 376.753585  -0.009   0.9930    
## loanapp$dep2      -3.401128 376.753582  -0.009   0.9928    
## loanapp$dep3      -3.388711 376.753604  -0.009   0.9928    
## loanapp$dep4      -3.816284 376.753667  -0.010   0.9919    
## loanapp$dep5       1.146623 406.202496   0.003   0.9977    
## loanapp$dep6       0.915706 430.929803   0.002   0.9983    
## loanapp$dep7      -0.096264 532.810001   0.000   0.9999    
## loanapp$dep8             NA         NA      NA       NA    
## loanapp$sch        0.011054   0.095678   0.116   0.9080    
## loanapp$cosign     0.056668   0.242111   0.234   0.8149    
## loanapp$chist      0.584222   0.095920   6.091 1.12e-09 ***
## loanapp$pubrec    -0.773119   0.127788  -6.050 1.45e-09 ***
## loanapp$mortlat1  -0.165458   0.258328  -0.640   0.5219    
## loanapp$mortlat2  -0.492990   0.338468  -1.457   0.1452    
## loanapp$vr        -0.190553   0.081869  -2.328   0.0199 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1480.7  on 1988  degrees of freedom
## Residual deviance: 1194.9  on 1964  degrees of freedom
## AIC: 1244.9
## 
## Number of Fisher Scoring iterations: 14

c

Question 5

library(readxl)
apple <- read_excel("apple.xls")

head(apple)
## # A tibble: 6 x 17
##      id  educ date  state regprc ecoprc inseason hhsize  male faminc   age
##   <dbl> <dbl> <chr> <chr>  <dbl>  <dbl>    <dbl>  <dbl> <dbl>  <dbl> <dbl>
## 1 10002    16 "\"1~ "\"S~   1.19   1.19        1      4     0     45    43
## 2 10004    16 "\"1~ "\"K~   0.59   0.79        0      1     0     65    37
## 3 10034    18 "\"1~ "\"M~   0.59   0.99        1      3     0     65    44
## 4 10035    12 "\"1~ "\"T~   0.89   1.09        1      2     1     55    55
## 5 10039    15 "\"1~ "\"N~   0.89   1.09        0      1     1     25    22
## 6 10041    12 "\"1~ "\"W~   0.59   0.79        1      4     0     15    34
## # ... with 6 more variables: reglbs <dbl>, ecolbs <dbl>, numlt5 <dbl>,
## #   num5_17 <dbl>, num18_64 <dbl>, num_gt64 <dbl>

a

sum(apple$ecolbs == 0.00000)
## [1] 248

b

Tobit model

c

d

e

f

Question 6

library(readxl)
smoke <- read_excel("smoke.xls")
head(smoke)
## # A tibble: 6 x 10
##    educ cigpric white   age income  cigs restaurn lincome agesq lcigpric
##   <dbl>   <dbl> <dbl> <dbl>  <dbl> <dbl>    <dbl>   <dbl> <dbl>    <dbl>
## 1  16      60.5     1    46  20000     0        0    9.90  2116     4.10
## 2  16      57.9     1    40  30000     0        0   10.3   1600     4.06
## 3  12      57.7     1    58  30000     3        0   10.3   3364     4.05
## 4  13.5    57.9     1    30  20000     0        0    9.90   900     4.06
## 5  10      58.3     1    17  20000     0        0    9.90   289     4.07
## 6   6      59.3     1    86   6500     0        0    8.78  7396     4.08

a

b

Poisson Regression

c

d

e

f

g