Libraries
rm(list=ls())
library(openxlsx) # load package; to read xlsx file
library(sandwich) # load package; to compute heteroskedasticity robust standard errors
library(lmtest) # load package; to conduct hypothesis test using robust SE
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(car) # load package; to conduct hypothesis for multiple coefficients
## Loading required package: carData
library(stargazer) # load package; to put regression results into a single stargazer table
##
## Please cite as:
## Hlavac, Marek (2018). stargazer: Well-Formatted Regression and Summary Statistics Tables.
## R package version 5.2.2. https://CRAN.R-project.org/package=stargazer
library(plm)
library(openxlsx)
id <- "1eVyfH-AhYRm4oMtZUuQos8zfS9YLvN8t"
smoke <- read.xlsx(sprintf("https://docs.google.com/uc?id=%s&export=download",id),
sheet=1,startRow=1,colNames=TRUE,rowNames=FALSE)
attach(smoke)
`` Description of the dataset: https://drive.google.com/file/d/1_xaHYpIKO7A0CPhFyU_OzMnMkI7FP8uz/view?usp=sharing
Question 11.2 Estimate the probability of smoking for (i) all workers, (ii) workers affected by workplace smoking bans, and (iii) workers not affected by workplace smoking bans.
Question A
n <- dim(smoke)[1]
prob.smoke <- sum(smoker)/n
prob.smoke
## [1] 0.2423
sub.ii <- subset(smkban, smoker==1)
prob.asmoke <- sum(sub.ii)/sum(smkban)
prob.asmoke
## [1] 0.2120367
#creating the table
# sum(smkban)
# sum(smoker)-10000
# sub.ii <- subset(smoker==0, smkban==0)
# sum(sub.ii)
sub.iii <- subset(smoker, smkban==0)
prob.nsmoke <- sum(sub.iii)/-(sum(smkban)-n)
prob.nsmoke
## [1] 0.2895951
Question B
What is the difference in the probability of smoking between workers affected by a work place smoking ban and workers not affected by a workplace smoking ban? Use a linear probability model to determine whether this difference is statistically significant.
dif.prob.smoke <- prob.asmoke - prob.nsmoke #difference
dif.prob.smoke # Difference in Prob
## [1] -0.07755835
model1 <- smoker~smkban #regression
fit.smoker <- lm(model1) #OLS
coeftest(fit.smoker, vcov=vcovHC, type="HC1") #Heteroskedasticity Robust Error
##
## t test of coefficients:
##
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.2895951 0.0072619 39.8789 < 2.2e-16 ***
## smkban -0.0775583 0.0089520 -8.6638 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Answer: As we can see the result is statistically significant at 1% and the difference is about 7.8%
Question C
Estimate a linear probability model with smoker as the dependent variable and the following regressors: smkban, female, age, age2, hsdrop, hsgrad, colsome, colgrad, black, and Hispanic. Compare the estimated effect of a smoking ban from this regression with your answer from (b). Suggest an explanation, based on the substance of this regression, for the change in the estimated effect of a smoking ban between (b) and (c).
model2 <- (smoker~smkban + female+ age+ I(age^2)+ hsdrop+ hsgrad+ colsome+ colgrad+ black+ hispanic) #regression
fit.smoker2 <- lm(model2) #OLS
coeftest(fit.smoker2, vcov=vcovHC, type="HC1") #Heteroskedasticity Robust Error
##
## t test of coefficients:
##
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.4110e-02 4.1423e-02 -0.3406 0.7333890
## smkban -4.7240e-02 8.9661e-03 -5.2687 1.402e-07 ***
## female -3.3257e-02 8.5683e-03 -3.8814 0.0001045 ***
## age 9.6744e-03 1.8954e-03 5.1040 3.386e-07 ***
## I(age^2) -1.3180e-04 2.1905e-05 -6.0169 1.841e-09 ***
## hsdrop 3.2271e-01 1.9489e-02 16.5592 < 2.2e-16 ***
## hsgrad 2.3270e-01 1.2590e-02 18.4826 < 2.2e-16 ***
## colsome 1.6430e-01 1.2625e-02 13.0138 < 2.2e-16 ***
## colgrad 4.4798e-02 1.2044e-02 3.7196 0.0002006 ***
## black -2.7566e-02 1.6078e-02 -1.7144 0.0864772 .
## hispanic -1.0482e-01 1.3975e-02 -7.5004 6.905e-14 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Answer:
According to linear model1 we expect to have a difference of about 7.8% and with model2 we expect 4.7%. One possible explanation is omitted variables. From the variables we can see that college can influence the result. We see that workers that works in the ban differ from the ones that don’t. Age can also influence.
fit.smoker <- lm(model1) #OLS
coeftest(fit.smoker, vcov=vcovHC, type="HC1") #Heteroskedasticity Robust Error
##
## t test of coefficients:
##
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.2895951 0.0072619 39.8789 < 2.2e-16 ***
## smkban -0.0775583 0.0089520 -8.6638 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Question D
Test the hypothesis that the coefficient on smkban is O in the population version of the regression in (c) against the alternative that it is nonzero, at the 5% significance level.
Answer: The value is already calculate in the table and it is t = -5.2 and that is way above the critical value of -1.96 so we can reject the null hypothesis.
Question E
Test the hypothesis that the probability of smoking does not depend on the level of education in the regression in (c). Does the probability of smoking increase or decrease with the level of education.
linearHypothesis(fit.smoker2, c("hsdrop=0","hsgrad=0","colsome=0","colgrad=0"),white.adjust=c("hc1"))
## Linear hypothesis test
##
## Hypothesis:
## hsdrop = 0
## hsgrad = 0
## colsome = 0
## colgrad = 0
##
## Model 1: restricted model
## Model 2: smoker ~ smkban + female + age + I(age^2) + hsdrop + hsgrad +
## colsome + colgrad + black + hispanic
##
## Note: Coefficient covariance matrix supplied.
##
## Res.Df Df F Pr(>F)
## 1 9993
## 2 9989 4 140.09 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
linearHypothesis(fit.smoker2, c("hsdrop=0","hsgrad=0","colsome=0","colgrad=0"),white.adjust=c("hc1"))
## Linear hypothesis test
##
## Hypothesis:
## hsdrop = 0
## hsgrad = 0
## colsome = 0
## colgrad = 0
##
## Model 1: restricted model
## Model 2: smoker ~ smkban + female + age + I(age^2) + hsdrop + hsgrad +
## colsome + colgrad + black + hispanic
##
## Note: Coefficient covariance matrix supplied.
##
## Res.Df Df F Pr(>F)
## 1 9993
## 2 9989 4 140.09 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Answer: The omitted education variable is master degree or higher and we can see that all the other education levels that represents less education has a positive sign, so more education less probability of smoking. From the test we can see that education affects smoking.
Question F
Repeat (c)-(e) using a probit model.
model.prob1 <- smoker~smkban + smkban+ female+ age+ I(age^2)+ hsdrop+ hsgrad+ colsome+ colgrad+ black+ hispanic #regression
fit.smoker.p1 <- glm(model.prob1, data = smoke, family= binomial(link="probit")) #Probit
coeftest(fit.smoker.p1, vcov=vcovHC, type="HC1") #Heteroskedasticity Robust Error
##
## z test of coefficients:
##
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.7349e+00 1.5168e-01 -11.4384 < 2.2e-16 ***
## smkban -1.5863e-01 2.9176e-02 -5.4370 5.419e-08 ***
## female -1.1173e-01 2.8866e-02 -3.8707 0.0001085 ***
## age 3.4511e-02 6.8549e-03 5.0346 4.789e-07 ***
## I(age^2) -4.6754e-04 8.2487e-05 -5.6681 1.444e-08 ***
## hsdrop 1.1416e+00 7.3449e-02 15.5428 < 2.2e-16 ***
## hsgrad 8.8267e-01 6.0664e-02 14.5502 < 2.2e-16 ***
## colsome 6.7712e-01 6.1681e-02 10.9777 < 2.2e-16 ***
## colgrad 2.3468e-01 6.5578e-02 3.5787 0.0003453 ***
## black -8.4279e-02 5.3828e-02 -1.5657 0.1174193
## hispanic -3.3827e-01 5.0155e-02 -6.7446 1.534e-11 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
linearHypothesis(fit.smoker.p1, c("hsdrop=0","hsgrad=0","colsome=0","colgrad=0"),
white.adjust=c("hc1"))
## Linear hypothesis test
##
## Hypothesis:
## hsdrop = 0
## hsgrad = 0
## colsome = 0
## colgrad = 0
##
## Model 1: restricted model
## Model 2: smoker ~ smkban + smkban + female + age + I(age^2) + hsdrop +
## hsgrad + colsome + colgrad + black + hispanic
##
## Res.Df Df Chisq Pr(>Chisq)
## 1 9993
## 2 9989 4 461.12 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
model.prob1 <- smoker~smkban + smkban+ female+ age+ I(age^2)+ hsdrop+ hsgrad+ colsome+ colgrad+ black+ hispanic #regression
fit.smoker.p1 <- glm(model.prob1, data = smoke, family= binomial(link="probit")) #Probit
coeftest(fit.smoker.p1, vcov=vcovHC, type="HC1") #Heteroskedasticity Robust Error
##
## z test of coefficients:
##
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.7349e+00 1.5168e-01 -11.4384 < 2.2e-16 ***
## smkban -1.5863e-01 2.9176e-02 -5.4370 5.419e-08 ***
## female -1.1173e-01 2.8866e-02 -3.8707 0.0001085 ***
## age 3.4511e-02 6.8549e-03 5.0346 4.789e-07 ***
## I(age^2) -4.6754e-04 8.2487e-05 -5.6681 1.444e-08 ***
## hsdrop 1.1416e+00 7.3449e-02 15.5428 < 2.2e-16 ***
## hsgrad 8.8267e-01 6.0664e-02 14.5502 < 2.2e-16 ***
## colsome 6.7712e-01 6.1681e-02 10.9777 < 2.2e-16 ***
## colgrad 2.3468e-01 6.5578e-02 3.5787 0.0003453 ***
## black -8.4279e-02 5.3828e-02 -1.5657 0.1174193
## hispanic -3.3827e-01 5.0155e-02 -6.7446 1.534e-11 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Question G
Repeat (c)-(e) using a logit model.
model.logi1 <- smoker~smkban + smkban+ female+ age+ I(age^2)+ hsdrop+ hsgrad+ colsome+ colgrad+ black+ hispanic #regression
fit.smoker.g1 <- glm(model.logi1, data = smoke, family= binomial(link="probit")) #Probit
coeftest(fit.smoker.g1, vcov=vcovHC, type="HC1") #Heteroskedasticity Robust Error
##
## z test of coefficients:
##
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.7349e+00 1.5168e-01 -11.4384 < 2.2e-16 ***
## smkban -1.5863e-01 2.9176e-02 -5.4370 5.419e-08 ***
## female -1.1173e-01 2.8866e-02 -3.8707 0.0001085 ***
## age 3.4511e-02 6.8549e-03 5.0346 4.789e-07 ***
## I(age^2) -4.6754e-04 8.2487e-05 -5.6681 1.444e-08 ***
## hsdrop 1.1416e+00 7.3449e-02 15.5428 < 2.2e-16 ***
## hsgrad 8.8267e-01 6.0664e-02 14.5502 < 2.2e-16 ***
## colsome 6.7712e-01 6.1681e-02 10.9777 < 2.2e-16 ***
## colgrad 2.3468e-01 6.5578e-02 3.5787 0.0003453 ***
## black -8.4279e-02 5.3828e-02 -1.5657 0.1174193
## hispanic -3.3827e-01 5.0155e-02 -6.7446 1.534e-11 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
linearHypothesis(fit.smoker.g1, c("hsdrop=0","hsgrad=0","colsome=0","colgrad=0"),
white.adjust=c("hc1"))
## Linear hypothesis test
##
## Hypothesis:
## hsdrop = 0
## hsgrad = 0
## colsome = 0
## colgrad = 0
##
## Model 1: restricted model
## Model 2: smoker ~ smkban + smkban + female + age + I(age^2) + hsdrop +
## hsgrad + colsome + colgrad + black + hispanic
##
## Res.Df Df Chisq Pr(>Chisq)
## 1 9993
## 2 9989 4 461.12 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Question H
Answer: 46.41% change of smoke in the first case and 40.17% in the second case. The effect is about 6% in this case.
#For this regression we have this betas no zero
b0 = coefficients(fit.smoker.g1)[1]
b1 = coefficients(fit.smoker.g1)[2]*0
b4 = coefficients(fit.smoker.g1)[4]*20
b5 = coefficients(fit.smoker.g1)[5]*(20^2)
b6 = coefficients(fit.smoker.g1)[6]
prob.smoke.A = b0+b1+b4+b5+b6
pnorm(prob.smoke.A)
## (Intercept)
## 0.464102
b0 = coefficients(fit.smoker.g1)[1]
b1 = coefficients(fit.smoker.g1)[2]
b4 = coefficients(fit.smoker.g1)[4]*20
b5 = coefficients(fit.smoker.g1)[5]*(20^2)
b6 = coefficients(fit.smoker.g1)[6]
prob.smoke.A = b0+b1+b4+b5+b6
pnorm(prob.smoke.A)
## (Intercept)
## 0.4017831
Answer: Mr.B has 14.36% chance of smoke in the first case and 11.07% in the second case. The difference is about 3.2%
b0 = coefficients(fit.smoker.g1)[1]
b1 = coefficients(fit.smoker.g1)[2]*0
b2 = coefficients(fit.smoker.g1)[3]
b4 = coefficients(fit.smoker.g1)[4]*40
b5 = coefficients(fit.smoker.g1)[5]*(40^2)
b8 = coefficients(fit.smoker.g1)[9]
b9 = coefficients(fit.smoker.g1)[10]
prob.smoke.B = b0+b1+b2+b4+b5+b8+b9
pnorm(prob.smoke.B)
## (Intercept)
## 0.1436957
b0 = coefficients(fit.smoker.g1)[1]
b1 = coefficients(fit.smoker.g1)[2]
b2 = coefficients(fit.smoker.g1)[3]
b4 = coefficients(fit.smoker.g1)[4]*40
b5 = coefficients(fit.smoker.g1)[5]*(40^2)
b8 = coefficients(fit.smoker.g1)[9]
b9 = coefficients(fit.smoker.g1)[10]
prob.smoke.B = b0+b1+b2+b4+b5+b8+b9
pnorm(prob.smoke.B)
## (Intercept)
## 0.1107609
b0 = coefficients(fit.smoker2)[1]
b1 = coefficients(fit.smoker2)[2]*0
b4 = coefficients(fit.smoker2)[4]*20
b5 = coefficients(fit.smoker2)[5]*(20^2)
b6 = coefficients(fit.smoker2)[6]
prob.smoke.A = b0+b1+b4+b5+b6
prob.smoke.A
## (Intercept)
## 0.4493721
b0 = coefficients(fit.smoker2)[1]
b1 = coefficients(fit.smoker2)[2]
b4 = coefficients(fit.smoker2)[4]*20
b5 = coefficients(fit.smoker2)[5]*(20^2)
b6 = coefficients(fit.smoker2)[6]
prob.smoke.A = b0+b1+b4+b5+b6
prob.smoke.A
## (Intercept)
## 0.4021323
Answer: For Mr.A in the first case 44.93% and second case 40.21% probability to smoke. Almost the same from the other model
b0 = coefficients(fit.smoker2)[1]
b1 = coefficients(fit.smoker2)[2]*0
b2 = coefficients(fit.smoker2)[3]
b4 = coefficients(fit.smoker2)[4]*40
b5 = coefficients(fit.smoker2)[5]*(40^2)
b8 = coefficients(fit.smoker2)[9]
b9 = coefficients(fit.smoker2)[10]
prob.smoke.B = b0+b1+b2+b4+b5+b8+b9
prob.smoke.B
## (Intercept)
## 0.145961
b0 = coefficients(fit.smoker2)[1]
b1 = coefficients(fit.smoker2)[2]
b2 = coefficients(fit.smoker2)[3]
b4 = coefficients(fit.smoker2)[4]*40
b5 = coefficients(fit.smoker2)[5]*(40^2)
b8 = coefficients(fit.smoker2)[9]
b9 = coefficients(fit.smoker2)[10]
prob.smoke.B = b0+b1+b2+b4+b5+b8+b9
prob.smoke.B
## (Intercept)
## 0.09872116
For Mr.B the results are 14.59% and 9.87% probability to smoke, also very similar to the other model.
(iV) Repeat (i)(ii) using logit model.
fit.smoker.p1 <- glm(model.logi1, data = smoke, family= binomial(link="logit")) #Logit
coeftest(fit.smoker.p1, vcov=vcovHC, type="HC1") #Heteroskedasticity Robust Error
##
## z test of coefficients:
##
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.99918220 0.26537659 -11.3016 < 2.2e-16 ***
## smkban -0.26202867 0.04950196 -5.2933 1.201e-07 ***
## female -0.19077250 0.04920099 -3.8774 0.0001056 ***
## age 0.05993658 0.01182862 5.0671 4.040e-07 ***
## I(age^2) -0.00081819 0.00014321 -5.7131 1.109e-08 ***
## hsdrop 2.01685337 0.13409500 15.0405 < 2.2e-16 ***
## hsgrad 1.57850359 0.11582668 13.6282 < 2.2e-16 ***
## colsome 1.22997749 0.11770914 10.4493 < 2.2e-16 ***
## colgrad 0.44658309 0.12643554 3.5321 0.0004123 ***
## black -0.15603412 0.09129560 -1.7091 0.0874308 .
## hispanic -0.59717314 0.08623010 -6.9253 4.349e-12 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#For this regression we have this betas no zero
b0 = coefficients(fit.smoker.p1)[1]
b1 = coefficients(fit.smoker.p1)[2]*0
b4 = coefficients(fit.smoker.p1)[4]*20
b5 = coefficients(fit.smoker.p1)[5]*(20^2)
b6 = coefficients(fit.smoker.p1)[6]
prob.smoke.A = b0+b1+b4+b5+b6
plogis(prob.smoke.A)
## (Intercept)
## 0.4723103
b0 = coefficients(fit.smoker.p1)[1]
b1 = coefficients(fit.smoker.p1)[2]
b4 = coefficients(fit.smoker.p1)[4]*20
b5 = coefficients(fit.smoker.p1)[5]*(20^2)
b6 = coefficients(fit.smoker.p1)[6]
prob.smoke.A = b0+b1+b4+b5+b6
plogis(prob.smoke.A)
## (Intercept)
## 0.4078402
For the Mr. A we get 47.23% in the first case and 40.78% in the second case.
b0 = coefficients(fit.smoker.g1)[1]
b1 = coefficients(fit.smoker.g1)[2]*0
b2 = coefficients(fit.smoker.g1)[3]
b4 = coefficients(fit.smoker.g1)[4]*40
b5 = coefficients(fit.smoker.g1)[5]*(40^2)
b8 = coefficients(fit.smoker.g1)[9]
b9 = coefficients(fit.smoker.g1)[10]
prob.smoke.B = b0+b1+b2+b4+b5+b8+b9
plogis(prob.smoke.B)
## (Intercept)
## 0.2565722
b0 = coefficients(fit.smoker.g1)[1]
b1 = coefficients(fit.smoker.g1)[2]
b2 = coefficients(fit.smoker.g1)[3]
b4 = coefficients(fit.smoker.g1)[4]*40
b5 = coefficients(fit.smoker.g1)[5]*(40^2)
b8 = coefficients(fit.smoker.g1)[9]
b9 = coefficients(fit.smoker.g1)[10]
prob.smoke.B = b0+b1+b2+b4+b5+b8+b9
plogis(prob.smoke.B)
## (Intercept)
## 0.2274983
For Mr.B we have 14.05% and 11.17%
Showing all in one table
library(jtools)
library(broom.mixed)
## Registered S3 methods overwritten by 'broom':
## method from
## tidy.glht jtools
## tidy.summary.glht jtools
export_summs(fit.smoker,fit.smoker.g1,fit.smoker.p1,fit.smoker2, transform.reponse = TRUE)
| Model 1 | Model 2 | Model 3 | Model 4 | |
|---|---|---|---|---|
| (Intercept) | 0.29 *** | -1.73 *** | -3.00 *** | -0.01 |
| (0.01) | (0.15) | (0.27) | (0.04) | |
| smkban | -0.08 *** | -0.16 *** | -0.26 *** | -0.05 *** |
| (0.01) | (0.03) | (0.05) | (0.01) | |
| female | -0.11 *** | -0.19 *** | -0.03 *** | |
| (0.03) | (0.05) | (0.01) | ||
| age | 0.03 *** | 0.06 *** | 0.01 *** | |
| (0.01) | (0.01) | (0.00) | ||
| I(age^2) | -0.00 *** | -0.00 *** | -0.00 *** | |
| (0.00) | (0.00) | (0.00) | ||
| hsdrop | 1.14 *** | 2.02 *** | 0.32 *** | |
| (0.07) | (0.13) | (0.02) | ||
| hsgrad | 0.88 *** | 1.58 *** | 0.23 *** | |
| (0.06) | (0.11) | (0.02) | ||
| colsome | 0.68 *** | 1.23 *** | 0.16 *** | |
| (0.06) | (0.12) | (0.02) | ||
| colgrad | 0.23 *** | 0.45 *** | 0.04 ** | |
| (0.07) | (0.13) | (0.02) | ||
| black | -0.08 | -0.16 | -0.03 | |
| (0.05) | (0.09) | (0.02) | ||
| hispanic | -0.34 *** | -0.60 *** | -0.10 *** | |
| (0.05) | (0.08) | (0.01) | ||
| N | 10000 | 10000 | 10000 | 10000 |
| R2 | 0.01 | 0.06 | ||
| AIC | 11356.04 | 10493.74 | 10490.00 | 10865.49 |
| BIC | 11377.67 | 10573.05 | 10569.31 | 10952.01 |
| Pseudo R2 | 0.09 | 0.09 | ||
| *** p < 0.001; ** p < 0.01; * p < 0.05. | ||||
summary(fit.smoker)
##
## Call:
## lm(formula = model1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.2896 -0.2896 -0.2120 -0.2120 0.7880
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.289595 0.006833 42.380 <2e-16 ***
## smkban -0.077558 0.008750 -8.863 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4268 on 9998 degrees of freedom
## Multiple R-squared: 0.007796, Adjusted R-squared: 0.007697
## F-statistic: 78.56 on 1 and 9998 DF, p-value: < 2.2e-16
Now we have the table in the graph (how cool is that?)
library("ggstance")
plot_coefs(fit.smoker,fit.smoker.g1,fit.smoker.p1,fit.smoker2, robust = "HC1")
Lets go further and plot now the distributions for each
plot_summs(fit.smoker, robust = "HC1", scale = TRUE, plot.distributions = TRUE)
plot_summs(fit.smoker.g1, robust = "HC1", scale = TRUE, plot.distributions = TRUE)
plot_summs(fit.smoker.p1, robust = "HC1", scale = TRUE, plot.distributions = TRUE)
plot_summs(fit.smoker2, robust = "HC1", scale = TRUE, plot.distributions = TRUE)
Maybe we can go further and visualize our data and our prediction at the same time let’s try AGE
# library(data.table)
# library(jtools)
# library(ggplot2)
effect_plot(fit.smoker.g1, pred = age, interval = TRUE, plot.points = TRUE)
What about marginal effects?
library(margins)
probitmargins <- margins(fit.smoker.p1)
probitmargins
## Average marginal effects
## glm(formula = model.logi1, family = binomial(link = "logit"), data = smoke)
## smkban female age hsdrop hsgrad colsome colgrad black hispanic
## -0.04531 -0.03299 -0.0004218 0.3487 0.2729 0.2127 0.07722 -0.02698 -0.1033