Data are downloaded, loaded, and names of fields are read from the file:
setInternet2(TRUE) # solution for https files
download.file("https://sites.google.com/site/econometriks/docs/R_smoking.rda", "R_smoking.rda")
load("R_smoking.rda")
names(smoking)
## [1] "educ" "age" "income" "pcigs79" "ageeduc"
## [6] "smoker" "educincome" "qrt.age" "inc.10k" "fcast.reg"
## [11] "fcast.logit" "fcast.prob" "qrt.edu" "qrt.inc" "qrt.price"
Loading required packages and attaching the data set.
library(xtable)
library(psych)
attach(smoking)
## The following object is masked from package:psych:
##
## income
library(htmlTable)
library(stargazer)
##
## Please cite as:
## Hlavac, Marek (2015). stargazer: Well-Formatted Regression and Summary Statistics Tables.
## R package version 5.2. http://CRAN.R-project.org/package=stargazer
Table 8.4
t1<-table(smoker)
tab <- xtable(cbind(t1,'t1(%)'=t1/sum(t1)*100))
htmlTable(txtRound(tab,2))
| t1 | t1(%) | |
|---|---|---|
| no | 695.00 | 61.94 |
| yes | 427.00 | 38.06 |
Table 8.5
smoking$smoker2<-factor(smoking$smoker)
f1 <- function(x) c( mean=mean(x), sd= sd(x), obs=length(x))
tab<-rbind(education=f1(educ),'age in years'=f1(age),
'household income'=f1(income),
'cigarette price in cents'=f1(pcigs79),
'smoker' =f1(as.numeric(smoking$smoker2)-1))
htmlTable(txtRound(tab,2))
| mean | sd | obs | |
|---|---|---|---|
| education | 12.19 | 3.30 | 1122.00 |
| age in years | 41.88 | 17.08 | 1122.00 |
| household income | 19359.18 | 9052.52 | 1122.00 |
| cigarette price in cents | 60.97 | 4.85 | 1122.00 |
| smoker | 0.38 | 0.49 | 1122.00 |
… …
Descriptive statistics by smoker status
Table 8.6
tab <- xtable(describeBy(cbind(educ, age, income, pcigs79), smoker, mat=T, skew=F, ranges=F))
names(tab)[2] <-c( "smoker")
print(tab, type="html")
| item | smoker | vars | n | mean | sd | se | |
|---|---|---|---|---|---|---|---|
| educ1 | 1 | no | 1.00 | 695.00 | 12.43 | 3.52 | 0.13 |
| educ2 | 2 | yes | 1.00 | 427.00 | 11.81 | 2.86 | 0.14 |
| age1 | 3 | no | 2.00 | 695.00 | 43.72 | 18.15 | 0.69 |
| age2 | 4 | yes | 2.00 | 427.00 | 38.89 | 14.73 | 0.71 |
| income1 | 5 | no | 3.00 | 695.00 | 19492.81 | 9192.07 | 348.68 |
| income2 | 6 | yes | 3.00 | 427.00 | 19141.69 | 8827.00 | 427.17 |
| pcigs791 | 7 | no | 4.00 | 695.00 | 61.21 | 4.80 | 0.18 |
| pcigs792 | 8 | yes | 4.00 | 427.00 | 60.58 | 4.92 | 0.24 |
t1<-(aggregate(cbind(educ, age, income, pcigs79) ~ smoker, data=smoking,f1))
htmlTable(txtRound(t(t1),2))
| smoker | no | yes |
| educ.mean | 12.43 | 11.81 |
| educ.sd | 3.52 | 2.86 |
| educ.obs | 695.00 | 427.00 |
| age.mean | 43.72 | 38.89 |
| age.sd | 18.15 | 14.73 |
| age.obs | 695.00 | 427.00 |
| income.mean | 19492.81 | 19141.69 |
| income.sd | 9192.07 | 8827.00 |
| income.obs | 695.00 | 427.00 |
| pcigs79.mean | 61.21 | 60.58 |
| pcigs79.sd | 4.80 | 4.92 |
| pcigs79.obs | 695.00 | 427.00 |
t1<-aggregate(cbind(income,age, educ, pcigs79) ~ smoker, data=smoking, f1)
tab<-xtable( as.matrix(t(t1)) )
print(tab, type="html")
| 1 | 2 | |
|---|---|---|
| smoker | no | yes |
| income.mean | 19492.806 | 19141.686 |
| income.sd | 9192.072 | 8826.999 |
| income.obs | 695.000 | 427.000 |
| age.mean | 43.72230 | 38.89227 |
| age.sd | 18.14572 | 14.72686 |
| age.obs | 695.00000 | 427.00000 |
| educ.mean | 12.427338 | 11.806792 |
| educ.sd | 3.517470 | 2.861969 |
| educ.obs | 695.000000 | 427.000000 |
| pcigs79.mean | 61.214532 | 60.577049 |
| pcigs79.sd | 4.800400 | 4.915248 |
| pcigs79.obs | 695.000000 | 427.000000 |
t1<-aggregate(cbind(income,age, educ, pcigs79) ~ smoker+1, data=smoking, f1)
t2<-aggregate(cbind(income,age, educ, pcigs79) ~ 1, data=smoking, f1);
t3<-as.matrix(t(t1))
t4<-as.matrix(rbind("total",t(t2)))
t5<-cbind(t3,t4)
tab<-xtable(t5)
print(tab, type="html")
| 1 | 2 | 3 | |
|---|---|---|---|
| smoker | no | yes | total |
| income.mean | 19492.806 | 19141.686 | 19359.1800356506 |
| income.sd | 9192.072 | 8826.999 | 9052.51555447872 |
| income.obs | 695.000 | 427.000 | 1122 |
| age.mean | 43.72230 | 38.89227 | 41.8841354723708 |
| age.sd | 18.14572 | 14.72686 | 17.0812441184657 |
| age.obs | 695.00000 | 427.00000 | 1122 |
| educ.mean | 12.427338 | 11.806792 | 12.1911764705882 |
| educ.sd | 3.517470 | 2.861969 | 3.29594980916978 |
| educ.obs | 695.000000 | 427.000000 | 1122 |
| pcigs79.mean | 61.214532 | 60.577049 | 60.9719250384924 |
| pcigs79.sd | 4.800400 | 4.915248 | 4.85213378896449 |
| pcigs79.obs | 695.000000 | 427.000000 | 1122 |
htmlTable(txtRound(t5,0,excl.rows=c(1:3,5,6,8,9,11,12)))
| smoker | no | yes | total |
| income.mean | 19492.806 | 19141.686 | 19359.1800356506 |
| income.sd | 9192.072 | 8826.999 | 9052.51555447872 |
| income.obs | 695 | 427 | 1122 |
| age.mean | 43.72230 | 38.89227 | 41.8841354723708 |
| age.sd | 18.14572 | 14.72686 | 17.0812441184657 |
| age.obs | 695 | 427 | 1122 |
| educ.mean | 12.427338 | 11.806792 | 12.1911764705882 |
| educ.sd | 3.517470 | 2.861969 | 3.29594980916978 |
| educ.obs | 695 | 427 | 1122 |
| pcigs79.mean | 61.214532 | 60.577049 | 60.9719250384924 |
| pcigs79.sd | 4.800400 | 4.915248 | 4.85213378896449 |
| pcigs79.obs | 695 | 427 | 1122 |
Table 8.7
ols.1<- lm((as.numeric(smoker)-1) ~ age +educ + pcigs79 +income,
data = smoking)
ols.1a <- xtable(summary(ols.1))
print(ols.1a, type="html")
| Estimate | Std. Error | t value | Pr(>|t|) | |
|---|---|---|---|---|
| (Intercept) | 1.1811 | 0.1943 | 6.08 | 0.0000 |
| age | -0.0046 | 0.0009 | -5.36 | 0.0000 |
| educ | -0.0197 | 0.0047 | -4.14 | 0.0000 |
| pcigs79 | -0.0063 | 0.0029 | -2.14 | 0.0323 |
| income | 0.0000 | 0.0000 | 0.47 | 0.6400 |
ols.2<- lm((as.numeric(smoker)-1) ~ age +educ + pcigs79 +inc.10k)
ols.2a <- xtable(summary(ols.2))
print(ols.2a, type="html")
| Estimate | Std. Error | t value | Pr(>|t|) | |
|---|---|---|---|---|
| (Intercept) | 1.1811 | 0.1943 | 6.08 | 0.0000 |
| age | -0.0046 | 0.0009 | -5.36 | 0.0000 |
| educ | -0.0197 | 0.0047 | -4.14 | 0.0000 |
| pcigs79 | -0.0063 | 0.0029 | -2.14 | 0.0323 |
| inc.10k | 0.0079 | 0.0170 | 0.47 | 0.6400 |
## Alternate version
stargazer(ols.1,ols.2, type="html", align=TRUE, no.space=TRUE,
digits=4, dep.var.labels=c("smoker"),
column.labels=c("income", "income/10k"),
covariate.labels=c("age in years",
"education in years",
"price of a pack", "household income",
"income in 10k"))
| Dependent variable: | ||
| smoker | ||
| income | income/10k | |
| (1) | (2) | |
| age in years | -0.0046*** | -0.0046*** |
| (0.0009) | (0.0009) | |
| education in years | -0.0197*** | -0.0197*** |
| (0.0047) | (0.0047) | |
| price of a pack | -0.0063** | -0.0063** |
| (0.0029) | (0.0029) | |
| household income | 0.000001 | |
| (0.000002) | ||
| income in 10k | 0.0079 | |
| (0.0170) | ||
| Constant | 1.1811*** | 1.1811*** |
| (0.1943) | (0.1943) | |
| Observations | 1,122 | 1,122 |
| R2 | 0.0380 | 0.0380 |
| Adjusted R2 | 0.0346 | 0.0346 |
| Residual Std. Error (df = 1117) | 0.4773 | 0.4773 |
| F Statistic (df = 4; 1117) | 11.0438*** | 11.0438*** |
| Note: | p<0.1; p<0.05; p<0.01 | |
Table 8.9
GLM.1 <- glm(smoker ~ age +educ + pcigs79 + income, family=binomial(logit))
coef.vector1 <-exp(GLM.1$coef)
stargazer(GLM.1, type="html", align=TRUE, no.space=TRUE,
digits=4, dep.var.labels=c("smoker"),
column.labels=c("income", "income/10k"),
covariate.labels=c("age in years", "education in years",
"price of a pack", "household income"))
| Dependent variable: | |
| smoker | |
| income | |
| age in years | -0.0202*** |
| (0.0038) | |
| education in years | -0.0867*** |
| (0.0212) | |
| price of a pack | -0.0274** |
| (0.0129) | |
| household income | 0.000004 |
| (0.00001) | |
| Constant | 2.9926*** |
| (0.8553) | |
| Observations | 1,122 |
| Log Likelihood | -723.6548 |
| Akaike Inf. Crit. | 1,457.3100 |
| Note: | p<0.1; p<0.05; p<0.01 |
…
Exponentiated coefficients Table 8.10
GLM.1 <- glm(smoker ~ age +educ + pcigs79 + income, family=binomial(logit))
coef.vector1 <-exp(GLM.1$coef)
GLM.2 <- glm(smoker ~ age +educ + pcigs79 + inc.10k, family=binomial(logit))
coef.vector2 <-exp(GLM.2$coef)
stargazer(GLM.1, GLM.2, coef= list(coef.vector1, coef.vector2),
digits=4, dep.var.labels=c("smoker"),
column.labels=c("income", "income/10k"),
type="html",
covariate.labels=c("age in years", "education in years",
"price of a pack", "household income",
"income in 10k"))
| Dependent variable: | ||
| smoker | ||
| income | income/10k | |
| (1) | (2) | |
| age in years | 0.9800*** | 0.9800*** |
| (0.0038) | (0.0038) | |
| education in years | 0.9169*** | 0.9169*** |
| (0.0212) | (0.0212) | |
| price of a pack | 0.9730*** | 0.9730*** |
| (0.0129) | (0.0129) | |
| household income | 1.0000*** | |
| (0.00001) | ||
| income in 10k | 1.0379*** | |
| (0.0745) | ||
| Constant | 19.9367*** | 19.9367*** |
| (0.8553) | (0.8553) | |
| Observations | 1,122 | 1,122 |
| Log Likelihood | -723.6548 | -723.6548 |
| Akaike Inf. Crit. | 1,457.3100 | 1,457.3100 |
| Note: | p<0.1; p<0.05; p<0.01 | |
Figure 8.9
GLM.p<-(glm(smoker ~ age +educ + pcigs79 + inc.10k, data=smoking, family=binomial(probit)))
summary(GLM.p)
##
## Call:
## glm(formula = smoker ~ age + educ + pcigs79 + inc.10k, family = binomial(probit),
## data = smoking)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.4392 -0.9872 -0.8035 1.2806 1.8318
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.851707 0.525606 3.523 0.000427 ***
## age -0.012539 0.002341 -5.357 8.47e-08 ***
## educ -0.053713 0.012957 -4.145 3.39e-05 ***
## pcigs79 -0.016871 0.007924 -2.129 0.033242 *
## inc.10k 0.021489 0.045853 0.469 0.639312
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1490.8 on 1121 degrees of freedom
## Residual deviance: 1446.9 on 1117 degrees of freedom
## AIC: 1456.9
##
## Number of Fisher Scoring iterations: 4
Table 8.12
stargazer(ols.2, GLM.2,GLM.p, digits=4, dep.var.labels=c("smoker"),
type="html",
covariate.labels=c("age in years",
"education in years",
"price of a pack", "income in 10k"))
| Dependent variable: | |||
| smoker | smoker | ||
| OLS | logistic | probit | |
| (1) | (2) | (3) | |
| age in years | -0.0046*** | -0.0202*** | -0.0125*** |
| (0.0009) | (0.0038) | (0.0023) | |
| education in years | -0.0197*** | -0.0867*** | -0.0537*** |
| (0.0047) | (0.0212) | (0.0130) | |
| price of a pack | -0.0063** | -0.0274** | -0.0169** |
| (0.0029) | (0.0129) | (0.0079) | |
| income in 10k | 0.0079 | 0.0372 | 0.0215 |
| (0.0170) | (0.0745) | (0.0459) | |
| Constant | 1.1811*** | 2.9926*** | 1.8517*** |
| (0.1943) | (0.8553) | (0.5256) | |
| Observations | 1,122 | 1,122 | 1,122 |
| R2 | 0.0380 | ||
| Adjusted R2 | 0.0346 | ||
| Log Likelihood | -723.6548 | -723.4255 | |
| Akaike Inf. Crit. | 1,457.3100 | 1,456.8510 | |
| Residual Std. Error | 0.4773 (df = 1117) | ||
| F Statistic | 11.0438*** (df = 4; 1117) | ||
| Note: | p<0.1; p<0.05; p<0.01 | ||
z.out1 <- zelig(smoker ~ age + educ + pcigs79 + income,
model = "logit", data = smoking)
## How to cite this model in Zelig:
## R Core Team. 2007.
## logit: Logistic Regression for Dichotomous Dependent Variables
## in Christine Choirat, James Honaker, Kosuke Imai, Gary King, and Olivia Lau,
## "Zelig: Everyone's Statistical Software," http://zeligproject.org/
x.out1 <- setx(z.out1, age = 41.88414, educ=12.19118,
income = 19359.18, pcigs79 = 60.97193)
## Warning in model.response(mf, "numeric"): using type = "numeric" with a
## factor response will be ignored
## Warning in Ops.factor(y, z$residuals): '-' not meaningful for factors
s.out1 <- sim(z.out1, x = x.out1, num=100000)
summary(s.out1)
##
## sim x :
## -----
## ev
## mean sd 50% 2.5% 97.5%
## [1,] 0.3759722 0.01480765 0.3759038 0.3473249 0.4052036
## pv
## 0 1
## [1,] 0.62702 0.37298
x.out2 <- setx(z.out1, age = 41.88414, educ=12.19118,
income = 19359.18, pcigs79 = 76.2149125)
## Warning in model.response(mf, "numeric"): using type = "numeric" with a
## factor response will be ignored
## Warning in model.response(mf, "numeric"): '-' not meaningful for factors
s.out2 <- sim(z.out1, x = x.out2, x1 = x.out1, num=100000)
summary(s.out2)
##
## sim x :
## -----
## ev
## mean sd 50% 2.5% 97.5%
## [1,] 0.2858559 0.04218039 0.2840144 0.2086999 0.373392
## pv
## 0 1
## [1,] 0.71444 0.28556
##
## sim x1 :
## -----
## ev
## mean sd 50% 2.5% 97.5%
## [1,] 0.3759802 0.01480383 0.3758579 0.3470313 0.4052447
## pv
## 0 1
## [1,] 0.62507 0.37493
## fd
## mean sd 50% 2.5% 97.5%
## [1,] 0.09012424 0.03986612 0.09181075 0.007302585 0.1633069
plot(s.out1)
plot(s.out2)
library (erer)
## Loading required package: lmtest
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
ma <- glm(smoker ~ age + educ + inc.10k + pcigs79, x = TRUE,
data = smoking, family = binomial(link = "probit"))
ea <- maBina(w = ma, x.mean = TRUE, rev.dum = TRUE)
summary(ma)
##
## Call:
## glm(formula = smoker ~ age + educ + inc.10k + pcigs79, family = binomial(link = "probit"),
## data = smoking, x = TRUE)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.4392 -0.9872 -0.8035 1.2806 1.8318
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.851707 0.525606 3.523 0.000427 ***
## age -0.012539 0.002341 -5.357 8.47e-08 ***
## educ -0.053713 0.012957 -4.145 3.39e-05 ***
## inc.10k 0.021489 0.045853 0.469 0.639312
## pcigs79 -0.016871 0.007924 -2.129 0.033242 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1490.8 on 1121 degrees of freedom
## Residual deviance: 1446.9 on 1117 degrees of freedom
## AIC: 1456.9
##
## Number of Fisher Scoring iterations: 4
ea
## effect error t.value p.value
## (Intercept) 0.703 0.200 3.514 0.000
## age -0.005 0.001 -5.363 0.000
## educ -0.020 0.005 -4.148 0.000
## inc.10k 0.008 0.017 0.469 0.639
## pcigs79 -0.006 0.003 -2.129 0.033
edu.1 <- maTrend(q = ea, nam.c="educ")
plot(edu.1)
download.file("https://sites.google.com/site/econometriks/docs/ny_grouped.rda", "ny_grouped.rda")
load("ny_grouped.rda")
attach(ny_grouped)
Table 8.15
f1 <- function(x) c( mean=mean(x), sd= sd(x), obs=length(x))
tab<-rbind("average autos/hhld"=f1(hh_autos),
'average persons/hhld'=f1(hh_size),
'distance from CBD'=f1(d_cbd),
'total number of trips'=f1(n_trips),
'total transit trips' =f1(n_transit))
htmlTable(txtRound(tab,2))
| mean | sd | obs | |
|---|---|---|---|
| average autos/hhld | 0.78 | 0.44 | 2707.00 |
| average persons/hhld | 2.75 | 0.51 | 2707.00 |
| distance from CBD | 13.40 | 5.87 | 2707.00 |
| total number of trips | 1518.03 | 1073.37 | 2707.00 |
| total transit trips | 698.10 | 674.33 | 2707.00 |
…
Figure 8.20 and Table 8.17
ny_grouped$non_transit <- with(ny_grouped, n_trips-n_transit)
detach(ny_grouped)
attach(ny_grouped)
mod1 <- glm(cbind( n_transit, non_transit) ~ hh_autos + hh_size +
d_cbd, data = ny_grouped,
family = binomial(link="logit"),trace=T)
Deviance = 249880.9 Iterations - 1 Deviance = 246322.9 Iterations - 2 Deviance = 246320 Iterations - 3 Deviance = 246320 Iterations - 4
stargazer(mod1, type="html",
covariate.labels=c("avg. autos/hhld", "avg. persons/ hhld",
"distance from cbd"))
| Dependent variable: | |
| cbind(n_transit, non_transit) | |
| avg. autos/hhld | -1.693*** |
| (0.003) | |
| avg. persons/ hhld | 0.090*** |
| (0.002) | |
| distance from cbd | -0.006*** |
| (0.0002) | |
| Constant | 0.974*** |
| (0.005) | |
| Observations | 2,707 |
| Log Likelihood | -133,013.300 |
| Akaike Inf. Crit. | 266,034.600 |
| Note: | p<0.1; p<0.05; p<0.01 |
Figure 8.19
res<-(data.frame("observed transit trips"=(ny_grouped$n_transit), "forecasted transit trips"=fitted(mod1)*ny_grouped$n_trips))
head(res)
## observed.transit.trips forecasted.transit.trips
## 1 182 194.4994
## 2 1467 1251.3738
## 3 718 674.3818
## 4 883 838.6868
## 5 1803 1705.3699
## 6 251 286.3279
plot(res)