Research question: How do changes in real cigarette prices, income, and nearby-state minimum prices affect cigarette consumption across states and over time?
Loading in the libraries
library(plm)
library(dplyr)
library(ggplot2)
library(broom)
library(lmtest)
library(sandwich)
Loading the Cigar Dataset
data(Cigar)
head(Cigar)
Cleaning and preparing the data
Cigar_df <- Cigar %>%
mutate(
year = year + 1900,
state_factor = factor(state),
year_factor = factor(year),
real_price = price / cpi,
real_ndi = ndi / cpi,
real_pimin = pimin / cpi
)
head(Cigar_df)
Summary Statistics for Key Variables
Cigar_df %>%
summarise(
mean_sales = mean(sales),
sd_sales = sd(sales),
mean_real_price = mean(real_price),
mean_real_ndi = mean(real_ndi),
mean_real_pimin = mean(real_pimin)
)
Histograms
ggplot(Cigar_df, aes(sales)) + geom_histogram(bins=30, fill="gray") + theme_minimal()
ggplot(Cigar_df, aes(real_price)) + geom_histogram(bins=30, fill="gray") + theme_minimal()
ggplot(Cigar_df, aes(real_ndi)) + geom_histogram(bins=30, fill="gray") + theme_minimal()
ggplot(Cigar_df, aes(real_pimin)) + geom_histogram(bins=30, fill="gray") + theme_minimal()
ggplot(Cigar_df, aes(pop16)) + geom_histogram(bins=30, fill="gray") + theme_minimal()
Scatterplots of Sales vs Predictors
ggplot(Cigar_df, aes(real_price, sales)) +
geom_point(alpha=.25) + geom_smooth(method="lm") + theme_minimal()
ggplot(Cigar_df, aes(real_ndi, sales)) +
geom_point(alpha=.25) + geom_smooth(method="lm") + theme_minimal()
ggplot(Cigar_df, aes(real_pimin, sales)) +
geom_point(alpha=.25) + geom_smooth(method="lm") + theme_minimal()
Creating yearly mean sales variable
year_means <- Cigar_df %>%
group_by(year) %>%
summarise(mean_sales = mean(sales))
Graphing the mean cigarette sales per year
ggplot(year_means, aes(x = year, y = mean_sales)) +
geom_line(size = 1.1, color = "black") +
theme_minimal() +
labs(
title = "Mean Cigarette Sales per Capita by Year",
x = "Year",
y = "Mean Packs per Capita"
)
Converting the data to a panel format
panel_df <- pdata.frame(Cigar_df, index = c("state", "year"))
Fixed effects regression
fe_model <- plm(
sales ~ real_price + real_ndi + real_pimin + pop16,
data = panel_df,
model = "within"
)
summary(fe_model)
Oneway (individual) effect Within Model
Call:
plm(formula = sales ~ real_price + real_ndi + real_pimin + pop16,
data = panel_df, model = "within")
Balanced Panel: n = 46, T = 30, N = 1380
Residuals:
Min. 1st Qu. Median 3rd Qu. Max.
-66.05662 -5.58883 0.25892 5.50535 116.82257
Coefficients:
Estimate Std. Error t-value Pr(>|t|)
real_price -1.0307e+02 6.5803e+00 -15.6636 < 2.2e-16 ***
real_ndi -7.6352e-02 3.1540e-02 -2.4208 0.015618 *
real_pimin 1.9225e+01 7.4363e+00 2.5852 0.009837 **
pop16 -1.0123e-03 5.4891e-04 -1.8443 0.065366 .
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Total Sum of Squares: 412530
Residual Sum of Squares: 227480
R-Squared: 0.44858
Adj. R-Squared: 0.42826
F-statistic: 270.484 on 4 and 1330 DF, p-value: < 2.22e-16
Random effects regression
re_model <- plm(
sales ~ real_price + real_ndi + real_pimin + pop16,
data = panel_df,
model = "random"
)
summary(re_model)
Oneway (individual) effect Random Effect Model
(Swamy-Arora's transformation)
Call:
plm(formula = sales ~ real_price + real_ndi + real_pimin + pop16,
data = panel_df, model = "random")
Balanced Panel: n = 46, T = 30, N = 1380
Effects:
var std.dev share
idiosyncratic 171.04 13.08 0.28
individual 438.84 20.95 0.72
theta: 0.8868
Residuals:
Min. 1st Qu. Median 3rd Qu. Max.
-62.04795 -6.56952 -0.11468 5.26028 121.10478
Coefficients:
Estimate Std. Error z-value Pr(>|z|)
(Intercept) 2.1165e+02 4.4274e+00 47.8052 < 2.2e-16 ***
real_price -1.0424e+02 6.5748e+00 -15.8549 < 2.2e-16 ***
real_ndi -6.6200e-02 3.0607e-02 -2.1629 0.030549 *
real_pimin 1.9834e+01 7.4229e+00 2.6721 0.007538 **
pop16 -9.0291e-04 4.7276e-04 -1.9098 0.056153 .
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Total Sum of Squares: 424220
Residual Sum of Squares: 238160
R-Squared: 0.43859
Adj. R-Squared: 0.43696
Chisq: 1074.2 on 4 DF, p-value: < 2.22e-16
Hausman Test to compare FE and RE
phtest(fe_model, re_model)
Hausman Test
data: sales ~ real_price + real_ndi + real_pimin + pop16
chisq = 359.94, df = 4, p-value < 2.2e-16
alternative hypothesis: one model is inconsistent
Breusch–Pagan test for heteroskedasticity
bptest(fe_model)
studentized Breusch-Pagan test
data: fe_model
BP = 122.49, df = 4, p-value < 2.2e-16
Wooldridge test for serial correlation
pbgtest(fe_model)
Breusch-Godfrey/Wooldridge test for serial correlation in panel models
data: sales ~ real_price + real_ndi + real_pimin + pop16
chisq = 967.84, df = 30, p-value < 2.2e-16
alternative hypothesis: serial correlation in idiosyncratic errors
Robust fixed effects model, corrected based on diagnostics
fe_robust <- coeftest(fe_model, vcov = vcovHC(fe_model, type = "HC1"))
fe_robust
t test of coefficients:
Estimate Std. Error t value Pr(>|t|)
real_price -1.0307e+02 2.6459e+01 -3.8955 0.0001029 ***
real_ndi -7.6352e-02 1.6574e-01 -0.4607 0.6451052
real_pimin 1.9225e+01 3.0185e+01 0.6369 0.5243095
pop16 -1.0123e-03 1.9184e-03 -0.5277 0.5978001
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1