Research question: How do changes in real cigarette prices, income, and nearby-state minimum prices affect cigarette consumption across states and over time?

Loading in the libraries

library(plm)
library(dplyr)
library(ggplot2)
library(broom)
library(lmtest)
library(sandwich)

Loading the Cigar Dataset

data(Cigar)
head(Cigar)

Cleaning and preparing the data

Cigar_df <- Cigar %>%
  mutate(
    year = year + 1900,
    state_factor = factor(state),
    year_factor  = factor(year),
    real_price = price / cpi,
    real_ndi   = ndi / cpi,
    real_pimin = pimin / cpi
  )

head(Cigar_df)

Summary Statistics for Key Variables

Cigar_df %>%
  summarise(
    mean_sales = mean(sales),
    sd_sales = sd(sales),
    mean_real_price = mean(real_price),
    mean_real_ndi = mean(real_ndi),
    mean_real_pimin = mean(real_pimin)
  )

Histograms

ggplot(Cigar_df, aes(sales)) + geom_histogram(bins=30, fill="gray") + theme_minimal()

ggplot(Cigar_df, aes(real_price)) + geom_histogram(bins=30, fill="gray") + theme_minimal()

ggplot(Cigar_df, aes(real_ndi)) + geom_histogram(bins=30, fill="gray") + theme_minimal()

ggplot(Cigar_df, aes(real_pimin)) + geom_histogram(bins=30, fill="gray") + theme_minimal()

ggplot(Cigar_df, aes(pop16)) + geom_histogram(bins=30, fill="gray") + theme_minimal()

Scatterplots of Sales vs Predictors

ggplot(Cigar_df, aes(real_price, sales)) +
  geom_point(alpha=.25) + geom_smooth(method="lm") + theme_minimal()


ggplot(Cigar_df, aes(real_ndi, sales)) +
  geom_point(alpha=.25) + geom_smooth(method="lm") + theme_minimal()


ggplot(Cigar_df, aes(real_pimin, sales)) +
  geom_point(alpha=.25) + geom_smooth(method="lm") + theme_minimal()

Creating yearly mean sales variable

year_means <- Cigar_df %>%
  group_by(year) %>%
  summarise(mean_sales = mean(sales))

Graphing the mean cigarette sales per year

ggplot(year_means, aes(x = year, y = mean_sales)) +
  geom_line(size = 1.1, color = "black") +
  theme_minimal() +
  labs(
    title = "Mean Cigarette Sales per Capita by Year",
    x = "Year",
    y = "Mean Packs per Capita"
  )

Converting the data to a panel format

panel_df <- pdata.frame(Cigar_df, index = c("state", "year"))

Fixed effects regression

fe_model <- plm(
  sales ~ real_price + real_ndi + real_pimin + pop16,
  data = panel_df,
  model = "within"
)

summary(fe_model)
Oneway (individual) effect Within Model

Call:
plm(formula = sales ~ real_price + real_ndi + real_pimin + pop16, 
    data = panel_df, model = "within")

Balanced Panel: n = 46, T = 30, N = 1380

Residuals:
     Min.   1st Qu.    Median   3rd Qu.      Max. 
-66.05662  -5.58883   0.25892   5.50535 116.82257 

Coefficients:
              Estimate  Std. Error  t-value  Pr(>|t|)    
real_price -1.0307e+02  6.5803e+00 -15.6636 < 2.2e-16 ***
real_ndi   -7.6352e-02  3.1540e-02  -2.4208  0.015618 *  
real_pimin  1.9225e+01  7.4363e+00   2.5852  0.009837 ** 
pop16      -1.0123e-03  5.4891e-04  -1.8443  0.065366 .  
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Total Sum of Squares:    412530
Residual Sum of Squares: 227480
R-Squared:      0.44858
Adj. R-Squared: 0.42826
F-statistic: 270.484 on 4 and 1330 DF, p-value: < 2.22e-16

Random effects regression

re_model <- plm(
  sales ~ real_price + real_ndi + real_pimin + pop16,
  data = panel_df,
  model = "random"
)

summary(re_model)
Oneway (individual) effect Random Effect Model 
   (Swamy-Arora's transformation)

Call:
plm(formula = sales ~ real_price + real_ndi + real_pimin + pop16, 
    data = panel_df, model = "random")

Balanced Panel: n = 46, T = 30, N = 1380

Effects:
                 var std.dev share
idiosyncratic 171.04   13.08  0.28
individual    438.84   20.95  0.72
theta: 0.8868

Residuals:
     Min.   1st Qu.    Median   3rd Qu.      Max. 
-62.04795  -6.56952  -0.11468   5.26028 121.10478 

Coefficients:
               Estimate  Std. Error  z-value  Pr(>|z|)    
(Intercept)  2.1165e+02  4.4274e+00  47.8052 < 2.2e-16 ***
real_price  -1.0424e+02  6.5748e+00 -15.8549 < 2.2e-16 ***
real_ndi    -6.6200e-02  3.0607e-02  -2.1629  0.030549 *  
real_pimin   1.9834e+01  7.4229e+00   2.6721  0.007538 ** 
pop16       -9.0291e-04  4.7276e-04  -1.9098  0.056153 .  
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Total Sum of Squares:    424220
Residual Sum of Squares: 238160
R-Squared:      0.43859
Adj. R-Squared: 0.43696
Chisq: 1074.2 on 4 DF, p-value: < 2.22e-16

Hausman Test to compare FE and RE

phtest(fe_model, re_model)

    Hausman Test

data:  sales ~ real_price + real_ndi + real_pimin + pop16
chisq = 359.94, df = 4, p-value < 2.2e-16
alternative hypothesis: one model is inconsistent

Breusch–Pagan test for heteroskedasticity

bptest(fe_model)

    studentized Breusch-Pagan test

data:  fe_model
BP = 122.49, df = 4, p-value < 2.2e-16

Wooldridge test for serial correlation

pbgtest(fe_model)

    Breusch-Godfrey/Wooldridge test for serial correlation in panel models

data:  sales ~ real_price + real_ndi + real_pimin + pop16
chisq = 967.84, df = 30, p-value < 2.2e-16
alternative hypothesis: serial correlation in idiosyncratic errors

Robust fixed effects model, corrected based on diagnostics

fe_robust <- coeftest(fe_model, vcov = vcovHC(fe_model, type = "HC1"))
fe_robust

t test of coefficients:

              Estimate  Std. Error t value  Pr(>|t|)    
real_price -1.0307e+02  2.6459e+01 -3.8955 0.0001029 ***
real_ndi   -7.6352e-02  1.6574e-01 -0.4607 0.6451052    
real_pimin  1.9225e+01  3.0185e+01  0.6369 0.5243095    
pop16      -1.0123e-03  1.9184e-03 -0.5277 0.5978001    
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
LS0tDQp0aXRsZTogIlIgTm90ZWJvb2siDQpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sNCi0tLQ0KDQpSZXNlYXJjaCBxdWVzdGlvbjogSG93IGRvIGNoYW5nZXMgaW4gcmVhbCBjaWdhcmV0dGUgcHJpY2VzLCBpbmNvbWUsIGFuZCBuZWFyYnktc3RhdGUgbWluaW11bSBwcmljZXMgYWZmZWN0IGNpZ2FyZXR0ZSBjb25zdW1wdGlvbiBhY3Jvc3Mgc3RhdGVzIGFuZCBvdmVyIHRpbWU/DQoNCg0KDQpMb2FkaW5nIGluIHRoZSBsaWJyYXJpZXMNCg0KYGBge3J9DQpsaWJyYXJ5KHBsbSkNCmxpYnJhcnkoZHBseXIpDQpsaWJyYXJ5KGdncGxvdDIpDQpsaWJyYXJ5KGJyb29tKQ0KbGlicmFyeShsbXRlc3QpDQpsaWJyYXJ5KHNhbmR3aWNoKQ0KYGBgDQoNCkxvYWRpbmcgdGhlIENpZ2FyIERhdGFzZXQNCg0KYGBge3J9DQpkYXRhKENpZ2FyKQ0KaGVhZChDaWdhcikNCmBgYA0KDQoNCg0KQ2xlYW5pbmcgYW5kIHByZXBhcmluZyB0aGUgZGF0YSAgDQoNCmBgYHtyfQ0KQ2lnYXJfZGYgPC0gQ2lnYXIgJT4lDQogIG11dGF0ZSgNCiAgICB5ZWFyID0geWVhciArIDE5MDAsDQogICAgc3RhdGVfZmFjdG9yID0gZmFjdG9yKHN0YXRlKSwNCiAgICB5ZWFyX2ZhY3RvciAgPSBmYWN0b3IoeWVhciksDQogICAgcmVhbF9wcmljZSA9IHByaWNlIC8gY3BpLA0KICAgIHJlYWxfbmRpICAgPSBuZGkgLyBjcGksDQogICAgcmVhbF9waW1pbiA9IHBpbWluIC8gY3BpDQogICkNCg0KaGVhZChDaWdhcl9kZikNCmBgYA0KDQoNCg0KU3VtbWFyeSBTdGF0aXN0aWNzIGZvciBLZXkgVmFyaWFibGVzDQoNCmBgYHtyfQ0KQ2lnYXJfZGYgJT4lDQogIHN1bW1hcmlzZSgNCiAgICBtZWFuX3NhbGVzID0gbWVhbihzYWxlcyksDQogICAgc2Rfc2FsZXMgPSBzZChzYWxlcyksDQogICAgbWVhbl9yZWFsX3ByaWNlID0gbWVhbihyZWFsX3ByaWNlKSwNCiAgICBtZWFuX3JlYWxfbmRpID0gbWVhbihyZWFsX25kaSksDQogICAgbWVhbl9yZWFsX3BpbWluID0gbWVhbihyZWFsX3BpbWluKQ0KICApDQpgYGANCg0KDQoNCkhpc3RvZ3JhbXMNCg0KYGBge3J9DQpnZ3Bsb3QoQ2lnYXJfZGYsIGFlcyhzYWxlcykpICsgZ2VvbV9oaXN0b2dyYW0oYmlucz0zMCwgZmlsbD0iZ3JheSIpICsgdGhlbWVfbWluaW1hbCgpDQpnZ3Bsb3QoQ2lnYXJfZGYsIGFlcyhyZWFsX3ByaWNlKSkgKyBnZW9tX2hpc3RvZ3JhbShiaW5zPTMwLCBmaWxsPSJncmF5IikgKyB0aGVtZV9taW5pbWFsKCkNCmdncGxvdChDaWdhcl9kZiwgYWVzKHJlYWxfbmRpKSkgKyBnZW9tX2hpc3RvZ3JhbShiaW5zPTMwLCBmaWxsPSJncmF5IikgKyB0aGVtZV9taW5pbWFsKCkNCmdncGxvdChDaWdhcl9kZiwgYWVzKHJlYWxfcGltaW4pKSArIGdlb21faGlzdG9ncmFtKGJpbnM9MzAsIGZpbGw9ImdyYXkiKSArIHRoZW1lX21pbmltYWwoKQ0KZ2dwbG90KENpZ2FyX2RmLCBhZXMocG9wMTYpKSArIGdlb21faGlzdG9ncmFtKGJpbnM9MzAsIGZpbGw9ImdyYXkiKSArIHRoZW1lX21pbmltYWwoKQ0KYGBgDQoNCg0KU2NhdHRlcnBsb3RzIG9mIFNhbGVzIHZzIFByZWRpY3RvcnMgIA0KDQpgYGB7cn0NCmdncGxvdChDaWdhcl9kZiwgYWVzKHJlYWxfcHJpY2UsIHNhbGVzKSkgKw0KICBnZW9tX3BvaW50KGFscGhhPS4yNSkgKyBnZW9tX3Ntb290aChtZXRob2Q9ImxtIikgKyB0aGVtZV9taW5pbWFsKCkNCg0KZ2dwbG90KENpZ2FyX2RmLCBhZXMocmVhbF9uZGksIHNhbGVzKSkgKw0KICBnZW9tX3BvaW50KGFscGhhPS4yNSkgKyBnZW9tX3Ntb290aChtZXRob2Q9ImxtIikgKyB0aGVtZV9taW5pbWFsKCkNCg0KZ2dwbG90KENpZ2FyX2RmLCBhZXMocmVhbF9waW1pbiwgc2FsZXMpKSArDQogIGdlb21fcG9pbnQoYWxwaGE9LjI1KSArIGdlb21fc21vb3RoKG1ldGhvZD0ibG0iKSArIHRoZW1lX21pbmltYWwoKQ0KYGBgDQoNCg0KQ3JlYXRpbmcgeWVhcmx5IG1lYW4gc2FsZXMgdmFyaWFibGUNCg0KYGBge3J9DQp5ZWFyX21lYW5zIDwtIENpZ2FyX2RmICU+JQ0KICBncm91cF9ieSh5ZWFyKSAlPiUNCiAgc3VtbWFyaXNlKG1lYW5fc2FsZXMgPSBtZWFuKHNhbGVzKSkNCmBgYA0KDQoNCkdyYXBoaW5nIHRoZSBtZWFuIGNpZ2FyZXR0ZSBzYWxlcyBwZXIgeWVhcg0KDQpgYGB7cn0NCmdncGxvdCh5ZWFyX21lYW5zLCBhZXMoeCA9IHllYXIsIHkgPSBtZWFuX3NhbGVzKSkgKw0KICBnZW9tX2xpbmUoc2l6ZSA9IDEuMSwgY29sb3IgPSAiYmxhY2siKSArDQogIHRoZW1lX21pbmltYWwoKSArDQogIGxhYnMoDQogICAgdGl0bGUgPSAiTWVhbiBDaWdhcmV0dGUgU2FsZXMgcGVyIENhcGl0YSBieSBZZWFyIiwNCiAgICB4ID0gIlllYXIiLA0KICAgIHkgPSAiTWVhbiBQYWNrcyBwZXIgQ2FwaXRhIg0KICApDQpgYGANCg0KQ29udmVydGluZyB0aGUgZGF0YSB0byBhIHBhbmVsIGZvcm1hdA0KDQpgYGB7cn0NCnBhbmVsX2RmIDwtIHBkYXRhLmZyYW1lKENpZ2FyX2RmLCBpbmRleCA9IGMoInN0YXRlIiwgInllYXIiKSkNCmBgYA0KDQoNCkZpeGVkIGVmZmVjdHMgcmVncmVzc2lvbiAgDQoNCmBgYHtyfQ0KZmVfbW9kZWwgPC0gcGxtKA0KICBzYWxlcyB+IHJlYWxfcHJpY2UgKyByZWFsX25kaSArIHJlYWxfcGltaW4gKyBwb3AxNiwNCiAgZGF0YSA9IHBhbmVsX2RmLA0KICBtb2RlbCA9ICJ3aXRoaW4iDQopDQoNCnN1bW1hcnkoZmVfbW9kZWwpDQpgYGANCg0KUmFuZG9tIGVmZmVjdHMgcmVncmVzc2lvbiAgDQoNCmBgYHtyfQ0KcmVfbW9kZWwgPC0gcGxtKA0KICBzYWxlcyB+IHJlYWxfcHJpY2UgKyByZWFsX25kaSArIHJlYWxfcGltaW4gKyBwb3AxNiwNCiAgZGF0YSA9IHBhbmVsX2RmLA0KICBtb2RlbCA9ICJyYW5kb20iDQopDQoNCnN1bW1hcnkocmVfbW9kZWwpDQpgYGANCg0KSGF1c21hbiBUZXN0IHRvIGNvbXBhcmUgRkUgYW5kIFJFDQoNCmBgYHtyfQ0KcGh0ZXN0KGZlX21vZGVsLCByZV9tb2RlbCkNCmBgYA0KDQpCcmV1c2No4oCTUGFnYW4gdGVzdCBmb3IgaGV0ZXJvc2tlZGFzdGljaXR5DQoNCmBgYHtyfQ0KYnB0ZXN0KGZlX21vZGVsKQ0KYGBgDQpXb29sZHJpZGdlIHRlc3QgZm9yIHNlcmlhbCBjb3JyZWxhdGlvbg0KDQpgYGB7cn0NCnBiZ3Rlc3QoZmVfbW9kZWwpDQpgYGANCg0KUm9idXN0IGZpeGVkIGVmZmVjdHMgbW9kZWwsIGNvcnJlY3RlZCBiYXNlZCBvbiBkaWFnbm9zdGljcyAgDQoNCmBgYHtyfQ0KZmVfcm9idXN0IDwtIGNvZWZ0ZXN0KGZlX21vZGVsLCB2Y292ID0gdmNvdkhDKGZlX21vZGVsLCB0eXBlID0gIkhDMSIpKQ0KZmVfcm9idXN0DQpgYGANCg0K