R Markdown

library(dplyr)
## 
## Adjuntando el paquete: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(plm)
## Warning: package 'plm' was built under R version 4.4.3
## 
## Adjuntando el paquete: 'plm'
## The following objects are masked from 'package:dplyr':
## 
##     between, lag, lead
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats   1.0.0     ✔ readr     2.1.5
## ✔ ggplot2   3.5.1     ✔ stringr   1.5.1
## ✔ lubridate 1.9.4     ✔ tibble    3.2.1
## ✔ purrr     1.0.4     ✔ tidyr     1.3.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ plm::between()  masks dplyr::between()
## ✖ dplyr::filter() masks stats::filter()
## ✖ plm::lag()      masks dplyr::lag(), stats::lag()
## ✖ plm::lead()     masks dplyr::lead()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ggplot2)
library(RColorBrewer)
data("Cigar", package = "plm")

cigar_pd <- pdata.frame(Cigar, index = c("state", "year"))

cigar_pd$price_real <- cigar_pd$price / cigar_pd$cpi
cigar_pd$sales_real <- cigar_pd$sales / cigar_pd$cpi
cigar_pd <- cigar_pd[cigar_pd$price_real > 0 & cigar_pd$sales_real > 0, ]

cigar_pd$l_price <- log(cigar_pd$price_real)
cigar_pd$l_sales <- log(cigar_pd$sales_real)

cigar_df <- as.data.frame(cigar_pd)
ggplot(cigar_df, aes(l_price)) +
  geom_histogram(
    bins = 30,
    fill = "blue2",
    color = "white"
  ) +
  labs(
    title = "Distribution of Log Real Cigarette Prices",
    x = "Log Real Price",
    y = "Frequency"
  ) +
  theme_minimal()

elastic_lm <- lm(l_sales ~ l_price, data = cigar_df)
elasticity <- round(coef(elastic_lm)[2], 2)

ggplot(cigar_df, aes(l_price, l_sales)) +
  geom_point(alpha = 0.15) +
  geom_smooth(method = "lm", se = FALSE) +
  annotate(
    "text",
    x = min(cigar_df$l_price),
    y = max(cigar_df$l_sales),
    label = paste("Elasticity =", elasticity),
    hjust = 0
  ) +
  labs(
    title = "Price Elasticity of Cigarette Demand",
    x = "Log Real Price",
    y = "Log Real Demand"
  ) +
  theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'

ggplot(cigar_df, aes(x = l_price, y = l_sales)) +

  geom_bin2d(bins = 35) +

  geom_smooth(
    method = "lm",
    se = FALSE,
    color = "white",
    linewidth = 1
  ) +

  scale_fill_distiller(
    palette = "YlGnBu",
    name = "Density"
  ) +

  labs(
    title = "Price Elasticity of Cigarette Demand (Real Values)",
    subtitle = "Real prices vs real cigarette consumption",
    x = "Log Real Price",
    y = "Log Real Cigarette Sales"
  ) +

  theme_minimal(base_size = 13)
## `geom_smooth()` using formula = 'y ~ x'

cigar_df <- cigar_df %>%
  mutate(price_q = ntile(l_price, 4))

ggplot(cigar_df, aes(factor(price_q), l_sales)) +
  geom_boxplot() +
  labs(
    title = "Cigarette Demand by Price Quartiles",
    x = "Price Quartile (Low to High)",
    y = "Log Real Demand"
  ) +
  theme_minimal()

elastic_state <- cigar_df %>%
  group_by(state) %>%
  summarise(
    elasticity = coef(lm(l_sales ~ l_price))[2]
  )
ggplot(elastic_state, aes(elasticity)) +
  geom_histogram(
    bins = 20,
    fill = "blue3",
    color = "white"
  ) +
  geom_vline(
    xintercept = mean(elastic_state$elasticity),
    linetype = "dashed"
  ) +
  labs(
    title = "Distribution of Price Elasticity Across States",
    x = "Price Elasticity",
    y = "Number of States"
  ) +
  theme_minimal()

fe_model <- plm(
  l_sales ~ l_price + pimin + ndi,
  data = cigar_pd,
  model = "within"
)

summary(fe_model)
## Oneway (individual) effect Within Model
## 
## Call:
## plm(formula = l_sales ~ l_price + pimin + ndi, data = cigar_pd, 
##     model = "within")
## 
## Balanced Panel: n = 46, T = 30, N = 1380
## 
## Residuals:
##       Min.    1st Qu.     Median    3rd Qu.       Max. 
## -0.4085788 -0.0781452 -0.0066931  0.0782425  0.6179235 
## 
## Coefficients:
##            Estimate  Std. Error  t-value  Pr(>|t|)    
## l_price  4.0321e-01  3.6220e-02  11.1322 < 2.2e-16 ***
## pimin   -3.3045e-03  4.5337e-04  -7.2886  5.35e-13 ***
## ndi     -1.0043e-04  3.4157e-06 -29.4018 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Total Sum of Squares:    458.38
## Residual Sum of Squares: 20.038
## R-Squared:      0.95628
## Adj. R-Squared: 0.95471
## F-statistic: 9705.37 on 3 and 1331 DF, p-value: < 2.22e-16
fe_modelprueba <- plm(
  sales ~ price + pimin + ndi,
  data = cigar_pd,
  model = "within"
)

summary(fe_modelprueba)
## Oneway (individual) effect Within Model
## 
## Call:
## plm(formula = sales ~ price + pimin + ndi, data = cigar_pd, model = "within")
## 
## Balanced Panel: n = 46, T = 30, N = 1380
## 
## Residuals:
##      Min.   1st Qu.    Median   3rd Qu.      Max. 
## -78.27770  -6.75695   0.54401   7.04075 122.11238 
## 
## Coefficients:
##          Estimate  Std. Error t-value  Pr(>|t|)    
## price -0.78962069  0.08425336 -9.3720 < 2.2e-16 ***
## pimin  0.46635055  0.09357589  4.9837 7.060e-07 ***
## ndi    0.00147026  0.00034686  4.2388 2.402e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Total Sum of Squares:    412530
## Residual Sum of Squares: 294400
## R-Squared:      0.28634
## Adj. R-Squared: 0.26061
## F-statistic: 178.013 on 3 and 1331 DF, p-value: < 2.22e-16
re_model <- plm(
  l_sales ~ l_price + pimin + ndi,
  data = cigar_pd,
  model = "random"
)

summary(re_model)
## Oneway (individual) effect Random Effect Model 
##    (Swamy-Arora's transformation)
## 
## Call:
## plm(formula = l_sales ~ l_price + pimin + ndi, data = cigar_pd, 
##     model = "random")
## 
## Balanced Panel: n = 46, T = 30, N = 1380
## 
## Effects:
##                   var std.dev share
## idiosyncratic 0.01505 0.12270 0.394
## individual    0.02319 0.15227 0.606
## theta: 0.8544
## 
## Residuals:
##      Min.   1st Qu.    Median   3rd Qu.      Max. 
## -0.372798 -0.082195 -0.010389  0.072030  0.698022 
## 
## Coefficients:
##                Estimate  Std. Error  z-value  Pr(>|z|)    
## (Intercept)  1.6364e+00  2.5327e-02  64.6119 < 2.2e-16 ***
## l_price      4.1789e-01  3.6781e-02  11.3617 < 2.2e-16 ***
## pimin       -3.7513e-03  4.5753e-04  -8.1989 2.426e-16 ***
## ndi         -9.6866e-05  3.4471e-06 -28.1005 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Total Sum of Squares:    459.38
## Residual Sum of Squares: 21.852
## R-Squared:      0.95243
## Adj. R-Squared: 0.95233
## Chisq: 27550.9 on 3 DF, p-value: < 2.22e-16
re_modelprueba <- plm(
  sales ~ price + pimin + ndi,
  data = cigar_pd,
  model = "random"
)

summary(re_modelprueba)
## Oneway (individual) effect Random Effect Model 
##    (Swamy-Arora's transformation)
## 
## Call:
## plm(formula = sales ~ price + pimin + ndi, data = cigar_pd, model = "random")
## 
## Balanced Panel: n = 46, T = 30, N = 1380
## 
## Effects:
##                  var std.dev share
## idiosyncratic 221.19   14.87 0.335
## individual    439.37   20.96 0.665
## theta: 0.8715
## 
## Residuals:
##      Min.   1st Qu.    Median   3rd Qu.      Max. 
## -70.02030  -7.47154   0.61024   6.71294 127.27161 
## 
## Coefficients:
##                Estimate  Std. Error z-value  Pr(>|z|)    
## (Intercept)  1.3779e+02  3.2041e+00 43.0056 < 2.2e-16 ***
## price       -8.0883e-01  8.4152e-02 -9.6115 < 2.2e-16 ***
## pimin        4.7133e-01  9.3276e-02  5.0531 4.348e-07 ***
## ndi          1.6050e-03  3.4637e-04  4.6339 3.588e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Total Sum of Squares:    427580
## Residual Sum of Squares: 307760
## R-Squared:      0.28023
## Adj. R-Squared: 0.27866
## Chisq: 535.717 on 3 DF, p-value: < 2.22e-16
phtest(fe_model, re_model)
## 
##  Hausman Test
## 
## data:  l_sales ~ l_price + pimin + ndi
## chisq = 59.358, df = 3, p-value = 8.061e-13
## alternative hypothesis: one model is inconsistent
phtest(fe_modelprueba, re_modelprueba)
## 
##  Hausman Test
## 
## data:  sales ~ price + pimin + ndi
## chisq = 127.35, df = 3, p-value < 2.2e-16
## alternative hypothesis: one model is inconsistent
# OLS
ols_model <- lm(l_sales ~ l_price, data = cigar_df)

# Fixed Effects
fe_model <- plm(
  l_sales ~ l_price,
  data = cigar_pd,
  model = "within"
)

# Random Effects
re_model <- plm(
  l_sales ~ l_price,
  data = cigar_pd,
  model = "random"
)
beta_ols <- coef(ols_model)["l_price"]
beta_fe  <- coef(fe_model)["l_price"]
beta_re  <- coef(re_model)["l_price"]
price_seq <- seq(
  min(cigar_df$l_price),
  max(cigar_df$l_price),
  length.out = 100
)

plot_df <- data.frame(
  l_price = price_seq,
  ols_fit = coef(ols_model)[1] + beta_ols * price_seq,
  fe_fit  = mean(cigar_df$l_sales) + beta_fe * (price_seq - mean(cigar_df$l_price)),
  re_fit  = coef(re_model)[1] + beta_re * price_seq
)
library(ggplot2)

ggplot(cigar_df, aes(l_price, l_sales)) +
  geom_point(alpha = 0.15, color = "gray50") +

  geom_line(
    data = plot_df,
    aes(l_price, ols_fit, color = "OLS"),
    linewidth = 1
  ) +
  geom_line(
    data = plot_df,
    aes(l_price, re_fit, color = "Random Effects"),
    linewidth = 1
  ) +
  geom_line(
    data = plot_df,
    aes(l_price, fe_fit, color = "Fixed Effects"),
    linewidth = 1
  ) +

  scale_color_manual(
    values = c(
      "OLS" = "blue",
      "Random Effects" = "darkgreen",
      "Fixed Effects" = "red"
    )
  ) +

  labs(
    title = "Comparison of OLS, Fixed Effects, and Random Effects Models",
    subtitle = "Estimated price elasticity of cigarette demand",
    x = "Log Real Price",
    y = "Log Real Demand",
    color = "Model"
  ) +

  theme_minimal()