R Markdown
library(dplyr)
##
## Adjuntando el paquete: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(plm)
## Warning: package 'plm' was built under R version 4.4.3
##
## Adjuntando el paquete: 'plm'
## The following objects are masked from 'package:dplyr':
##
## between, lag, lead
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.0 ✔ readr 2.1.5
## ✔ ggplot2 3.5.1 ✔ stringr 1.5.1
## ✔ lubridate 1.9.4 ✔ tibble 3.2.1
## ✔ purrr 1.0.4 ✔ tidyr 1.3.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ plm::between() masks dplyr::between()
## ✖ dplyr::filter() masks stats::filter()
## ✖ plm::lag() masks dplyr::lag(), stats::lag()
## ✖ plm::lead() masks dplyr::lead()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ggplot2)
library(RColorBrewer)
data("Cigar", package = "plm")
cigar_pd <- pdata.frame(Cigar, index = c("state", "year"))
cigar_pd$price_real <- cigar_pd$price / cigar_pd$cpi
cigar_pd$sales_real <- cigar_pd$sales / cigar_pd$cpi
cigar_pd <- cigar_pd[cigar_pd$price_real > 0 & cigar_pd$sales_real > 0, ]
cigar_pd$l_price <- log(cigar_pd$price_real)
cigar_pd$l_sales <- log(cigar_pd$sales_real)
cigar_df <- as.data.frame(cigar_pd)
ggplot(cigar_df, aes(l_price)) +
geom_histogram(
bins = 30,
fill = "blue2",
color = "white"
) +
labs(
title = "Distribution of Log Real Cigarette Prices",
x = "Log Real Price",
y = "Frequency"
) +
theme_minimal()

elastic_lm <- lm(l_sales ~ l_price, data = cigar_df)
elasticity <- round(coef(elastic_lm)[2], 2)
ggplot(cigar_df, aes(l_price, l_sales)) +
geom_point(alpha = 0.15) +
geom_smooth(method = "lm", se = FALSE) +
annotate(
"text",
x = min(cigar_df$l_price),
y = max(cigar_df$l_sales),
label = paste("Elasticity =", elasticity),
hjust = 0
) +
labs(
title = "Price Elasticity of Cigarette Demand",
x = "Log Real Price",
y = "Log Real Demand"
) +
theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'

ggplot(cigar_df, aes(x = l_price, y = l_sales)) +
geom_bin2d(bins = 35) +
geom_smooth(
method = "lm",
se = FALSE,
color = "white",
linewidth = 1
) +
scale_fill_distiller(
palette = "YlGnBu",
name = "Density"
) +
labs(
title = "Price Elasticity of Cigarette Demand (Real Values)",
subtitle = "Real prices vs real cigarette consumption",
x = "Log Real Price",
y = "Log Real Cigarette Sales"
) +
theme_minimal(base_size = 13)
## `geom_smooth()` using formula = 'y ~ x'

cigar_df <- cigar_df %>%
mutate(price_q = ntile(l_price, 4))
ggplot(cigar_df, aes(factor(price_q), l_sales)) +
geom_boxplot() +
labs(
title = "Cigarette Demand by Price Quartiles",
x = "Price Quartile (Low to High)",
y = "Log Real Demand"
) +
theme_minimal()

elastic_state <- cigar_df %>%
group_by(state) %>%
summarise(
elasticity = coef(lm(l_sales ~ l_price))[2]
)
ggplot(elastic_state, aes(elasticity)) +
geom_histogram(
bins = 20,
fill = "blue3",
color = "white"
) +
geom_vline(
xintercept = mean(elastic_state$elasticity),
linetype = "dashed"
) +
labs(
title = "Distribution of Price Elasticity Across States",
x = "Price Elasticity",
y = "Number of States"
) +
theme_minimal()

fe_model <- plm(
l_sales ~ l_price + pimin + ndi,
data = cigar_pd,
model = "within"
)
summary(fe_model)
## Oneway (individual) effect Within Model
##
## Call:
## plm(formula = l_sales ~ l_price + pimin + ndi, data = cigar_pd,
## model = "within")
##
## Balanced Panel: n = 46, T = 30, N = 1380
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -0.4085788 -0.0781452 -0.0066931 0.0782425 0.6179235
##
## Coefficients:
## Estimate Std. Error t-value Pr(>|t|)
## l_price 4.0321e-01 3.6220e-02 11.1322 < 2.2e-16 ***
## pimin -3.3045e-03 4.5337e-04 -7.2886 5.35e-13 ***
## ndi -1.0043e-04 3.4157e-06 -29.4018 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 458.38
## Residual Sum of Squares: 20.038
## R-Squared: 0.95628
## Adj. R-Squared: 0.95471
## F-statistic: 9705.37 on 3 and 1331 DF, p-value: < 2.22e-16
fe_modelprueba <- plm(
sales ~ price + pimin + ndi,
data = cigar_pd,
model = "within"
)
summary(fe_modelprueba)
## Oneway (individual) effect Within Model
##
## Call:
## plm(formula = sales ~ price + pimin + ndi, data = cigar_pd, model = "within")
##
## Balanced Panel: n = 46, T = 30, N = 1380
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -78.27770 -6.75695 0.54401 7.04075 122.11238
##
## Coefficients:
## Estimate Std. Error t-value Pr(>|t|)
## price -0.78962069 0.08425336 -9.3720 < 2.2e-16 ***
## pimin 0.46635055 0.09357589 4.9837 7.060e-07 ***
## ndi 0.00147026 0.00034686 4.2388 2.402e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 412530
## Residual Sum of Squares: 294400
## R-Squared: 0.28634
## Adj. R-Squared: 0.26061
## F-statistic: 178.013 on 3 and 1331 DF, p-value: < 2.22e-16
re_model <- plm(
l_sales ~ l_price + pimin + ndi,
data = cigar_pd,
model = "random"
)
summary(re_model)
## Oneway (individual) effect Random Effect Model
## (Swamy-Arora's transformation)
##
## Call:
## plm(formula = l_sales ~ l_price + pimin + ndi, data = cigar_pd,
## model = "random")
##
## Balanced Panel: n = 46, T = 30, N = 1380
##
## Effects:
## var std.dev share
## idiosyncratic 0.01505 0.12270 0.394
## individual 0.02319 0.15227 0.606
## theta: 0.8544
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -0.372798 -0.082195 -0.010389 0.072030 0.698022
##
## Coefficients:
## Estimate Std. Error z-value Pr(>|z|)
## (Intercept) 1.6364e+00 2.5327e-02 64.6119 < 2.2e-16 ***
## l_price 4.1789e-01 3.6781e-02 11.3617 < 2.2e-16 ***
## pimin -3.7513e-03 4.5753e-04 -8.1989 2.426e-16 ***
## ndi -9.6866e-05 3.4471e-06 -28.1005 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 459.38
## Residual Sum of Squares: 21.852
## R-Squared: 0.95243
## Adj. R-Squared: 0.95233
## Chisq: 27550.9 on 3 DF, p-value: < 2.22e-16
re_modelprueba <- plm(
sales ~ price + pimin + ndi,
data = cigar_pd,
model = "random"
)
summary(re_modelprueba)
## Oneway (individual) effect Random Effect Model
## (Swamy-Arora's transformation)
##
## Call:
## plm(formula = sales ~ price + pimin + ndi, data = cigar_pd, model = "random")
##
## Balanced Panel: n = 46, T = 30, N = 1380
##
## Effects:
## var std.dev share
## idiosyncratic 221.19 14.87 0.335
## individual 439.37 20.96 0.665
## theta: 0.8715
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -70.02030 -7.47154 0.61024 6.71294 127.27161
##
## Coefficients:
## Estimate Std. Error z-value Pr(>|z|)
## (Intercept) 1.3779e+02 3.2041e+00 43.0056 < 2.2e-16 ***
## price -8.0883e-01 8.4152e-02 -9.6115 < 2.2e-16 ***
## pimin 4.7133e-01 9.3276e-02 5.0531 4.348e-07 ***
## ndi 1.6050e-03 3.4637e-04 4.6339 3.588e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 427580
## Residual Sum of Squares: 307760
## R-Squared: 0.28023
## Adj. R-Squared: 0.27866
## Chisq: 535.717 on 3 DF, p-value: < 2.22e-16
phtest(fe_model, re_model)
##
## Hausman Test
##
## data: l_sales ~ l_price + pimin + ndi
## chisq = 59.358, df = 3, p-value = 8.061e-13
## alternative hypothesis: one model is inconsistent
phtest(fe_modelprueba, re_modelprueba)
##
## Hausman Test
##
## data: sales ~ price + pimin + ndi
## chisq = 127.35, df = 3, p-value < 2.2e-16
## alternative hypothesis: one model is inconsistent
# OLS
ols_model <- lm(l_sales ~ l_price, data = cigar_df)
# Fixed Effects
fe_model <- plm(
l_sales ~ l_price,
data = cigar_pd,
model = "within"
)
# Random Effects
re_model <- plm(
l_sales ~ l_price,
data = cigar_pd,
model = "random"
)
beta_ols <- coef(ols_model)["l_price"]
beta_fe <- coef(fe_model)["l_price"]
beta_re <- coef(re_model)["l_price"]
price_seq <- seq(
min(cigar_df$l_price),
max(cigar_df$l_price),
length.out = 100
)
plot_df <- data.frame(
l_price = price_seq,
ols_fit = coef(ols_model)[1] + beta_ols * price_seq,
fe_fit = mean(cigar_df$l_sales) + beta_fe * (price_seq - mean(cigar_df$l_price)),
re_fit = coef(re_model)[1] + beta_re * price_seq
)
library(ggplot2)
ggplot(cigar_df, aes(l_price, l_sales)) +
geom_point(alpha = 0.15, color = "gray50") +
geom_line(
data = plot_df,
aes(l_price, ols_fit, color = "OLS"),
linewidth = 1
) +
geom_line(
data = plot_df,
aes(l_price, re_fit, color = "Random Effects"),
linewidth = 1
) +
geom_line(
data = plot_df,
aes(l_price, fe_fit, color = "Fixed Effects"),
linewidth = 1
) +
scale_color_manual(
values = c(
"OLS" = "blue",
"Random Effects" = "darkgreen",
"Fixed Effects" = "red"
)
) +
labs(
title = "Comparison of OLS, Fixed Effects, and Random Effects Models",
subtitle = "Estimated price elasticity of cigarette demand",
x = "Log Real Price",
y = "Log Real Demand",
color = "Model"
) +
theme_minimal()
