remove(list=ls())
# install.packages("AER")
library(AER)
## Warning: package 'AER' was built under R version 4.2.3
## Loading required package: car
## Loading required package: carData
## Warning: package 'carData' was built under R version 4.2.3
## Loading required package: lmtest
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
## Loading required package: sandwich
## Warning: package 'sandwich' was built under R version 4.2.3
## Loading required package: survival
data("CigarettesSW")
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.2.3
## Warning: package 'tibble' was built under R version 4.2.3
## Warning: package 'dplyr' was built under R version 4.2.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.1 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.2 ✔ tidyr 1.3.0
## ✔ purrr 1.0.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ✖ dplyr::recode() masks car::recode()
## ✖ purrr::some() masks car::some()
## ℹ Use the ]8;;http://conflicted.r-lib.org/conflicted package]8;; to force all conflicts to become errors
glimpse(CigarettesSW)
## Rows: 96
## Columns: 9
## $ state <fct> AL, AR, AZ, CA, CO, CT, DE, FL, GA, IA, ID, IL, IN, KS, KY,…
## $ year <fct> 1985, 1985, 1985, 1985, 1985, 1985, 1985, 1985, 1985, 1985,…
## $ cpi <dbl> 1.076, 1.076, 1.076, 1.076, 1.076, 1.076, 1.076, 1.076, 1.0…
## $ population <dbl> 3973000, 2327000, 3184000, 26444000, 3209000, 3201000, 6180…
## $ packs <dbl> 116.4863, 128.5346, 104.5226, 100.3630, 112.9635, 109.2784,…
## $ income <dbl> 46014968, 26210736, 43956936, 447102816, 49466672, 60063368…
## $ tax <dbl> 32.50000, 37.00000, 31.00000, 26.00000, 31.00000, 42.00000,…
## $ price <dbl> 102.18167, 101.47500, 108.57875, 107.83734, 94.26666, 128.0…
## $ taxs <dbl> 33.34834, 37.00000, 36.17042, 32.10400, 31.00000, 51.48333,…
library(psych)
##
## Attaching package: 'psych'
##
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
##
## The following object is masked from 'package:car':
##
## logit
describe(CigarettesSW)
## vars n mean sd median trimmed mad
## state* 1 96 24.50 13.93 24.50 24.50 17.79
## year* 2 96 1.50 0.50 1.50 1.50 0.74
## cpi 3 96 1.30 0.23 1.30 1.30 0.33
## population 4 96 5168866.32 5442344.66 3697471.50 4151471.85 3175852.26
## packs 5 96 109.18 25.87 110.16 108.33 25.30
## income 6 96 99878735.74 120541138.18 61661644.00 75672114.50 62901336.16
## tax 7 96 42.68 16.14 37.00 40.96 11.86
## price 8 96 143.45 43.89 137.72 140.34 53.47
## taxs 9 96 48.33 19.33 41.05 46.12 13.92
## min max range skew kurtosis se
## state* 1.00 48.00 47.00 0.00 -1.24 1.42
## year* 1.00 2.00 1.00 0.00 -2.02 0.05
## cpi 1.08 1.52 0.45 0.00 -2.02 0.02
## population 478447.00 31493524.00 31015077.00 2.35 6.68 555456.98
## packs 49.27 197.99 148.72 0.49 1.10 2.64
## income 6887097.00 771470144.00 764583047.00 2.77 10.00 12302678.40
## tax 18.00 99.00 81.00 1.03 0.62 1.65
## price 84.97 240.85 155.88 0.40 -1.21 4.48
## taxs 21.27 112.63 91.37 1.04 0.38 1.97
library(ggplot2)
III
Estimate the linear regression model
model <- lm(packs ~ price, data = CigarettesSW)
summary(model)
##
## Call:
## lm(formula = packs ~ price, data = CigarettesSW)
##
## Residuals:
## Min 1Q Median 3Q Max
## -53.926 -9.621 -0.967 7.596 70.369
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 164.35649 6.90885 23.789 < 2e-16 ***
## price -0.38463 0.04608 -8.348 5.92e-13 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 19.71 on 94 degrees of freedom
## Multiple R-squared: 0.4257, Adjusted R-squared: 0.4196
## F-statistic: 69.68 on 1 and 94 DF, p-value: 5.923e-13
ggplot(CigarettesSW, aes(x = price, y = packs)) +
geom_point() +
xlab("Cigarette Price") +
ylab("Packs Sold per Capita") +
ggtitle("Relationship between Cigarette Prices and Consumption")

IV
Create a matrix of independent variables (including a column of ones
for the intercept)
X <- cbind(1, CigarettesSW$price)
# Create a vector of the dependent variable
y <- CigarettesSW$packs
# Compute the beta coefficients using the matrix algebra formula
betas <- solve(t(X) %*% X) %*% t(X) %*% y
betas
## [,1]
## [1,] 164.3564934
## [2,] -0.3846278
# Compute the residual vector
CigarettesSW$residuals <- y - (X %*% betas)
CigarettesSW$fitted_values<-predict(object = model, newdata = CigarettesSW)
CigarettesSW$residuals_AS <-resid(object = model, newdata = CigarettesSW)
summary(object = CigarettesSW[,11:12])
## fitted_values residuals_AS
## Min. : 71.72 Min. :-53.9258
## 1st Qu.: 96.60 1st Qu.: -9.6210
## Median :111.39 Median : -0.9668
## Mean :109.18 Mean : 0.0000
## 3rd Qu.:124.85 3rd Qu.: 7.5960
## Max. :131.68 Max. : 70.3695
# Calculate the variance-covariance matrix of the beta coefficients
sigma2 <- as.numeric((t(CigarettesSW$residuals) %*% CigarettesSW$residuals) / ( nrow(X) - ncol(X) ))
#sigma2 <- as.numeric( sum(residuals^2) / ( nrow(X) - ncol(X) ) )
sigma2
## [1] 388.4679
vcov <- sigma2 * solve(t(X) %*% X)
# Calculate the standard error of the beta coefficients
se <- sqrt(diag(vcov))
# View the standard error of the beta coefficients
se
## [1] 6.90885462 0.04607608
summary(model)
##
## Call:
## lm(formula = packs ~ price, data = CigarettesSW)
##
## Residuals:
## Min 1Q Median 3Q Max
## -53.926 -9.621 -0.967 7.596 70.369
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 164.35649 6.90885 23.789 < 2e-16 ***
## price -0.38463 0.04608 -8.348 5.92e-13 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 19.71 on 94 degrees of freedom
## Multiple R-squared: 0.4257, Adjusted R-squared: 0.4196
## F-statistic: 69.68 on 1 and 94 DF, p-value: 5.923e-13
typeof(sigma2)
## [1] "double"
class(sigma2)
## [1] "numeric"