remove(list=ls())
# install.packages("AER")
library(AER)
## Warning: package 'AER' was built under R version 4.2.3
## Loading required package: car
## Loading required package: carData
## Warning: package 'carData' was built under R version 4.2.3
## Loading required package: lmtest
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## Loading required package: sandwich
## Warning: package 'sandwich' was built under R version 4.2.3
## Loading required package: survival
data("CigarettesSW")

library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.2.3
## Warning: package 'tibble' was built under R version 4.2.3
## Warning: package 'dplyr' was built under R version 4.2.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.1     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.4.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.2     ✔ tidyr     1.3.0
## ✔ purrr     1.0.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ✖ dplyr::recode() masks car::recode()
## ✖ purrr::some()   masks car::some()
## ℹ Use the ]8;;http://conflicted.r-lib.org/conflicted package]8;; to force all conflicts to become errors
glimpse(CigarettesSW)
## Rows: 96
## Columns: 9
## $ state      <fct> AL, AR, AZ, CA, CO, CT, DE, FL, GA, IA, ID, IL, IN, KS, KY,…
## $ year       <fct> 1985, 1985, 1985, 1985, 1985, 1985, 1985, 1985, 1985, 1985,…
## $ cpi        <dbl> 1.076, 1.076, 1.076, 1.076, 1.076, 1.076, 1.076, 1.076, 1.0…
## $ population <dbl> 3973000, 2327000, 3184000, 26444000, 3209000, 3201000, 6180…
## $ packs      <dbl> 116.4863, 128.5346, 104.5226, 100.3630, 112.9635, 109.2784,…
## $ income     <dbl> 46014968, 26210736, 43956936, 447102816, 49466672, 60063368…
## $ tax        <dbl> 32.50000, 37.00000, 31.00000, 26.00000, 31.00000, 42.00000,…
## $ price      <dbl> 102.18167, 101.47500, 108.57875, 107.83734, 94.26666, 128.0…
## $ taxs       <dbl> 33.34834, 37.00000, 36.17042, 32.10400, 31.00000, 51.48333,…
library(psych)
## 
## Attaching package: 'psych'
## 
## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha
## 
## The following object is masked from 'package:car':
## 
##     logit
describe(CigarettesSW)
##            vars  n        mean           sd      median     trimmed         mad
## state*        1 96       24.50        13.93       24.50       24.50       17.79
## year*         2 96        1.50         0.50        1.50        1.50        0.74
## cpi           3 96        1.30         0.23        1.30        1.30        0.33
## population    4 96  5168866.32   5442344.66  3697471.50  4151471.85  3175852.26
## packs         5 96      109.18        25.87      110.16      108.33       25.30
## income        6 96 99878735.74 120541138.18 61661644.00 75672114.50 62901336.16
## tax           7 96       42.68        16.14       37.00       40.96       11.86
## price         8 96      143.45        43.89      137.72      140.34       53.47
## taxs          9 96       48.33        19.33       41.05       46.12       13.92
##                   min          max        range skew kurtosis          se
## state*           1.00        48.00        47.00 0.00    -1.24        1.42
## year*            1.00         2.00         1.00 0.00    -2.02        0.05
## cpi              1.08         1.52         0.45 0.00    -2.02        0.02
## population  478447.00  31493524.00  31015077.00 2.35     6.68   555456.98
## packs           49.27       197.99       148.72 0.49     1.10        2.64
## income     6887097.00 771470144.00 764583047.00 2.77    10.00 12302678.40
## tax             18.00        99.00        81.00 1.03     0.62        1.65
## price           84.97       240.85       155.88 0.40    -1.21        4.48
## taxs            21.27       112.63        91.37 1.04     0.38        1.97
library(ggplot2)

III

Estimate the linear regression model

model <- lm(packs ~ price, data = CigarettesSW)

summary(model)
## 
## Call:
## lm(formula = packs ~ price, data = CigarettesSW)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -53.926  -9.621  -0.967   7.596  70.369 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 164.35649    6.90885  23.789  < 2e-16 ***
## price        -0.38463    0.04608  -8.348 5.92e-13 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 19.71 on 94 degrees of freedom
## Multiple R-squared:  0.4257, Adjusted R-squared:  0.4196 
## F-statistic: 69.68 on 1 and 94 DF,  p-value: 5.923e-13
ggplot(CigarettesSW, aes(x = price, y = packs)) +
  geom_point() +
  xlab("Cigarette Price") +
  ylab("Packs Sold per Capita") +
  ggtitle("Relationship between Cigarette Prices and Consumption")

IV

Create a matrix of independent variables (including a column of ones for the intercept)

X <- cbind(1, CigarettesSW$price)

# Create a vector of the dependent variable
y <- CigarettesSW$packs

# Compute the beta coefficients using the matrix algebra formula
betas <- solve(t(X) %*% X) %*% t(X) %*% y

betas
##             [,1]
## [1,] 164.3564934
## [2,]  -0.3846278
# Compute the residual vector
CigarettesSW$residuals <- y - (X %*% betas)

CigarettesSW$fitted_values<-predict(object = model, newdata = CigarettesSW)
CigarettesSW$residuals_AS <-resid(object = model, newdata = CigarettesSW)

summary(object = CigarettesSW[,11:12])
##  fitted_values     residuals_AS     
##  Min.   : 71.72   Min.   :-53.9258  
##  1st Qu.: 96.60   1st Qu.: -9.6210  
##  Median :111.39   Median : -0.9668  
##  Mean   :109.18   Mean   :  0.0000  
##  3rd Qu.:124.85   3rd Qu.:  7.5960  
##  Max.   :131.68   Max.   : 70.3695
# Calculate the variance-covariance matrix of the beta coefficients
sigma2 <- as.numeric((t(CigarettesSW$residuals) %*% CigarettesSW$residuals)  / ( nrow(X) - ncol(X)  ))
                     
#sigma2 <- as.numeric(  sum(residuals^2) / ( nrow(X) - ncol(X) ) )
sigma2
## [1] 388.4679
vcov <-  sigma2 * solve(t(X) %*% X)


# Calculate the standard error of the beta coefficients
se <- sqrt(diag(vcov))

# View the standard error of the beta coefficients
se
## [1] 6.90885462 0.04607608
summary(model)
## 
## Call:
## lm(formula = packs ~ price, data = CigarettesSW)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -53.926  -9.621  -0.967   7.596  70.369 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 164.35649    6.90885  23.789  < 2e-16 ***
## price        -0.38463    0.04608  -8.348 5.92e-13 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 19.71 on 94 degrees of freedom
## Multiple R-squared:  0.4257, Adjusted R-squared:  0.4196 
## F-statistic: 69.68 on 1 and 94 DF,  p-value: 5.923e-13
typeof(sigma2)
## [1] "double"
class(sigma2)
## [1] "numeric"