options(repos = c(CRAN = "https://cran.r-project.org"))
library(car)
## Warning: package 'car' was built under R version 4.4.3
## Cargando paquete requerido: carData
## Warning: package 'carData' was built under R version 4.4.3
library(wooldridge)
## Warning: package 'wooldridge' was built under R version 4.4.3
library(tibble)
## Warning: package 'tibble' was built under R version 4.4.3
modelo1 <- lm(cigs ~ lcigpric , data=smoke)
summary(modelo1)
## 
## Call:
## lm(formula = cigs ~ lcigpric, data = smoke)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -9.440 -8.668 -8.546 11.022 71.344 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept)   18.592     23.892   0.778    0.437
## lcigpric      -2.418      5.832  -0.415    0.678
## 
## Residual standard error: 13.73 on 805 degrees of freedom
## Multiple R-squared:  0.0002136,  Adjusted R-squared:  -0.001028 
## F-statistic: 0.172 on 1 and 805 DF,  p-value: 0.6785
smoke$log_cigs1 <- log(smoke$cigs + 1)

#Modelos auxiliares

modelo1.1 <- lm(log_cigs1 ~ lcigpric, data = smoke)
summary(modelo1.1)
## 
## Call:
## lm(formula = log_cigs1 ~ lcigpric, data = smoke)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -1.264 -1.138 -1.118  1.857  3.259 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept)   2.7581     2.6261   1.050    0.294
## lcigpric     -0.3948     0.6410  -0.616    0.538
## 
## Residual standard error: 1.509 on 805 degrees of freedom
## Multiple R-squared:  0.0004711,  Adjusted R-squared:  -0.0007705 
## F-statistic: 0.3794 on 1 and 805 DF,  p-value: 0.5381
modelo1.2 <- lm(log_cigs1 ~ lincome, data = smoke)
summary(modelo1.2)
## 
## Call:
## lm(formula = log_cigs1 ~ lincome, data = smoke)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -1.182 -1.155 -1.099  1.863  3.213 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept)  0.50292    0.72422   0.694    0.488
## lincome      0.06585    0.07456   0.883    0.377
## 
## Residual standard error: 1.509 on 805 degrees of freedom
## Multiple R-squared:  0.0009681,  Adjusted R-squared:  -0.000273 
## F-statistic:  0.78 on 1 and 805 DF,  p-value: 0.3774
modelo2 <- lm(cigs ~  lcigpric + educ + lincome  + restaurn+age , data=smoke)
summary(modelo2)
## 
## Call:
## lm(formula = cigs ~ lcigpric + educ + lincome + restaurn + age, 
##     data = smoke)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -12.927  -9.046  -6.872   8.756  71.585 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)   
## (Intercept)  0.60882   24.44910   0.025  0.98014   
## lcigpric    -0.72917    5.86557  -0.124  0.90110   
## educ        -0.37688    0.16797  -2.244  0.02513 * 
## lincome      1.89438    0.71212   2.660  0.00797 **
## restaurn    -2.93272    1.12936  -2.597  0.00958 **
## age         -0.04520    0.02868  -1.576  0.11537   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 13.62 on 801 degrees of freedom
## Multiple R-squared:  0.02101,    Adjusted R-squared:  0.0149 
## F-statistic: 3.438 on 5 and 801 DF,  p-value: 0.004427
modelo3 <- lm(cigs ~  lcigpric + educ + lincome  + restaurn+age + agesq + (lcigpric*restaurn) , data=smoke)
summary(modelo3)
## 
## Call:
## lm(formula = cigs ~ lcigpric + educ + lincome + restaurn + age + 
##     agesq + (lcigpric * restaurn), data = smoke)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -15.772  -9.403  -5.868   7.739  70.212 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        5.869e+00  2.472e+01   0.237  0.81241    
## lcigpric          -3.046e+00  5.930e+00  -0.514  0.60764    
## educ              -4.878e-01  1.671e-01  -2.919  0.00361 ** 
## lincome            8.606e-01  7.271e-01   1.184  0.23691    
## restaurn          -1.761e+02  1.043e+02  -1.689  0.09165 .  
## age                7.669e-01  1.600e-01   4.794 1.95e-06 ***
## agesq             -8.996e-03  1.741e-03  -5.167 3.01e-07 ***
## lcigpric:restaurn  4.211e+01  2.534e+01   1.662  0.09695 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 13.39 on 799 degrees of freedom
## Multiple R-squared:  0.056,  Adjusted R-squared:  0.04773 
## F-statistic: 6.771 on 7 and 799 DF,  p-value: 8.034e-08
#el fiv sólo se puede utilizar en modelos múltiples
vif(modelo2)
## lcigpric     educ  lincome restaurn      age 
## 1.027975 1.145973 1.119335 1.031002 1.036033
vif(modelo3)
## there are higher-order terms (interactions) in this model
## consider setting type = 'predictor'; see ?vif
##          lcigpric              educ           lincome          restaurn 
##          1.086933          1.173116          1.207098       9097.803576 
##               age             agesq lcigpric:restaurn 
##         33.350162         33.901408       9104.194730

Supuesto 4: Media condicional 0 de los errores

#Modelo 1
residuos1 <- resid(modelo1)
Esperanza_condicional1 <-mean(residuos1)
correlacion_errores_lcigpric_1 <- cor(smoke$lcigpric, residuos1)
correlacion_errores_lcigpric_1
## [1] -8.705012e-16
#Modelo auxiliar 1.1
residuos1.1 <- resid(modelo1.1)
Esperanza_condicional1.1 <-mean(residuos1.1)
correlacion_errores_lcigpric_1.1 <- cor(smoke$lcigpric, residuos1.1)
correlacion_errores_lcigpric_1.1
## [1] -1.075747e-15
#Modelo auxiliar 1.2
residuos1.2 <- resid(modelo1.2)
Esperanza_condicional1.2 <-mean(residuos1.2)
correlacion_errores_lcigpric_1.2 <- cor(smoke$lincome, residuos1.2)
correlacion_errores_lcigpric_1.2
## [1] 1.609086e-15
#Modelo 2
residuos2 <- resid(modelo2)
Esperanza_condicional2 <-mean(residuos2)

correlacion_errores_lcigpric_2 <- cor(smoke$lcigpric, residuos2)
correlacion_errores_lcigpric_2
## [1] -8.366745e-16
correlacion_errores_educ_2 <- cor(smoke$educ, residuos2)
correlacion_errores_educ_2
## [1] 2.226924e-16
correlacion_errores_lincome2 <- cor(smoke$lincome, residuos2)
correlacion_errores_lincome2
## [1] 1.38617e-15
restaurn_age2 <- smoke$restaurn * smoke$age

correlacion_errores_restaurn_age_2 <- cor(restaurn_age2, residuos2)
correlacion_errores_restaurn_age_2
## [1] 0.004642778
#Modelo 3
residuos3 <- resid(modelo3)
Esperanza_condicional3 <-mean(residuos3)


correlacion_errores_lcigpric3 <- cor(smoke$lcigpric, residuos3)
correlacion_errores_lcigpric3
## [1] -1.083137e-15
correlacion_errores_educ3 <- cor(smoke$educ, residuos3)
correlacion_errores_educ3
## [1] 2.873638e-16
correlacion_errores_lincome3 <- cor(smoke$lincome, residuos3)
correlacion_errores_lincome3
## [1] 1.763446e-15
correlacion_errores_restaurn3 <- cor(smoke$restaurn, residuos3)
correlacion_errores_restaurn3
## [1] -5.515054e-17
correlacion_errores_age3 <- cor(smoke$age, residuos3)
correlacion_errores_age3
## [1] 2.563934e-17
correlacion_errores_agesq3 <- cor(smoke$agesq, residuos3)
correlacion_errores_agesq3
## [1] -2.547483e-17
lcigpric_restaurn3 <- smoke$lcigpric * smoke$restaurn

correlacion_errores_lcigpric_restaurn3 <- cor(lcigpric_restaurn3, residuos3)
correlacion_errores_lcigpric_restaurn3
## [1] 7.800254e-18
install.packages("lmtest")
## package 'lmtest' successfully unpacked and MD5 sums checked
## 
## The downloaded binary packages are in
##  C:\Users\salabiblioteca\AppData\Local\Temp\RtmpIV2fmm\downloaded_packages
install.packages("sandwich")
## package 'sandwich' successfully unpacked and MD5 sums checked
## 
## The downloaded binary packages are in
##  C:\Users\salabiblioteca\AppData\Local\Temp\RtmpIV2fmm\downloaded_packages
library(lmtest)
## Warning: package 'lmtest' was built under R version 4.4.3
## Cargando paquete requerido: zoo
## Warning: package 'zoo' was built under R version 4.4.3
## 
## Adjuntando el paquete: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
library(sandwich)
## Warning: package 'sandwich' was built under R version 4.4.3
# Heterocedasticidad

# Heterocedasticidad modelo 1
bptest(modelo1, ~ fitted(modelo1) + I(fitted(modelo1)^2), data = smoke)
## 
##  studentized Breusch-Pagan test
## 
## data:  modelo1
## BP = 1.2233, df = 2, p-value = 0.5425
coeftest(modelo1, vcov = vcovHC(modelo1, type = "HC"))
## 
## t test of coefficients:
## 
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept)  18.5918    25.1101  0.7404   0.4593
## lcigpric     -2.4183     6.1311 -0.3944   0.6934
# Heterocedasticidad modelo 1.1

bptest(modelo1.1, ~ fitted(modelo1.1) + I(fitted(modelo1.1)^2), data = smoke)
## 
##  studentized Breusch-Pagan test
## 
## data:  modelo1.1
## BP = 2.3948, df = 2, p-value = 0.302
coeftest(modelo1.1, vcov = vcovHC(modelo1.1, type = "HC"))  
## 
## t test of coefficients:
## 
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept)  2.75810    2.72444  1.0124   0.3117
## lcigpric    -0.39484    0.66494 -0.5938   0.5528
# Heterocedasticidad modelo 1.2
bptest(modelo1.2, ~ fitted(modelo1.2) + I(fitted(modelo1.2)^2), data = smoke)
## 
##  studentized Breusch-Pagan test
## 
## data:  modelo1.2
## BP = 6.3702, df = 2, p-value = 0.04137
coeftest(modelo1.2, vcov = vcovHC(modelo1.2, type = "HC"))  
## 
## t test of coefficients:
## 
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.502916   0.664880  0.7564   0.4496
## lincome     0.065850   0.068815  0.9569   0.3389
# Heterocedasticidad modelo 2
bptest(modelo2, ~ fitted(modelo2) + I(fitted(modelo2)^2), data = smoke)
## 
##  studentized Breusch-Pagan test
## 
## data:  modelo2
## BP = 6.5256, df = 2, p-value = 0.03828
coeftest(modelo2, vcov = vcovHC(modelo2, type = "HC"))
## 
## t test of coefficients:
## 
##              Estimate Std. Error t value Pr(>|t|)   
## (Intercept)  0.608824  25.866976  0.0235 0.981228   
## lcigpric    -0.729165   6.102572 -0.1195 0.904921   
## educ        -0.376878   0.162504 -2.3192 0.020636 * 
## lincome      1.894383   0.586965  3.2274 0.001300 **
## restaurn    -2.932716   1.032918 -2.8393 0.004636 **
## age         -0.045200   0.023429 -1.9293 0.054052 . 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Heterocedasticidad modelo 3
bptest(modelo3, ~ fitted(modelo3) + I(fitted(modelo3)^2), data = smoke)
## 
##  studentized Breusch-Pagan test
## 
## data:  modelo3
## BP = 29.023, df = 2, p-value = 4.986e-07
coeftest(modelo3, vcov = vcovHC(modelo3, type = "HC"))
## 
## t test of coefficients:
## 
##                      Estimate  Std. Error t value  Pr(>|t|)    
## (Intercept)        5.8692e+00  2.6379e+01  0.2225  0.823986    
## lcigpric          -3.0460e+00  6.2108e+00 -0.4904  0.623967    
## educ              -4.8782e-01  1.6037e-01 -3.0419  0.002428 ** 
## lincome            8.6060e-01  5.9226e-01  1.4531  0.146592    
## restaurn          -1.7615e+02  8.8665e+01 -1.9867  0.047300 *  
## age                7.6691e-01  1.3744e-01  5.5798 3.298e-08 ***
## agesq             -8.9962e-03  1.4537e-03 -6.1886 9.691e-10 ***
## lcigpric:restaurn  4.2109e+01  2.1609e+01  1.9487  0.051680 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Autocorrelación

# Autocorrelacion modelo 1
dwtest(modelo1)
## 
##  Durbin-Watson test
## 
## data:  modelo1
## DW = 1.99, p-value = 0.4308
## alternative hypothesis: true autocorrelation is greater than 0
# Autocorrelacion modelo 1.1
dwtest(modelo1.1)
## 
##  Durbin-Watson test
## 
## data:  modelo1.1
## DW = 2.0573, p-value = 0.7829
## alternative hypothesis: true autocorrelation is greater than 0
# Autocorrelacion modelo 1.2
dwtest(modelo1.2)
## 
##  Durbin-Watson test
## 
## data:  modelo1.2
## DW = 2.0528, p-value = 0.7732
## alternative hypothesis: true autocorrelation is greater than 0
# Autocorrelacion modelo 2
dwtest(modelo2)
## 
##  Durbin-Watson test
## 
## data:  modelo2
## DW = 2.0115, p-value = 0.5371
## alternative hypothesis: true autocorrelation is greater than 0
coeftest(modelo2, vcov = NeweyWest(modelo2))
## 
## t test of coefficients:
## 
##              Estimate Std. Error t value Pr(>|t|)   
## (Intercept)  0.608824  24.617780  0.0247 0.980276   
## lcigpric    -0.729165   5.744532 -0.1269 0.899026   
## educ        -0.376878   0.169309 -2.2260 0.026293 * 
## lincome      1.894383   0.647785  2.9244 0.003549 **
## restaurn    -2.932716   1.062760 -2.7595 0.005921 **
## age         -0.045200   0.022994 -1.9658 0.049672 * 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Autocorrelacion modelo 3
dwtest(modelo3)
## 
##  Durbin-Watson test
## 
## data:  modelo3
## DW = 2.0173, p-value = 0.5578
## alternative hypothesis: true autocorrelation is greater than 0
coeftest(modelo3, vcov = NeweyWest(modelo3))
## 
## t test of coefficients:
## 
##                      Estimate  Std. Error t value  Pr(>|t|)    
## (Intercept)        5.8692e+00  2.4309e+01  0.2414  0.809271    
## lcigpric          -3.0460e+00  5.6916e+00 -0.5352  0.592682    
## educ              -4.8782e-01  1.6604e-01 -2.9380  0.003399 ** 
## lincome            8.6060e-01  6.4246e-01  1.3395  0.180778    
## restaurn          -1.7615e+02  1.0267e+02 -1.7156  0.086622 .  
## age                7.6691e-01  1.3792e-01  5.5607 3.665e-08 ***
## agesq             -8.9962e-03  1.4658e-03 -6.1374 1.320e-09 ***
## lcigpric:restaurn  4.2109e+01  2.5038e+01  1.6818  0.092995 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Supuesto 6: Normalidad

#Normalidad1

qqnorm(residuos1)

qqline(residuos1, col="red")

hist(residuos1, breaks=20, main="Histograma de residuos modelo 1")

shapiro.test(residuos1)
## 
##  Shapiro-Wilk normality test
## 
## data:  residuos1
## W = 0.69719, p-value < 2.2e-16
#Normalidad1.1

qqnorm(residuos1.1)

qqline(residuos1.1, col="pink")

hist(residuos1.1, breaks=20, main="Histograma de residuos modelo 1.1")

shapiro.test(residuos1.1)
## 
##  Shapiro-Wilk normality test
## 
## data:  residuos1.1
## W = 0.71018, p-value < 2.2e-16
#Normalidad1.2

qqnorm(residuos1.2)

qqline(residuos1.2, col="purple")

hist(residuos1.2, breaks=20, main="Histograma de residuos modelo 1.2")

shapiro.test(residuos1.2)
## 
##  Shapiro-Wilk normality test
## 
## data:  residuos1.2
## W = 0.71178, p-value < 2.2e-16
#Normalidad2

qqnorm(residuos2)

qqline(residuos2, col="green")

hist(residuos2, breaks=20, main="Histograma de residuos modelo 2")

shapiro.test(residuos2)
## 
##  Shapiro-Wilk normality test
## 
## data:  residuos2
## W = 0.77422, p-value < 2.2e-16
#Normalidad3

qqnorm(residuos3)

qqline(residuos3, col="blue")

hist(residuos3, breaks=20, main="Histograma de residuos modelo 3")

shapiro.test(residuos3)
## 
##  Shapiro-Wilk normality test
## 
## data:  residuos3
## W = 0.82668, p-value < 2.2e-16