Paquetes Utilizados

library("foreign")
library("olsrr")
library("mctest")
library("GGally")
library("ggplot2")

Base de datos

auto<-read.dta("auto.dta")
head(auto)
##            make price mpg rep78 headroom trunk weight length turn
## 1   AMC Concord  4099  22     3      2.5    11   2930    186   40
## 2     AMC Pacer  4749  17     3      3.0    11   3350    173   40
## 3    AMC Spirit  3799  22    NA      3.0    12   2640    168   35
## 4 Buick Century  4816  20     3      4.5    16   3250    196   40
## 5 Buick Electra  7827  15     4      4.0    20   4080    222   43
## 6 Buick LeSabre  5788  18     3      4.0    21   3670    218   43
##   displacement gear_ratio  foreign
## 1          121       3.58 Domestic
## 2          258       2.53 Domestic
## 3          121       3.08 Domestic
## 4          196       2.93 Domestic
## 5          350       2.41 Domestic
## 6          231       2.73 Domestic

Reegresión Simple: mpg vs weight

Diagrama de Dispersión

ggplot(auto, aes(x=weight, y=mpg)) + 
  geom_point() + theme_light()

Estimación del Modelo

modelo1<-lm(mpg~weight,data=auto)
summary(modelo1)
## 
## Call:
## lm(formula = mpg ~ weight, data = auto)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -6.9593 -1.9325 -0.3713  0.8885 13.8174 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 39.4402835  1.6140031   24.44   <2e-16 ***
## weight      -0.0060087  0.0005179  -11.60   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.439 on 72 degrees of freedom
## Multiple R-squared:  0.6515, Adjusted R-squared:  0.6467 
## F-statistic: 134.6 on 1 and 72 DF,  p-value: < 2.2e-16

Modelo

\[ mpg = 39.4403 - 0.0060*weight \] ## Evaluación de los residuales

Normalidad

Gráfico Q-Q

ols_plot_resid_qq(modelo1)

Prueba de Normalidad

\[ H_0: Los \ residuales \ siguen \ una \ Distribución \ Normal \]

ols_test_normality(modelo1)
## -----------------------------------------------
##        Test             Statistic       pvalue  
## -----------------------------------------------
## Shapiro-Wilk              0.8959         0.0000 
## Kolmogorov-Smirnov        0.1593         0.0416 
## Cramer-von Mises          6.3706         0.0000 
## Anderson-Darling          2.4253         0.0000 
## -----------------------------------------------

Correlación entre residuos observados y esperados

ols_test_correlation(modelo1)
## [1] 0.9423747

Gráfico residuales vs ajustados

ols_plot_resid_fit(modelo1)

Histograma de los residuales

ols_plot_resid_hist(modelo1)

ols_plot_diagnostics(modelo1)

Modelo de Regresión distinguiendo procedencia del auto.

Diagrama de Dispersión

ggplot(auto,aes(weight,mpg,color=foreign))+geom_point()

ggplot(auto, aes(x = weight, y = mpg))+
  geom_point(aes(color = foreign))+               
  facet_wrap(~foreign, ncol = 2, nrow = 1)

Modelo Autos Domésticos

modelo.domestico<-lm(mpg~weight,data=subset(auto,foreign=="Domestic"))
summary(modelo.domestico)
## 
## Call:
## lm(formula = mpg ~ weight, data = subset(auto, foreign == "Domestic"))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.8130 -1.5041 -0.2844  0.4845  7.8318 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 39.6469645  1.5766224   25.15   <2e-16 ***
## weight      -0.0059751  0.0004654  -12.84   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.311 on 50 degrees of freedom
## Multiple R-squared:  0.7673, Adjusted R-squared:  0.7626 
## F-statistic: 164.8 on 1 and 50 DF,  p-value: < 2.2e-16

Modelo Autos Domésticos

\[ mpg = 39.64696 - 0.0059751*weight \] ## Evaluación de los residuales

Normalidad

Gráfico Q-Q

ols_plot_resid_qq(modelo.domestico)

ols_test_normality(modelo.domestico)
## -----------------------------------------------
##        Test             Statistic       pvalue  
## -----------------------------------------------
## Shapiro-Wilk              0.8369         0.0000 
## Kolmogorov-Smirnov        0.1741         0.0754 
## Cramer-von Mises          5.2468         0.0000 
## Anderson-Darling          2.3618         0.0000 
## -----------------------------------------------

Gráfico residuales vs ajustados

ols_plot_resid_fit(modelo.domestico)

ols_plot_diagnostics(modelo.domestico)

Modelo con foreign

modelo.foreign<-lm(mpg ~ weight + as.numeric(foreign),data=auto)
summary(modelo.foreign)
## 
## Call:
## lm(formula = mpg ~ weight + as.numeric(foreign), data = auto)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -6.1529 -1.9712 -0.4534  0.8083 14.4096 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         43.3297314  2.9983312  14.451  < 2e-16 ***
## weight              -0.0065879  0.0006371 -10.340 8.28e-16 ***
## as.numeric(foreign) -1.6500291  1.0759941  -1.533     0.13    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.407 on 71 degrees of freedom
## Multiple R-squared:  0.6627, Adjusted R-squared:  0.6532 
## F-statistic: 69.75 on 2 and 71 DF,  p-value: < 2.2e-16

\[ mpg=43.3297-0.0065879*weight-1.6500291*foreign \]

Modelo Cuadrático

auto$weight2<-auto$weight^2
modelo.cuadratico<-lm(mpg ~ weight + weight2 + as.numeric(foreign),data=auto)
summary(modelo.cuadratico)
## 
## Call:
## lm(formula = mpg ~ weight + weight2 + as.numeric(foreign), data = auto)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -5.6361 -1.8989 -0.3020  0.8105 13.8517 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          5.874e+01  6.707e+00   8.758 7.34e-13 ***
## weight              -1.657e-02  3.969e-03  -4.175 8.43e-05 ***
## weight2              1.591e-06  6.249e-07   2.546   0.0131 *  
## as.numeric(foreign) -2.204e+00  1.059e+00  -2.080   0.0412 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.283 on 70 degrees of freedom
## Multiple R-squared:  0.6913, Adjusted R-squared:  0.6781 
## F-statistic: 52.25 on 3 and 70 DF,  p-value: < 2.2e-16

\[ mpg=58.874-0.001657*weight + 0.000001591*weight^2-2.204*foreign \]

ols_plot_diagnostics(modelo.cuadratico)