library(rio)
data= import("data.zip")
names(data)
##  [1] "Country"                         "Year"                           
##  [3] "Status"                          "Life.expectancy"                
##  [5] "Adult.Mortality"                 "infant.deaths"                  
##  [7] "Alcohol"                         "percentage.expenditure"         
##  [9] "Hepatitis.B"                     "Measles"                        
## [11] "BMI"                             "under-five.deaths"              
## [13] "Polio"                           "Total.expenditure"              
## [15] "Diphtheria"                      "HIV/AIDS"                       
## [17] "GDP"                             "Population"                     
## [19] "thinness.1.19.years"             "thinness.5.9.years"             
## [21] "Income.composition.of.resources" "Schooling"
MODELO<-lm(data$Life.expectancy~data$Schooling+data$Alcohol)
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.2 --
## v ggplot2 3.3.6     v purrr   0.3.4
## v tibble  3.1.8     v dplyr   1.0.9
## v tidyr   1.2.0     v stringr 1.4.0
## v readr   2.1.2     v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
MODELO1<-data%>%select(-(1:3))%>% lm(Life.expectancy~Schooling+Alcohol,.)
summary(MODELO1)
## 
## Call:
## lm(formula = Life.expectancy ~ Schooling + Alcohol, data = .)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -25.394  -2.908   0.636   3.897  32.039 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 43.30759    0.48442  89.400   <2e-16 ***
## Schooling    2.19926    0.04485  49.040   <2e-16 ***
## Alcohol     -0.09159    0.03664  -2.499   0.0125 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 6.243 on 2581 degrees of freedom
##   (354 observations deleted due to missingness)
## Multiple R-squared:  0.5577, Adjusted R-squared:  0.5573 
## F-statistic:  1627 on 2 and 2581 DF,  p-value: < 2.2e-16
#lm(Life.expectancy ~.,.)
MODELO2<-data%>%select(-(1:3))%>% lm(Life.expectancy~.,.)
summary(MODELO2)
## 
## Call:
## lm(formula = Life.expectancy ~ ., data = .)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -17.0176  -2.0454  -0.0185   2.2260  11.9157 
## 
## Coefficients:
##                                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                      5.328e+01  7.358e-01  72.412  < 2e-16 ***
## Adult.Mortality                 -1.689e-02  9.473e-04 -17.828  < 2e-16 ***
## infant.deaths                    9.369e-02  1.068e-02   8.776  < 2e-16 ***
## Alcohol                         -5.435e-02  3.061e-02  -1.776   0.0760 .  
## percentage.expenditure           3.777e-04  1.805e-04   2.093   0.0365 *  
## Hepatitis.B                     -5.582e-03  4.446e-03  -1.256   0.2095    
## Measles                         -8.617e-06  1.081e-05  -0.797   0.4253    
## BMI                              3.350e-02  6.011e-03   5.573 2.92e-08 ***
## `under-five.deaths`             -7.047e-02  7.728e-03  -9.119  < 2e-16 ***
## Polio                            7.836e-03  5.163e-03   1.518   0.1293    
## Total.expenditure                7.975e-02  4.074e-02   1.958   0.0505 .  
## Diphtheria                       1.439e-02  5.938e-03   2.423   0.0155 *  
## `HIV/AIDS`                      -4.383e-01  1.788e-02 -24.519  < 2e-16 ***
## GDP                              1.383e-05  2.838e-05   0.487   0.6260    
## Population                      -6.917e-10  1.753e-09  -0.395   0.6931    
## thinness.1.19.years             -8.670e-03  5.310e-02  -0.163   0.8703    
## thinness.5.9.years              -5.123e-02  5.242e-02  -0.977   0.3286    
## Income.composition.of.resources  9.824e+00  8.340e-01  11.780  < 2e-16 ***
## Schooling                        8.783e-01  5.939e-02  14.789  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.596 on 1630 degrees of freedom
##   (1289 observations deleted due to missingness)
## Multiple R-squared:  0.8347, Adjusted R-squared:  0.8329 
## F-statistic: 457.4 on 18 and 1630 DF,  p-value: < 2.2e-16
modelo<-data%>%lm(Life.expectancy~Adult.Mortality+infant.deaths+BMI+`under-five.deaths`+`HIV/AIDS`+Income.composition.of.resources+Schooling+percentage.expenditure+Diphtheria,.)
summary(modelo)
## 
## Call:
## lm(formula = Life.expectancy ~ Adult.Mortality + infant.deaths + 
##     BMI + `under-five.deaths` + `HIV/AIDS` + Income.composition.of.resources + 
##     Schooling + percentage.expenditure + Diphtheria, data = .)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -21.1576  -2.1440  -0.0853   2.2235  20.9098 
## 
## Coefficients:
##                                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                      5.212e+01  4.502e-01 115.790  < 2e-16 ***
## Adult.Mortality                 -1.702e-02  7.893e-04 -21.566  < 2e-16 ***
## infant.deaths                    8.639e-02  7.789e-03  11.091  < 2e-16 ***
## BMI                              3.792e-02  4.665e-03   8.129 6.51e-16 ***
## `under-five.deaths`             -6.545e-02  5.754e-03 -11.375  < 2e-16 ***
## `HIV/AIDS`                      -4.866e-01  1.683e-02 -28.913  < 2e-16 ***
## Income.composition.of.resources  7.332e+00  6.051e-01  12.117  < 2e-16 ***
## Schooling                        9.472e-01  4.096e-02  23.123  < 2e-16 ***
## percentage.expenditure           3.755e-04  3.984e-05   9.427  < 2e-16 ***
## Diphtheria                       3.877e-02  3.628e-03  10.686  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.859 on 2718 degrees of freedom
##   (210 observations deleted due to missingness)
## Multiple R-squared:  0.8294, Adjusted R-squared:  0.8288 
## F-statistic:  1468 on 9 and 2718 DF,  p-value: < 2.2e-16
library(ggfortify)
library(see)
library(patchwork)
library(performance)
library(car)
## Loading required package: carData
## 
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
## 
##     recode
## The following object is masked from 'package:purrr':
## 
##     some
library(lmtest)#para usar breusch pagan
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
library(nortest)#para usar Kolmogorov-Smirnov
autoplot(modelo)

Revisamos los supuestos

#NORMALIDAD
autoplot(modelo,2)

lillie.test(modelo$residuals)#prueba de Kolmogorov-Smirnov
## 
##  Lilliefors (Kolmogorov-Smirnov) normality test
## 
## data:  modelo$residuals
## D = 0.045222, p-value = 4.628e-14

Dado que tenemos un p-value (4.628e-14) MENOR a (<) 0.05, se rechaza H0 (distribución normal) y podemos acepta H1 que (no hay distribución normal), por lo que nuestro modelo no cumpliaria con los suspuestos de normalidad.

#HOMOCEDASTICIDAD
autoplot(modelo,3)

bptest(modelo)#test de Breusch Pagan
## 
##  studentized Breusch-Pagan test
## 
## data:  modelo
## BP = 288.87, df = 9, p-value < 2.2e-16

En este caso con un p-value (2.2e-16) MENOR a(<) que 0.05 se RECHAZA por lo que nuestro modelo tendria un problema de heterocedasticidad.

#MULTICOLINEALIDAD
vif(modelo)
##                 Adult.Mortality                   infant.deaths 
##                        1.715669                      162.943709 
##                             BMI             `under-five.deaths` 
##                        1.572115                      164.331771 
##                      `HIV/AIDS` Income.composition.of.resources 
##                        1.414493                        2.851972 
##                       Schooling          percentage.expenditure 
##                        3.141859                        1.224715 
##                      Diphtheria 
##                        1.291935

La variable infant.deaths y under-five.deaths no cumple con el supuesto pues su VIF es mayor a 5. Por tanto el modelo no cumplirĆ­a con el supuesto de multicolinealidad.

#Independencia
durbinWatsonTest (modelo) 
##  lag Autocorrelation D-W Statistic p-value
##    1       0.6648133      0.667676       0
##  Alternative hypothesis: rho != 0

Dado que la prueba de Durbin-Watson presenta un Pvalue (0) menor a(>) 0.05, podemos rechazar la hipotesis nula, por lo que existe auto-correlación.

check_model(modelo)

El modelo cumple con la condición de apalancamiento, puesto que los valores extremos no afectan el modelo.