library(rio)
data= import("data.zip")
names(data)
## [1] "Country" "Year"
## [3] "Status" "Life.expectancy"
## [5] "Adult.Mortality" "infant.deaths"
## [7] "Alcohol" "percentage.expenditure"
## [9] "Hepatitis.B" "Measles"
## [11] "BMI" "under-five.deaths"
## [13] "Polio" "Total.expenditure"
## [15] "Diphtheria" "HIV/AIDS"
## [17] "GDP" "Population"
## [19] "thinness.1.19.years" "thinness.5.9.years"
## [21] "Income.composition.of.resources" "Schooling"
MODELO<-lm(data$Life.expectancy~data$Schooling+data$Alcohol)
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.2 --
## v ggplot2 3.3.6 v purrr 0.3.4
## v tibble 3.1.8 v dplyr 1.0.9
## v tidyr 1.2.0 v stringr 1.4.0
## v readr 2.1.2 v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
MODELO1<-data%>%select(-(1:3))%>% lm(Life.expectancy~Schooling+Alcohol,.)
summary(MODELO1)
##
## Call:
## lm(formula = Life.expectancy ~ Schooling + Alcohol, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -25.394 -2.908 0.636 3.897 32.039
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 43.30759 0.48442 89.400 <2e-16 ***
## Schooling 2.19926 0.04485 49.040 <2e-16 ***
## Alcohol -0.09159 0.03664 -2.499 0.0125 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 6.243 on 2581 degrees of freedom
## (354 observations deleted due to missingness)
## Multiple R-squared: 0.5577, Adjusted R-squared: 0.5573
## F-statistic: 1627 on 2 and 2581 DF, p-value: < 2.2e-16
#lm(Life.expectancy ~.,.)
MODELO2<-data%>%select(-(1:3))%>% lm(Life.expectancy~.,.)
summary(MODELO2)
##
## Call:
## lm(formula = Life.expectancy ~ ., data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -17.0176 -2.0454 -0.0185 2.2260 11.9157
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.328e+01 7.358e-01 72.412 < 2e-16 ***
## Adult.Mortality -1.689e-02 9.473e-04 -17.828 < 2e-16 ***
## infant.deaths 9.369e-02 1.068e-02 8.776 < 2e-16 ***
## Alcohol -5.435e-02 3.061e-02 -1.776 0.0760 .
## percentage.expenditure 3.777e-04 1.805e-04 2.093 0.0365 *
## Hepatitis.B -5.582e-03 4.446e-03 -1.256 0.2095
## Measles -8.617e-06 1.081e-05 -0.797 0.4253
## BMI 3.350e-02 6.011e-03 5.573 2.92e-08 ***
## `under-five.deaths` -7.047e-02 7.728e-03 -9.119 < 2e-16 ***
## Polio 7.836e-03 5.163e-03 1.518 0.1293
## Total.expenditure 7.975e-02 4.074e-02 1.958 0.0505 .
## Diphtheria 1.439e-02 5.938e-03 2.423 0.0155 *
## `HIV/AIDS` -4.383e-01 1.788e-02 -24.519 < 2e-16 ***
## GDP 1.383e-05 2.838e-05 0.487 0.6260
## Population -6.917e-10 1.753e-09 -0.395 0.6931
## thinness.1.19.years -8.670e-03 5.310e-02 -0.163 0.8703
## thinness.5.9.years -5.123e-02 5.242e-02 -0.977 0.3286
## Income.composition.of.resources 9.824e+00 8.340e-01 11.780 < 2e-16 ***
## Schooling 8.783e-01 5.939e-02 14.789 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.596 on 1630 degrees of freedom
## (1289 observations deleted due to missingness)
## Multiple R-squared: 0.8347, Adjusted R-squared: 0.8329
## F-statistic: 457.4 on 18 and 1630 DF, p-value: < 2.2e-16
modelo<-data%>%lm(Life.expectancy~Adult.Mortality+infant.deaths+BMI+`under-five.deaths`+`HIV/AIDS`+Income.composition.of.resources+Schooling+percentage.expenditure+Diphtheria,.)
summary(modelo)
##
## Call:
## lm(formula = Life.expectancy ~ Adult.Mortality + infant.deaths +
## BMI + `under-five.deaths` + `HIV/AIDS` + Income.composition.of.resources +
## Schooling + percentage.expenditure + Diphtheria, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -21.1576 -2.1440 -0.0853 2.2235 20.9098
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.212e+01 4.502e-01 115.790 < 2e-16 ***
## Adult.Mortality -1.702e-02 7.893e-04 -21.566 < 2e-16 ***
## infant.deaths 8.639e-02 7.789e-03 11.091 < 2e-16 ***
## BMI 3.792e-02 4.665e-03 8.129 6.51e-16 ***
## `under-five.deaths` -6.545e-02 5.754e-03 -11.375 < 2e-16 ***
## `HIV/AIDS` -4.866e-01 1.683e-02 -28.913 < 2e-16 ***
## Income.composition.of.resources 7.332e+00 6.051e-01 12.117 < 2e-16 ***
## Schooling 9.472e-01 4.096e-02 23.123 < 2e-16 ***
## percentage.expenditure 3.755e-04 3.984e-05 9.427 < 2e-16 ***
## Diphtheria 3.877e-02 3.628e-03 10.686 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.859 on 2718 degrees of freedom
## (210 observations deleted due to missingness)
## Multiple R-squared: 0.8294, Adjusted R-squared: 0.8288
## F-statistic: 1468 on 9 and 2718 DF, p-value: < 2.2e-16
library(ggfortify)
library(see)
library(patchwork)
library(performance)
library(car)
## Loading required package: carData
##
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
##
## recode
## The following object is masked from 'package:purrr':
##
## some
library(lmtest)#para usar breusch pagan
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(nortest)#para usar Kolmogorov-Smirnov
autoplot(modelo)
Revisamos los supuestos
#NORMALIDAD
autoplot(modelo,2)
lillie.test(modelo$residuals)#prueba de Kolmogorov-Smirnov
##
## Lilliefors (Kolmogorov-Smirnov) normality test
##
## data: modelo$residuals
## D = 0.045222, p-value = 4.628e-14
Dado que tenemos un p-value (4.628e-14) MENOR a (<) 0.05, se rechaza H0 (distribución normal) y podemos acepta H1 que (no hay distribución normal), por lo que nuestro modelo no cumpliaria con los suspuestos de normalidad.
#HOMOCEDASTICIDAD
autoplot(modelo,3)
bptest(modelo)#test de Breusch Pagan
##
## studentized Breusch-Pagan test
##
## data: modelo
## BP = 288.87, df = 9, p-value < 2.2e-16
En este caso con un p-value (2.2e-16) MENOR a(<) que 0.05 se RECHAZA por lo que nuestro modelo tendria un problema de heterocedasticidad.
#MULTICOLINEALIDAD
vif(modelo)
## Adult.Mortality infant.deaths
## 1.715669 162.943709
## BMI `under-five.deaths`
## 1.572115 164.331771
## `HIV/AIDS` Income.composition.of.resources
## 1.414493 2.851972
## Schooling percentage.expenditure
## 3.141859 1.224715
## Diphtheria
## 1.291935
La variable infant.deaths y under-five.deaths no cumple con el supuesto pues su VIF es mayor a 5. Por tanto el modelo no cumplirĆa con el supuesto de multicolinealidad.
#Independencia
durbinWatsonTest (modelo)
## lag Autocorrelation D-W Statistic p-value
## 1 0.6648133 0.667676 0
## Alternative hypothesis: rho != 0
Dado que la prueba de Durbin-Watson presenta un Pvalue (0) menor a(>) 0.05, podemos rechazar la hipotesis nula, por lo que existe auto-correlación.
check_model(modelo)
El modelo cumple con la condición de apalancamiento, puesto que los valores extremos no afectan el modelo.