Load Data
data <- read_excel("D:/data for my project/Abhi11.xlsx")
# View structure
head(data)
## # A tibble: 6 × 10
## Country `GDP per capita` `Life Expectancy` `Infant Mortality`
## <chr> <dbl> <dbl> <dbl>
## 1 Afghanistan 414. 63 36.5
## 2 Albania 8575. 79 8.8
## 3 Algeria 5364. 77 19.2
## 4 Angola 2310. 62 51.1
## 5 Antigua and Barbuda 21495. 78 6.8
## 6 Argentina 13466. 76 8.2
## # ℹ 6 more variables: `Under-5 Mortality` <dbl>, `DPT Immunization` <dbl>,
## # `Measles Immunization` <dbl>, `HIV Prevalence` <dbl>,
## # `Tuberculosis Incidence` <dbl>, `Anemia Prevalence` <dbl>
str(data)
## tibble [181 × 10] (S3: tbl_df/tbl/data.frame)
## $ Country : chr [1:181] "Afghanistan" "Albania" "Algeria" "Angola" ...
## $ GDP per capita : num [1:181] 414 8575 5364 2310 21495 ...
## $ Life Expectancy : num [1:181] 63 79 77 62 78 76 75 84 82 74 ...
## $ Infant Mortality : num [1:181] 36.5 8.8 19.2 51.1 6.8 8.2 9.9 3 2.5 14.7 ...
## $ Under-5 Mortality : num [1:181] 48.3 10.8 21.8 68.1 8.2 9.5 11.4 3.5 3.1 16.3 ...
## $ DPT Immunization : num [1:181] 60 97 92 42 95 93 96 95 97 92 ...
## $ Measles Immunization : num [1:181] 55 97 86 42 94 92 95 94 96 91 ...
## $ HIV Prevalence : num [1:181] 0.1 0.1 0.1 1.6 1.1 0.3 0.1 0.1 0.1 0.1 ...
## $ Tuberculosis Incidence: num [1:181] 189 12 68 316 15 39 33 5 4 63 ...
## $ Anemia Prevalence : num [1:181] 45.4 24.7 31.6 43.8 20.3 21.1 25.6 13.7 14.1 28.4 ...
Regression Model
model <- lm(`Life Expectancy` ~
`GDP per capita` +
`Infant Mortality` +
`Under-5 Mortality` +
`DPT Immunization` +
`Measles Immunization` +
`HIV Prevalence` +
`Tuberculosis Incidence` +
`Anemia Prevalence`,
data = data)
summary(model)
##
## Call:
## lm(formula = `Life Expectancy` ~ `GDP per capita` + `Infant Mortality` +
## `Under-5 Mortality` + `DPT Immunization` + `Measles Immunization` +
## `HIV Prevalence` + `Tuberculosis Incidence` + `Anemia Prevalence`,
## data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -5.2689 -1.1101 0.0576 1.2817 5.0615
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.831e+01 2.607e+00 30.035 < 2e-16 ***
## `GDP per capita` 5.798e-05 1.044e-05 5.553 1.05e-07 ***
## `Infant Mortality` 4.136e-02 8.704e-02 0.475 0.635275
## `Under-5 Mortality` -1.997e-01 5.787e-02 -3.451 0.000702 ***
## `DPT Immunization` -2.454e-01 2.048e-01 -1.199 0.232361
## `Measles Immunization` 2.671e-01 2.006e-01 1.332 0.184748
## `HIV Prevalence` -2.409e-01 5.992e-02 -4.020 8.69e-05 ***
## `Tuberculosis Incidence` -3.729e-03 1.923e-03 -1.940 0.054045 .
## `Anemia Prevalence` -9.392e-02 4.285e-02 -2.192 0.029753 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.932 on 172 degrees of freedom
## Multiple R-squared: 0.9356, Adjusted R-squared: 0.9326
## F-statistic: 312.3 on 8 and 172 DF, p-value: < 2.2e-16
Diagnostic Plots
par(mfrow=c(2,2))
plot(model)

par(mfrow=c(1,1))
Actual vs Predicted Values
plot(data$`Life Expectancy`, predict(model),
xlab="Actual Life Expectancy",
ylab="Predicted Life Expectancy",
col="blue", pch=19,
main="Actual vs Predicted Life Expectancy")
abline(0,1,col="red",lwd=2)
