knitr::include_graphics("/Users/Nusrat/Desktop/Life.png")

Life expectancy is the average period of a person may expect to live. It is one of the most important measures of health. Statistics from World Health Organization (WHO) suggest that human beings currently have an average life span of 64.3 years. The purpose of this assignment is to study various factors, both individual level and country level, which might influence life expectancy across the globe. Additionally, I will also look at the trend in life expectency over the last fifteen years (2000 to 2015). The dataset I will be using for this study is a dataset from World Health Organization. The data can be accessed through the following link: https://www.kaggle.com/kumarajarshi/life-expectancy-who.

Variable Index:

  1. Life_expectancy (dependent varable): Life expectancy in years
  2. Year (independent varable): 2000 to 2015
  3. Thinness (independent varable): Prevalence of thinness among children and adolescents for age 10 to 19 (recorded as percentage)
  4. Status_Rec (independent varable): 0 = Developing country, 1 = Developed country
  5. Total Expenditure (independent varable): Government expenditure on health as a percentage of total government expenditure (recorded as percentage)

Research Questions:

The research questions of this project are as follows: 1. Is life expectancy significantly impacted by the prevalence of thinness among children and adolescents aging 10 to 19 years; 2. Does life expectancy depend on how much the government spends on public health care? To be more specific, should a country having a low life expectancy value increase its health care expenditure in order to improve its average lifespan?; 3. What is the histrical trend of life expectancy?; 4. Does life expectancy vary across developed and developing countries?

Hypothesis:

The hypotheses for this study are as follows: 1. Life expectancy is significantly impacted by the prevalence of thinness among children and adolescents aging 10 to 19 years; 2. Countries with a low life expectancy value should increase its health care expenditure in order to improve its average lifespan; 3. Life expectancy has increased over the course of past fifteen years; 4. Life expectancy is greater in developed countries than developing countries.

library(dplyr)
library(magrittr)
library(tidyr)
library(haven)
library(ggplot2)
library(texreg)
library(gridExtra)
library(readr)
LifeExpectancy <- read_csv("C:/Users/Nusrat/Desktop/MA - 3rd semester, Spring 19/SOC 712 - Advanced Analytics (R)/Assignment 9 - Advanced VIsualization/LifeExpectancy.csv")
head(LifeExpectancy)
## # A tibble: 6 x 24
##   Country Country_Rec  Year Status Status_Rec Life_expectancy
##   <chr>         <dbl> <dbl> <chr>       <dbl>           <dbl>
## 1 Afghan~           1  2015 Devel~          0            65  
## 2 Afghan~           1  2014 Devel~          0            59.9
## 3 Afghan~           1  2013 Devel~          0            59.9
## 4 Afghan~           1  2012 Devel~          0            59.5
## 5 Afghan~           1  2011 Devel~          0            59.2
## 6 Afghan~           1  2010 Devel~          0            58.8
## # ... with 18 more variables: `Adult Mortality` <dbl>, `infant
## #   deaths` <dbl>, Alcohol <dbl>, `percentage expenditure` <dbl>,
## #   `Hepatitis B` <dbl>, Measles <dbl>, BMI <dbl>, `under-five
## #   deaths` <dbl>, Polio <dbl>, Total_expenditure <dbl>, Diphtheria <dbl>,
## #   `HIV/AIDS` <dbl>, GDP <dbl>, Population <dbl>, Thinness <dbl>,
## #   `thinness 5-9 years` <dbl>, `Income composition of resources` <dbl>,
## #   Schooling <dbl>

Model 1: Effect of Thinness on Life Expectancy

m1 <- lm(Life_expectancy ~ Thinness, data = LifeExpectancyy)
summary(m1)
## 
## Call:
## lm(formula = Life_expectancy ~ Thinness, data = LifeExpectancyy)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -27.699  -4.607   1.501   5.466  19.650 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  73.5500     0.2800   262.6   <2e-16 ***
## Thinness     -0.8757     0.0419   -20.9   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7.823 on 1647 degrees of freedom
## Multiple R-squared:  0.2096, Adjusted R-squared:  0.2091 
## F-statistic: 436.8 on 1 and 1647 DF,  p-value: < 2.2e-16

Model 2: Effect of Thinness and Total Govt. Expenditure on Life Expectancy

m2 <- lm(Life_expectancy ~ Thinness + Total_expenditure, data = LifeExpectancyy)
summary(m2)
## 
## Call:
## lm(formula = Life_expectancy ~ Thinness + Total_expenditure, 
##     data = LifeExpectancyy)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -27.759  -4.192   1.444   5.393  19.878 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       71.51563    0.61853 115.621  < 2e-16 ***
## Thinness          -0.84268    0.04269 -19.739  < 2e-16 ***
## Total_expenditure  0.31468    0.08539   3.685 0.000236 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7.793 on 1646 degrees of freedom
## Multiple R-squared:  0.2161, Adjusted R-squared:  0.2151 
## F-statistic: 226.9 on 2 and 1646 DF,  p-value: < 2.2e-16

Model 3: Effect of Thinness, Country Status and Total Govt. Expenditure on Life Expectancy

m3 <- lm(Life_expectancy ~ Thinness + Status_Rec + Total_expenditure, data = LifeExpectancyy)
summary(m3)
## 
## Call:
## lm(formula = Life_expectancy ~ Thinness + Status_Rec + Total_expenditure, 
##     data = LifeExpectancyy)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -25.826  -4.219   1.106   5.121  18.720 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       70.46746    0.58393 120.677   <2e-16 ***
## Thinness          -0.66707    0.04167 -16.008   <2e-16 ***
## Status_Rec         8.15116    0.53949  15.109   <2e-16 ***
## Total_expenditure  0.14680    0.08081   1.817   0.0695 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7.305 on 1645 degrees of freedom
## Multiple R-squared:  0.3116, Adjusted R-squared:  0.3104 
## F-statistic: 248.2 on 3 and 1645 DF,  p-value: < 2.2e-16

Model 4: Adding Interaction Terms Between Total Govt. Expenditure on Health and Country Status

m4 <- lm(Life_expectancy ~ Thinness + Total_expenditure * Status_Rec, data = LifeExpectancyy)
summary(m4)
## 
## Call:
## lm(formula = Life_expectancy ~ Thinness + Total_expenditure * 
##     Status_Rec, data = LifeExpectancyy)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -25.815  -4.185   1.173   5.134  18.627 
## 
## Coefficients:
##                              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                  70.59107    0.63472 111.216  < 2e-16 ***
## Thinness                     -0.66825    0.04175 -16.006  < 2e-16 ***
## Total_expenditure             0.12650    0.09055   1.397    0.163    
## Status_Rec                    7.47508    1.46224   5.112 3.56e-07 ***
## Total_expenditure:Status_Rec  0.09921    0.19942   0.497    0.619    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7.307 on 1644 degrees of freedom
## Multiple R-squared:  0.3117, Adjusted R-squared:   0.31 
## F-statistic: 186.1 on 4 and 1644 DF,  p-value: < 2.2e-16

Model 5: Adding Interaction Terms Between Thinness and Total Govt. Expenditure on Health

m5 <- lm(Life_expectancy ~ Thinness*Total_expenditure + Status_Rec, data = LifeExpectancyy)
summary(m5)
## 
## Call:
## lm(formula = Life_expectancy ~ Thinness * Total_expenditure + 
##     Status_Rec, data = LifeExpectancyy)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -26.2183  -3.8125   0.9259   5.0646  25.9413 
## 
## Coefficients:
##                            Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                66.85283    0.71752  93.172  < 2e-16 ***
## Thinness                    0.09231    0.09972   0.926    0.355    
## Total_expenditure           0.79274    0.11071   7.160 1.21e-12 ***
## Status_Rec                  7.48753    0.53451  14.008  < 2e-16 ***
## Thinness:Total_expenditure -0.14317    0.01715  -8.347  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7.157 on 1644 degrees of freedom
## Multiple R-squared:  0.3396, Adjusted R-squared:  0.338 
## F-statistic: 211.4 on 4 and 1644 DF,  p-value: < 2.2e-16

Model Comparison - Best fit

library(texreg)
htmlreg(list(m1,m2,m3,m4,m5),doctype = FALSE)
Statistical models
Model 1 Model 2 Model 3 Model 4 Model 5
(Intercept) 73.55*** 71.52*** 70.47*** 70.59*** 66.85***
(0.28) (0.62) (0.58) (0.63) (0.72)
Thinness -0.88*** -0.84*** -0.67*** -0.67*** 0.09
(0.04) (0.04) (0.04) (0.04) (0.10)
Total_expenditure 0.31*** 0.15 0.13 0.79***
(0.09) (0.08) (0.09) (0.11)
Status_Rec 8.15*** 7.48*** 7.49***
(0.54) (1.46) (0.53)
Total_expenditure:Status_Rec 0.10
(0.20)
Thinness:Total_expenditure -0.14***
(0.02)
R2 0.21 0.22 0.31 0.31 0.34
Adj. R2 0.21 0.22 0.31 0.31 0.34
Num. obs. 1649 1649 1649 1649 1649
RMSE 7.82 7.79 7.31 7.31 7.16
p < 0.001, p < 0.01, p < 0.05
AIC(m1,m2,m3,m4,m5)

df AIC m1 3 11467.90 m2 4 11456.35 m3 5 11244.06 m4 6 11245.81 m5 6 11177.61

BIC(m1,m2,m3,m4,m5)

df BIC m1 3 11484.12 m2 4 11477.98 m3 5 11271.10 m4 6 11278.26 m5 6 11210.06

GGPLOTS

Relationship between Life Expectancy and Thinness

Lplot <- ggplot(data = LifeExpectancyy, aes(x = Thinness, y =Life_expectancy)) +
  geom_line()
Lplot

Distribution of Life Expectancy Across Developed Countries

dcoef <- LifeExpectancyy %>% 
    group_by(Country_Rec) %>% 
    do(mod = lm(Life_expectancy ~ Status_Rec, data = .))
coef <- dcoef %>% do(data.frame(intc = coef(.$mod)[1]))
ggplot(coef, aes(x = intc)) + geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Histrical Trend in Life Expectancy

library(ggplot2)
g <- ggplot(LifeExpectancy, aes(x = Year, y = Life_expectancy))
g1 <- g + geom_smooth(color="green", size=1.5) + geom_point(alpha=.3) +
  theme_classic() +
  ggtitle("Overall Life Expectancy Over Time")
g1
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'

Histrical Trend in Life Expectancy in a Developed Country: United States

LifeExpectancyUS <- read_csv("C:/Users/Nusrat/Desktop/MA - 3rd semester, Spring 19/SOC 712 - Advanced Analytics (R)/Assignment 9 - Advanced VIsualization/LifeExpectancyUS.csv")
gg <- ggplot(LifeExpectancyUS, aes(x = Year, y = Life_expectancy))
g2 <- gg + geom_smooth(color="blue", size=1.5) + geom_smooth(alpha=.3) +
  theme_classic() +
  ggtitle("Life Expectancy Over Time in the US")
g2
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

Histrical Trend in Life Expectancy in a Developing Country: Bangladesh

LifeExpectancyBD <- read_csv("C:/Users/Nusrat/Desktop/MA - 3rd semester, Spring 19/SOC 712 - Advanced Analytics (R)/Assignment 9 - Advanced VIsualization/LifeExpectancyBD.csv")
ggg <- ggplot(LifeExpectancyBD, aes(x = Year, y = Life_expectancy))
g3 <- ggg + geom_smooth(color="blue", size=1.5) + geom_smooth(alpha=.3) +
  theme_classic() +
  ggtitle("Life Expectancy Over Time in BD")
g3
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

Comparing Life Expectancy in the U.S. with the World’s Overall Life Expectancy

grid.arrange(g1,g2, ncol=2)
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

Comparing Life Expectancy in Bangladesh with the World’s Overall Life Expectancy

grid.arrange(g1,g3, ncol=2)
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

Life Expectancy in the U.S. vs. Bangladesh

grid.arrange(g2,g3, ncol=2)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'