rm(list=ls())
setwd("C:/Users/AKASH/Desktop/Term IV/Applied Econometrics for managers/End-term/AEM Term Project")
library(foreign) 
library(psych) 
library(lmtest)
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
library(mfx)  
## Loading required package: sandwich
## Loading required package: MASS
## Loading required package: betareg
library(plm) 
library(car) 
## Loading required package: carData
## 
## Attaching package: 'car'
## The following object is masked from 'package:psych':
## 
##     logit
library(stargazer) 
## 
## Please cite as:
##  Hlavac, Marek (2018). stargazer: Well-Formatted Regression and Summary Statistics Tables.
##  R package version 5.2.2. https://CRAN.R-project.org/package=stargazer
library(pglm)
## Loading required package: maxLik
## Loading required package: miscTools
## 
## Please cite the 'maxLik' package as:
## Henningsen, Arne and Toomet, Ott (2011). maxLik: A package for maximum likelihood estimation in R. Computational Statistics 26(3), 443-458. DOI 10.1007/s00180-010-0217-1.
## 
## If you have questions, suggestions, or comments regarding the 'maxLik' package, please use a forum or 'tracker' at maxLik's R-Forge site:
## https://r-forge.r-project.org/projects/maxlik/
library(readxl)
library(ggplot2)
## 
## Attaching package: 'ggplot2'
## The following objects are masked from 'package:psych':
## 
##     %+%, alpha
library(stargazer)
library(tidyverse)
## -- Attaching packages ---------------------------------------------- tidyverse 1.3.0 --
## v tibble  3.0.3     v dplyr   1.0.1
## v tidyr   1.1.1     v stringr 1.4.0
## v readr   1.3.1     v forcats 0.5.0
## v purrr   0.3.4
## -- Conflicts ------------------------------------------------- tidyverse_conflicts() --
## x ggplot2::%+%()   masks psych::%+%()
## x ggplot2::alpha() masks psych::alpha()
## x dplyr::between() masks plm::between()
## x dplyr::filter()  masks stats::filter()
## x dplyr::lag()     masks plm::lag(), stats::lag()
## x dplyr::lead()    masks plm::lead()
## x dplyr::recode()  masks car::recode()
## x dplyr::select()  masks MASS::select()
## x purrr::some()    masks car::some()
library(dummies)
## dummies-1.5.6 provided by Decision Patterns
#Reading file
dataset<-read.csv("Life Expectancy Data.csv")
#View(dataset)

#Subsetting data
data00 <- subset(dataset,Year==2000)

#View(data00)
data00 <- data00[order(data00$Life.expectancy),]
data00_bot <- head (data00,25)
data00_top <- tail (data00,25)

data15 <- subset(dataset,Year==2015)
#View(data00)
data15 <- data15[order(data15$Life.expectancy),]
data15_bot <- head (data15,25)
data15_top <- tail (data15,25)

#Visual Representation
# Bottom 20 countries life expectancy in 2000
ggplot(data00_bot, aes(reorder(Country, Life.expectancy), Life.expectancy)) +
  geom_point() +
  theme(axis.text.x = element_text(angle=45, hjust=1, vjust = 1))

# Top 20 countries life expectancy in 2000  
ggplot(data00_top, aes(reorder(Country, Life.expectancy), Life.expectancy)) +
  geom_point(color = "darkred") +
  theme(axis.text.x = element_text(angle=45, hjust=1, vjust = 1))

# Bottom 20 countries life expectancy in 2015
ggplot(data15_bot, aes(reorder(Country, Life.expectancy), Life.expectancy)) +
  geom_point() +
  theme(axis.text.x = element_text(angle=45, hjust=1, vjust = 1))

# Top 20 countries life expectancy in 2015  
ggplot(data15_top, aes(reorder(Country, Life.expectancy), Life.expectancy)) +
  geom_point(color = "darkred") +
  theme(axis.text.x = element_text(angle=45, hjust=1, vjust = 1))

# Checking for any missing values
#dataset <- dataset[!complete.cases(dataset),]

summary(dataset)
##    Country               Year         Status          Life.expectancy
##  Length:2938        Min.   :2000   Length:2938        Min.   :36.30  
##  Class :character   1st Qu.:2004   Class :character   1st Qu.:63.10  
##  Mode  :character   Median :2008   Mode  :character   Median :72.10  
##                     Mean   :2008                      Mean   :69.22  
##                     3rd Qu.:2012                      3rd Qu.:75.70  
##                     Max.   :2015                      Max.   :89.00  
##                                                       NA's   :10     
##  Adult.Mortality infant.deaths       Alcohol        percentage.expenditure
##  Min.   :  1.0   Min.   :   0.0   Min.   : 0.0100   Min.   :    0.000     
##  1st Qu.: 74.0   1st Qu.:   0.0   1st Qu.: 0.8775   1st Qu.:    4.685     
##  Median :144.0   Median :   3.0   Median : 3.7550   Median :   64.913     
##  Mean   :164.8   Mean   :  30.3   Mean   : 4.6029   Mean   :  738.251     
##  3rd Qu.:228.0   3rd Qu.:  22.0   3rd Qu.: 7.7025   3rd Qu.:  441.534     
##  Max.   :723.0   Max.   :1800.0   Max.   :17.8700   Max.   :19479.912     
##  NA's   :10                       NA's   :194                             
##   Hepatitis.B       Measles              BMI        under.five.deaths
##  Min.   : 1.00   Min.   :     0.0   Min.   : 1.00   Min.   :   0.00  
##  1st Qu.:77.00   1st Qu.:     0.0   1st Qu.:19.30   1st Qu.:   0.00  
##  Median :92.00   Median :    17.0   Median :43.50   Median :   4.00  
##  Mean   :80.94   Mean   :  2419.6   Mean   :38.32   Mean   :  42.04  
##  3rd Qu.:97.00   3rd Qu.:   360.2   3rd Qu.:56.20   3rd Qu.:  28.00  
##  Max.   :99.00   Max.   :212183.0   Max.   :87.30   Max.   :2500.00  
##  NA's   :553                        NA's   :34                       
##      Polio       Total.expenditure   Diphtheria       HIV.AIDS     
##  Min.   : 3.00   Min.   : 0.370    Min.   : 2.00   Min.   : 0.100  
##  1st Qu.:78.00   1st Qu.: 4.260    1st Qu.:78.00   1st Qu.: 0.100  
##  Median :93.00   Median : 5.755    Median :93.00   Median : 0.100  
##  Mean   :82.55   Mean   : 5.938    Mean   :82.32   Mean   : 1.742  
##  3rd Qu.:97.00   3rd Qu.: 7.492    3rd Qu.:97.00   3rd Qu.: 0.800  
##  Max.   :99.00   Max.   :17.600    Max.   :99.00   Max.   :50.600  
##  NA's   :19      NA's   :226       NA's   :19                      
##       GDP              Population        thinness..1.19.years
##  Min.   :     1.68   Min.   :3.400e+01   Min.   : 0.10       
##  1st Qu.:   463.94   1st Qu.:1.958e+05   1st Qu.: 1.60       
##  Median :  1766.95   Median :1.387e+06   Median : 3.30       
##  Mean   :  7483.16   Mean   :1.275e+07   Mean   : 4.84       
##  3rd Qu.:  5910.81   3rd Qu.:7.420e+06   3rd Qu.: 7.20       
##  Max.   :119172.74   Max.   :1.294e+09   Max.   :27.70       
##  NA's   :448         NA's   :652         NA's   :34          
##  thinness.5.9.years Income.composition.of.resources   Schooling    
##  Min.   : 0.10      Min.   :0.0000                  Min.   : 0.00  
##  1st Qu.: 1.50      1st Qu.:0.4930                  1st Qu.:10.10  
##  Median : 3.30      Median :0.6770                  Median :12.30  
##  Mean   : 4.87      Mean   :0.6276                  Mean   :11.99  
##  3rd Qu.: 7.20      3rd Qu.:0.7790                  3rd Qu.:14.30  
##  Max.   :28.60      Max.   :0.9480                  Max.   :20.70  
##  NA's   :34         NA's   :167                     NA's   :163
# Getting Summary Statistics
describe(dataset, skew=F) 
##                                 vars    n        mean          sd     min
## Country*                           1 2938       96.09       56.25    1.00
## Year                               2 2938     2007.52        4.61 2000.00
## Status*                            3 2938        1.83        0.38    1.00
## Life.expectancy                    4 2928       69.22        9.52   36.30
## Adult.Mortality                    5 2928      164.80      124.29    1.00
## infant.deaths                      6 2938       30.30      117.93    0.00
## Alcohol                            7 2744        4.60        4.05    0.01
## percentage.expenditure             8 2938      738.25     1987.91    0.00
## Hepatitis.B                        9 2385       80.94       25.07    1.00
## Measles                           10 2938     2419.59    11467.27    0.00
## BMI                               11 2904       38.32       20.04    1.00
## under.five.deaths                 12 2938       42.04      160.45    0.00
## Polio                             13 2919       82.55       23.43    3.00
## Total.expenditure                 14 2712        5.94        2.50    0.37
## Diphtheria                        15 2919       82.32       23.72    2.00
## HIV.AIDS                          16 2938        1.74        5.08    0.10
## GDP                               17 2490     7483.16    14270.17    1.68
## Population                        18 2286 12753375.12 61012096.51   34.00
## thinness..1.19.years              19 2904        4.84        4.42    0.10
## thinness.5.9.years                20 2904        4.87        4.51    0.10
## Income.composition.of.resources   21 2771        0.63        0.21    0.00
## Schooling                         22 2775       11.99        3.36    0.00
##                                          max        range         se
## Country*                        1.930000e+02 1.920000e+02       1.04
## Year                            2.015000e+03 1.500000e+01       0.09
## Status*                         2.000000e+00 1.000000e+00       0.01
## Life.expectancy                 8.900000e+01 5.270000e+01       0.18
## Adult.Mortality                 7.230000e+02 7.220000e+02       2.30
## infant.deaths                   1.800000e+03 1.800000e+03       2.18
## Alcohol                         1.787000e+01 1.786000e+01       0.08
## percentage.expenditure          1.947991e+04 1.947991e+04      36.68
## Hepatitis.B                     9.900000e+01 9.800000e+01       0.51
## Measles                         2.121830e+05 2.121830e+05     211.56
## BMI                             8.730000e+01 8.630000e+01       0.37
## under.five.deaths               2.500000e+03 2.500000e+03       2.96
## Polio                           9.900000e+01 9.600000e+01       0.43
## Total.expenditure               1.760000e+01 1.723000e+01       0.05
## Diphtheria                      9.900000e+01 9.700000e+01       0.44
## HIV.AIDS                        5.060000e+01 5.050000e+01       0.09
## GDP                             1.191727e+05 1.191711e+05     285.98
## Population                      1.293859e+09 1.293859e+09 1276079.80
## thinness..1.19.years            2.770000e+01 2.760000e+01       0.08
## thinness.5.9.years              2.860000e+01 2.850000e+01       0.08
## Income.composition.of.resources 9.500000e-01 9.500000e-01       0.00
## Schooling                       2.070000e+01 2.070000e+01       0.06
# Creating Dummy for categorical variables #
dataset$Statusdummy <- factor(dataset$Status, labels = c("Status", "2"))
prop.table(table(dataset$generationdummy))
## numeric(0)
# Linear Regression with independent variable as Percentage expenditure
reg1 <- lm (dataset$Life.expectancy ~ dataset$percentage.expenditure)

# Linear Regression with independent variable as Percentage expenditure and Alcohol consumption
reg2 <- lm (dataset$Life.expectancy ~ dataset$percentage.expenditure + dataset$Alcohol)

# Linear Regression with all the variables
reg3 <- lm (dataset$Life.expectancy ~ dataset$percentage.expenditure + dataset$Alcohol + dataset$Adult.Mortality + dataset$infant.deaths + dataset$Hepatitis.B + dataset$Measles + dataset$BMI + dataset$under.five.deaths + dataset$Polio + dataset$Total.expenditure + dataset$Diphtheria + dataset$HIV.AIDS + dataset$GDP + dataset$Population + dataset$thinness..1.19.years + dataset$thinness..1.19.years + dataset$thinness.5.9.years + dataset$Income.composition.of.resources + dataset$Schooling + dataset$Statusdummy)

stargazer(list(reg1,reg2,reg3), keep.stat=c("n", "adj.rsq"), type="text")
## 
## =============================================================
##                                      Dependent variable:     
##                                 -----------------------------
##                                        Life.expectancy       
##                                    (1)       (2)       (3)   
## -------------------------------------------------------------
## percentage.expenditure          0.002***  0.001***  0.0004** 
##                                 (0.0001)  (0.0001)  (0.0002) 
##                                                              
## Alcohol                                   0.718***  -0.091***
##                                            (0.042)   (0.033) 
##                                                              
## Adult.Mortality                                     -0.017***
##                                                      (0.001) 
##                                                              
## infant.deaths                                       0.093*** 
##                                                      (0.011) 
##                                                              
## Hepatitis.B                                          -0.007  
##                                                      (0.004) 
##                                                              
## Measles                                             -0.00001 
##                                                     (0.00001)
##                                                              
## BMI                                                 0.034*** 
##                                                      (0.006) 
##                                                              
## under.five.deaths                                   -0.070***
##                                                      (0.008) 
##                                                              
## Polio                                                 0.008  
##                                                      (0.005) 
##                                                              
## Total.expenditure                                    0.076*  
##                                                      (0.041) 
##                                                              
## Diphtheria                                           0.015** 
##                                                      (0.006) 
##                                                              
## HIV.AIDS                                            -0.437***
##                                                      (0.018) 
##                                                              
## GDP                                                  0.00001 
##                                                     (0.00003)
##                                                              
## Population                                           -0.000  
##                                                      (0.000) 
##                                                              
## thinness..1.19.years                                 -0.012  
##                                                      (0.053) 
##                                                              
## thinness.5.9.years                                   -0.048  
##                                                      (0.052) 
##                                                              
## Income.composition.of.resources                     9.817*** 
##                                                      (0.832) 
##                                                              
## Schooling                                           0.867*** 
##                                                      (0.059) 
##                                                              
## Statusdummy2                                        -0.968***
##                                                      (0.338) 
##                                                              
## Constant                        67.873*** 64.769*** 54.451***
##                                  (0.174)   (0.241)   (0.840) 
##                                                              
## -------------------------------------------------------------
## Observations                      2,928     2,735     1,649  
## Adjusted R2                       0.146     0.240     0.834  
## =============================================================
## Note:                             *p<0.1; **p<0.05; ***p<0.01
# Now we will consider only the significant and relevant variables and do linear regression 
# with them. Thus the significant variables are: percentage.expenditure, Alcohol, BMI, HIV.AIDS, 
# Income.composition.of.resources, Schooling and Statusdummy. 

reg4 <- lm (dataset$Life.expectancy ~ dataset$percentage.expenditure + dataset$Alcohol + dataset$BMI + dataset$HIV.AIDS + dataset$Income.composition.of.resources + dataset$Schooling + dataset$Statusdummy)
summary(reg3)
## 
## Call:
## lm(formula = dataset$Life.expectancy ~ dataset$percentage.expenditure + 
##     dataset$Alcohol + dataset$Adult.Mortality + dataset$infant.deaths + 
##     dataset$Hepatitis.B + dataset$Measles + dataset$BMI + dataset$under.five.deaths + 
##     dataset$Polio + dataset$Total.expenditure + dataset$Diphtheria + 
##     dataset$HIV.AIDS + dataset$GDP + dataset$Population + dataset$thinness..1.19.years + 
##     dataset$thinness..1.19.years + dataset$thinness.5.9.years + 
##     dataset$Income.composition.of.resources + dataset$Schooling + 
##     dataset$Statusdummy)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -16.9597  -2.0621  -0.0147   2.2751  11.7115 
## 
## Coefficients:
##                                           Estimate Std. Error t value Pr(>|t|)
## (Intercept)                              5.445e+01  8.400e-01  64.822  < 2e-16
## dataset$percentage.expenditure           3.673e-04  1.801e-04   2.040  0.04156
## dataset$Alcohol                         -9.140e-02  3.316e-02  -2.756  0.00592
## dataset$Adult.Mortality                 -1.663e-02  9.494e-04 -17.517  < 2e-16
## dataset$infant.deaths                    9.350e-02  1.065e-02   8.777  < 2e-16
## dataset$Hepatitis.B                     -6.525e-03  4.449e-03  -1.467  0.14265
## dataset$Measles                         -7.865e-06  1.079e-05  -0.729  0.46597
## dataset$BMI                              3.376e-02  5.998e-03   5.628 2.15e-08
## dataset$under.five.deaths               -7.035e-02  7.711e-03  -9.123  < 2e-16
## dataset$Polio                            7.935e-03  5.152e-03   1.540  0.12370
## dataset$Total.expenditure                7.586e-02  4.067e-02   1.865  0.06236
## dataset$Diphtheria                       1.490e-02  5.928e-03   2.513  0.01205
## dataset$HIV.AIDS                        -4.370e-01  1.784e-02 -24.490  < 2e-16
## dataset$GDP                              8.738e-06  2.837e-05   0.308  0.75813
## dataset$Population                      -6.425e-10  1.749e-09  -0.367  0.71337
## dataset$thinness..1.19.years            -1.238e-02  5.300e-02  -0.234  0.81527
## dataset$thinness.5.9.years              -4.798e-02  5.231e-02  -0.917  0.35917
## dataset$Income.composition.of.resources  9.817e+00  8.321e-01  11.797  < 2e-16
## dataset$Schooling                        8.665e-01  5.940e-02  14.587  < 2e-16
## dataset$Statusdummy2                    -9.684e-01  3.379e-01  -2.865  0.00422
##                                            
## (Intercept)                             ***
## dataset$percentage.expenditure          *  
## dataset$Alcohol                         ** 
## dataset$Adult.Mortality                 ***
## dataset$infant.deaths                   ***
## dataset$Hepatitis.B                        
## dataset$Measles                            
## dataset$BMI                             ***
## dataset$under.five.deaths               ***
## dataset$Polio                              
## dataset$Total.expenditure               .  
## dataset$Diphtheria                      *  
## dataset$HIV.AIDS                        ***
## dataset$GDP                                
## dataset$Population                         
## dataset$thinness..1.19.years               
## dataset$thinness.5.9.years                 
## dataset$Income.composition.of.resources ***
## dataset$Schooling                       ***
## dataset$Statusdummy2                    ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.588 on 1629 degrees of freedom
##   (1289 observations deleted due to missingness)
## Multiple R-squared:  0.8356, Adjusted R-squared:  0.8336 
## F-statistic: 435.7 on 19 and 1629 DF,  p-value: < 2.2e-16
stargazer(list(reg4), keep.stat=c("n", "adj.rsq"), type="text")
## 
## ===========================================================
##                                     Dependent variable:    
##                                 ---------------------------
##                                       Life.expectancy      
## -----------------------------------------------------------
## percentage.expenditure                   0.0004***         
##                                          (0.00005)         
##                                                            
## Alcohol                                  -0.114***         
##                                           (0.029)          
##                                                            
## BMI                                      0.063***          
##                                           (0.005)          
##                                                            
## HIV.AIDS                                 -0.676***         
##                                           (0.017)          
##                                                            
## Income.composition.of.resources          9.264***          
##                                           (0.698)          
##                                                            
## Schooling                                1.095***          
##                                           (0.049)          
##                                                            
## Statusdummy2                             -1.705***         
##                                           (0.323)          
##                                                            
## Constant                                 50.820***         
##                                           (0.569)          
##                                                            
## -----------------------------------------------------------
## Observations                               2,569           
## Adjusted R2                                0.773           
## ===========================================================
## Note:                           *p<0.1; **p<0.05; ***p<0.01