rm(list=ls())
setwd("C:/Users/AKASH/Desktop/Term IV/Applied Econometrics for managers/End-term/AEM Term Project")
library(foreign)
library(psych)
library(lmtest)
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(mfx)
## Loading required package: sandwich
## Loading required package: MASS
## Loading required package: betareg
library(plm)
library(car)
## Loading required package: carData
##
## Attaching package: 'car'
## The following object is masked from 'package:psych':
##
## logit
library(stargazer)
##
## Please cite as:
## Hlavac, Marek (2018). stargazer: Well-Formatted Regression and Summary Statistics Tables.
## R package version 5.2.2. https://CRAN.R-project.org/package=stargazer
library(pglm)
## Loading required package: maxLik
## Loading required package: miscTools
##
## Please cite the 'maxLik' package as:
## Henningsen, Arne and Toomet, Ott (2011). maxLik: A package for maximum likelihood estimation in R. Computational Statistics 26(3), 443-458. DOI 10.1007/s00180-010-0217-1.
##
## If you have questions, suggestions, or comments regarding the 'maxLik' package, please use a forum or 'tracker' at maxLik's R-Forge site:
## https://r-forge.r-project.org/projects/maxlik/
library(readxl)
library(ggplot2)
##
## Attaching package: 'ggplot2'
## The following objects are masked from 'package:psych':
##
## %+%, alpha
library(stargazer)
library(tidyverse)
## -- Attaching packages ---------------------------------------------- tidyverse 1.3.0 --
## v tibble 3.0.3 v dplyr 1.0.1
## v tidyr 1.1.1 v stringr 1.4.0
## v readr 1.3.1 v forcats 0.5.0
## v purrr 0.3.4
## -- Conflicts ------------------------------------------------- tidyverse_conflicts() --
## x ggplot2::%+%() masks psych::%+%()
## x ggplot2::alpha() masks psych::alpha()
## x dplyr::between() masks plm::between()
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks plm::lag(), stats::lag()
## x dplyr::lead() masks plm::lead()
## x dplyr::recode() masks car::recode()
## x dplyr::select() masks MASS::select()
## x purrr::some() masks car::some()
library(dummies)
## dummies-1.5.6 provided by Decision Patterns
#Reading file
dataset<-read.csv("Life Expectancy Data.csv")
#View(dataset)
#Subsetting data
data00 <- subset(dataset,Year==2000)
#View(data00)
data00 <- data00[order(data00$Life.expectancy),]
data00_bot <- head (data00,25)
data00_top <- tail (data00,25)
data15 <- subset(dataset,Year==2015)
#View(data00)
data15 <- data15[order(data15$Life.expectancy),]
data15_bot <- head (data15,25)
data15_top <- tail (data15,25)
#Visual Representation
# Bottom 20 countries life expectancy in 2000
ggplot(data00_bot, aes(reorder(Country, Life.expectancy), Life.expectancy)) +
geom_point() +
theme(axis.text.x = element_text(angle=45, hjust=1, vjust = 1))

# Top 20 countries life expectancy in 2000
ggplot(data00_top, aes(reorder(Country, Life.expectancy), Life.expectancy)) +
geom_point(color = "darkred") +
theme(axis.text.x = element_text(angle=45, hjust=1, vjust = 1))

# Bottom 20 countries life expectancy in 2015
ggplot(data15_bot, aes(reorder(Country, Life.expectancy), Life.expectancy)) +
geom_point() +
theme(axis.text.x = element_text(angle=45, hjust=1, vjust = 1))

# Top 20 countries life expectancy in 2015
ggplot(data15_top, aes(reorder(Country, Life.expectancy), Life.expectancy)) +
geom_point(color = "darkred") +
theme(axis.text.x = element_text(angle=45, hjust=1, vjust = 1))

# Checking for any missing values
#dataset <- dataset[!complete.cases(dataset),]
summary(dataset)
## Country Year Status Life.expectancy
## Length:2938 Min. :2000 Length:2938 Min. :36.30
## Class :character 1st Qu.:2004 Class :character 1st Qu.:63.10
## Mode :character Median :2008 Mode :character Median :72.10
## Mean :2008 Mean :69.22
## 3rd Qu.:2012 3rd Qu.:75.70
## Max. :2015 Max. :89.00
## NA's :10
## Adult.Mortality infant.deaths Alcohol percentage.expenditure
## Min. : 1.0 Min. : 0.0 Min. : 0.0100 Min. : 0.000
## 1st Qu.: 74.0 1st Qu.: 0.0 1st Qu.: 0.8775 1st Qu.: 4.685
## Median :144.0 Median : 3.0 Median : 3.7550 Median : 64.913
## Mean :164.8 Mean : 30.3 Mean : 4.6029 Mean : 738.251
## 3rd Qu.:228.0 3rd Qu.: 22.0 3rd Qu.: 7.7025 3rd Qu.: 441.534
## Max. :723.0 Max. :1800.0 Max. :17.8700 Max. :19479.912
## NA's :10 NA's :194
## Hepatitis.B Measles BMI under.five.deaths
## Min. : 1.00 Min. : 0.0 Min. : 1.00 Min. : 0.00
## 1st Qu.:77.00 1st Qu.: 0.0 1st Qu.:19.30 1st Qu.: 0.00
## Median :92.00 Median : 17.0 Median :43.50 Median : 4.00
## Mean :80.94 Mean : 2419.6 Mean :38.32 Mean : 42.04
## 3rd Qu.:97.00 3rd Qu.: 360.2 3rd Qu.:56.20 3rd Qu.: 28.00
## Max. :99.00 Max. :212183.0 Max. :87.30 Max. :2500.00
## NA's :553 NA's :34
## Polio Total.expenditure Diphtheria HIV.AIDS
## Min. : 3.00 Min. : 0.370 Min. : 2.00 Min. : 0.100
## 1st Qu.:78.00 1st Qu.: 4.260 1st Qu.:78.00 1st Qu.: 0.100
## Median :93.00 Median : 5.755 Median :93.00 Median : 0.100
## Mean :82.55 Mean : 5.938 Mean :82.32 Mean : 1.742
## 3rd Qu.:97.00 3rd Qu.: 7.492 3rd Qu.:97.00 3rd Qu.: 0.800
## Max. :99.00 Max. :17.600 Max. :99.00 Max. :50.600
## NA's :19 NA's :226 NA's :19
## GDP Population thinness..1.19.years
## Min. : 1.68 Min. :3.400e+01 Min. : 0.10
## 1st Qu.: 463.94 1st Qu.:1.958e+05 1st Qu.: 1.60
## Median : 1766.95 Median :1.387e+06 Median : 3.30
## Mean : 7483.16 Mean :1.275e+07 Mean : 4.84
## 3rd Qu.: 5910.81 3rd Qu.:7.420e+06 3rd Qu.: 7.20
## Max. :119172.74 Max. :1.294e+09 Max. :27.70
## NA's :448 NA's :652 NA's :34
## thinness.5.9.years Income.composition.of.resources Schooling
## Min. : 0.10 Min. :0.0000 Min. : 0.00
## 1st Qu.: 1.50 1st Qu.:0.4930 1st Qu.:10.10
## Median : 3.30 Median :0.6770 Median :12.30
## Mean : 4.87 Mean :0.6276 Mean :11.99
## 3rd Qu.: 7.20 3rd Qu.:0.7790 3rd Qu.:14.30
## Max. :28.60 Max. :0.9480 Max. :20.70
## NA's :34 NA's :167 NA's :163
# Getting Summary Statistics
describe(dataset, skew=F)
## vars n mean sd min
## Country* 1 2938 96.09 56.25 1.00
## Year 2 2938 2007.52 4.61 2000.00
## Status* 3 2938 1.83 0.38 1.00
## Life.expectancy 4 2928 69.22 9.52 36.30
## Adult.Mortality 5 2928 164.80 124.29 1.00
## infant.deaths 6 2938 30.30 117.93 0.00
## Alcohol 7 2744 4.60 4.05 0.01
## percentage.expenditure 8 2938 738.25 1987.91 0.00
## Hepatitis.B 9 2385 80.94 25.07 1.00
## Measles 10 2938 2419.59 11467.27 0.00
## BMI 11 2904 38.32 20.04 1.00
## under.five.deaths 12 2938 42.04 160.45 0.00
## Polio 13 2919 82.55 23.43 3.00
## Total.expenditure 14 2712 5.94 2.50 0.37
## Diphtheria 15 2919 82.32 23.72 2.00
## HIV.AIDS 16 2938 1.74 5.08 0.10
## GDP 17 2490 7483.16 14270.17 1.68
## Population 18 2286 12753375.12 61012096.51 34.00
## thinness..1.19.years 19 2904 4.84 4.42 0.10
## thinness.5.9.years 20 2904 4.87 4.51 0.10
## Income.composition.of.resources 21 2771 0.63 0.21 0.00
## Schooling 22 2775 11.99 3.36 0.00
## max range se
## Country* 1.930000e+02 1.920000e+02 1.04
## Year 2.015000e+03 1.500000e+01 0.09
## Status* 2.000000e+00 1.000000e+00 0.01
## Life.expectancy 8.900000e+01 5.270000e+01 0.18
## Adult.Mortality 7.230000e+02 7.220000e+02 2.30
## infant.deaths 1.800000e+03 1.800000e+03 2.18
## Alcohol 1.787000e+01 1.786000e+01 0.08
## percentage.expenditure 1.947991e+04 1.947991e+04 36.68
## Hepatitis.B 9.900000e+01 9.800000e+01 0.51
## Measles 2.121830e+05 2.121830e+05 211.56
## BMI 8.730000e+01 8.630000e+01 0.37
## under.five.deaths 2.500000e+03 2.500000e+03 2.96
## Polio 9.900000e+01 9.600000e+01 0.43
## Total.expenditure 1.760000e+01 1.723000e+01 0.05
## Diphtheria 9.900000e+01 9.700000e+01 0.44
## HIV.AIDS 5.060000e+01 5.050000e+01 0.09
## GDP 1.191727e+05 1.191711e+05 285.98
## Population 1.293859e+09 1.293859e+09 1276079.80
## thinness..1.19.years 2.770000e+01 2.760000e+01 0.08
## thinness.5.9.years 2.860000e+01 2.850000e+01 0.08
## Income.composition.of.resources 9.500000e-01 9.500000e-01 0.00
## Schooling 2.070000e+01 2.070000e+01 0.06
# Creating Dummy for categorical variables #
dataset$Statusdummy <- factor(dataset$Status, labels = c("Status", "2"))
prop.table(table(dataset$generationdummy))
## numeric(0)
# Linear Regression with independent variable as Percentage expenditure
reg1 <- lm (dataset$Life.expectancy ~ dataset$percentage.expenditure)
# Linear Regression with independent variable as Percentage expenditure and Alcohol consumption
reg2 <- lm (dataset$Life.expectancy ~ dataset$percentage.expenditure + dataset$Alcohol)
# Linear Regression with all the variables
reg3 <- lm (dataset$Life.expectancy ~ dataset$percentage.expenditure + dataset$Alcohol + dataset$Adult.Mortality + dataset$infant.deaths + dataset$Hepatitis.B + dataset$Measles + dataset$BMI + dataset$under.five.deaths + dataset$Polio + dataset$Total.expenditure + dataset$Diphtheria + dataset$HIV.AIDS + dataset$GDP + dataset$Population + dataset$thinness..1.19.years + dataset$thinness..1.19.years + dataset$thinness.5.9.years + dataset$Income.composition.of.resources + dataset$Schooling + dataset$Statusdummy)
stargazer(list(reg1,reg2,reg3), keep.stat=c("n", "adj.rsq"), type="text")
##
## =============================================================
## Dependent variable:
## -----------------------------
## Life.expectancy
## (1) (2) (3)
## -------------------------------------------------------------
## percentage.expenditure 0.002*** 0.001*** 0.0004**
## (0.0001) (0.0001) (0.0002)
##
## Alcohol 0.718*** -0.091***
## (0.042) (0.033)
##
## Adult.Mortality -0.017***
## (0.001)
##
## infant.deaths 0.093***
## (0.011)
##
## Hepatitis.B -0.007
## (0.004)
##
## Measles -0.00001
## (0.00001)
##
## BMI 0.034***
## (0.006)
##
## under.five.deaths -0.070***
## (0.008)
##
## Polio 0.008
## (0.005)
##
## Total.expenditure 0.076*
## (0.041)
##
## Diphtheria 0.015**
## (0.006)
##
## HIV.AIDS -0.437***
## (0.018)
##
## GDP 0.00001
## (0.00003)
##
## Population -0.000
## (0.000)
##
## thinness..1.19.years -0.012
## (0.053)
##
## thinness.5.9.years -0.048
## (0.052)
##
## Income.composition.of.resources 9.817***
## (0.832)
##
## Schooling 0.867***
## (0.059)
##
## Statusdummy2 -0.968***
## (0.338)
##
## Constant 67.873*** 64.769*** 54.451***
## (0.174) (0.241) (0.840)
##
## -------------------------------------------------------------
## Observations 2,928 2,735 1,649
## Adjusted R2 0.146 0.240 0.834
## =============================================================
## Note: *p<0.1; **p<0.05; ***p<0.01
# Now we will consider only the significant and relevant variables and do linear regression
# with them. Thus the significant variables are: percentage.expenditure, Alcohol, BMI, HIV.AIDS,
# Income.composition.of.resources, Schooling and Statusdummy.
reg4 <- lm (dataset$Life.expectancy ~ dataset$percentage.expenditure + dataset$Alcohol + dataset$BMI + dataset$HIV.AIDS + dataset$Income.composition.of.resources + dataset$Schooling + dataset$Statusdummy)
summary(reg3)
##
## Call:
## lm(formula = dataset$Life.expectancy ~ dataset$percentage.expenditure +
## dataset$Alcohol + dataset$Adult.Mortality + dataset$infant.deaths +
## dataset$Hepatitis.B + dataset$Measles + dataset$BMI + dataset$under.five.deaths +
## dataset$Polio + dataset$Total.expenditure + dataset$Diphtheria +
## dataset$HIV.AIDS + dataset$GDP + dataset$Population + dataset$thinness..1.19.years +
## dataset$thinness..1.19.years + dataset$thinness.5.9.years +
## dataset$Income.composition.of.resources + dataset$Schooling +
## dataset$Statusdummy)
##
## Residuals:
## Min 1Q Median 3Q Max
## -16.9597 -2.0621 -0.0147 2.2751 11.7115
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.445e+01 8.400e-01 64.822 < 2e-16
## dataset$percentage.expenditure 3.673e-04 1.801e-04 2.040 0.04156
## dataset$Alcohol -9.140e-02 3.316e-02 -2.756 0.00592
## dataset$Adult.Mortality -1.663e-02 9.494e-04 -17.517 < 2e-16
## dataset$infant.deaths 9.350e-02 1.065e-02 8.777 < 2e-16
## dataset$Hepatitis.B -6.525e-03 4.449e-03 -1.467 0.14265
## dataset$Measles -7.865e-06 1.079e-05 -0.729 0.46597
## dataset$BMI 3.376e-02 5.998e-03 5.628 2.15e-08
## dataset$under.five.deaths -7.035e-02 7.711e-03 -9.123 < 2e-16
## dataset$Polio 7.935e-03 5.152e-03 1.540 0.12370
## dataset$Total.expenditure 7.586e-02 4.067e-02 1.865 0.06236
## dataset$Diphtheria 1.490e-02 5.928e-03 2.513 0.01205
## dataset$HIV.AIDS -4.370e-01 1.784e-02 -24.490 < 2e-16
## dataset$GDP 8.738e-06 2.837e-05 0.308 0.75813
## dataset$Population -6.425e-10 1.749e-09 -0.367 0.71337
## dataset$thinness..1.19.years -1.238e-02 5.300e-02 -0.234 0.81527
## dataset$thinness.5.9.years -4.798e-02 5.231e-02 -0.917 0.35917
## dataset$Income.composition.of.resources 9.817e+00 8.321e-01 11.797 < 2e-16
## dataset$Schooling 8.665e-01 5.940e-02 14.587 < 2e-16
## dataset$Statusdummy2 -9.684e-01 3.379e-01 -2.865 0.00422
##
## (Intercept) ***
## dataset$percentage.expenditure *
## dataset$Alcohol **
## dataset$Adult.Mortality ***
## dataset$infant.deaths ***
## dataset$Hepatitis.B
## dataset$Measles
## dataset$BMI ***
## dataset$under.five.deaths ***
## dataset$Polio
## dataset$Total.expenditure .
## dataset$Diphtheria *
## dataset$HIV.AIDS ***
## dataset$GDP
## dataset$Population
## dataset$thinness..1.19.years
## dataset$thinness.5.9.years
## dataset$Income.composition.of.resources ***
## dataset$Schooling ***
## dataset$Statusdummy2 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.588 on 1629 degrees of freedom
## (1289 observations deleted due to missingness)
## Multiple R-squared: 0.8356, Adjusted R-squared: 0.8336
## F-statistic: 435.7 on 19 and 1629 DF, p-value: < 2.2e-16
stargazer(list(reg4), keep.stat=c("n", "adj.rsq"), type="text")
##
## ===========================================================
## Dependent variable:
## ---------------------------
## Life.expectancy
## -----------------------------------------------------------
## percentage.expenditure 0.0004***
## (0.00005)
##
## Alcohol -0.114***
## (0.029)
##
## BMI 0.063***
## (0.005)
##
## HIV.AIDS -0.676***
## (0.017)
##
## Income.composition.of.resources 9.264***
## (0.698)
##
## Schooling 1.095***
## (0.049)
##
## Statusdummy2 -1.705***
## (0.323)
##
## Constant 50.820***
## (0.569)
##
## -----------------------------------------------------------
## Observations 2,569
## Adjusted R2 0.773
## ===========================================================
## Note: *p<0.1; **p<0.05; ***p<0.01