## Load Data ##
library(readxl)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(gtools)
library(SciViews)
HDI.dt <- read_excel("C:/Users/Joseph/Dropbox/Boston College/Development Economics/HDI/Data_Extract_From_World_Development_Indicators.xlsx")
## Get List of Countries ##
countries <- unique(HDI.dt[,c('Country Name','Income Level')])
### Break into Income Brackets ###
LowIncome <- countries[countries$`Income Level` == 1,]
MedIncome <- countries[countries$`Income Level` == 2,]
HighIncome <- countries[countries$`Income Level` == 3,]
## Sample ##
LowIncome <- sample(LowIncome$`Country Name`,32)
MedIncome <- sample(MedIncome$`Country Name`, 36)
HighIncome <- sample(HighIncome$`Country Name`,32)
HDI.Sample <- c(LowIncome, MedIncome, HighIncome)
#Filter Original Data
HDI.Sample.dt <- merge(
x = HDI.Sample,
y = HDI.dt,
by = 1,
all.x = TRUE
)
colnames(HDI.Sample.dt) <- c("Country Name", colnames(HDI.Sample.dt)[-1])
## Calculating Life Expecatancy
LifeExpectancyIndex <- function(Ob, setMin, setMax) {
return((Ob - setMin)/(setMax - setMin))
}
LifeExpectancy.dt <- HDI.Sample.dt[HDI.Sample.dt$`Series Name` == "Life expectancy at birth, total (years)",]
setMin <- 20
setMax <- max(LifeExpectancy.dt$Data, na.rm = TRUE)
LifeExpectancy.dt <- LifeExpectancy.dt %>% group_by(`Country Name`) %>% mutate(LifeIndex = LifeExpectancyIndex(Data,setMin,setMax))
### Calculating Education Index
Index <- function(Ob, setMin, setMax) {
return((Ob - setMin)/(setMax - setMin))
}
MeanYears.dt <- HDI.Sample.dt[HDI.Sample.dt$`Series Name` == "Barro-Lee: Mean years of schooling",]
setMin <- 0
setMax <- max(MeanYears.dt$Data, na.rm = TRUE)
MeanYears.dt <- MeanYears.dt %>% group_by(`Country Name`) %>% mutate(MeanIndex = Index(Data,setMin,setMax))
ExpectedYears.dt <- HDI.Sample.dt[HDI.Sample.dt$`Series Name` == "Human Capital Index (HCI): Expected Years of School, Total",]
setMin <- 0
setMax <- max(ExpectedYears.dt$Data, na.rm = TRUE)
ExpectedYears.dt <- ExpectedYears.dt %>% group_by(`Country Name`) %>% mutate(ExpectedIndex = Index(Data,setMin,setMax))
CombinedEducation.dt <- merge(
x = ExpectedYears.dt[,c(-3,-4)],
y = MeanYears.dt[,c(1,10)],
by = 1
)
setMin <- 0
setMax <- 1
EducationIndex <- function(ExpectedIndex,MeanIndex) {
return(sqrt(ExpectedIndex * MeanIndex))
}
CombinedEducation.dt <- CombinedEducation.dt %>% group_by(`Country Name`) %>% mutate(CombinedEdIndex = EducationIndex(ExpectedIndex,MeanIndex))
### Calculating the Income Index ###
IncomeIndex <- function(Ob, setMin, setMax) {
return((ln(Ob) - ln(setMin))/(ln(setMax) - ln(setMin)))
}
IncomeIndex.dt <- HDI.Sample.dt[HDI.Sample.dt$`Series Name` == "GNI per capita (constant 2010 US$)" ,]
setMin <- 100
setMax <- max(IncomeIndex.dt$Data, na.rm = TRUE)
IncomeIndex.dt <- IncomeIndex.dt %>% group_by(`Country Name`) %>% mutate(IncomeIndex = IncomeIndex(Data,setMin,setMax))
## Calculating HDI for each Country ##
HDI.Index.Dt <- merge(
x = LifeExpectancy.dt[,c(-3,-4)],
y = IncomeIndex.dt[,c(1,10)]
)
HDI.Index.Dt <- merge(
x = HDI.Index.Dt,
y = CombinedEducation.dt[,c(1,10)]
)
HDI.func <- function(Life, Education, Income) {
return((Life * Education * Income)^(1/3))
}
HDI.Index.Dt <- HDI.Index.Dt %>% group_by(`Country Name`) %>% mutate(HDI = HDI.func(LifeIndex,CombinedEdIndex,IncomeIndex))
HDI.Index.Dt[,c("Country Name", "HDI")]
## # A tibble: 100 x 2
## # Groups: Country Name [100]
## `Country Name` HDI
## <fct> <dbl>
## 1 Afghanistan NA
## 2 Albania 0.757
## 3 Andorra NA
## 4 Angola 0
## 5 Armenia 0.720
## 6 Bahrain 0.799
## 7 Belarus 0
## 8 Belize 0
## 9 Benin 0.435
## 10 Bermuda NA
## # ... with 90 more rows
###Difficulties:
##Grabbing consistent data was difficult and the datasets I used even had some holes which could skew the results.
#2) There are plenty of difference in the numbers in the methods but I would say that my index ranks countries rather similarly to the UN.
#3)
GDP.dt <- read_excel("C:/Users/Joseph/Dropbox/Boston College/Development Economics/HDI/GDPPPP2017.xlsx")
colnames(GDP.dt) <- c(colnames(GDP.dt)[-5], "GDP_PPP_2017")
HDI.Index.Dt <- merge(
x = HDI.Index.Dt,
y = GDP.dt[,c(1,5)],
by = 1
)
HDI.Index.Dt$GDP_PPP_2017 <- as.numeric(HDI.Index.Dt$GDP_PPP_2017)
## Warning: NAs introduced by coercion
HDI.Index.Dt$logGDP_PPP_2017 <- ln(HDI.Index.Dt$GDP_PPP_2017)
#GDP PPP Index correlation
cor(HDI.Index.Dt$HDI, HDI.Index.Dt$GDP_PPP_2017,"na.or.complete")
## [1] 0.2257043
#log GDP PPP Index
cor(HDI.Index.Dt$HDI, HDI.Index.Dt$logGDP_PPP_2017, "na.or.complete")
## [1] 0.5909102
#The Log is stronger because this is how I calculated income with the index
#4)
plot(HDI.Index.Dt$HDI, HDI.Index.Dt$logGDP_PPP_2017)

#5) Making this change did not change much for the HDI in my sample and calculations, this is likely due to the quality of the data that I was able to obtain.
#6) There a number of fields that could be added but I believe that adding an income inequality index would help describe the distribution of the wealth and paint a more detailed picture of a country. For my calculations below I will be using the GINI index a world bank estimate
#3)
GINI.dt <- read_excel("C:/Users/Joseph/Dropbox/Boston College/Development Economics/HDI/GINI Index.xlsx")
colnames(GINI.dt) <- c("Country Name", "GINI")
HDI.Index.Dt <- merge(
x = HDI.Index.Dt,
y = GINI.dt,
by = 1
)
HDI.Index.Dt$GINI <- as.numeric(HDI.Index.Dt$GINI)
## Warning: NAs introduced by coercion
GiniIndex <- function(Ob, setMin, setMax) {
return((Ob - setMin)/(setMax - setMin))
}
setMin <- 0
setMax <- setMax <- max(HDI.Index.Dt$GINI, na.rm = TRUE)
HDI.Index.Dt <- HDI.Index.Dt %>% group_by(`Country Name`) %>% mutate(GiniIndex = GiniIndex(GINI,setMin, setMax))
MyIndex <- function(Life, Education, Income, Inequality) {
return((Life * Education * Income * Inequality)^(1/4))
}
HDI.Index.Dt <- HDI.Index.Dt %>% group_by(`Country Name`) %>% mutate(MyIndex = MyIndex(LifeIndex,CombinedEdIndex,IncomeIndex,GiniIndex))
#Correlation between Indices
cor(HDI.Index.Dt$HDI, HDI.Index.Dt$MyIndex,"na.or.complete")
## [1] 0.9792878
HDI.Index.Dt[,c("Country Name", "HDI", "MyIndex")]
## # A tibble: 81 x 3
## # Groups: Country Name [81]
## `Country Name` HDI MyIndex
## <fct> <dbl> <dbl>
## 1 Afghanistan NA NA
## 2 Albania 0.757 NA
## 3 Angola 0 NA
## 4 Armenia 0.720 0.699
## 5 Belarus 0 0
## 6 Belize 0 NA
## 7 Benin 0.435 0.527
## 8 Bulgaria 0.789 NA
## 9 Burkina Faso 0 NA
## 10 Burundi 0.287 NA
## # ... with 71 more rows
#The biggest difference between the two Indices is that there is significantly less equality data to use for analysis.