library(stringr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyr)
library(ggplot2)
AllGiniData <- read.csv(file="C:/MSDA/D607/Project2/Data_Extract_From_World_Development_Indicators/GiniData.csv")
head(AllGiniData)
## ï..Series.Name Series.Code Country.Name Country.Code
## 1 GINI index (World Bank estimate) SI.POV.GINI Afghanistan AFG
## 2 GINI index (World Bank estimate) SI.POV.GINI Albania ALB
## 3 GINI index (World Bank estimate) SI.POV.GINI Algeria DZA
## 4 GINI index (World Bank estimate) SI.POV.GINI American Samoa ASM
## 5 GINI index (World Bank estimate) SI.POV.GINI Andorra ADO
## 6 GINI index (World Bank estimate) SI.POV.GINI Angola AGO
## X1990..YR1990. X2000..YR2000. X2007..YR2007. X2008..YR2008.
## 1 .. .. .. ..
## 2 .. .. .. 29.98
## 3 .. .. .. ..
## 4 .. .. .. ..
## 5 .. .. .. ..
## 6 .. 51.96 .. 42.72
## X2009..YR2009. X2010..YR2010. X2011..YR2011. X2012..YR2012.
## 1 .. .. .. ..
## 2 .. .. .. 28.96
## 3 .. .. .. ..
## 4 .. .. .. ..
## 5 .. .. .. ..
## 6 .. .. .. ..
## X2013..YR2013. X2014..YR2014. X2015..YR2015. X2016..YR2016.
## 1 .. .. .. ..
## 2 .. .. .. ..
## 3 .. .. .. ..
## 4 .. .. .. ..
## 5 .. .. .. ..
## 6 .. .. .. ..
GiniRelevantData <- AllGiniData %>% select(-1, -2)
GiniRelevantData <- GiniRelevantData %>% filter(Country.Name != "")
head(GiniRelevantData)
## Country.Name Country.Code X1990..YR1990. X2000..YR2000. X2007..YR2007.
## 1 Afghanistan AFG .. .. ..
## 2 Albania ALB .. .. ..
## 3 Algeria DZA .. .. ..
## 4 American Samoa ASM .. .. ..
## 5 Andorra ADO .. .. ..
## 6 Angola AGO .. 51.96 ..
## X2008..YR2008. X2009..YR2009. X2010..YR2010. X2011..YR2011.
## 1 .. .. .. ..
## 2 29.98 .. .. ..
## 3 .. .. .. ..
## 4 .. .. .. ..
## 5 .. .. .. ..
## 6 42.72 .. .. ..
## X2012..YR2012. X2013..YR2013. X2014..YR2014. X2015..YR2015.
## 1 .. .. .. ..
## 2 28.96 .. .. ..
## 3 .. .. .. ..
## 4 .. .. .. ..
## 5 .. .. .. ..
## 6 .. .. .. ..
## X2016..YR2016.
## 1 ..
## 2 ..
## 3 ..
## 4 ..
## 5 ..
## 6 ..
GiniTidy <- gather(GiniRelevantData, Year, GiniCoeff, X1990..YR1990.:X2016..YR2016., convert = FALSE)
## Warning: attributes are not identical across measure variables; they will
## be dropped
GiniTidy <- GiniTidy %>% filter(GiniCoeff != "..") %>% filter(!is.na(GiniCoeff))
head(GiniTidy)
## Country.Name Country.Code Year GiniCoeff
## 1 Bolivia BOL X1990..YR1990. 42.04
## 2 Brazil BRA X1990..YR1990. 60.49
## 3 Chile CHL X1990..YR1990. 57.25
## 4 Costa Rica CRI X1990..YR1990. 45.3
## 5 Honduras HND X1990..YR1990. 57.36
## 6 Iran, Islamic Rep. IRN X1990..YR1990. 43.6
GiniTidy <- GiniTidy %>% separate(Year,sep="\\.\\.",into=c("Tmp","Year")) %>% select(-Tmp) %>% separate(Year,sep="\\.",into=c("Year","Tmp")) %>% select(-Tmp) %>% separate(Year,sep="YR",into=c("Tmp","Year")) %>% select(-Tmp)
#as.numeric(GiniTidy$GiniCoeff)
GiniTidy<-GiniTidy %>% mutate(GiniCoeff=as.numeric(GiniCoeff))
head(GiniTidy)
## Country.Name Country.Code Year GiniCoeff
## 1 Bolivia BOL 1990 42.04
## 2 Brazil BRA 1990 60.49
## 3 Chile CHL 1990 57.25
## 4 Costa Rica CRI 1990 45.30
## 5 Honduras HND 1990 57.36
## 6 Iran, Islamic Rep. IRN 1990 43.60
AllGiniMetaData <- read.csv(file="C:/MSDA/D607/Project2/Data_Extract_From_World_Development_Indicators/Data_Extract_From_World_Development_Indicators_Metadata.csv")
head(AllGiniMetaData)
## Code Long.Name Income.Group
## 1 AFG Islamic State of Afghanistan Low income
## 2 ALB Republic of Albania Upper middle income
## 3 DZA People's Democratic Republic of Algeria Upper middle income
## 4 ASM American Samoa Upper middle income
## 5 ADO Principality of Andorra High income
## 6 AGO People's Republic of Angola Upper middle income
## Region Lending.category Other.groups Currency.Unit
## 1 South Asia IDA HIPC Afghan afghani
## 2 Europe & Central Asia IBRD Albanian lek
## 3 Middle East & North Africa IBRD Algerian dinar
## 4 East Asia & Pacific U.S. dollar
## 5 Europe & Central Asia Euro
## 6 Sub-Saharan Africa IBRD Angolan kwanza
## Latest.population.census
## 1 1979
## 2 2011
## 3 2008
## 4 2010
## 5 2011. Population data compiled from administrative registers.
## 6 2014
## Latest.household.survey
## 1 Demographic and Health Survey, 2015
## 2 Demographic and Health Survey, 2008/09
## 3 Multiple Indicator Cluster Survey, 2012/13
## 4
## 5
## 6 Demographic and Health Survey, 2015
## Special.Notes
## 1 Fiscal year end: March 20; reporting period for national accounts data is calendar year, estimated to insure consistency between national accounts and fiscal data. National accounts data are sourced from the IMF and differ from the Central Statistics Organization numbers due to exclusion of the opium economy.
## 2
## 3
## 4
## 5 The base year has changed to 2000. Price valuation is in basic prices.
## 6 April 2013 database update: Based on IMF data, national accounts data were revised for 2000 onward; the base year changed to 2002.
## National.accounts.base.year
## 1 2002/03
## 2 Original chained constant price data are rescaled.
## 3 1980
## 4
## 5 2000
## 6 2002
## National.accounts.reference.year
## 1
## 2 1996
## 3
## 4
## 5
## 6
## System.of.National.Accounts
## 1 Country uses the 1993 System of National Accounts methodology.
## 2 Country uses the 1993 System of National Accounts methodology.
## 3 Country uses the 1968 System of National Accounts methodology.
## 4 Country uses the 1968 System of National Accounts methodology.
## 5 Country uses the 1968 System of National Accounts methodology.
## 6 Country uses the 1993 System of National Accounts methodology.
## SNA.price.valuation Alternative.conversion.factor
## 1 Value added at basic prices (VAB)
## 2 Value added at basic prices (VAB)
## 3 Value added at basic prices (VAB)
## 4
## 5 Value added at basic prices (VAB)
## 6 Value added at producer prices (VAP) 1991<U+0096>96
## PPP.survey.year
## 1
## 2 Rolling
## 3 2011
## 4 2011 (household consumption only).
## 5
## 6 2011
## Balance.of.Payments.Manual.in.use
## 1 IMF Balance of Payments Manual, 6th edition.
## 2 IMF Balance of Payments Manual, 6th edition.
## 3 IMF Balance of Payments Manual, 6th edition.
## 4
## 5
## 6 IMF Balance of Payments Manual, 6th edition.
## External.debt.Reporting.status System.of.trade
## 1 Actual General trade system
## 2 Actual Special trade system
## 3 Actual Special trade system
## 4
## 5
## 6 Actual
## Government.Accounting.concept IMF.data.dissemination.standard
## 1 Consolidated central government General Data Dissemination System (GDDS)
## 2 Consolidated central government General Data Dissemination System (GDDS)
## 3 Budgetary central government General Data Dissemination System (GDDS)
## 4
## 5
## 6 Budgetary central government General Data Dissemination System (GDDS)
## Source.of.most.recent.Income.and.expenditure.data
## 1 Integrated household survey (IHS), 2011
## 2 Living Standards Measurement Study Survey (LSMS), 2012
## 3 Integrated household survey (IHS), 1995
## 4
## 5
## 6 Integrated household survey (IHS), 2008/09
## Vital.registration.complete Latest.agricultural.census
## 1
## 2 Yes 2012
## 3
## 4 Yes 2008
## 5 Yes
## 6
## Latest.industrial.data Latest.trade.data Latest.water.withdrawal.data
## 1 NA 2014 2000
## 2 2012 2014 2006
## 3 2010 2014 2012
## 4 NA NA NA
## 5 NA 2014 NA
## 6 NA NA 2005
## X2.alpha.code WB.2.code Table.Name Short.Name
## 1 AF AF Afghanistan Afghanistan
## 2 AL AL Albania Albania
## 3 DZ DZ Algeria Algeria
## 4 AS AS American Samoa American Samoa
## 5 AD AD Andorra Andorra
## 6 AO AO Angola Angola
GiniRelevantMetaData <- AllGiniMetaData %>% select(Code, Long.Name, Income.Group, Region, Currency.Unit)
head(GiniRelevantMetaData)
## Code Long.Name Income.Group
## 1 AFG Islamic State of Afghanistan Low income
## 2 ALB Republic of Albania Upper middle income
## 3 DZA People's Democratic Republic of Algeria Upper middle income
## 4 ASM American Samoa Upper middle income
## 5 ADO Principality of Andorra High income
## 6 AGO People's Republic of Angola Upper middle income
## Region Currency.Unit
## 1 South Asia Afghan afghani
## 2 Europe & Central Asia Albanian lek
## 3 Middle East & North Africa Algerian dinar
## 4 East Asia & Pacific U.S. dollar
## 5 Europe & Central Asia Euro
## 6 Sub-Saharan Africa Angolan kwanza
colnames(GiniTidy)[2] <- "Code"
GiniCombinedData <- left_join(x=GiniTidy, y=GiniRelevantMetaData, by=c("Code", "Code"))
## Warning in left_join_impl(x, y, by$x, by$y, suffix$x, suffix$y): joining
## factors with different levels, coercing to character vector
head(GiniCombinedData)
## Country.Name Code Year GiniCoeff Long.Name
## 1 Bolivia BOL 1990 42.04 Plurinational State of Bolivia
## 2 Brazil BRA 1990 60.49 Federative Republic of Brazil
## 3 Chile CHL 1990 57.25 Republic of Chile
## 4 Costa Rica CRI 1990 45.30 Republic of Costa Rica
## 5 Honduras HND 1990 57.36 Republic of Honduras
## 6 Iran, Islamic Rep. IRN 1990 43.60 Islamic Republic of Iran
## Income.Group Region Currency.Unit
## 1 Lower middle income Latin America & Caribbean Bolivian Boliviano
## 2 Upper middle income Latin America & Caribbean Brazilian real
## 3 High income Latin America & Caribbean Chilean peso
## 4 Upper middle income Latin America & Caribbean Costa Rican colon
## 5 Lower middle income Latin America & Caribbean Honduran lempira
## 6 Upper middle income Middle East & North Africa Iranian rial
GiniByRegion <- GiniCombinedData %>% group_by(Region) %>% summarise(AvgGini=mean(GiniCoeff))
GiniByRegion %>% arrange(AvgGini)
## # A tibble: 7 × 2
## Region AvgGini
## <fctr> <dbl>
## 1 Europe & Central Asia 31.55733
## 2 South Asia 34.22067
## 3 North America 37.85143
## 4 East Asia & Pacific 39.20587
## 5 Middle East & North Africa 40.21143
## 6 Sub-Saharan Africa 43.84947
## 7 Latin America & Caribbean 49.60098
GiniByIncome <- GiniCombinedData %>% group_by(Income.Group) %>% summarise(AvgGini=mean(GiniCoeff))
GiniByIncome %>% arrange(AvgGini)
## # A tibble: 4 × 2
## Income.Group AvgGini
## <fctr> <dbl>
## 1 High income 32.68040
## 2 Lower middle income 38.48853
## 3 Low income 41.72471
## 4 Upper middle income 42.91836