Basic Setup

Set up libraries

library(stringr)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tidyr)
library(ggplot2)

Read Data and Select Relevant Columns

AllGiniData <- read.csv(file="C:/MSDA/D607/Project2/Data_Extract_From_World_Development_Indicators/GiniData.csv")
head(AllGiniData)
##                     ï..Series.Name Series.Code   Country.Name Country.Code
## 1 GINI index (World Bank estimate) SI.POV.GINI    Afghanistan          AFG
## 2 GINI index (World Bank estimate) SI.POV.GINI        Albania          ALB
## 3 GINI index (World Bank estimate) SI.POV.GINI        Algeria          DZA
## 4 GINI index (World Bank estimate) SI.POV.GINI American Samoa          ASM
## 5 GINI index (World Bank estimate) SI.POV.GINI        Andorra          ADO
## 6 GINI index (World Bank estimate) SI.POV.GINI         Angola          AGO
##   X1990..YR1990. X2000..YR2000. X2007..YR2007. X2008..YR2008.
## 1             ..             ..             ..             ..
## 2             ..             ..             ..          29.98
## 3             ..             ..             ..             ..
## 4             ..             ..             ..             ..
## 5             ..             ..             ..             ..
## 6             ..          51.96             ..          42.72
##   X2009..YR2009. X2010..YR2010. X2011..YR2011. X2012..YR2012.
## 1             ..             ..             ..             ..
## 2             ..             ..             ..          28.96
## 3             ..             ..             ..             ..
## 4             ..             ..             ..             ..
## 5             ..             ..             ..             ..
## 6             ..             ..             ..             ..
##   X2013..YR2013. X2014..YR2014. X2015..YR2015. X2016..YR2016.
## 1             ..             ..             ..             ..
## 2             ..             ..             ..             ..
## 3             ..             ..             ..             ..
## 4             ..             ..             ..             ..
## 5             ..             ..             ..             ..
## 6             ..             ..             ..             ..
GiniRelevantData <- AllGiniData %>% select(-1, -2)
GiniRelevantData <- GiniRelevantData %>% filter(Country.Name != "")
head(GiniRelevantData)
##     Country.Name Country.Code X1990..YR1990. X2000..YR2000. X2007..YR2007.
## 1    Afghanistan          AFG             ..             ..             ..
## 2        Albania          ALB             ..             ..             ..
## 3        Algeria          DZA             ..             ..             ..
## 4 American Samoa          ASM             ..             ..             ..
## 5        Andorra          ADO             ..             ..             ..
## 6         Angola          AGO             ..          51.96             ..
##   X2008..YR2008. X2009..YR2009. X2010..YR2010. X2011..YR2011.
## 1             ..             ..             ..             ..
## 2          29.98             ..             ..             ..
## 3             ..             ..             ..             ..
## 4             ..             ..             ..             ..
## 5             ..             ..             ..             ..
## 6          42.72             ..             ..             ..
##   X2012..YR2012. X2013..YR2013. X2014..YR2014. X2015..YR2015.
## 1             ..             ..             ..             ..
## 2          28.96             ..             ..             ..
## 3             ..             ..             ..             ..
## 4             ..             ..             ..             ..
## 5             ..             ..             ..             ..
## 6             ..             ..             ..             ..
##   X2016..YR2016.
## 1             ..
## 2             ..
## 3             ..
## 4             ..
## 5             ..
## 6             ..

Tidy the data

GiniTidy <- gather(GiniRelevantData, Year, GiniCoeff, X1990..YR1990.:X2016..YR2016., convert = FALSE)
## Warning: attributes are not identical across measure variables; they will
## be dropped
GiniTidy <- GiniTidy %>% filter(GiniCoeff != "..") %>% filter(!is.na(GiniCoeff))
head(GiniTidy)
##         Country.Name Country.Code           Year GiniCoeff
## 1            Bolivia          BOL X1990..YR1990.     42.04
## 2             Brazil          BRA X1990..YR1990.     60.49
## 3              Chile          CHL X1990..YR1990.     57.25
## 4         Costa Rica          CRI X1990..YR1990.      45.3
## 5           Honduras          HND X1990..YR1990.     57.36
## 6 Iran, Islamic Rep.          IRN X1990..YR1990.      43.6
GiniTidy <- GiniTidy %>% separate(Year,sep="\\.\\.",into=c("Tmp","Year")) %>% select(-Tmp) %>% separate(Year,sep="\\.",into=c("Year","Tmp")) %>% select(-Tmp) %>% separate(Year,sep="YR",into=c("Tmp","Year")) %>% select(-Tmp)
#as.numeric(GiniTidy$GiniCoeff)
GiniTidy<-GiniTidy %>% mutate(GiniCoeff=as.numeric(GiniCoeff))
head(GiniTidy)
##         Country.Name Country.Code Year GiniCoeff
## 1            Bolivia          BOL 1990     42.04
## 2             Brazil          BRA 1990     60.49
## 3              Chile          CHL 1990     57.25
## 4         Costa Rica          CRI 1990     45.30
## 5           Honduras          HND 1990     57.36
## 6 Iran, Islamic Rep.          IRN 1990     43.60

Analysis of Data -

1. Average Gini Coefficient Region-wise

2. Average Gini Coefficient according to Income category to see if we can find any conclusions

Read Metadata and Select relevant columns

AllGiniMetaData <- read.csv(file="C:/MSDA/D607/Project2/Data_Extract_From_World_Development_Indicators/Data_Extract_From_World_Development_Indicators_Metadata.csv")
head(AllGiniMetaData)
##   Code                               Long.Name        Income.Group
## 1  AFG            Islamic State of Afghanistan          Low income
## 2  ALB                     Republic of Albania Upper middle income
## 3  DZA People's Democratic Republic of Algeria Upper middle income
## 4  ASM                          American Samoa Upper middle income
## 5  ADO                 Principality of Andorra         High income
## 6  AGO             People's Republic of Angola Upper middle income
##                       Region Lending.category Other.groups  Currency.Unit
## 1                 South Asia              IDA         HIPC Afghan afghani
## 2      Europe & Central Asia             IBRD                Albanian lek
## 3 Middle East & North Africa             IBRD              Algerian dinar
## 4        East Asia & Pacific                                  U.S. dollar
## 5      Europe & Central Asia                                         Euro
## 6         Sub-Saharan Africa             IBRD              Angolan kwanza
##                                        Latest.population.census
## 1                                                          1979
## 2                                                          2011
## 3                                                          2008
## 4                                                          2010
## 5 2011. Population data compiled from administrative registers.
## 6                                                          2014
##                      Latest.household.survey
## 1        Demographic and Health Survey, 2015
## 2     Demographic and Health Survey, 2008/09
## 3 Multiple Indicator Cluster Survey, 2012/13
## 4                                           
## 5                                           
## 6        Demographic and Health Survey, 2015
##                                                                                                                                                                                                                                                                                                            Special.Notes
## 1 Fiscal year end: March 20; reporting period for national accounts data is calendar year, estimated to insure consistency between national accounts and fiscal data. National accounts data are sourced from the IMF and differ from the Central Statistics Organization numbers due to exclusion of the opium economy.
## 2                                                                                                                                                                                                                                                                                                                       
## 3                                                                                                                                                                                                                                                                                                                       
## 4                                                                                                                                                                                                                                                                                                                       
## 5                                                                                                                                                                                                                                                 The base year has changed to 2000. Price valuation is in basic prices.
## 6                                                                                                                                                                                     April 2013 database update: Based on IMF data, national accounts data were revised for 2000 onward; the base year changed to 2002.
##                          National.accounts.base.year
## 1                                            2002/03
## 2 Original chained constant price data are rescaled.
## 3                                               1980
## 4                                                   
## 5                                               2000
## 6                                               2002
##   National.accounts.reference.year
## 1                                 
## 2                             1996
## 3                                 
## 4                                 
## 5                                 
## 6                                 
##                                      System.of.National.Accounts
## 1 Country uses the 1993 System of National Accounts methodology.
## 2 Country uses the 1993 System of National Accounts methodology.
## 3 Country uses the 1968 System of National Accounts methodology.
## 4 Country uses the 1968 System of National Accounts methodology.
## 5 Country uses the 1968 System of National Accounts methodology.
## 6 Country uses the 1993 System of National Accounts methodology.
##                    SNA.price.valuation Alternative.conversion.factor
## 1    Value added at basic prices (VAB)                              
## 2    Value added at basic prices (VAB)                              
## 3    Value added at basic prices (VAB)                              
## 4                                                                   
## 5    Value added at basic prices (VAB)                              
## 6 Value added at producer prices (VAP)                       1991<U+0096>96
##                      PPP.survey.year
## 1                                   
## 2                            Rolling
## 3                               2011
## 4 2011 (household consumption only).
## 5                                   
## 6                               2011
##              Balance.of.Payments.Manual.in.use
## 1 IMF Balance of Payments Manual, 6th edition.
## 2 IMF Balance of Payments Manual, 6th edition.
## 3 IMF Balance of Payments Manual, 6th edition.
## 4                                             
## 5                                             
## 6 IMF Balance of Payments Manual, 6th edition.
##   External.debt.Reporting.status      System.of.trade
## 1                         Actual General trade system
## 2                         Actual Special trade system
## 3                         Actual Special trade system
## 4                                                    
## 5                                                    
## 6                         Actual                     
##     Government.Accounting.concept          IMF.data.dissemination.standard
## 1 Consolidated central government General Data Dissemination System (GDDS)
## 2 Consolidated central government General Data Dissemination System (GDDS)
## 3    Budgetary central government General Data Dissemination System (GDDS)
## 4                                                                         
## 5                                                                         
## 6    Budgetary central government General Data Dissemination System (GDDS)
##        Source.of.most.recent.Income.and.expenditure.data
## 1                Integrated household survey (IHS), 2011
## 2 Living Standards Measurement Study Survey (LSMS), 2012
## 3                Integrated household survey (IHS), 1995
## 4                                                       
## 5                                                       
## 6             Integrated household survey (IHS), 2008/09
##   Vital.registration.complete Latest.agricultural.census
## 1                                                       
## 2                         Yes                       2012
## 3                                                       
## 4                         Yes                       2008
## 5                         Yes                           
## 6                                                       
##   Latest.industrial.data Latest.trade.data Latest.water.withdrawal.data
## 1                     NA              2014                         2000
## 2                   2012              2014                         2006
## 3                   2010              2014                         2012
## 4                     NA                NA                           NA
## 5                     NA              2014                           NA
## 6                     NA                NA                         2005
##   X2.alpha.code WB.2.code     Table.Name     Short.Name
## 1            AF        AF    Afghanistan    Afghanistan
## 2            AL        AL        Albania        Albania
## 3            DZ        DZ        Algeria        Algeria
## 4            AS        AS American Samoa American Samoa
## 5            AD        AD        Andorra        Andorra
## 6            AO        AO         Angola         Angola
GiniRelevantMetaData <- AllGiniMetaData %>% select(Code, Long.Name, Income.Group, Region, Currency.Unit)
head(GiniRelevantMetaData)
##   Code                               Long.Name        Income.Group
## 1  AFG            Islamic State of Afghanistan          Low income
## 2  ALB                     Republic of Albania Upper middle income
## 3  DZA People's Democratic Republic of Algeria Upper middle income
## 4  ASM                          American Samoa Upper middle income
## 5  ADO                 Principality of Andorra         High income
## 6  AGO             People's Republic of Angola Upper middle income
##                       Region  Currency.Unit
## 1                 South Asia Afghan afghani
## 2      Europe & Central Asia   Albanian lek
## 3 Middle East & North Africa Algerian dinar
## 4        East Asia & Pacific    U.S. dollar
## 5      Europe & Central Asia           Euro
## 6         Sub-Saharan Africa Angolan kwanza

Region Analysis

colnames(GiniTidy)[2] <- "Code"
GiniCombinedData <- left_join(x=GiniTidy, y=GiniRelevantMetaData, by=c("Code", "Code"))
## Warning in left_join_impl(x, y, by$x, by$y, suffix$x, suffix$y): joining
## factors with different levels, coercing to character vector
head(GiniCombinedData)
##         Country.Name Code Year GiniCoeff                      Long.Name
## 1            Bolivia  BOL 1990     42.04 Plurinational State of Bolivia
## 2             Brazil  BRA 1990     60.49  Federative Republic of Brazil
## 3              Chile  CHL 1990     57.25              Republic of Chile
## 4         Costa Rica  CRI 1990     45.30         Republic of Costa Rica
## 5           Honduras  HND 1990     57.36           Republic of Honduras
## 6 Iran, Islamic Rep.  IRN 1990     43.60       Islamic Republic of Iran
##          Income.Group                     Region      Currency.Unit
## 1 Lower middle income  Latin America & Caribbean Bolivian Boliviano
## 2 Upper middle income  Latin America & Caribbean     Brazilian real
## 3         High income  Latin America & Caribbean       Chilean peso
## 4 Upper middle income  Latin America & Caribbean  Costa Rican colon
## 5 Lower middle income  Latin America & Caribbean   Honduran lempira
## 6 Upper middle income Middle East & North Africa       Iranian rial
GiniByRegion <- GiniCombinedData %>% group_by(Region) %>% summarise(AvgGini=mean(GiniCoeff))
GiniByRegion %>% arrange(AvgGini)
## # A tibble: 7 × 2
##                       Region  AvgGini
##                       <fctr>    <dbl>
## 1      Europe & Central Asia 31.55733
## 2                 South Asia 34.22067
## 3              North America 37.85143
## 4        East Asia & Pacific 39.20587
## 5 Middle East & North Africa 40.21143
## 6         Sub-Saharan Africa 43.84947
## 7  Latin America & Caribbean 49.60098

The Gini coefficient is lowest in Europe possibly reflecting their ‘Socialistic’ structure. Whereas It reflects an unequal socirty in Africas and Latin countries

Analysis by income

GiniByIncome <- GiniCombinedData %>% group_by(Income.Group) %>% summarise(AvgGini=mean(GiniCoeff))
GiniByIncome %>% arrange(AvgGini)
## # A tibble: 4 × 2
##          Income.Group  AvgGini
##                <fctr>    <dbl>
## 1         High income 32.68040
## 2 Lower middle income 38.48853
## 3          Low income 41.72471
## 4 Upper middle income 42.91836

The lowest Gini is in High income countries but surprisingly Upper Middle income countries show a very high GINI. This should be further investigated as to what countries are contributing to it.