Gini Coefficient Analysis

library(ineq)

data(Ilocos)
attach(Ilocos)
# plot Pen's Parade of income
Pen(income)

Pen(income, fill = hsv(0.1, 0.3, 1))

# income distribution of the USA in 1968 (in 10 classes)
# x vector of class means, n vector of class frequencies
x <- c(541, 1463, 2445, 3438, 4437, 5401, 6392, 8304, 11904, 22261)
n <- c(482, 825, 722, 690, 661, 760, 745, 2140, 1911, 1024)
Pen(x, n = n)

# create artificial grouping variable
myfac <- factor(c(1, 1, 1, 2, 2, 2, 3, 3, 3, 3))
Pen(x, n = n, group = myfac)

## Load and attach income (and metadata) set from Ilocos, Philippines
data(Ilocos)
attach(Ilocos)

## The following objects are masked from Ilocos (pos = 3):
## 
##     AP.family.size, AP.income, AP.weight, family.size, income,
##     province, sex, urbanity

## extract and rescale income for the provinces "Pangasinan" und "La Union"
income.p <- income[province=="Pangasinan"]/10000
income.u <- income[province=="La Union"]/10000
## compute the Lorenz curves
Lc.p <- Lc(income.p)
Lc.u <- Lc(income.u)
## plot both Lorenz curves
plot(Lc.p)
lines(Lc.u, col=2)

# generate vectors (of incomes)
x <- c(541, 1463, 2445, 3438, 4437, 5401, 6392, 8304, 11904, 22261)
y <- c(841, 2063, 2445, 3438, 4437, 5401, 6392, 8304, 11304, 21961)
# compute Watts index with poverty line 2000
pov(x, 2000)

## [1] 0.1620141

pov(y, 2000)

## [1] 0.08663108

# compute headcount ratio with poverty line 2000
t=pov(x, 2000, parameter=1, type="Foster")
pov(y, 2000, parameter=1, type="Foster")

## [1] 0.1

theorLc

## function (type = c("Singh-Maddala", "Dagum", "lognorm", "Pareto", 
##     "exponential"), parameter = 0) 
## {
##     switch(match.arg(type), `Singh-Maddala` = rval <- function(p) {
##         Lc.singh(p, parameter = parameter)
##     }, Dagum = rval <- function(p) {
##         Lc.dagum(p, parameter = parameter)
##     }, lognorm = rval <- function(p) {
##         Lc.lognorm(p, parameter = parameter)
##     }, Pareto = rval <- function(p) {
##         Lc.pareto(p, parameter = parameter)
##     }, exponential = rval <- function(p) {
##         Lc.exp(p)
##     })
##     class(rval) <- "theorLc"
##     return(rval)
## }
## <bytecode: 0x000000001894b8d0>
## <environment: namespace:ineq>

## [1] 0.2

## Figure 2
x <- rep(c(50/9, 50), c(9, 1))
y <- rep(c(2, 18), c(5, 5))
plot(table(x))

plot(table(y))

## statistics
mean(x)

## [1] 10

mean(y)

## [1] 10

Gini(x, corr = TRUE)

## [1] 0.4444444

Gini(y, corr = TRUE)

## [1] 0.4444444

Lasym(x)

## [1] 1.46

Lasym(y)

## [1] 0.74

## Figure 3
plot(Lc(x))
lines(Lc(y), col = "slategray")
abline(1, -1, lty = 2)

library(readr)
gini <- read_csv("~/SA projects 2019/Nombulelo/gini.csv")

## Parsed with column specification:
## cols(
##   Gender = col_character(),
##   Race = col_character(),
##   Year = col_character(),
##   Total_Assets = col_double(),
##   Total_Liabilities = col_double(),
##   Net_worthy = col_double(),
##   Business_Assets = col_double(),
##   Real_Estate = col_double(),
##   Vehichle_Assets = col_double(),
##   Financial_Assets = col_double(),
##   Superannuation_Assets = col_double(),
##   Possessions_Assets = col_double(),
##   Real_Estate_Debt = col_double(),
##   Business_Debt = col_double(),
##   Financial_Debt = col_double(),
##   Self_Employed = col_character(),
##   Casual_Labour = col_character(),
##   Districts = col_character()
## )

## Warning: 1 parsing failure.
##   row           col expected  actual                                    file
## 20004 Business_Debt a double Missing '~/SA projects 2019/Nombulelo/gini.csv'

View(gini)
attach(gini)
names(gini)

##  [1] "Gender"                "Race"                 
##  [3] "Year"                  "Total_Assets"         
##  [5] "Total_Liabilities"     "Net_worthy"           
##  [7] "Business_Assets"       "Real_Estate"          
##  [9] "Vehichle_Assets"       "Financial_Assets"     
## [11] "Superannuation_Assets" "Possessions_Assets"   
## [13] "Real_Estate_Debt"      "Business_Debt"        
## [15] "Financial_Debt"        "Self_Employed"        
## [17] "Casual_Labour"         "Districts"

plot(table(Net_worthy))

## statistics
mean(Net_worthy)

## [1] 210.997

##Gini(, corr = TRUE)

##Lasym(x)

## Figure 3
plot(Lc(Net_worthy))

Pen(Net_worthy, fill = hsv(0.1, 0.3, 1))

using the revised data set

library(readr)
gni_revised <- read_csv("~/SA projects 2019/Nombulelo/gni_revised.csv")

## Parsed with column specification:
## cols(
##   .default = col_double(),
##   Gender = col_character(),
##   Race = col_character(),
##   Year = col_character(),
##   Districts = col_character(),
##   Self_Employed = col_character(),
##   Casual_Labour = col_character()
## )

## See spec(...) for full column specifications.

## Warning: 4 parsing failures.
##   row           col expected  actual                                           file
## 20004 Total_Assets  a double #VALUE! '~/SA projects 2019/Nombulelo/gni_revised.csv'
## 20004 Assets        a double #VALUE! '~/SA projects 2019/Nombulelo/gni_revised.csv'
## 20004 Business_Debt a double Missing '~/SA projects 2019/Nombulelo/gni_revised.csv'
## 20004 gini          a double #VALUE! '~/SA projects 2019/Nombulelo/gni_revised.csv'

View(gni_revised)
attach(gni_revised)

## The following object is masked _by_ .GlobalEnv:
## 
##     gini

## The following objects are masked from gini:
## 
##     Business_Assets, Business_Debt, Casual_Labour, Districts,
##     Financial_Assets, Financial_Debt, Gender, Net_worthy,
##     Possessions_Assets, Race, Real_Estate, Real_Estate_Debt,
##     Self_Employed, Superannuation_Assets, Total_Assets,
##     Total_Liabilities, Vehichle_Assets, Year

names(gni_revised)

##  [1] "Gender"                "Race"                 
##  [3] "Year"                  "Districts"            
##  [5] "Total_Assets"          "Total_Liabilities"    
##  [7] "Net_worthy"            "Business_Assets"      
##  [9] "Real_Estate"           "Vehichle_Assets"      
## [11] "Financial_Assets"      "Superannuation_Assets"
## [13] "Assets"                "Possessions_Assets"   
## [15] "Real_Estate_Debt"      "Business_Debt"        
## [17] "Financial_Debt"        "Self_Employed"        
## [19] "Casual_Labour"         "gini"

Gender=as.factor(Gender)
Race=as.factor(Race)
Year=as.factor(Year)
Districts=as.factor(Districts)

library(ggplot2)

plot(Lc(Net_worthy))

Pen(Net_worthy, fill = hsv(0.1, 0.3, 1))

ggplot(gni_revised, aes(x = Year , y =gini, color = Gender)) +  
  geom_point(size=3,  aes(shape=Gender)) + 
  geom_smooth(method=lm, position = "jitter", aes(fill=Gender), level = 0.95)+ylab("The Gini Coefficient") + xlab("Year")+geom_abline(xintercept = 0, linetype=2, color = "red", size=1)

## Warning: Ignoring unknown parameters: xintercept

## Warning: Removed 1 rows containing non-finite values (stat_smooth).

## Warning: Removed 1 rows containing missing values (geom_point).

########################

theme_set(theme_gray(base_size =10))
ggplot(gni_revised, aes(x = Gender, y =gini, colour = Year)) + 
    geom_boxplot(size=1.0,varwidth = TRUE) + 
    geom_point(data = gni_revised, aes(y = mean(gini))) +
    geom_line(data = gni_revised, aes(y =mean(gini), group = Year))+ylab("The Gini Coefficient") + xlab("Gender")

## Warning: Removed 1 rows containing non-finite values (stat_boxplot).

## Warning: Removed 23400 rows containing missing values (geom_point).

## Warning: Removed 23400 rows containing missing values (geom_path).

Gini Coefficient Analysis

Lovemore Chipindu [lovemore.datascience@gmail.com], [0778796212]

25 March 2019