library(readxl)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.1     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.4.2     ✔ tibble    3.2.1
## ✔ lubridate 1.9.2     ✔ tidyr     1.3.0
## ✔ purrr     1.0.1     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the ]8;;http://conflicted.r-lib.org/conflicted package]8;; to force all conflicts to become errors
library(ineq)
library(reshape2)
## 
## Присоединяю пакет: 'reshape2'
## 
## Следующий объект скрыт от 'package:tidyr':
## 
##     smiths
library(dplyr)
df <- read_excel("/Users/DP/Downloads/GCIPrawdata.xlsx", skip = 2) 

head(df) 
## # A tibble: 6 × 14
##   Country      Year `Decile 1 Income` `Decile 2 Income` `Decile 3 Income`
##   <chr>       <dbl>             <dbl>             <dbl>             <dbl>
## 1 Afghanistan  1980               206               350               455
## 2 Afghanistan  1981               212               361               469
## 3 Afghanistan  1982               221               377               490
## 4 Afghanistan  1983               238               405               527
## 5 Afghanistan  1984               249               424               551
## 6 Afghanistan  1985               256               435               566
## # ℹ 9 more variables: `Decile 4 Income` <dbl>, `Decile 5 Income` <dbl>,
## #   `Decile 6 Income` <dbl>, `Decile 7 Income` <dbl>, `Decile 8 Income` <dbl>,
## #   `Decile 9 Income` <dbl>, `Decile 10 Income` <dbl>, `Mean Income` <dbl>,
## #   Population <dbl>
sel_Year <- c(1980, 2014)
sel_Country <- c("Russian Federation", "Ukraine")

temp <- df %>% filter(Country %in% sel_Country & Year %in% sel_Year)
temp
## # A tibble: 4 × 14
##   Country             Year `Decile 1 Income` `Decile 2 Income` `Decile 3 Income`
##   <chr>              <dbl>             <dbl>             <dbl>             <dbl>
## 1 Russian Federation  1980              1126              1555              1891
## 2 Russian Federation  2014              1781              2880              3740
## 3 Ukraine             1980              1572              2260              2702
## 4 Ukraine             2014              1026              1553              1976
## # ℹ 9 more variables: `Decile 4 Income` <dbl>, `Decile 5 Income` <dbl>,
## #   `Decile 6 Income` <dbl>, `Decile 7 Income` <dbl>, `Decile 8 Income` <dbl>,
## #   `Decile 9 Income` <dbl>, `Decile 10 Income` <dbl>, `Mean Income` <dbl>,
## #   Population <dbl>
total_income <- temp[, "Mean Income"] * temp[, "Population"]  
options(scipen = 999)
total_income
##    Mean Income
## 1 391702000000
## 2 944909800000
## 3 232704752827
## 4 207127001400
test <- c(2, 4, 10, 22)  
cumsum(test)  
## [1]  2  6 16 38
# Pick the deciles (Columns 3 to 12) in Row 1 (RF, 1980)
decs_RF80 <- unlist(temp[1, 3:12]) * 2
                               
# Give the total income, assuming a population of 20
total_inc <- 20 * unlist(temp[1, "Mean Income"])

cum_inc_share_RF80 <- cumsum(decs_RF80) / total_inc
cum_inc_share_RF80
##  Decile 1 Income  Decile 2 Income  Decile 3 Income  Decile 4 Income 
##       0.03995742       0.09513840       0.16224273       0.23977999 
##  Decile 5 Income  Decile 6 Income  Decile 7 Income  Decile 8 Income 
##       0.32732434       0.42523066       0.53477644       0.65890703 
##  Decile 9 Income Decile 10 Income 
##       0.80479063       1.00000000
# For RF, 2014  
# Go to Row 2 (RF, 2014)
decs_RF14 <- unlist(temp[2, 3:12]) * 2

# Give the total income, assuming a population of 20
total_inc <- 20 * unlist(temp[2, "Mean Income"]) 

cum_inc_share_RF14 <- cumsum(decs_RF14) / total_inc  
  
# For the Ukraine, 1980  
# Select Row 3 (Ukraine, 1980)
decs_Ua80 <- unlist(temp[3, 3:12]) * 2

# Give the total income, assuming a population of 20
total_inc <- 20 * unlist(temp[3, "Mean Income"])

cum_inc_share_Ua80 <- cumsum(decs_Ua80) / total_inc  
  
# For the Ukraine, 2014  
# Select Row 4 (Ukraine, 2014)
decs_Ua14 <- unlist(temp[4, 3:12]) * 2   

# Give the total income, assuming a population of 20 
total_inc <- 20 * unlist(temp[4, "Mean Income"])

cum_inc_share_Ua14 <- cumsum(decs_Ua14) / total_inc  
plot(cum_inc_share_RF80, type = "l", col = "blue", 
  lwd = 2, ylab = "Cumulative income share",
  # xlim = c(0,10), ylim = c(0,1)
  )  

# Add the perfect equality line 
abline(a = 0, b = 0.1, col = "black", lwd = 2)

title("Lorenz curve, Russia Federation, 1980") 

Interpretation: Since the straight line shows the situation of absolute equality, the bend in our case demonstrates the situation of inequality, so we can see that 40% of the population (OX axis) owns 20% of all income (OY axis)

plot(cum_inc_share_RF80, type = "l", col = "blue", 
  lty = 2, lwd = 2, xlab = "Deciles", 
  ylab = "Cumulative income share")  

# Add the perfect equality line 
abline(a = 0, b = 0.1, col = "black", lwd = 2)  

lines(cum_inc_share_RF14, col = "green", lty = 1, lwd = 2)

lines(cum_inc_share_Ua80, col = "magenta", lty = 2, lwd = 2)

lines(cum_inc_share_Ua14, col = "orange", lty = 1, lwd = 2)  

title("Lorenz curves, Russia Federation and the Ukraine (1980 and 2014)")  

legend("topleft", lty = 2.5:1, lwd = 2, cex = 1.2, legend = 
  c("RF, 1980", "RF, 2014",
    "Ukraine, 1980", "Ukraine, 2014"),  
  col = c("blue", "green", "magenta", "orange")) 

Here we can immediately note that the Russian Federation in 1980 was closest to economic equality, since this curve is closest to a straight line, in turn, the situation in Ukraine in 2014 reflects the greatest inequality, since the values of the curve are the furthest from a straight line. Ukraine 2014 - 50% of the population have 20% of income, Ukraine in 1980 almost completely coincides with Russia in 2014, so 80% of the population have 50% of income

This may be due to several historical facts, the deterioration of the economic situation in Russia is associated with the crises of 2008-2009 (the global financial crisis worsened the economic state of the state, respectively, and the state of incomes of the population) and 2014 in connection with the annexation of Crimea by Russia and subsequent comprehensive sanctions, which reduced the incomes of the population, and consequently affected the level of economic inequality. The situation in Ukraine may be dictated by the fact that in 1980 Ukraine was part of the USSR, which affected the fact that this region received external support and was better funded, the situation worsened in 2014 due to the collapse of the USSR, which in the long term affected the development of the economy of Ukraine, also the global financial crisis affected situations of economic inequality, and may also be related to the unstable political situation and the beginning of the armed invasion of the Russian Federation on the territory of Ukraine, as well as Euromaidan and revolutionary events in the country

g_RF80 <- Gini(decs_RF80)
g_RF14 <- Gini(decs_RF14)
g_Ua80 <- Gini(decs_Ua80)
g_Ua14 <- Gini(decs_Ua14)
paste("Gini coefficients")
## [1] "Gini coefficients"
paste("RF - 1980: ", round(g_RF80, 2), ", 2014: ", round(g_RF14, 2))
## [1] "RF - 1980:  0.24 , 2014:  0.33"
paste("Ukraine - 1980: ", round(g_Ua80, 2), ", 2014: ", round(g_Ua14, 2))
## [1] "Ukraine - 1980:  0.32 , 2014:  0.43"
plot(cum_inc_share_RF80, type = "l", col = "blue", lty = 2, 
  lwd = 2, xlab = "Deciles", 
  ylab = "Cumulative income share")   

# Add the perfect equality line
abline(a = 0, b = 0.1, col = "black", lwd = 2)

lines(cum_inc_share_RF14, col = "green", lty = 1, lwd = 2)

lines(cum_inc_share_Ua80, col = "magenta", lty = 2, lwd = 2)

lines(cum_inc_share_Ua14, col = "orange", lty = 1, lwd = 2)

title("Lorenz curves, RF and Ukraine (1980 and 2014)")

legend("topleft", lty = 2.5:1, lwd = 2, cex = 1.2, legend = 
  c("RF, 1980", "RF, 2014",
    "Ukraine, 1980", "Ukraine, 2014"),  
  col = c("blue", "green", "magenta", "orange")) 

text(8.4, 0.78, round(g_RF80, digits = 3), col = 'blue')
text(9.4, 0.6, round(g_RF14, digits = 3), col = 'green')
text(5.2, 0.37, round(g_Ua80, digits = 3), col = 'magenta')
text(6.4, 0.31, round(g_Ua14, digits = 3), col = 'orange')

This graph with Gini coefficients confirms the earlier assumptions, and also confirms the assumption that the indicators of Russia 2014 (0.325) and Ukraine 1980 (0.32) are almost identical

df$gini <- 0
# Give us the number of rows in decile_data
noc <- nrow(df)

for (i in seq(1, noc)){
  # Go to Row I to get the decile data
  decs_i <- unlist(df[i, 3:12]) 
 df$gini[i] <- Gini(decs_i)
}
summary(df$gini)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.1791  0.3470  0.4814  0.4617  0.5700  0.7386
gini_selected <- df%>% filter(Country %in% c("Russian Federation", "Ukraine", "Turkmenistan", "Kazakhstan"))

ggplot(gini_selected, 
  aes(x = Year, y = gini, color = Country)) +
  geom_line(linewidth = 1) +
  theme_bw() +
  ylab("Gini") +
  ggtitle("Gini coefficients for the post-Soviet countries")

This graph almost duplicates the graph with Gini coefficients, since there is a noticeably similar dynamics, for example, the peak of inequality in Kazakhstan (1996-1997) is reflected both here and in the previous graph, as well as a sharp decrease in inequality in Ukraine - the absolute minimum in the graph (2004-2005). This graph also shows a fairly uniform and stable situation before 1990 and a sharp increase in inequality after - as a consequence of the collapse of the USSR. The difference between these two charts is that there is a significant difference between the peak of Russia and Kazakhstan (1996-1997), in Russia 10% of the richest earned 30 times more than 10% of the poorest, in Kazakhstan it was more than 50 times. On the graph with Gini coefficients, this indicator is approximately equal to 0.5 for both Russia and Kazakhstan during this time period. This may be due to the fact that in the Gini coefficient we take into account all the values of various population groups, and here we take two conditionally “extreme points”

df$ratio90_10 <- df$`Decile 10 Income`/df$`Decile 1 Income`
df$ratio90_50 <- df$`Decile 10 Income`/df$`Decile 5 Income`
df$ratio50_10 <- df$`Decile 5 Income`/df$`Decile 1 Income`
ratios_selected <- df%>% filter(Country %in% c("Russian Federation", "Ukraine", "Turkmenistan", "Kazakhstan"))

ggplot(ratios_selected, 
  aes(x = Year, y = ratio90_10, color = Country)) +
  geom_line(linewidth = 1) +
  theme_bw() +
  ylab("Interdecile P90/P10") +
  ggtitle("Interdecile P90/P10 for the post-Soviet countries")

This graph almost duplicates the graph with Gini coefficients, since there is a noticeably similar dynamics here, for example, the peak of inequality in Kazakhstan is reflected both here and in the previous graph, as well as a sharp decrease in inequality in Ukraine - the absolute minimum in the graph. This graph also shows a fairly uniform and stable situation before 1990 and a sharp increase in inequality after - as a consequence of the collapse of the USSR

ggplot(ratios_selected, 
  aes(x = Year, y = ratio90_50, color = Country)) +
  geom_line(linewidth = 1) +
  theme_bw() +
  ylab("Interdecile P90/P50") +
  ggtitle("Interdecile P90/P50 for the post-Soviet countries")

ggplot(ratios_selected, 
  aes(x = Year, y = ratio50_10, color = Country)) +
  geom_line(linewidth = 1) +
  theme_bw() +
  ylab("Interdecile P50/P10") +
  ggtitle("Interdecile P50/P10 for the post-Soviet countries")

In general, we can trace a trend similar to the rest of the graphs, but if we talk about the differences between these two graphs, we can see that the peaks are distributed differently, so in the second graph the inequality that arose in 1996-1997 is more clearly visible, this may be due to the fact that it most affected the poorest strata However, on the same graph after 2000, it is noticeable that inequality seems to have a more “smooth” character without sharp jumps, perhaps this is due to the fact that further crises affected the poorest and the “middle” equally. In the first chart after 2000, there are more angles and sharp drops, that is, it is noticeable how the ratio between the “middle” and the richest changed - the situation improved during periods, it got worse during periods, but basically it all developed in leaps and bounds