We will need reldist and IC2 libraries to get them…
# install.packages('reldist', dependencies = T)
# install.packages('IC2', dependencies = T)
library(reldist)
library(IC2)
library(ggplot2)
library(dplyr)
…we will abandon the exponential notation (e.g. e+10) for prettier graphs…
options(scipen = 999)
…and we will need some data.
set.seed(42)
city <- c("A", "B", "C", "D", "E", "F", "G", "H")
income <- sample(1:100000,
160,
replace = TRUE)
cities <- data.frame(city, income)
(Hint: run with set.seed all the time to get exactly the same data.
When you run on your machine you can get different numbers!)
par(mfrow=c(2,4))
for (i in LETTERS[1:8]) {
curveLorenz(cities[city==i,'income'], col = 'red')
title(paste('City', i))
}
ginicities <- aggregate(income ~ city,
data = cities,
FUN = "gini")
names(ginicities) <- c("city", "gini")
knitr::kable(ginicities %>% arrange(desc(gini)), align = 'l')
| city | gini |
|---|---|
| H | 0.4549123 |
| C | 0.3505498 |
| B | 0.3184892 |
| G | 0.3037768 |
| E | 0.3022083 |
| D | 0.2759564 |
| A | 0.2710777 |
| F | 0.1637490 |
cities[26,]
## city income
## 26 B 24609
cities[26,2] <- 120000
cities[city == 'D',]$income <- cities[city == 'D',]$income*1.5
| city | gini |
|---|---|
| H | 0.4549123 |
| C | 0.3505498 |
| B | 0.3331599 |
| G | 0.3037768 |
| E | 0.3022083 |
| D | 0.2759564 |
| A | 0.2710777 |
| F | 0.1637490 |
ggplot(cities,
aes(income)) +
geom_histogram(aes(y = ..density..), bins = 20) +
geom_density() +
facet_wrap(~ city, ncol = 2)