This Notebook plots relevant graphs using data within the Wealth Inequality Database, Thomas Piketty’s book: Capital in the 21st Century (2014), and the QJE journal article: Capital Is Back [1].
[1] Piketty, T., and G. Zucman, “Capital is Back: Wealth-Income Ratios in Rich Countries, 1700-2010”, Quarterly Journal of Economics, 2014, vol.129, no.3, p.1155-1210
library(tidyverse)
get_wid_country_var <- function(cntry, varib) {
# Read the data file for cntry
data <- read.csv(file = paste0("WID_data_", cntry, ".csv"), sep = ";")
# Filter for varible and create a column to indicate country
data <- data %>%
filter(variable == varib) %>%
mutate(country = cntry)
}
# Country abbreviations
countries <- c("DE", "FR", "GB")
# Call the helper function on each country abbrev
cir_data <- map_df(countries, get_wid_country_var, varib = "wwealpi999")
# Filtering years and creating decade column
cir_data <- cir_data %>%
filter(year >= 1870 & year <= 2019) %>%
mutate(decade = 10 * (year %/% 10))
# Calculating decennial average data
decennial_cir_data <- cir_data %>%
group_by(country, decade) %>%
summarize(average_value = mean(value, na.rm = TRUE), .groups = 'drop')
# Customizing shapes, colors, and fill for each country
shapes <- c('DE' = 17, 'GB' = 0, 'FR' = 16) # triangle, square, circle
ggplot(decennial_cir_data,
aes(x = decade,
y = average_value,
color = country,
shape = country)) +
geom_line() +
geom_point(aes(fill = country), size = 2) +
scale_shape_manual(values = shapes) +
labs(
title = "The Capital/Income Ratio in Europe 1870 - 2010",
y = "Market value of private capital (% national income)",
x = "Year"
) +
scale_y_continuous(
limits = c(1, 8),
breaks = seq(1, 8, by = 1),
labels = scales::percent_format(scale = 100)
) +
scale_x_continuous(
limits = c(1870, 2010),
breaks = seq(1870, 2010, by = 20)
) +
theme_minimal()
pwr_data <- readxl::read_excel(
"data/CapitalIsBack/AppendixTables.xls",
sheet = "Table A145",
skip = 3
)
colnames(pwr_data)[1] <- "Year"
tidy_pwr <- pwr_data %>%
pivot_longer(cols = -Year, names_to = "Country", values_to = "Return") %>%
filter(Year >= 1975)
ggplot(tidy_pwr, aes(x = Year, y = Return, color = Country, group = Country)) +
geom_line() +
geom_point() +
labs(
title = "Average return on private wealth 1975-2010",
x = "Year",
y = "Return (%)"
) +
scale_y_continuous(
limits = c(0, 0.12),
breaks = seq(0, 0.12, by = 0.02),
labels = scales::percent_format(scale = 100)
) +
scale_x_continuous(
limits = c(1975, 2010),
breaks = seq(1975, 2010, by = 5)
) +
theme_minimal() +
theme(
legend.position = c(0.5, 0.15),
legend.background = element_rect(fill = alpha('white', 0.75))
) +
guides(
colour = guide_legend(nrow = 1, title = NULL, label.position = "bottom")
)
topUS_data <- readxl::read_excel(
"data/CapitalInThe21stCentury/Chapter8TablesFigures.xlsx",
sheet = "TS8.2",
skip = 4,
n_max = 101
)
colnames(topUS_data)[1] <- "Year"
top1_data <- topUS_data %>%
select(
"Year",
"Top 1% income share",
"Top 1% income share (excl. capital gains)",
"Top 1% wage share"
) %>%
filter(Year >= 1910)
top10_data <- topUS_data %>%
select(
"Year",
"Top 1% income share",
"Top 5%-1% income share",
"Top 10%-5% income share"
) %>%
filter(Year >= 1910)
# Tidy
tidy_top1 <- top1_data %>%
pivot_longer(cols = -Year, names_to = "Type", values_to = "Value")
tidy_top10 <- top10_data %>%
pivot_longer(cols = -Year, names_to = "Type", values_to = "Value")
# Plot Top 1%
ggplot(tidy_top1, aes(x = Year, y = Value, color = Type, group = Type)) +
geom_line() +
labs(
title = "The transformation of the top 1% in the United States",
x = "Year",
y = "Share of top percentile in total (incomes or wages)"
) +
scale_y_continuous(
limits = c(0, 0.24),
breaks = seq(0, 0.24, by = 0.02),
labels = scales::percent_format(scale = 100)
) +
scale_x_continuous(
limits = c(1910, 2010),
breaks = seq(1910, 2010, by = 10)
) +
theme_minimal() +
theme(
legend.position = c(0.55, 0.85),
legend.background = element_rect(fill = alpha('white', 0.75))
) +
guides(colour = guide_legend(title = NULL))
# Plot Top 10%
ggplot(tidy_top10, aes(x = Year, y = Value, color = Type, group = Type)) +
geom_line() +
labs(
title = "Decomposition of the top decile, U.S. 1910-2010",
x = "Year",
y = "Share of the different groups in total income"
) +
scale_y_continuous(
limits = c(0, 0.25),
breaks = seq(0, 0.25, by = 0.05),
labels = scales::percent_format(scale = 100)
) +
scale_x_continuous(
limits = c(1910, 2010),
breaks = seq(1910, 2010, by = 10)
) +
theme_minimal() +
theme(
legend.position = c(0.5, 0.2),
legend.background = element_rect(fill = alpha('white', 0.75))
) +
guides(colour = guide_legend(title = NULL)) +
scale_color_discrete(
labels = c("Top 1% (annual incomes above $352 000 in 2010)",
"Top 5%-1% (annual incomes between $150 000 and $352 000 in 2010)",
"Top 10%-5% (annual incomes between $108 000 and $150 000 in 2010)"
)
)
# Country abbreviations
countries <- c("DE", "FR", "GB", "IT", "US")
# Call the helper function on each country abbrev
all_cir_data <- map_df(countries, get_wid_country_var, varib = "wwealpi999")
# Filtering years and creating decade column
all_cir_data <- all_cir_data %>%
filter(year >= 1870 & year <= 2019) %>%
mutate(decade = 10 * (year %/% 10))
# Calculating decennial average data
decennial_all_cir_data <- all_cir_data %>%
group_by(country, decade) %>%
summarize(average = mean(value, na.rm = TRUE), .groups = 'drop')
# Filtering for European countries
eu_cir_data <- decennial_all_cir_data %>%
filter(country %in% c("DE", "FR", "GB", "IT")) %>%
group_by(decade) %>%
summarize(average = mean(average, na.rm = TRUE), .groups = 'drop')
# Filtering for the US
us_cir_data <- decennial_all_cir_data %>%
filter(country == "US")
# Combining the data frames for plotting
combined_cir_data <- bind_rows(
eu_cir_data %>% mutate(region = "Europe"),
us_cir_data %>% mutate(region = "US")
)
ggplot(combined_cir_data, aes(x = decade, y = average, color = region, group = region)) +
geom_line() +
geom_point() +
labs(
title = "Private wealth/national income ratios 1870 - 2010: Europe vs. USA",
x = "",
y = ""
) +
scale_y_continuous(
limits = c(1, 8),
breaks = seq(1, 8, by = 1),
labels = scales::percent_format(scale = 100)
) +
scale_x_continuous(
breaks = seq(min(combined_cir_data$decade), max(combined_cir_data$decade), by = 20)
) +
theme_minimal() +
theme(
legend.position = c(0.6, 0.75),
legend.background = element_rect(fill = alpha('white', 0.75))
) +
guides(colour = guide_legend(title = NULL, nrow = 1))
[2] Alstadsæter, A., Johannesen, N. and Zucman, G., 2018. Who owns the wealth in tax havens? Macro evidence and implications for global inequality. Journal of Public Economics, 162, pp.89-100.
top001_data <- readxl::read_excel(
"data/WhoOwnsTheWealthInTaxHavens/AJZ2017bData.xlsx",
sheet = "DataF7",
skip = 5,
n_max = 2
)
colnames(top001_data)[1] <- "wealth"
# Tidy the data
tidy_top001 <- top001_data %>%
pivot_longer(cols = -1, names_to = "country", values_to = "value") %>%
filter(country %in% c("Spain", "UK", "Scandinavia", "France", "USA", "Russia"))
# Change the stacking order (Offshore on top)
tidy_top001$wealth <- factor(tidy_top001$wealth, levels = c("Offshore", "Excl"))
# Plotting
ggplot(tidy_top001, aes(x = reorder(country, value, FUN = sum), y = value, fill = wealth)) +
geom_bar(stat = "identity") +
labs(title = "Wealth of the top 0.01%",
x = "",
y = "% of total household wealth") +
scale_y_continuous(
limits = c(0, 0.13),
breaks = seq(0, 0.12, by = 0.02),
labels = scales::percent_format(scale = 100)
) +
theme_minimal() +
theme(
legend.position = c(0.2, 0.7),
legend.background = element_rect(fill = alpha('white', 0.75))
) +
guides(fill = guide_legend(title = NULL)) +
scale_fill_discrete(
labels = c("Offshore wealth", "All wealth excluding offshore")
)
return_growth_data <- readxl::read_excel(
"data/CapitalInThe21stCentury/Chapter10TablesFigures.xlsx",
sheet = "TS10.3",
range = "F6:I14"
)
colnames(return_growth_data)[1] <- "year"
# Plotting
ggplot(return_growth_data, aes(x = year)) +
geom_line(aes(y = `r (after taxes)`, color = "After Taxes", group = 1)) +
geom_line(aes(y = g, color = "Growth", group = 1)) +
geom_point(aes(y = `r (after taxes)`, color = "After Taxes")) +
geom_point(aes(y = g, color = "Growth")) +
labs(title = "After tax rate of return vs. growth rate at the world level, from Antiquity until 2200",
x = "",
y = "Annual rate of return or rate of growth") +
scale_y_continuous(
limits = c(0, 0.06),
breaks = seq(0, 0.06, by = 0.010),
labels = scales::percent_format(scale = 100)
) +
theme_minimal()
de_gb_inherit_data <- readxl::read_excel(
"data/CapitalInThe21stCentury/Chapter11TablesFigures.xlsx",
sheet = "TS11.3",
skip = 5
) %>%
select(1:3) %>%
rename(
Year = 1,
Britain = 2,
Germany = 3
)
fr_inherit_data <- readxl::read_excel(
"data/CapitalInThe21stCentury/Chapter11TablesFigures.xlsx",
sheet = "TS11.1",
skip = 5
) %>%
select(1:2) %>%
rename(
Year = 1,
France = 2
) %>%
mutate_all(as.numeric)
merged_inherit_data <- full_join(fr_inherit_data, de_gb_inherit_data, by = "Year") %>%
filter(Year >= 1900 & Year <= 2010)
tidy_inherit <- merged_inherit_data %>%
pivot_longer(cols = -1, names_to = "Country", values_to = "Value")
# Plotting (removing the NA Germany rows to "interpolate")
tidy_inherit %>%
filter(!is.na(Value)) %>%
ggplot(aes(x = Year, y = Value, color = Country, group = Country)) +
geom_line() +
geom_point() +
labs(
title = "Inheritance as % of national income in France, Germany and Britain",
x = "",
y = "Annual value of bequests and gifts (% national income)"
) +
scale_y_continuous(
limits = c(0, 0.25),
breaks = seq(0, 0.24, by = 0.04),
labels = scales::percent_format(scale = 100)
) +
scale_x_continuous(
limits = c(1900, 2010),
breaks = seq(1900, 2010, by = 10)
) +
theme_minimal() +
theme(
legend.position = c(0.6, 0.6),
legend.background = element_rect(fill = alpha('white', 0.75))
) +
guides(
colour = guide_legend(nrow = 1, title = NULL, label.position = "bottom")
)