The Piketty Plots

This Notebook plots relevant graphs using data within the Wealth Inequality Database, Thomas Piketty’s book: Capital in the 21st Century (2014), and the QJE journal article: Capital Is Back [1].

[1] Piketty, T., and G. Zucman, “Capital is Back: Wealth-Income Ratios in Rich Countries, 1700-2010”, Quarterly Journal of Economics, 2014, vol.129, no.3, p.1155-1210

Set up

Loading required libraries

library(tidyverse)

Defining Helper Functions

get_wid_country_var <- function(cntry, varib) {
  # Read the data file for cntry
  data <- read.csv(file = paste0("WID_data_", cntry, ".csv"), sep = ";")
  
  # Filter for varible and create a column to indicate country
  data <- data %>%
    filter(variable == varib) %>% 
    mutate(country = cntry)
}

1.1 Capital/Income Ratio

$WID variable for capital income ratio$

Loading the WID Data

# Country abbreviations
countries <- c("DE", "FR", "GB")

# Call the helper function on each country abbrev
cir_data <- map_df(countries, get_wid_country_var, varib = "wwealpi999")

Calculating Decennial Average

# Filtering years and creating decade column
cir_data <- cir_data %>%
  filter(year >= 1870 & year <= 2019) %>%
  mutate(decade = 10 * (year %/% 10))

# Calculating decennial average data
decennial_cir_data <- cir_data %>%
  group_by(country, decade) %>%
  summarize(average_value = mean(value, na.rm = TRUE), .groups = 'drop')

Plotting

# Customizing shapes, colors, and fill for each country
shapes <- c('DE' = 17, 'GB' = 0, 'FR' = 16)  # triangle, square, circle

ggplot(decennial_cir_data, 
       aes(x = decade, 
           y = average_value, 
           color = country, 
           shape = country)) +
  geom_line() +
  geom_point(aes(fill = country), size = 2) +
  scale_shape_manual(values = shapes) +
  labs(
    title = "The Capital/Income Ratio in Europe 1870 - 2010",
    y = "Market value of private capital (% national income)",
    x = "Year"
  ) +
  scale_y_continuous(
    limits = c(1, 8), 
    breaks = seq(1, 8, by = 1), 
    labels = scales::percent_format(scale = 100)
  ) +
  scale_x_continuous(
    limits = c(1870, 2010),
    breaks = seq(1870, 2010, by = 20)
  ) +
  theme_minimal()

1.2 Capital shares

Loading the WID data

countries <- c("DE", "FR", "GB", "JP", "US", "CA", "AU", "IT")

csh_data <- map_df(countries, get_wid_country_var, varib = "wcapshi999")

csh_data <- csh_data %>%
  filter(year >= 1975 & year <= 2020)

Plotting

ggplot(csh_data, aes(x = year, y = value, color = country)) +
  geom_line() +
  geom_point() +
  labs(
    title = "Capital Share in National Income (1975 - 2020)",
    x = "Year",
    y = "Value (%)") +
  scale_y_continuous(
    limits = c(0.05, 0.40), 
    breaks = seq(0.05, 0.40, by = 0.05), 
    labels = scales::percent_format(scale = 100)
  ) +
  scale_x_continuous(
    limits = c(1975, 2010),
    breaks = seq(1975, 2010, by = 5)
  ) +
  theme_minimal() + 
  theme(
    legend.position = c(0.8, 0.2),
    legend.background = element_rect(fill = alpha('white', 0.75))
  ) +
  guides(
    colour = guide_legend(nrow = 3, title = NULL, label.position = "right")
  )

1.3 Return on private wealth

Loading the paper data

pwr_data <- readxl::read_excel(
  "data/CapitalIsBack/AppendixTables.xls", 
  sheet = "Table A145", 
  skip = 3
)
colnames(pwr_data)[1] <- "Year"

Tidying the data

tidy_pwr <- pwr_data %>% 
  pivot_longer(cols = -Year, names_to = "Country", values_to = "Return") %>% 
  filter(Year >= 1975)

Plotting

ggplot(tidy_pwr, aes(x = Year, y = Return, color = Country, group = Country)) +
  geom_line() +
  geom_point() +
  labs(
    title = "Average return on private wealth 1975-2010",
    x = "Year",
    y = "Return (%)"
  ) +
  scale_y_continuous(
    limits = c(0, 0.12),
    breaks = seq(0, 0.12, by = 0.02),
    labels = scales::percent_format(scale = 100)
  ) +
  scale_x_continuous(
    limits = c(1975, 2010),
    breaks = seq(1975, 2010, by = 5)
  ) +
  theme_minimal() + 
  theme(
    legend.position = c(0.5, 0.15),
    legend.background = element_rect(fill = alpha('white', 0.75))
  ) +
  guides(
    colour = guide_legend(nrow = 1, title = NULL, label.position = "bottom")
  )

1.4 Top 1% and Top 10% in the U.S.

Loading the book data

topUS_data <- readxl::read_excel(
  "data/CapitalInThe21stCentury/Chapter8TablesFigures.xlsx",
  sheet = "TS8.2",
  skip = 4,
  n_max = 101
)
colnames(topUS_data)[1] <- "Year"

top1_data <- topUS_data %>% 
  select(
    "Year",
    "Top 1% income share",
    "Top 1% income share (excl. capital gains)",
    "Top 1% wage share"
  ) %>% 
  filter(Year >= 1910)

top10_data <- topUS_data %>% 
  select(
    "Year",
    "Top 1% income share",
    "Top 5%-1% income share",
    "Top 10%-5% income share"
  ) %>% 
  filter(Year >= 1910)

Tidying the data

# Tidy
tidy_top1 <- top1_data %>% 
  pivot_longer(cols = -Year, names_to = "Type", values_to = "Value")


tidy_top10 <- top10_data %>% 
  pivot_longer(cols = -Year, names_to = "Type", values_to = "Value")

Plotting

# Plot Top 1%
ggplot(tidy_top1, aes(x = Year, y = Value, color = Type, group = Type)) +
  geom_line() + 
  labs(
    title = "The transformation of the top 1% in the United States",
    x = "Year",
    y = "Share of top percentile in total (incomes or wages)"
  ) +
  scale_y_continuous(
    limits = c(0, 0.24),
    breaks = seq(0, 0.24, by = 0.02),
    labels = scales::percent_format(scale = 100)
  ) +
  scale_x_continuous(
    limits = c(1910, 2010),
    breaks = seq(1910, 2010, by = 10)
  ) +
  theme_minimal() + 
  theme(
    legend.position = c(0.55, 0.85),
    legend.background = element_rect(fill = alpha('white', 0.75))
  ) + 
  guides(colour = guide_legend(title = NULL))

# Plot Top 10%
ggplot(tidy_top10, aes(x = Year, y = Value, color = Type, group = Type)) +
  geom_line() + 
  labs(
    title = "Decomposition of the top decile, U.S. 1910-2010",
    x = "Year",
    y = "Share of the different groups in total income"
  ) +
  scale_y_continuous(
    limits = c(0, 0.25),
    breaks = seq(0, 0.25, by = 0.05),
    labels = scales::percent_format(scale = 100)
  ) +
  scale_x_continuous(
    limits = c(1910, 2010),
    breaks = seq(1910, 2010, by = 10)
  ) +
  theme_minimal() + 
  theme(
    legend.position = c(0.5, 0.2),
    legend.background = element_rect(fill = alpha('white', 0.75))
  ) + 
  guides(colour = guide_legend(title = NULL)) + 
  scale_color_discrete(
    labels = c("Top 1% (annual incomes above $352 000 in 2010)",
               "Top 5%-1% (annual incomes between $150 000 and $352 000 in 2010)",
               "Top 10%-5% (annual incomes between $108 000 and $150 000 in 2010)"
    )
  )

1.5 Wealth Income Ratio (Europe vs U.S.)

Loading the WID data

# Country abbreviations
countries <- c("DE", "FR", "GB", "IT", "US")

# Call the helper function on each country abbrev
all_cir_data <- map_df(countries, get_wid_country_var, varib = "wwealpi999")

Calculating Decennial Averages

# Filtering years and creating decade column
all_cir_data <- all_cir_data %>%
  filter(year >= 1870 & year <= 2019) %>%
  mutate(decade = 10 * (year %/% 10))

# Calculating decennial average data
decennial_all_cir_data <- all_cir_data %>%
  group_by(country, decade) %>%
  summarize(average = mean(value, na.rm = TRUE), .groups = 'drop')

# Filtering for European countries
eu_cir_data <- decennial_all_cir_data %>%
  filter(country %in% c("DE", "FR", "GB", "IT")) %>%
  group_by(decade) %>%
  summarize(average = mean(average, na.rm = TRUE), .groups = 'drop')

# Filtering for the US
us_cir_data <- decennial_all_cir_data %>%
  filter(country == "US")

# Combining the data frames for plotting
combined_cir_data <- bind_rows(
  eu_cir_data %>% mutate(region = "Europe"),
  us_cir_data %>% mutate(region = "US")
)

Plotting

ggplot(combined_cir_data, aes(x = decade, y = average, color = region, group = region)) +
  geom_line() + 
  geom_point() +
  labs(
    title = "Private wealth/national income ratios 1870 - 2010: Europe vs. USA",
    x = "",
    y = ""
  ) +
  scale_y_continuous(
    limits = c(1, 8),
    breaks = seq(1, 8, by = 1),
    labels = scales::percent_format(scale = 100)
  ) +
  scale_x_continuous(
    breaks = seq(min(combined_cir_data$decade), max(combined_cir_data$decade), by = 20)
  ) +
  theme_minimal() + 
  theme(
    legend.position = c(0.6, 0.75),
    legend.background = element_rect(fill = alpha('white', 0.75))
  ) + 
  guides(colour = guide_legend(title = NULL, nrow = 1))

1.6 Who Owns the Wealth in Tax Havens

Loading the paper [2] data

[2] Alstadsæter, A., Johannesen, N. and Zucman, G., 2018. Who owns the wealth in tax havens? Macro evidence and implications for global inequality. Journal of Public Economics, 162, pp.89-100.

top001_data <- readxl::read_excel(
  "data/WhoOwnsTheWealthInTaxHavens/AJZ2017bData.xlsx",
  sheet = "DataF7",
  skip = 5,
  n_max = 2
)
colnames(top001_data)[1] <- "wealth"

Tidying up the data

# Tidy the data
tidy_top001 <- top001_data %>%
  pivot_longer(cols = -1, names_to = "country", values_to = "value") %>% 
  filter(country %in% c("Spain", "UK", "Scandinavia", "France", "USA", "Russia"))

# Change the stacking order (Offshore on top)
tidy_top001$wealth <- factor(tidy_top001$wealth, levels = c("Offshore", "Excl"))

Plotting

# Plotting
ggplot(tidy_top001, aes(x = reorder(country, value, FUN = sum), y = value, fill = wealth)) +
  geom_bar(stat = "identity") +
  labs(title = "Wealth of the top 0.01%",
       x = "",
       y = "% of total household wealth") +
  scale_y_continuous(
    limits = c(0, 0.13),
    breaks = seq(0, 0.12, by = 0.02),
    labels = scales::percent_format(scale = 100)
  ) +
  theme_minimal() +
  theme(
    legend.position = c(0.2, 0.7),
    legend.background = element_rect(fill = alpha('white', 0.75))
  ) + 
  guides(fill = guide_legend(title = NULL)) + 
  scale_fill_discrete(
    labels = c("Offshore wealth", "All wealth excluding offshore")
  )

1.7 Hidden Wealth

Loading the paper [3] data

[3] Alstadsæter, A., Johannesen, N. and Zucman, G., 2019. Tax evasion and inequality. American Economic Review, 109(6), pp.2073-2103.

hsbc_data <- readxl::read_excel(
  "data/TaxEvasionAndInequality/AJZ2017AppendixE.xlsx",
  sheet = "T.E6",
  skip = 4,
  n_max = 16
)

hsbc_data <- hsbc_data %>% 
  na.omit() %>% 
  mutate(`Fraction of wealth hidden` = as.numeric(`Fraction of wealth hidden`))

Plotting

# Plotting
ggplot(hsbc_data, aes(x = `Wealth group`, y = `Fraction of wealth hidden`, group = 1)) +
  geom_line() +
  geom_point() +
  labs(title = "Average wealth hidden at HSBC, by wealth group (percent of total wealth
(including held at HSBC))",
       x = "Wealth Group",
       y = "") +
  scale_y_continuous(
    limits = c(0, 50),
    breaks = seq(0, 55, by = 10),
    labels = scales::percent_format(scale = 1)
  ) +
  theme_minimal()

1.8 Rate of Return and Growth

Loading the book data

return_growth_data <- readxl::read_excel(
  "data/CapitalInThe21stCentury/Chapter10TablesFigures.xlsx",
  sheet = "TS10.3",
  range = "F6:I14"
)
colnames(return_growth_data)[1] <- "year"

Plotting

# Plotting
ggplot(return_growth_data, aes(x = year)) +
  geom_line(aes(y = `r (after taxes)`, color = "After Taxes", group = 1)) +
  geom_line(aes(y = g, color = "Growth", group = 1)) +
  geom_point(aes(y = `r (after taxes)`, color = "After Taxes")) +
  geom_point(aes(y = g, color = "Growth")) +
  labs(title = "After tax rate of return vs. growth rate at the world level, from Antiquity until 2200",
       x = "",
       y = "Annual rate of return or rate of growth") +
  scale_y_continuous(
    limits = c(0, 0.06),
    breaks = seq(0, 0.06, by = 0.010),
    labels = scales::percent_format(scale = 100)
  ) +
  theme_minimal()

1.9 Inheritance as % national income

Loading and merging the book data

de_gb_inherit_data <- readxl::read_excel(
  "data/CapitalInThe21stCentury/Chapter11TablesFigures.xlsx",
  sheet = "TS11.3",
  skip = 5
) %>%
  select(1:3) %>%
  rename(
    Year = 1,
    Britain = 2,
    Germany = 3
  )

fr_inherit_data <- readxl::read_excel(
  "data/CapitalInThe21stCentury/Chapter11TablesFigures.xlsx",
  sheet = "TS11.1",
  skip = 5
) %>%
  select(1:2) %>% 
  rename(
    Year = 1,
    France = 2
  ) %>% 
  mutate_all(as.numeric)

merged_inherit_data <- full_join(fr_inherit_data, de_gb_inherit_data, by = "Year") %>%
  filter(Year >= 1900 & Year <= 2010)

Tidying the data

tidy_inherit <- merged_inherit_data %>%
  pivot_longer(cols = -1, names_to = "Country", values_to = "Value")

Plotting

# Plotting (removing the NA Germany rows to "interpolate")
tidy_inherit %>% 
  filter(!is.na(Value)) %>% 
  ggplot(aes(x = Year, y = Value, color = Country, group = Country)) +
    geom_line() +
    geom_point() +
    labs(
      title = "Inheritance as % of national income in France, Germany and Britain",
      x = "",
      y = "Annual value of bequests and gifts (% national income)"
    ) +
    scale_y_continuous(
      limits = c(0, 0.25),
      breaks = seq(0, 0.24, by = 0.04),
      labels = scales::percent_format(scale = 100)
    ) +
    scale_x_continuous(
      limits = c(1900, 2010),
      breaks = seq(1900, 2010, by = 10)
    ) +
    theme_minimal() + 
    theme(
      legend.position = c(0.6, 0.6),
      legend.background = element_rect(fill = alpha('white', 0.75))
    ) +
    guides(
      colour = guide_legend(nrow = 1, title = NULL, label.position = "bottom")
    )

R Notebook

The Piketty Plots

Set up

Loading required libraries

Defining Helper Functions

1.1 Capital/Income Ratio

Loading the WID Data

Calculating Decennial Average

Plotting

1.2 Capital shares

Loading the WID data

Plotting

1.3 Return on private wealth

Loading the paper data

Tidying the data

Plotting

1.4 Top 1% and Top 10% in the U.S.

Loading the book data

Tidying the data

Plotting

1.5 Wealth Income Ratio (Europe vs U.S.)

Loading the WID data

Calculating Decennial Averages

Plotting

1.6 Who Owns the Wealth in Tax Havens

Loading the paper [2] data

Tidying up the data

Plotting

1.7 Hidden Wealth

Loading the paper [3] data

Plotting

1.8 Rate of Return and Growth

Loading the book data

Plotting

1.9 Inheritance as % national income

Loading and merging the book data

Tidying the data

Plotting