The Piketty Plots

This Notebook plots relevant graphs using data within the Wealth Inequality Database, Thomas Piketty’s book: Capital in the 21st Century (2014), and the QJE journal article: Capital Is Back [1].

[1] Piketty, T., and G. Zucman, “Capital is Back: Wealth-Income Ratios in Rich Countries, 1700-2010”, Quarterly Journal of Economics, 2014, vol.129, no.3, p.1155-1210

Set up

Loading required libraries

library(tidyverse)

Defining Helper Functions

get_wid_country_var <- function(cntry, varib) {
  # Read the data file for cntry
  data <- read.csv(file = paste0("WID_data_", cntry, ".csv"), sep = ";")
  
  # Filter for varible and create a column to indicate country
  data <- data %>%
    filter(variable == varib) %>% 
    mutate(country = cntry)
}

1.1 Capital/Income Ratio

WID variable for capital income ratio

Loading the WID Data

# Country abbreviations
countries <- c("DE", "FR", "GB")

# Call the helper function on each country abbrev
cir_data <- map_df(countries, get_wid_country_var, varib = "wwealpi999")

Calculating Decennial Average

# Filtering years and creating decade column
cir_data <- cir_data %>%
  filter(year >= 1870 & year <= 2019) %>%
  mutate(decade = 10 * (year %/% 10))

# Calculating decennial average data
decennial_cir_data <- cir_data %>%
  group_by(country, decade) %>%
  summarize(average_value = mean(value, na.rm = TRUE), .groups = 'drop')

Plotting

# Customizing shapes, colors, and fill for each country
shapes <- c('DE' = 17, 'GB' = 0, 'FR' = 16)  # triangle, square, circle

ggplot(decennial_cir_data, 
       aes(x = decade, 
           y = average_value, 
           color = country, 
           shape = country)) +
  geom_line() +
  geom_point(aes(fill = country), size = 2) +
  scale_shape_manual(values = shapes) +
  labs(
    title = "The Capital/Income Ratio in Europe 1870 - 2010",
    y = "Market value of private capital (% national income)",
    x = "Year"
  ) +
  scale_y_continuous(
    limits = c(1, 8), 
    breaks = seq(1, 8, by = 1), 
    labels = scales::percent_format(scale = 100)
  ) +
  scale_x_continuous(
    limits = c(1870, 2010),
    breaks = seq(1870, 2010, by = 20)
  ) +
  theme_minimal()

1.2 Capital shares

Variable for capital share of factor-price national income

Loading the WID data

countries <- c("DE", "FR", "GB", "JP", "US", "CA", "AU", "IT")

csh_data <- map_df(countries, get_wid_country_var, varib = "wcapshi999")

csh_data <- csh_data %>%
  filter(year >= 1975 & year <= 2020)

Plotting

ggplot(csh_data, aes(x = year, y = value, color = country)) +
  geom_line() +
  geom_point() +
  labs(
    title = "Capital Share in National Income (1975 - 2020)",
    x = "Year",
    y = "Value (%)") +
  scale_y_continuous(
    limits = c(0.05, 0.40), 
    breaks = seq(0.05, 0.40, by = 0.05), 
    labels = scales::percent_format(scale = 100)
  ) +
  scale_x_continuous(
    limits = c(1975, 2010),
    breaks = seq(1975, 2010, by = 5)
  ) +
  theme_minimal() + 
  theme(
    legend.position = c(0.8, 0.2),
    legend.background = element_rect(fill = alpha('white', 0.75))
  ) +
  guides(
    colour = guide_legend(nrow = 3, title = NULL, label.position = "right")
  )

1.3 Return on private wealth

Loading the paper data

pwr_data <- readxl::read_excel(
  "data/CapitalIsBack/AppendixTables.xls", 
  sheet = "Table A145", 
  skip = 3
)
colnames(pwr_data)[1] <- "Year"

Tidying the data

tidy_pwr <- pwr_data %>% 
  pivot_longer(cols = -Year, names_to = "Country", values_to = "Return") %>% 
  filter(Year >= 1975)

Plotting

ggplot(tidy_pwr, aes(x = Year, y = Return, color = Country, group = Country)) +
  geom_line() +
  geom_point() +
  labs(
    title = "Average return on private wealth 1975-2010",
    x = "Year",
    y = "Return (%)"
  ) +
  scale_y_continuous(
    limits = c(0, 0.12),
    breaks = seq(0, 0.12, by = 0.02),
    labels = scales::percent_format(scale = 100)
  ) +
  scale_x_continuous(
    limits = c(1975, 2010),
    breaks = seq(1975, 2010, by = 5)
  ) +
  theme_minimal() + 
  theme(
    legend.position = c(0.5, 0.15),
    legend.background = element_rect(fill = alpha('white', 0.75))
  ) +
  guides(
    colour = guide_legend(nrow = 1, title = NULL, label.position = "bottom")
  )

1.4 Top 1% and Top 10% in the U.S.

Loading the book data

topUS_data <- readxl::read_excel(
  "data/CapitalInThe21stCentury/Chapter8TablesFigures.xlsx",
  sheet = "TS8.2",
  skip = 4,
  n_max = 101
)
colnames(topUS_data)[1] <- "Year"

top1_data <- topUS_data %>% 
  select(
    "Year",
    "Top 1% income share",
    "Top 1% income share (excl. capital gains)",
    "Top 1% wage share"
  ) %>% 
  filter(Year >= 1910)

top10_data <- topUS_data %>% 
  select(
    "Year",
    "Top 1% income share",
    "Top 5%-1% income share",
    "Top 10%-5% income share"
  ) %>% 
  filter(Year >= 1910)

Tidying the data

# Tidy
tidy_top1 <- top1_data %>% 
  pivot_longer(cols = -Year, names_to = "Type", values_to = "Value")


tidy_top10 <- top10_data %>% 
  pivot_longer(cols = -Year, names_to = "Type", values_to = "Value")

Plotting

# Plot Top 1%
ggplot(tidy_top1, aes(x = Year, y = Value, color = Type, group = Type)) +
  geom_line() + 
  labs(
    title = "The transformation of the top 1% in the United States",
    x = "Year",
    y = "Share of top percentile in total (incomes or wages)"
  ) +
  scale_y_continuous(
    limits = c(0, 0.24),
    breaks = seq(0, 0.24, by = 0.02),
    labels = scales::percent_format(scale = 100)
  ) +
  scale_x_continuous(
    limits = c(1910, 2010),
    breaks = seq(1910, 2010, by = 10)
  ) +
  theme_minimal() + 
  theme(
    legend.position = c(0.55, 0.85),
    legend.background = element_rect(fill = alpha('white', 0.75))
  ) + 
  guides(colour = guide_legend(title = NULL))

# Plot Top 10%
ggplot(tidy_top10, aes(x = Year, y = Value, color = Type, group = Type)) +
  geom_line() + 
  labs(
    title = "Decomposition of the top decile, U.S. 1910-2010",
    x = "Year",
    y = "Share of the different groups in total income"
  ) +
  scale_y_continuous(
    limits = c(0, 0.25),
    breaks = seq(0, 0.25, by = 0.05),
    labels = scales::percent_format(scale = 100)
  ) +
  scale_x_continuous(
    limits = c(1910, 2010),
    breaks = seq(1910, 2010, by = 10)
  ) +
  theme_minimal() + 
  theme(
    legend.position = c(0.5, 0.2),
    legend.background = element_rect(fill = alpha('white', 0.75))
  ) + 
  guides(colour = guide_legend(title = NULL)) + 
  scale_color_discrete(
    labels = c("Top 1% (annual incomes above $352 000 in 2010)",
               "Top 5%-1% (annual incomes between $150 000 and $352 000 in 2010)",
               "Top 10%-5% (annual incomes between $108 000 and $150 000 in 2010)"
    )
  )

1.5 Wealth Income Ratio (Europe vs U.S.)

Loading the WID data

# Country abbreviations
countries <- c("DE", "FR", "GB", "IT", "US")

# Call the helper function on each country abbrev
all_cir_data <- map_df(countries, get_wid_country_var, varib = "wwealpi999")

Calculating Decennial Averages

# Filtering years and creating decade column
all_cir_data <- all_cir_data %>%
  filter(year >= 1870 & year <= 2019) %>%
  mutate(decade = 10 * (year %/% 10))

# Calculating decennial average data
decennial_all_cir_data <- all_cir_data %>%
  group_by(country, decade) %>%
  summarize(average = mean(value, na.rm = TRUE), .groups = 'drop')

# Filtering for European countries
eu_cir_data <- decennial_all_cir_data %>%
  filter(country %in% c("DE", "FR", "GB", "IT")) %>%
  group_by(decade) %>%
  summarize(average = mean(average, na.rm = TRUE), .groups = 'drop')

# Filtering for the US
us_cir_data <- decennial_all_cir_data %>%
  filter(country == "US")

# Combining the data frames for plotting
combined_cir_data <- bind_rows(
  eu_cir_data %>% mutate(region = "Europe"),
  us_cir_data %>% mutate(region = "US")
)

Plotting

ggplot(combined_cir_data, aes(x = decade, y = average, color = region, group = region)) +
  geom_line() + 
  geom_point() +
  labs(
    title = "Private wealth/national income ratios 1870 - 2010: Europe vs. USA",
    x = "",
    y = ""
  ) +
  scale_y_continuous(
    limits = c(1, 8),
    breaks = seq(1, 8, by = 1),
    labels = scales::percent_format(scale = 100)
  ) +
  scale_x_continuous(
    breaks = seq(min(combined_cir_data$decade), max(combined_cir_data$decade), by = 20)
  ) +
  theme_minimal() + 
  theme(
    legend.position = c(0.6, 0.75),
    legend.background = element_rect(fill = alpha('white', 0.75))
  ) + 
  guides(colour = guide_legend(title = NULL, nrow = 1))

1.6 Who Owns the Wealth in Tax Havens

Loading the paper [2] data

[2] Alstadsæter, A., Johannesen, N. and Zucman, G., 2018. Who owns the wealth in tax havens? Macro evidence and implications for global inequality. Journal of Public Economics, 162, pp.89-100.

top001_data <- readxl::read_excel(
  "data/WhoOwnsTheWealthInTaxHavens/AJZ2017bData.xlsx",
  sheet = "DataF7",
  skip = 5,
  n_max = 2
)
colnames(top001_data)[1] <- "wealth"

Tidying up the data

# Tidy the data
tidy_top001 <- top001_data %>%
  pivot_longer(cols = -1, names_to = "country", values_to = "value") %>% 
  filter(country %in% c("Spain", "UK", "Scandinavia", "France", "USA", "Russia"))

# Change the stacking order (Offshore on top)
tidy_top001$wealth <- factor(tidy_top001$wealth, levels = c("Offshore", "Excl"))

Plotting

# Plotting
ggplot(tidy_top001, aes(x = reorder(country, value, FUN = sum), y = value, fill = wealth)) +
  geom_bar(stat = "identity") +
  labs(title = "Wealth of the top 0.01%",
       x = "",
       y = "% of total household wealth") +
  scale_y_continuous(
    limits = c(0, 0.13),
    breaks = seq(0, 0.12, by = 0.02),
    labels = scales::percent_format(scale = 100)
  ) +
  theme_minimal() +
  theme(
    legend.position = c(0.2, 0.7),
    legend.background = element_rect(fill = alpha('white', 0.75))
  ) + 
  guides(fill = guide_legend(title = NULL)) + 
  scale_fill_discrete(
    labels = c("Offshore wealth", "All wealth excluding offshore")
  )

1.7 Hidden Wealth

Loading the paper [3] data

[3] Alstadsæter, A., Johannesen, N. and Zucman, G., 2019. Tax evasion and inequality. American Economic Review, 109(6), pp.2073-2103.

hsbc_data <- readxl::read_excel(
  "data/TaxEvasionAndInequality/AJZ2017AppendixE.xlsx",
  sheet = "T.E6",
  skip = 4,
  n_max = 16
)

hsbc_data <- hsbc_data %>% 
  na.omit() %>% 
  mutate(`Fraction of wealth hidden` = as.numeric(`Fraction of wealth hidden`))

Plotting

# Plotting
ggplot(hsbc_data, aes(x = `Wealth group`, y = `Fraction of wealth hidden`, group = 1)) +
  geom_line() +
  geom_point() +
  labs(title = "Average wealth hidden at HSBC, by wealth group (percent of total wealth
(including held at HSBC))",
       x = "Wealth Group",
       y = "") +
  scale_y_continuous(
    limits = c(0, 50),
    breaks = seq(0, 55, by = 10),
    labels = scales::percent_format(scale = 1)
  ) +
  theme_minimal()

1.8 Rate of Return and Growth

Loading the book data

return_growth_data <- readxl::read_excel(
  "data/CapitalInThe21stCentury/Chapter10TablesFigures.xlsx",
  sheet = "TS10.3",
  range = "F6:I14"
)
colnames(return_growth_data)[1] <- "year"

Plotting

# Plotting
ggplot(return_growth_data, aes(x = year)) +
  geom_line(aes(y = `r (after taxes)`, color = "After Taxes", group = 1)) +
  geom_line(aes(y = g, color = "Growth", group = 1)) +
  geom_point(aes(y = `r (after taxes)`, color = "After Taxes")) +
  geom_point(aes(y = g, color = "Growth")) +
  labs(title = "After tax rate of return vs. growth rate at the world level, from Antiquity until 2200",
       x = "",
       y = "Annual rate of return or rate of growth") +
  scale_y_continuous(
    limits = c(0, 0.06),
    breaks = seq(0, 0.06, by = 0.010),
    labels = scales::percent_format(scale = 100)
  ) +
  theme_minimal()

1.9 Inheritance as % national income

Loading and merging the book data

de_gb_inherit_data <- readxl::read_excel(
  "data/CapitalInThe21stCentury/Chapter11TablesFigures.xlsx",
  sheet = "TS11.3",
  skip = 5
) %>%
  select(1:3) %>%
  rename(
    Year = 1,
    Britain = 2,
    Germany = 3
  )

fr_inherit_data <- readxl::read_excel(
  "data/CapitalInThe21stCentury/Chapter11TablesFigures.xlsx",
  sheet = "TS11.1",
  skip = 5
) %>%
  select(1:2) %>% 
  rename(
    Year = 1,
    France = 2
  ) %>% 
  mutate_all(as.numeric)

merged_inherit_data <- full_join(fr_inherit_data, de_gb_inherit_data, by = "Year") %>%
  filter(Year >= 1900 & Year <= 2010)

Tidying the data

tidy_inherit <- merged_inherit_data %>%
  pivot_longer(cols = -1, names_to = "Country", values_to = "Value")

Plotting

# Plotting (removing the NA Germany rows to "interpolate")
tidy_inherit %>% 
  filter(!is.na(Value)) %>% 
  ggplot(aes(x = Year, y = Value, color = Country, group = Country)) +
    geom_line() +
    geom_point() +
    labs(
      title = "Inheritance as % of national income in France, Germany and Britain",
      x = "",
      y = "Annual value of bequests and gifts (% national income)"
    ) +
    scale_y_continuous(
      limits = c(0, 0.25),
      breaks = seq(0, 0.24, by = 0.04),
      labels = scales::percent_format(scale = 100)
    ) +
    scale_x_continuous(
      limits = c(1900, 2010),
      breaks = seq(1900, 2010, by = 10)
    ) +
    theme_minimal() + 
    theme(
      legend.position = c(0.6, 0.6),
      legend.background = element_rect(fill = alpha('white', 0.75))
    ) +
    guides(
      colour = guide_legend(nrow = 1, title = NULL, label.position = "bottom")
    )