Representativeness
function income_distribution
This function results in a table of shares of the total sample that
belong to various income categories.
income_distribution <- function(df) {
df_name <- deparse(substitute(df))
total <- nrow(df %>% select(income))
below_800 <- nrow(df %>% filter(income < 800)) / total * 100
from_800_to_1399 <- nrow(df %>% filter(income >= 800 & income < 1399)) / total * 100
from_1400_to_1999 <- nrow(df %>% filter(income >= 1400 & income < 2000)) / total * 100
above_2000 <- nrow(df %>% filter(income >= 2000)) / total * 100
# Format percentages
result_percent <- sprintf("%.2f%%", c(below_800, from_800_to_1399, from_1400_to_1999, above_2000))
# Format total
total_count <- as.character(total)
# Combine values
result <- c(result_percent, total_count)
df_result <- data.frame(
value = result,
row.names = c("below_800", "from_800_to_1399", "from_800_to_1999", "2000_and_above", "N")
)
# Add average as a new row
return(df_result)
}
NL 1935 vs. surveys
df.NL598_temp <- df.NL598 %>%
#Here we adjust based on share of households taxed to account for household income from wife/children.
mutate(across(
income,
~ case_when(
A < 1400 ~ .x + 800 - quantile((df.NL598 %>% subset(A < 1400))$income, probs = 1-(88/167)),
A >= 1400 & A < 1800 ~ .x + 800 - quantile((df.NL598 %>% subset(A >= 1400 & A < 1800))$income , probs = 1-(91/109)),
A >= 1800 & A < 2300 ~ .x + 800 - quantile((df.NL598 %>% subset(A >= 1800 & A < 2300))$income, probs = 1-(89/103)),
A >= 2300 & A < 3000 ~ .x + 800 - quantile((df.NL598 %>% subset(A >= 2300 & A < 3000))$income, probs = 1-(51/60)),
A >= 3000 ~ .x
)))
cbind(
read.csv("data/NL_income_distribution_1935.csv", dec = ".") %>% slice(1) %>%
mutate(X800.1400 = X800.AC0.1400,
X.1400.2000 = X1400.AC0.2000,
X.2000 = X2000.AC0.5000 + X5000.AC0.10000 + X10000.AC0.20000 + X.AD4.20000,
X.800 = (X800.1400 + X.1400.2000 + X.2000)/(1464/3394)*(1-(1464/3394)), # The share of workers above the tax threshold of 800 is 1464/3394 in 1935.
Total = (Total / 1000000)/(1464/3394)) %>%
mutate(across(c(X.800, X800.1400, X.1400.2000, X.2000), ~ .x / (X.800 + X800.1400+ X.1400.2000 + X.2000))) %>%
select(X.800, X800.1400, X.1400.2000 , X.2000, Total) %>% t() %>% as.data.frame %>%
mutate(V1 = ifelse(row_number() <=4, percent(V1, accuracy = 0.01),round(V1,2))),
income_distribution(df.NL598_temp %>% filter(work_class == 1 | work_class == 3)),
income_distribution(df.NL598_temp %>% filter(work_class == 2 )),
# income_distribution(df.NL598_temp %>% filter(work_class == 3)),
income_distribution(df.NL700 %>% mutate(income = (support + public_employment + income.HH.head + income.own.company))),
income_distribution(df.A1934.unemployed %>% mutate(income = (Man + social_security + Invaliditeits..of.weezenrente..Pensioen.e.d.)))
) %>%
setNames(c("NL (mln.)", "NL: blue", "NL: white", "NL: un.", "A: un.")) %>%
(\(x) {
rownames(x) <- c("<800", "800-1400", "1400-2000", ">2000", "N")
x
})()
## NL (mln.) NL: blue NL: white NL: un. A: un.
## <800 56.87% 33.44% 4.46% 86.71% 53.85%
## 800-1400 19.55% 65.94% 51.98% 13.14% 46.15%
## 1400-2000 11.96% 0.00% 1.98% 0.14% 0.00%
## >2000 11.63% 0.62% 41.58% 0.00% 0.00%
## N 3.14 323 202 700 78
rm(df.NL598_temp)
Income distribution NL vs. Amsterdam surveys
These figures show the income distribution of NL vs. Amsterdam for 2
time periods (NL 1920 vs A 1923 & NL 1934-1935 vs. A 1934.) This
shows that the Employed Amsterdam surveys are in the top of the income
distribution when compared to NL. And it shows that both Amsterdam
surveys are in a similar position in the income distribution over time,
which makes the two groups more comparable.
# Amsterdam 1923 vs. NL 1920.
# bron distributie: De socialistische gids, maandschrift der sociaaldemocratische arbeiderspartij, Maart 1923
cbind(
read.csv("data/income_distribution1920-21.csv") %>% mutate(share.NL = aantal.aangeslagenen / 1368293) %>% subset(inkomens != "totaal") %>% select(inkomens, share.NL),
rbind(
count(df.A1923 %>% select(income) %>% subset(income >800 & income < 1000)) / count(df.A1923 %>% select(income)),
count(df.A1923 %>% select(income) %>% subset(income >1000 & income < 2000)) / count(df.A1923 %>% select(income)),
count(df.A1923 %>% select(income) %>% subset(income >2000 & income < 5000)) / count(df.A1923 %>% select(income)),
count(df.A1923 %>% select(income) %>% subset(income >5000 & income < 10000)) / count(df.A1923 %>% select(income)),
count(df.A1923 %>% select(income) %>% subset(income >10000 & income < 50000)) / count(df.A1923 %>% select(income)),
count(df.A1923 %>% select(income) %>% subset(income >50000 & income < 100000)) / count(df.A1923 %>% select(income)),
count(df.A1923 %>% select(income) %>% subset(income >100000 )) / count(df.A1923 %>% select(income))
)) %>% rename(share.A = n) %>% mutate(share.NL = scales::percent(share.NL), share.A = scales::percent(share.A))
## inkomens share.NL share.A
## 1 800-1000 9.094% 0.47%
## 2 1000-2000 59.780% 18.87%
## 3 2000-5000 24.336% 70.75%
## 4 5000-10000 4.348% 7.55%
## 5 10000-50000 2.159% 1.42%
## 6 50000-100000 0.180% 0.00%
## 7 >100000 0.104% 0.00%
# Amsterdam 1934 (employed) vs. NL 1934-35.
# bron distributie: CBS, statistiek der inkomens en vermogens in nederland 1934/1935
read.csv("data/income_distribution1934-35.csv", dec = ",") %>% mutate(share.NL = aantal.aangeslagenen / 1445019) %>% subset(inkomens != "totaal") %>%
mutate(arbeiders = arbeiders/100, ambtenaren = ambtenaren /100, total = arbeiders * 75/184 + ambtenaren * 109/184) %>%
rename("blue collar" = arbeiders, "white collar" = ambtenaren, share.A = total) %>% select(inkomens, share.NL, share.A, "blue collar", "white collar") %>% mutate(across(where(is.numeric), percent), across(where(is.numeric), ~ round(., 2)))
## inkomens share.NL share.A blue collar white collar
## 1 800-1400 44.9% 7.89% 15.0% 3.00%
## 2 1400-2000 27.3% 16.48% 27.2% 9.10%
## 3 2000-3000 15.0% 39.37% 29.0% 46.52%
## 4 3000-5000 7.9% 12.74% 3.8% 18.88%
## 5 5000-10000 3.5% 11.35% 0.0% 19.17%
## 6 >10000 1.4% 7.31% 0.0% 12.33%
# In Nederland zitten de Amsterdamse surveys in de top 30% van de inkomensverdeling.
Eindhoven (Verwey-Jonker)
# import verwey dataset.
df.verwey <- read.csv("data/verwey_jonker.csv") %>% filter(threshold != "totaal")
# change the income buckets to match NL income distribution and add NL598 NL700.
df.eindhoven <- df.verwey %>%
mutate(
# define the new bucket label
bucket = case_when(
threshold %in% c("<200","200-400","400-600","600-800") ~ "<800",
threshold %in% c("800-1000","1000-1200","1200-1400") ~ "800-1400",
threshold %in% c("1400-1600","1600-1800","1800-2000") ~ "1400-2000",
TRUE ~ ">2000"
)
) %>%
group_by(bucket) %>%
summarise(
"VJ (HH)" = sum(HH),
"VJ (singles)" = sum(singles),
.groups = "drop"
) %>%
rename(threshold = bucket) %>%
left_join(
# -----------------------------------------------------------------
# 1️⃣ Counts from df.NL598 (filtered to Eindhoven)
# -----------------------------------------------------------------
df.NL598 %>%
subset(location == "Eindhoven") %>%
mutate(
threshold = cut(
income,
breaks = c(-Inf, 800, 1400, 2000, Inf),
labels = c("<800", "800-1400", "1400-2000", ">2000"),
right = FALSE
)
) %>%
group_by(threshold) %>%
summarise(NL598 = n(), .groups = "drop") %>%
mutate(threshold = as.character(threshold)),
by = "threshold") %>%
# -----------------------------------------------------------------
# ️2 Counts from df.nl700 (filtered to Eindhoven)
# -----------------------------------------------------------------
left_join(
df.NL700 %>%
subset(HHid > 253 & HHid < 303) %>%
mutate(
threshold = cut(
income,
breaks = c(-Inf, 800, 1400, 2000, Inf),
labels = c("<800", "800-1400", "1400-2000", ">2000"),
right = FALSE
)
) %>%
group_by(threshold) %>%
summarise(NL700 = n(), .groups = "drop"), by = "threshold"
)
rbind(
# this is the row with the total sum.
df.eindhoven %>%
# 1️⃣ Summarise numeric columns (keep non‑numeric as is)
summarise(
# keep the label column (or any identifier) as a character,
# then replace it with the word "total"
across(where(is.numeric), sum, na.rm = TRUE),
.groups = "drop"
) %>% mutate(threshold = "N") %>% select(threshold, everything())
,
# here percentages of total is calculated.
df.eindhoven %>%
# 1️⃣ Compute the total of each numeric column (ignore the label column)
mutate(col_totals = map_dbl(select(., -threshold), sum, na.rm = TRUE))
%>% mutate(across(where(is.numeric), ~ replace_na(.x, 0))) %>%
# 2️⃣ Convert each numeric value to a percentage of its column total
mutate(across(
.cols = where(is.numeric), # all numeric columns
.fns = ~ round(.x / col_totals[cur_column()] * 100, 2)
)) %>%
# 3️⃣ Add the “%” sign – turn the numbers into formatted strings
mutate(across(
.cols = where(is.numeric),
.fns = ~ sprintf("%.2f%%", .x) # keeps two decimals and appends "%"
)) %>%
# 3️⃣ Drop the temporary totals column
select(-col_totals)
) %>%
mutate(threshold = factor(threshold, levels = c("<800", "800-1400", "1400-2000", ">2000", "N"))) %>% # impose order
arrange(threshold) %>% # sort by factor
# , This is NL as a whole. We drop it because these are individuals, other datasets are households.
# read.csv("data/NL_income_distribution_1935.csv", dec = ".") %>% slice(1) %>%
# mutate(X800.1400 = X800.AC0.1400,
# X.1400.2000 = X1400.AC0.2000,
# X.2000 = X2000.AC0.5000 + X5000.AC0.10000 + X10000.AC0.20000 + X.AD4.20000,
# X.800 = (X800.1400 + X.1400.2000 + X.2000)/(1464/3394)*(1-(1464/3394)), # The share of workers above the tax threshold of 800 is 1464/3394 in 1935.
# Total = (Total / 1000000)/(1464/3394)) %>%
# mutate(across(c(X.800, X800.1400, X.1400.2000, X.2000), ~ .x / (X.800 + X800.1400+ X.1400.2000 + X.2000))) %>%
# select(X.800, X800.1400, X.1400.2000 , X.2000, Total) %>% t() %>% as.data.frame %>%
# mutate(V1 = ifelse(row_number() <=4, percent(V1, accuracy = 0.01),round(V1,2))) %>% rename("NL (mln.)" = V1)
# select("threshold","NL (mln.)", everything())
as.tibble() %>% rename("Household income" = threshold)
## # A tibble: 5 × 5
## `Household income` `VJ (HH)` `VJ (singles)` NL598 NL700
## <fct> <chr> <chr> <chr> <chr>
## 1 <800 10.27% 49.17% 0.00% 20.41%
## 2 800-1400 27.30% 35.68% 16.95% 75.51%
## 3 1400-2000 25.78% 9.44% 37.29% 4.08%
## 4 >2000 36.65% 5.72% 45.76% 0.00%
## 5 N 22990 9600 59 49
rm(df.verwey)
Income distribution
Table income composition
bind_rows(
# ----- df.NL598 -------------------------------------------------
df.NL598 %>%
mutate(
work_class = recode(
work_class,
`1` = "NL 1936 (blue collar)",
`2` = "NL 1936 (white collar)",
`3` = "NL 1936 (blue collar)", # code 3 → Blue collar
`4` = "Farmers" # will be filtered out later
),
wage = A - A2 - A3 - A4 - A5,
support = A2,
public_employment = 0,
production = A4 + A5,
gifts = gifts,
other = 0
) %>%
select(wage, support, public_employment, production, gifts, other, work_class),
# ----- df.NL700 -------------------------------------------------
df.NL700 %>%
mutate(
wage = income.HH.head + other.HH.income,
support = support + fuel_subsidy + food_distr,
production = income.own.company,
gifts = gifts,
other = 0,
public_employment = public_employment,
work_class = "NL 1937 (unemployed)"
) %>%
select(wage, support, public_employment, production, gifts, other, work_class),
# ----- df.A1934.unemployed (original block) --------------------
df.A1934.unemployed %>%
mutate(
wage = HHincome,
support = social_security,
public_employment = 0,
production = 0,
gifts = gifts,
other = financial + other_income,
work_class = "A'dam 1934 (unemployed)"
) %>%
select(wage, support, public_employment, production, gifts, other, work_class),
# ----- Utrecht 1936 (unemployed) – data.frame, double -------
{
## Weekly numbers multiplied by 52 (as you supplied)
wk_vals <- c(
wage = 3.75 * 52,
support = 13.72 * 52,
public_employment = 0,
production = 0, # not provided → assume 0
gifts = 0.32 * 52,
other = 0.14 * 52
)
utrecht_one <- data.frame(
wage = as.double(wk_vals["wage"]),
support = as.double(wk_vals["support"]),
public_employment = as.double(wk_vals["public_employment"]),
production = as.double(wk_vals["production"]),
gifts = as.double(wk_vals["gifts"]),
other = as.double(wk_vals["other"]),
work_class = "Utrecht 1936 (unemployed)",
stringsAsFactors = FALSE
)
## Replicate the row 77 times (still a data.frame)
utrecht_one[rep(seq_len(nrow(utrecht_one)), each = 77), ]
},
# ----- df.DH1932 (new block) ----------------------------------
df.DH1932 %>%
mutate(
wage = HHincome, # wages = HHincome
support = social_security, # support = social_security
public_employment = 0,
production = 0, # not mentioned → 0
gifts = gifts,
other = financial + other + stocks, # other = financial+other+stocks
work_class = "DH 1932 (unemployed)" # label for ordering
) %>%
select(wage, support, public_employment, production, gifts, other, work_class)
) %>% # end bind_rows
filter(work_class != "Farmers") %>% # drop unwanted class
mutate(
income = wage + support + public_employment + production + gifts + other,
across(
c(wage, support, public_employment, production, gifts, other),
~ .x / income
)
) %>% # component shares
group_by(work_class) %>%
summarise(
n = n(),
Avg_income = mean(income, na.rm = TRUE),
avg_wage = mean(wage, na.rm = TRUE),
avg_support = mean(support, na.rm = TRUE),
avg_public_emp = mean(public_employment, na.rm = TRUE),
avg_production = mean(production, na.rm = TRUE),
avg_gifts = mean(gifts, na.rm = TRUE),
avg_other = mean(other, na.rm = TRUE),
.groups = "drop"
) %>%
mutate(
N = round(n, 0), # renamed from n
Income = round(Avg_income, 0), # renamed from Avg_income
# -----------------------------------------------------------------
# Capitalise all column names and add "(%)" suffix to the share cols
# -----------------------------------------------------------------
`Wage (%)` = sprintf("%.1f%%", avg_wage * 100),
`Support (%)` = sprintf("%.1f%%", avg_support * 100),
`Public employment (%)` = sprintf("%.1f%%", avg_public_emp* 100),
`Production (%)` = sprintf("%.1f%%", avg_production* 100),
`Gifts (%)` = sprintf("%.1f%%", avg_gifts * 100),
`Other (%)` = sprintf("%.1f%%", avg_other * 100)
) %>%
select(
# Keep the newly‑named columns and drop the old interim ones
work_class, N, Income,
`Wage (%)`, `Support (%)`, `Public employment (%)`,
`Production (%)`, `Gifts (%)`, `Other (%)`
) %>%
# -----------------------------------------------------------------
# Chronological ordering (earliest → latest)
# -----------------------------------------------------------------
mutate(
Work_class = factor(
work_class,
levels = c(
"DH 1932 (unemployed)", # earliest
"A'dam 1934 (unemployed)", # next
"Utrecht 1936 (unemployed)", # then Utrecht
"NL 1937 (unemployed)", # most recent
"NL 1936 (white collar)", # NL 1936 – white collar
"NL 1936 (blue collar)" # NL 1936 – blue collar
)
)
) %>%
arrange(Work_class) # final ordered table
## # A tibble: 6 × 10
## work_class N Income `Wage (%)` `Support (%)` Public employment (%…¹
## <chr> <dbl> <dbl> <chr> <chr> <chr>
## 1 DH 1932 (unemplo… 90 1104 5.1% 86.7% 0.0%
## 2 A'dam 1934 (unem… 78 978 12.1% 82.0% 0.0%
## 3 Utrecht 1936 (un… 77 932 20.9% 76.5% 0.0%
## 4 NL 1937 (unemplo… 700 816 13.6% 62.3% 18.9%
## 5 NL 1936 (white c… 202 3477 99.0% 0.1% 0.0%
## 6 NL 1936 (blue co… 323 1455 96.6% 1.0% 0.0%
## # ℹ abbreviated name: ¹`Public employment (%)`
## # ℹ 4 more variables: `Production (%)` <chr>, `Gifts (%)` <chr>,
## # `Other (%)` <chr>, Work_class <fct>
Total household income
rbind(df.NL598 %>% select(income, work_class) %>% mutate(work_class = ifelse(work_class == 3, 1, work_class)) %>%
subset(work_class != 4),
df.NL700 %>% mutate(work_class = 3) %>% select(work_class, income),
df.A1934.unemployed %>% mutate(work_class = 4) %>% select(work_class, income)) %>%
ggplot(aes(y = (income), x= factor(work_class)), weight = weight) +
geom_boxplot() + ggtitle("Total household income") + xlab("") + ylab("Guilders") + ylim(0,5000) + theme(axis.text.x = element_text(angle = 20, vjust = 1, hjust=1)) + scale_x_discrete(
breaks = c(1,2,3,4),
labels = c("Blue collar", "White collar", "Unemployed NL", "Unemployed A.")) +
labs(caption = str_wrap(
"For the national sample of employed total income includes wages, support, own production, gifts and income from renters, both in kind and financial flows. For the national sample of unemployed income includes support, income from public employment, fuel subsidies, wage income of the household head, own company and other family members and gifts. For the Amsterdam sample of unemployed income includes wage income, social security, financial income household income from other family members and gifts. Only incomes below 5000 guilders are shown.",
width = 120
))

Total income household head
df.NL598_temp <- df.NL598 %>% mutate(income = A - A1 - A2) # I assume implicit rent is taxed via income taxation.
rbind(df.NL598_temp %>% #Here we adjust based on share of households taxed to account for household income from wife/children. We do not include farmers as we have no information on share of households that pay taxes for this category of households.
mutate(across(
income,
~ case_when(
A < 1400 & work_class != 4 ~ .x + 800 - quantile((df.NL598_temp %>% subset(A < 1400))$income, probs = 1-(88/167)),
A >= 1400 & A < 1800 & work_class != 4 ~ .x + 800 - quantile((df.NL598_temp %>% subset(A >= 1400 & A < 1800))$income , probs = 1-(91/109)),
A >= 1800 & A < 2300 & work_class != 4 ~ .x + 800 - quantile((df.NL598_temp %>% subset(A >= 1800 & A < 2300))$income, probs = 1-(89/103)),
A >= 2300 & A < 3000 & work_class != 4 ~ .x + 800 - quantile((df.NL598_temp %>% subset(A >= 2300 & A < 3000))$income, probs = 1-(51/60)),
A >= 3000 & work_class != 4 ~ .x,
work_class == 4 ~ .x
))) %>% select(work_class, income) %>% mutate(work_class = ifelse(work_class == 3, 1, work_class)) %>% subset(work_class != 4),
df.NL700 %>% mutate(income = public_employment + income.HH.head + income.own.company, work_class = 5) %>% select(work_class, income),
df.A1934.unemployed %>% mutate(income = Man , work_class = 6) %>% select(work_class, income)) %>%
ggplot(aes(y = (income), x= factor(work_class)), weight = weight) +
geom_boxplot() + ggtitle("Income household head") + xlab("") + ylab("Guilders") + ylim(0,5000) + theme(axis.text.x = element_text(angle = 20, vjust = 1, hjust=1)) + ylim(0,5000) + scale_x_discrete(
breaks = c(1,2,3,4,5,6),
labels = c("Blue collar", "White collar", "Agricultural workers", "Farmers", "Unemployed NL", "Unemployed A.")) +
labs(caption = str_wrap(
"Household head income for the national survey of employed households is imputed using the share of household income for different income groups above 800 guilders, the income tax threshold. This allows for an estimation of income sources of other household members. For farmers total income is shown as no distinction between the household head and other family members is possible. For the national unemployed survey income of the household head includes income from wages of the household head and public employment and for the Amsterdam survey income from wages of the household head. Only incomes below 5000 guilders are shown",
width = 115
))

rm(df.NL598_temp)
Total income wife/children
df.NL598_temp <- df.NL598 %>% mutate(income = A - A1 - A2)
rbind(df.NL598_temp %>% #Here we adjust based on share of households taxed to account for household income from wife/children.
mutate(across(
income,
~ case_when(
A < 1400 & work_class != 4 ~ -(800 - quantile((df.NL598_temp %>% subset(A < 1400))$income, probs = 1-(88/167))),
A >= 1400 & A < 1800 & work_class != 4 ~ -(800 - quantile((df.NL598_temp %>% subset(A >= 1400 & A < 1800))$income , probs = 1-(91/109))),
A >= 1800 & A < 2300 & work_class != 4 ~ -(800 - quantile((df.NL598_temp %>% subset(A >= 1800 & A < 2300))$income, probs = 1-(89/103))),
A >= 2300 & A < 3000 & work_class != 4 ~ -(800 - quantile((df.NL598_temp %>% subset(A >= 2300 & A < 3000))$income, probs = 1-(51/60))),
A >= 3000 & work_class != 4 ~ 0,
work_class == 4 ~ 0
))) %>% select(work_class, income) %>% mutate(work_class = ifelse(work_class == 3, 1, work_class)) %>% subset(work_class != 4),
df.NL700 %>% mutate(income = other.HH.income, work_class = 5) %>% select(work_class, income),
df.A1934.unemployed %>% mutate(income = Vrouw + Kinderen, work_class = 6) %>% select(work_class, income)) %>%
ggplot(aes(y = (income), x= factor(work_class)), weight = weight) +
geom_boxplot() + ggtitle("Total household income other family members") + xlab("") + ylab("Guilders") + theme(axis.text.x = element_text(angle = 20, vjust = 1, hjust=1)) + ylim(0,5000) + scale_x_discrete(
breaks = c(1,2,3,4,5,6),
labels = c("Blue collar", "White collar", "Agricultural workers", "Farmers", "Unemployed NL", "Unemployed A.")) +
labs(caption = str_wrap(
"Income of other family members from the national survey of employed households is imputed using the share of household income for different income groups above 800 guilders, the income tax threshold. Farmer's income for other household members is set to 0 as no information was available to impute their respective income share. For the national unemployed survey and the Amsterdam unemployed survey wage income of women and children are reported seperately. Only incomes below 5000 guilders are shown",
width = 115
))

rm(df.NL598_temp)
Other income sources
rbind(df.NL598 %>%
mutate(income = A1 + A2) %>% select(work_class, income) %>% mutate(work_class = ifelse(work_class == 3, 1, work_class)) %>% subset(work_class != 4),
df.NL700 %>% mutate(income = fuel_subsidy + food_distr + support, work_class = 5) %>% select(work_class, income),
df.A1934.unemployed %>% mutate(income = gifts + social_security + financial, work_class = 6) %>% select(work_class, income)) %>%
ggplot(aes(y = (income), x= factor(work_class)), weight = weight) +
geom_boxplot() + ggtitle("Other income sources") + xlab("") + ylab("Guilders") + ylim(0,5000) + theme(axis.text.x = element_text(angle = 20, vjust = 1, hjust=1)) + ylim(0,5000) + scale_x_discrete(
breaks = c(1,2,3,4,5,6),
labels = c("Blue collar", "White collar", "Agricultural workers", "Farmers", "Unemployed NL", "Unemployed A.")) +
labs(caption = str_wrap(
"For the national survey of employed households, other income sources include in kind income, including, wages, support, own production and gifts. For the national unemployed survey other income sources are fuel subsidies, food distribution and support. For the Amsterdam survey other income sources are gifts, social security and financial income. Only incomes below 5000 guilders are shown",
width = 115
))

Share of contribution household head
Total income household head
df.NL598_temp <- df.NL598 %>% mutate(income = A - A1 - A2) # I assume implicit rent is taxed via income taxation.
rbind(df.NL598_temp %>% #Here we adjust based on share of households taxed to account for household income from wife/children. We do not include farmers as we have no information on share of households that pay taxes for this category of households.
mutate(across(
income,
~ case_when(
A < 1400 & work_class != 4 ~ (.x + 800 - quantile((df.NL598_temp %>% subset(A < 1400))$income, probs = 1-(88/167)))/ (wage + support + production),
A >= 1400 & A < 1800 & work_class != 4 ~ (.x + 800 - quantile((df.NL598_temp %>% subset(A >= 1400 & A < 1800))$income , probs = 1-(91/109)))/ (wage + support + production),
A >= 1800 & A < 2300 & work_class != 4 ~ (.x + 800 - quantile((df.NL598_temp %>% subset(A >= 1800 & A < 2300))$income, probs = 1-(89/103)))/ (wage + support + production),
A >= 2300 & A < 3000 & work_class != 4 ~ (.x + 800 - quantile((df.NL598_temp %>% subset(A >= 2300 & A < 3000))$income, probs = 1-(51/60)))/ (wage + support + production),
A >= 3000 & work_class != 4 ~ .x/ (wage + support + production),
work_class == 4 ~ .x / income
))) %>% select(work_class, income) %>% mutate(work_class = ifelse(work_class == 3, 1, work_class)) %>% subset(work_class != 4),
df.NL700 %>% mutate(income = (public_employment + income.HH.head + income.own.company)/income, work_class = 5) %>% select(work_class, income),
df.A1934.unemployed %>% mutate(income = Man/income , work_class = 6) %>% select(work_class, income)) %>%
ggplot(aes(y = (income), x= factor(work_class)), weight = weight) +
geom_boxplot() + ggtitle("Income household head") + xlab("") + ylab("Guilders") + ylim(0,5000) + theme(axis.text.x = element_text(angle = 20, vjust = 1, hjust=1)) + ylim(0,1) + scale_x_discrete(
breaks = c(1,2,3,4,5,6),
labels = c("Blue collar", "White collar", "Agricultural workers", "Farmers", "Unemployed NL", "Unemployed A.")) +
labs(caption = str_wrap(
"Household head income for the national survey of employed households is imputed using the share of household income for different income groups above 800 guilders, the income tax threshold. This allows for an estimation of income sources of other household members. For farmers total income is shown as no distinction between the household head and other family members is possible. For the national unemployed survey income of the household head includes income from wages of the household head and public employment and for the Amsterdam survey income from wages of the household head. Only incomes below 5000 guilders are shown",
width = 115
))

rm(df.NL598_temp)
Share of contribution wife/children.
df.NL598_temp <- df.NL598 %>% mutate(income = A - A1 - A2)
rbind(df.NL598_temp %>% #Here we adjust based on share of households taxed to account for household income from wife/children.
mutate(across(
income,
~ case_when(
A < 1400 & work_class != 4 ~ -(800 - quantile((df.NL598_temp %>% subset(A < 1400))$income, probs = 1-(88/167))) / (wage + support + production),
A >= 1400 & A < 1800 & work_class != 4 ~ -(800 - quantile((df.NL598_temp %>% subset(A >= 1400 & A < 1800))$income , probs = 1-(91/109)))/ (wage + support + production),
A >= 1800 & A < 2300 & work_class != 4 ~ -(800 - quantile((df.NL598_temp %>% subset(A >= 1800 & A < 2300))$income, probs = 1-(89/103)))/ (wage + support + production),
A >= 2300 & A < 3000 & work_class != 4 ~ -(800 - quantile((df.NL598_temp %>% subset(A >= 2300 & A < 3000))$income, probs = 1-(51/60)))/ (wage + support + production),
A >= 3000 & work_class != 4 ~ 0,
work_class == 4 ~ 0
))) %>% select(work_class, income) %>% mutate(work_class = ifelse(work_class == 3, 1, work_class)) %>% subset(work_class != 4),
df.NL700 %>% mutate(income = other.HH.income/income, work_class = 5) %>% select(work_class, income),
df.A1934.unemployed %>% mutate(income = (Vrouw + Kinderen)/income, work_class = 6) %>% select(work_class, income)) %>%
ggplot(aes(y = (income), x= factor(work_class)), weight = weight) +
geom_boxplot() + ggtitle("Share household income other family members") + xlab("") + ylab("Guilders") + theme(axis.text.x = element_text(angle = 20, vjust = 1, hjust=1)) +
scale_x_discrete(
breaks = c(1,2,3,4,5,6),
labels = c("Blue collar", "White collar", "Agricultural workers", "Farmers", "Unemployed NL", "Unemployed A.")) +
labs(caption = str_wrap(
"Income of other family members from the national survey of employed households is imputed using the share of household income for different income groups above 800 guilders, the income tax threshold. Farmer's income for other household members is set to 0 as no information was available to impute their respective income share. For the national unemployed survey and the Amsterdam unemployed survey wage income of women and children are reported seperately. Only incomes below 5000 guilders are shown",
width = 115
))

rm(df.NL598_temp)
Income
For this analysis I use data from the 598, split between 4 groups
(blue collar, white collar, farm workers and farmers) and the 700
(long-term) unemployed. These are the samples for which we have adequate
information on the income composition.
wages, own production & public support
Here I look at wages, own production and public support.
rbind(df.NL598 %>% mutate(income = (wage + support + production)) %>% select(wage, production, support, work_class, income) %>%
mutate(work_class = ifelse(work_class == 3, 1, work_class)) %>% subset(work_class !=4),
df.NL700 %>% mutate(production = 0, work_class = 3) %>% select(wage,support, production, work_class, income) ) %>%
ggplot(aes(y = wage/income, x= factor(work_class)), weight = weight) +
geom_boxplot() + ggtitle("Wages") + xlab("") + ylab("Share of income") + theme(axis.text.x = element_text(angle = 20, vjust = 1, hjust=1)) + scale_x_discrete(
breaks = c(1,2,3),
labels = c("Blue collar", "White collar", "Unemployed"))

rbind(df.NL598 %>% mutate(income = (wage + support + production)) %>% select(wage, production, support, work_class, income) %>%
mutate(work_class = ifelse(work_class == 3, 1, work_class)) %>% subset(work_class !=4),
df.NL700 %>% mutate(production = 0, work_class = 3) %>% select(wage,support, production, work_class, income)) %>%
ggplot(aes(y = production/income, x= factor(work_class)), weight = weight) +
geom_boxplot() + ggtitle("Own production") + xlab("") + ylab("Share of income") + theme(axis.text.x = element_text(angle = 20, vjust = 1, hjust=1)) + scale_x_discrete(
breaks = c(1,2,3),
labels = c("Blue collar", "White collar", "Unemployed"))

rbind(df.NL598 %>% mutate(income = (wage + support + production)) %>% select(wage, production, support, work_class, income) %>%
mutate(work_class = ifelse(work_class == 3, 1, work_class)) %>% subset(work_class !=4),
df.NL700 %>% mutate(production = 0, work_class = 3) %>% select(wage,support, production, work_class, income)) %>%
ggplot(aes(y = support/income, x= factor(work_class)), weight = weight) +
geom_boxplot() + ggtitle("Support") + xlab("") + ylab("Share of income") + theme(axis.text.x = element_text(angle = 20, vjust = 1, hjust=1)) + scale_x_discrete(
breaks = c(1,2,3),
labels = c("Blue collar", "White collar", "Unemployed"))

Consumption
For this analysis we focus on the Amsterdam surveys because they are
better comparable through time. We assume we more or less compare
similar groups through time.
Main table consumption
library(dplyr)
library(tidyr) # for uncount()
bind_rows(
# ----- df.NL598 -------------------------------------------------
df.NL598 %>%
select(food, shelter, clothing, other, work_class) %>%
mutate(
work_class = recode(
work_class,
`1` = "NL 1936 (blue collar)",
`2` = "NL 1936 (white collar)",
`3` = "NL 1936 (blue collar)", # code 3 → Blue collar
`4` = "Farmers" # will be filtered out later
)
),
# ----- df.NL700 -------------------------------------------------
df.NL700 %>%
select(food, shelter, clothing, other) %>%
mutate(work_class = "NL 1937 (unemployed)"),
# ----- df.A1934.employed ----------------------------------------
df.A1934.employed %>%
select(food, shelter, clothing, other, work, weight) %>%
uncount(weights = weight, .remove = FALSE) %>%
mutate(
work_class = case_when(
work == "arbeider" ~ "A'dam 1934 (blue collar)",
work == "ambtenaar" ~ "A'dam 1934 (white collar)",
TRUE ~ NA_character_
)
) %>%
select(-work, -weight),
# ----- df.A1934.unemployed --------------------------------------
df.A1934.unemployed %>%
select(food, shelter, clothing, other) %>%
mutate(work_class = "A'dam 1934 (unemployed)")
) %>% # end bind_rows
filter(work_class != "Farmers") %>%
mutate(
consumption = food + shelter + clothing + other,
across(c(food, shelter, clothing, other), ~ .x / consumption)
) %>%
group_by(work_class) %>%
summarise(
n = n(),
Total_consumption = mean(consumption, na.rm = TRUE),
avg_food = mean(food, na.rm = TRUE),
avg_shelter = mean(shelter, na.rm = TRUE),
avg_clothing = mean(clothing, na.rm = TRUE),
avg_other = mean(other, na.rm = TRUE),
.groups = "drop"
) %>%
mutate(
n = round(n, 0),
Total_consumption = round(Total_consumption, 0),
`share of food` = sprintf("%.1f%%", avg_food * 100),
`share of shelter` = sprintf("%.1f%%", avg_shelter * 100),
`share of clothing` = sprintf("%.1f%%", avg_clothing* 100),
`share of other` = sprintf("%.1f%%", avg_other * 100)
) %>%
select(-avg_food, -avg_shelter, -avg_clothing, -avg_other) %>%
# -------------------------------------------------------------------------
# Ordering: A’dam first (white → blue → unemployed), then NL
# -------------------------------------------------------------------------
mutate(
work_class = factor(
work_class,
levels = c(
"NL 1937 (unemployed)",
"A'dam 1934 (unemployed)",
# NL
"NL 1936 (white collar)",
"NL 1936 (blue collar)",
# A’dam
"A'dam 1934 (white collar)",
"A'dam 1934 (blue collar)"
)
)
) %>%
arrange(work_class) # final ordered table
## # A tibble: 6 × 7
## work_class n Total_consumption `share of food` `share of shelter`
## <fct> <dbl> <dbl> <chr> <chr>
## 1 NL 1937 (unemploye… 700 892 49.9% 27.9%
## 2 A'dam 1934 (unempl… 78 1004 40.3% 42.2%
## 3 NL 1936 (white col… 202 3365 24.3% 23.0%
## 4 NL 1936 (blue coll… 323 1469 41.1% 24.6%
## 5 A'dam 1934 (white … 218 4537 20.8% 17.0%
## 6 A'dam 1934 (blue c… 150 1895 34.8% 21.8%
## # ℹ 2 more variables: `share of clothing` <chr>, `share of other` <chr>
Food share Amsterdam
ggarrange(
rbind(
# Blue collar
df.A1934.employed %>% subset(work == "arbeider") %>% subset(income.group != "Totaal") %>% select(food, weight, consumption) %>% mutate(data = "A 1934"),
df.A1923 %>% subset(HISCLASS > 5) %>% select(food, consumption) %>% mutate(data = "A 1923-24", weight = 1)) %>%
ggplot(aes(y = food/consumption, x=factor(data)), weight = weight) + scale_y_continuous(limits = c(0, 1)) +
geom_boxplot() + ggtitle("Blue collar") + xlab("") + ylab("Share of expenditure") + theme(axis.text.x = element_text(angle = 20, vjust = 1, hjust=1))
,
rbind(
# White collar
df.A1934.employed %>% subset(work == "ambtenaar") %>% subset(income.group != "Totaal") %>% select(food, weight, consumption) %>% mutate(data = "A 1934"),
df.A1923 %>% subset(HISCLASS < 5) %>% select(food, consumption) %>% mutate(data = "A 1923-24", weight = 1)) %>%
ggplot(aes(y = food/consumption, x=factor(data)), weight = weight) + scale_y_continuous(limits = c(0, 1)) +
geom_boxplot() + ggtitle("White collar") + xlab("") + ylab("Share of expenditure") + theme(axis.text.x = element_text(angle = 20, vjust = 1, hjust=1))
,
df.A1934.unemployed %>% select(food, consumption) %>% mutate(data = "A 1934") %>%
ggplot(aes(y = food/consumption, x=factor(data))) + scale_y_continuous(limits = c(0, 1)) +
geom_boxplot() + ggtitle("Unemployed") + xlab("") + ylab("Share of expenditure") + theme(axis.text.x = element_text(angle = 20, vjust = 1, hjust=1))
, ncol = 3) %>% annotate_figure(top = text_grob("Share of food expenditure in Amsterdam",
face = "bold", size = 14))

Housing share Amsterdam
ggarrange(
rbind(
# Blue collar
df.A1934.employed %>% subset(work == "arbeider") %>% subset(income.group != "Totaal") %>% select(shelter, weight, consumption) %>% mutate(data = "A 1934"),
df.A1923 %>% subset(HISCLASS > 5) %>% select(shelter, consumption) %>% mutate(data = "A 1923-24", weight = 1)) %>%
ggplot(aes(y = shelter/consumption, x=factor(data)), weight = weight) + scale_y_continuous(limits = c(0, 1)) +
geom_boxplot() + ggtitle("Blue collar") + xlab("") + ylab("Share of expenditure") + theme(axis.text.x = element_text(angle = 20, vjust = 1, hjust=1))
,
rbind(
# White collar
df.A1934.employed %>% subset(work == "ambtenaar") %>% subset(income.group != "Totaal") %>% select(shelter, weight, consumption) %>% mutate(data = "A 1934"),
df.A1923 %>% subset(HISCLASS < 5) %>% select(shelter, consumption) %>% mutate(data = "A 1923-24", weight = 1)) %>%
ggplot(aes(y = shelter/consumption, x=factor(data)), weight = weight) + scale_y_continuous(limits = c(0, 1)) +
geom_boxplot() + ggtitle("White collar") + xlab("") + ylab("Share of expenditure") + theme(axis.text.x = element_text(angle = 20, vjust = 1, hjust=1))
,
df.A1934.unemployed %>% select(shelter, consumption) %>% mutate(data = "A 1934") %>%
ggplot(aes(y = shelter/consumption, x=factor(data))) + scale_y_continuous(limits = c(0, 1)) +
geom_boxplot() + ggtitle("Unemployed") + xlab("") + ylab("Share of expenditure") + theme(axis.text.x = element_text(angle = 20, vjust = 1, hjust=1))
, ncol = 3) %>% annotate_figure(top = text_grob("Share of housing expenditure in Amsterdam",
face = "bold", size = 14))

Clothing share Amsterdam
ggarrange(
rbind(
# Blue collar
df.A1934.employed %>% subset(work == "arbeider") %>% subset(income.group != "Totaal") %>% select(clothing, weight, consumption) %>% mutate(data = "A 1934"),
df.A1923 %>% subset(HISCLASS > 5) %>% select(clothing, consumption) %>% mutate(data = "A 1923-24", weight = 1)) %>%
ggplot(aes(y = clothing/consumption, x=factor(data)), weight = weight) + scale_y_continuous(limits = c(0, 1)) +
geom_boxplot() + ggtitle("Blue collar") + xlab("") + ylab("Share of expenditure") + theme(axis.text.x = element_text(angle = 20, vjust = 1, hjust=1))
,
rbind(
# White collar
df.A1934.employed %>% subset(work == "ambtenaar") %>% subset(income.group != "Totaal") %>% select(clothing, weight, consumption) %>% mutate(data = "A 1934"),
df.A1923 %>% subset(HISCLASS < 5) %>% select(clothing, consumption) %>% mutate(data = "A 1923-24", weight = 1)) %>%
ggplot(aes(y = clothing/consumption, x=factor(data)), weight = weight) + scale_y_continuous(limits = c(0, 1)) +
geom_boxplot() + ggtitle("White collar") + xlab("") + ylab("Share of expenditure") + theme(axis.text.x = element_text(angle = 20, vjust = 1, hjust=1))
,
df.A1934.unemployed %>% select(clothing, consumption) %>% mutate(data = "A 1934") %>%
ggplot(aes(y = clothing/consumption, x=factor(data))) + scale_y_continuous(limits = c(0, 1)) +
geom_boxplot() + ggtitle("Unemployed") + xlab("") + ylab("Share of expenditure") + theme(axis.text.x = element_text(angle = 20, vjust = 1, hjust=1))
, ncol = 3) %>% annotate_figure(top = text_grob("Share of clothing expenditure in Amsterdam",
face = "bold", size = 14))

Primary expenditure share Amsterdam
ggarrange(
rbind(
# Blue collar
df.A1934.employed %>% subset(work == "arbeider") %>% subset(income.group != "Totaal") %>% mutate(primary = food + shelter + clothing) %>% select(primary, weight, consumption) %>% mutate(data = "A 1934"),
df.A1923 %>% subset(HISCLASS > 5) %>% mutate(primary = food + shelter + clothing) %>% select(primary, consumption) %>% mutate(data = "A 1923-24", weight = 1)) %>%
ggplot(aes(y = primary/consumption, x=factor(data)), weight = weight) + scale_y_continuous(limits = c(0, 1)) +
geom_boxplot() + ggtitle("Blue collar") + xlab("") + ylab("Share of expenditure") + theme(axis.text.x = element_text(angle = 20, vjust = 1, hjust=1))
,
rbind(
# White collar
df.A1934.employed %>% subset(work == "ambtenaar") %>% mutate(primary = food + shelter + clothing) %>% subset(income.group != "Totaal") %>% select(primary, weight, consumption) %>% mutate(data = "A 1934"),
df.A1923 %>% subset(HISCLASS < 5) %>% mutate(primary = food + shelter + clothing) %>% select(primary, consumption) %>% mutate(data = "A 1923-24", weight = 1)) %>%
ggplot(aes(y = primary/consumption, x=factor(data)), weight = weight) + scale_y_continuous(limits = c(0, 1)) +
geom_boxplot() + ggtitle("White collar") + xlab("") + ylab("Share of expenditure") + theme(axis.text.x = element_text(angle = 20, vjust = 1, hjust=1))
,
df.A1934.unemployed %>% mutate(primary = food + shelter + clothing) %>% select(primary, consumption) %>% mutate(data = "A 1934") %>%
ggplot(aes(y = primary/consumption, x=factor(data))) + scale_y_continuous(limits = c(0, 1)) +
geom_boxplot() + ggtitle("Unemployed") + xlab("") + ylab("Share of expenditure") + theme(axis.text.x = element_text(angle = 20, vjust = 1, hjust=1))
, ncol = 3) %>% annotate_figure(top = text_grob("Share of primary expenditure in Amsterdam",
face = "bold", size = 14))

Financial margin (income - primary expenditure)
# --------------------------------------------------------------
# 1️⃣ Load required packages
# --------------------------------------------------------------
library(dplyr)
library(tidyr) # only needed for `uncount()` elsewhere
# --------------------------------------------------------------
# 2️⃣ ONE‑PIPE PIPELINE (order: white, blue, Amsterdam, NL)
# --------------------------------------------------------------
bind_rows(
## ----- df.NL598 -------------------------------------------------
df.NL598 %>%
mutate(
work_class = recode(
work_class,
`1` = "NL 1936 (blue collar)",
`2` = "NL 1936 (white collar)",
`3` = "NL 1936 (blue collar)", # code 3 → blue collar
`4` = "Farmers" # will be filtered out later
),
wage = A - A2 - A3 - A4 - A5,
support = A2,
public_employment = 0,
production = A4 + A5,
gifts = gifts,
other = 0
) %>%
select(
wage, support, public_employment,
production, gifts, other,
food, shelter, clothing,
work_class
),
## ----- df.NL700 -------------------------------------------------
df.NL700 %>%
mutate(
wage = income.HH.head + other.HH.income,
support = support + fuel_subsidy + food_distr,
production = income.own.company,
gifts = gifts,
other = 0,
public_employment = public_employment,
work_class = "NL 1937 (unemployed)"
) %>%
select(
wage, support, public_employment,
production, gifts, other,
food, shelter, clothing,
work_class
),
## ----- df.A1934.unemployed (original block) --------------------
df.A1934.unemployed %>%
mutate(
wage = HHincome,
support = social_security,
public_employment = 0,
production = 0,
gifts = gifts,
other = financial + other_income,
work_class = "A'dam 1934 (unemployed)"
) %>%
select(
wage, support, public_employment,
production, gifts, other,
food, shelter, clothing,
work_class
)
) %>% # end bind_rows
filter(work_class != "Farmers") %>% # drop unwanted class
# ----------------------------------------------------------
# Set the desired ordering of work_class
# ----------------------------------------------------------
mutate(
work_class = factor(
work_class,
levels = c(
"NL 1936 (white collar)",
"NL 1936 (blue collar)",
"A'dam 1934 (unemployed)",
"NL 1937 (unemployed)"
)
)
) %>%
# ----------------------------------------------------------
# Compute total income and FM
# ----------------------------------------------------------
mutate(
income = wage + support + public_employment +
production + gifts + other,
FM = income - (food + shelter + clothing)
) %>%
group_by(work_class) %>%
# --------------------------------------------------------------
# 3️⃣ Descriptive statistics you requested
# --------------------------------------------------------------
summarise(
N = n(),
Min = min(FM, na.rm = TRUE),
Q25 = quantile(FM, probs = 0.25, na.rm = TRUE),
Q50 = quantile(FM, probs = 0.50, na.rm = TRUE),
Q75 = quantile(FM, probs = 0.75, na.rm = TRUE),
Max = max(FM, na.rm = TRUE),
.groups = "drop"
) %>%
# --------------------------------------------------------------
# 4️⃣ Nice formatting
# --------------------------------------------------------------
mutate(
N = as.integer(N),
Min = round(Min, 2),
Q25 = round(Q25, 2),
Q50 = round(Q50, 2),
Q75 = round(Q75, 2),
Max = round(Max, 2)
) %>%
# --------------------------------------------------------------
# 5️⃣ Print the result
# --------------------------------------------------------------
print()
## # A tibble: 4 × 7
## work_class N Min Q25 Q50 Q75 Max
## <fct> <int> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 NL 1936 (white collar) 202 -6.97 678. 1075. 2135. 13650.
## 2 NL 1936 (blue collar) 323 -294. 185. 330. 494. 1680.
## 3 A'dam 1934 (unemployed) 78 -116. 71.7 96.3 147. 291.
## 4 NL 1937 (unemployed) 700 -375. 3.64 70.2 121. 630.
Credit
For the 598 we can distinguish between savings inflow/outflow and
credit inflow/outflow. For the 700 we cannot. Here we see positive or
negative difference between income and consumption, which we take to be
savings and credit increase.
A positive savings value denotes an inflow into the savings account.
A positive credit value denotes an increase in outstanding credit. If
savings - credit is positive, household net worth increases.
library(dplyr)
df.NL598 %>% # 1936 data
# -------------------------------------------------
# 1️⃣ Recode work_class for the NL598 rows
# -------------------------------------------------
mutate(
work_class = recode(
work_class,
`1` = "NL 1936 (blue collar)",
`2` = "NL 1936 (white collar)",
`3` = "NL 1936 (blue collar)", # code 3 → blue collar
`4` = "Farmers" # will be dropped next
),
credit1 = C1 - F1,
credit2 = C2 - F2
) %>%
# -------------------------------------------------
# 2️⃣ Drop the Farmers rows
# -------------------------------------------------
filter(work_class != "Farmers") %>%
# -------------------------------------------------
# 4️⃣ Append the NL700 unemployed group
# -------------------------------------------------
bind_rows(
df.NL700 %>% mutate(work_class = "NL 1937 (unemployed)",
credit1 = credit)
) %>% select(credit1, consumption, credit2, work_class) %>%
# -------------------------------------------------
# 👉 Set the desired order (white → blue → unemployed)
# -------------------------------------------------
mutate(
work_class = factor(
work_class,
levels = c(
"NL 1936 (white collar)",
"NL 1936 (blue collar)",
"NL 1937 (unemployed)"
)
)
) %>%
# -------------------------------------------------
# 8️⃣ Group by the unified work_class label
# -------------------------------------------------
group_by(work_class) %>%
summarise(
perc_credit1 = mean(credit1!=0, na.rm = TRUE) * 100,
avg_credit1_cons = mean(credit1 / consumption, na.rm = TRUE) * 100,
perc_credit2 = mean(credit2!=0, na.rm = TRUE) * 100,
avg_credit2_cons = mean(credit2 / consumption, na.rm = TRUE) * 100,
.groups = "drop"
) %>%
# -------------------------------------------------
# 🔟 Format percentages and round numbers
# -------------------------------------------------
mutate(
perc_credit1 = paste0(round(perc_credit1, 2), "%"),
avg_credit1_cons = paste0(round(avg_credit1_cons, 2), "%"),
perc_credit2 = paste0(round(perc_credit2, 2), "%"),
avg_credit2_cons = paste0(round(avg_credit2_cons, 2), "%"),
) %>%
# -------------------------------------------------
# ️1️⃣ Rename columns (Survey first) and print
# -------------------------------------------------
select(
Survey = work_class,
`Cash != 0` = perc_credit1,
`Cash/consumption` = avg_credit1_cons,
`Installment!=0` = perc_credit2,
`Installment/consumption` = avg_credit2_cons # ⬅️ NEW
) %>%
print()
## # A tibble: 3 × 5
## Survey `Cash != 0` `Cash/consumption` `Installment!=0` Installment/consumpt…¹
## <fct> <chr> <chr> <chr> <chr>
## 1 NL 193… 21.29% -0.07% 89.6% 0.87%
## 2 NL 193… 23.84% -0.49% 72.76% 0.12%
## 3 NL 193… 76.14% 8.92% NaN% NaN%
## # ℹ abbreviated name: ¹`Installment/consumption`
df.NL598 %>% mutate(C = ifelse(C == 0, NA, C),
F = ifelse(F == 0, NA, F)) %>%
summarise(
Krediet_up_round = sum(C == round(C) & !is.na(C), na.rm = TRUE),
Krediet_down_round = sum(F == round(F) & !is.na(F), na.rm = TRUE),
Krediet_up_notround= sum(C != round(C) & !is.na(C), na.rm = TRUE),
Krediet_down_notround = sum(F != round(F) & !is.na(F), na.rm = TRUE)
)
## Krediet_up_round Krediet_down_round Krediet_up_notround Krediet_down_notround
## 1 46 65 265 174
df.NL700 %>% mutate(credit = ifelse(credit == 0, NA, credit)) %>%
summarise(
Krediet_up_rounded = sum(credit == round(credit), na.rm = TRUE),
Krediet_up_notrounded = sum(credit != round(credit) & !is.na(credit), na.rm = TRUE)
)
## Krediet_up_rounded Krediet_up_notrounded
## 1 10 523
Savings
library(dplyr)
df.NL598 %>% # 1936 data
# -------------------------------------------------
# 1️⃣ Recode work_class for the NL598 rows
# -------------------------------------------------
mutate(
work_class = recode(
work_class,
`1` = "NL 1936 (blue collar)",
`2` = "NL 1936 (white collar)",
`3` = "NL 1936 (blue collar)", # code 3 → blue collar
`4` = "Farmers" # will be dropped next
)
) %>%
# -------------------------------------------------
# 2️⃣ Drop the Farmers rows
# -------------------------------------------------
filter(work_class != "Farmers") %>%
# -------------------------------------------------
# 4️⃣ Append the NL700 unemployed group
# -------------------------------------------------
bind_rows(
df.NL700 %>% mutate(work_class = "NL 1937 (unemployed)")
) %>%
# -------------------------------------------------
# 👉 Set the desired order (white → blue → unemployed)
# -------------------------------------------------
mutate(
work_class = factor(
work_class,
levels = c(
"NL 1936 (white collar)",
"NL 1936 (blue collar)",
"NL 1937 (unemployed)"
)
)
) %>%
# -------------------------------------------------
# 8️⃣ Group by the unified work_class label
# -------------------------------------------------
group_by(work_class) %>%
# -------------------------------------------------
# 9️⃣ Summarise the required **savings** metrics
# -------------------------------------------------
summarise(
perc_sav_pos = mean(savings > 0, na.rm = TRUE) * 100, # % of positive savings
perc_sav_neg = mean(savings < 0, na.rm = TRUE) * 100, # % of negative savings
avg_sav_per_cons = mean(savings / consumption, na.rm = TRUE) * 100,
.groups = "drop"
) %>%
# -------------------------------------------------
# 🔟 Format percentages and round numbers
# -------------------------------------------------
mutate(
perc_sav_pos = paste0(round(perc_sav_pos, 2), "%"),
perc_sav_neg = paste0(round(perc_sav_neg, 2), "%"),
avg_sav_per_cons = paste0(round(avg_sav_per_cons, 2), "%")
) %>%
# -------------------------------------------------
# 1️⃣1️⃣ Rename columns (Survey first) and print
# -------------------------------------------------
select(
Survey = work_class,
`Savings > 0` = perc_sav_pos,
`Savings < 0` = perc_sav_neg,
`Avg Savings / Consumption (%)` = avg_sav_per_cons
) %>%
print()
## # A tibble: 3 × 4
## Survey `Savings > 0` `Savings < 0` Avg Savings / Consumption…¹
## <fct> <chr> <chr> <chr>
## 1 NL 1936 (white collar) 60.4% 37.13% 4.2%
## 2 NL 1936 (blue collar) 50.15% 42.72% 0.04%
## 3 NL 1937 (unemployed) 23.29% 0% 1.17%
## # ℹ abbreviated name: ¹`Avg Savings / Consumption (%)`
figure savings & Credit
rbind(df.NL598 %>% mutate(income = (wage + support + production)) %>% select(income, credit, savings, work_class),
df.NL700 %>% mutate(production = 0,
work_class = 5) %>%
select(income, credit, savings, work_class)) %>%
ggplot(aes(y = (savings - credit)/income, x= factor(work_class)), weight = weight) +
geom_boxplot() + xlab("") + ylab("Share of income") + theme(axis.text.x = element_text(angle = 20, vjust = 1, hjust=1)) + scale_x_discrete(
breaks = c(1,2,3,4,5),
labels = c("Blue collar", "White collar", "Agricultural workers", "Farmers", "Unemployed")) +
labs(title = "Savings - credit",
caption = "Data 598 employed & 700 unemployed.")

rbind(df.NL598 %>% mutate(income = (wage + support + production)) %>% select(income, credit, savings, work_class),
df.NL700 %>% mutate(production = 0,
work_class = 5) %>%
select(income, credit, savings, work_class)) %>%
ggplot(aes(y = savings/income, x= factor(work_class)), weight = weight) +
geom_boxplot() + xlab("") + ylab("Share of income") + theme(axis.text.x = element_text(angle = 20, vjust = 1, hjust=1)) + scale_x_discrete(
breaks = c(1,2,3,4,5),
labels = c("Blue collar", "White collar", "Agricultural workers", "Farmers", "Unemployed")) +
labs(title = "Savings",
caption = "Data 598 employed & 700 unemployed.")

rbind(df.NL598 %>% mutate(income = (wage + support + production)) %>% select(income, credit, savings, work_class),
df.NL700 %>% mutate(production = 0,
work_class = 5) %>%
select(income, credit, savings, work_class)) %>%
ggplot(aes(y = credit/income, x= factor(work_class)), weight = weight) +
geom_boxplot() + xlab("") + ylab("Share of income") + theme(axis.text.x = element_text(angle = 20, vjust = 1, hjust=1)) + scale_x_discrete(
breaks = c(1,2,3,4,5),
labels = c("Blue collar", "White collar", "Agricultural workers", "Farmers", "Unemployed")) +
labs(title = "Credit",
caption = "Data 598 employed & 700 unemployed.")
