Functions

# Share of households
count_over_zero100 <- function(x) { 100 * sum(x>0, na.rm = TRUE) / length(na.omit(x)) }

# Net present value
npv <- function(cf0, cf, times, i){
  cf0 <- as.numeric(cf0)
  cf <- as.numeric(cf)
  vectordiscount <- rep((1+i), times)
  t <- 1:times
  discountrates <- vectordiscount^t
  discount <- cf / discountrates
  cf0 + sum(discount)
}

Setup surveys

NL 598 employed

df.NL598 <- read.csv("data/NL598.csv", sep = ",") %>% select(-X) 

# income categories
df.NL598 <- df.NL598 %>% 
  mutate(gifts = geschenken,
         production = A4 + A5,
         support = A2,
         wage = A - A2 - A3 - A4 - A5,
         income = wage + support + production)

# other cashflows
df.NL598 <- df.NL598 %>% 
  mutate(savings = E1 - B1,
         credit = C - F,
         stocks = B2 - E2)

# consumption categories
df.NL598$food <- df.NL598$D1_16
df.NL598$shelter <- rowSums(df.NL598[c("D17", "D18", "D19", "D20", "D21a", "D21b", "D21c")])
df.NL598$clothing <- rowSums(df.NL598[c("D25", "D26", "D22")])
df.NL598$other <- rowSums(df.NL598[c("D23a", "D23b", "D24", "D27", "D28", "D29", "D30", "D31", "D34", "D35",  "D36", "D37", "D38", "D32", "D33")])
df.NL598 <- df.NL598 %>% mutate(consumption = food + shelter + clothing + other)

# # check on hisclass
# df.NL598 %>% mutate(
#       work_class = recode(
#         work_class,
#         `1` = "NL 1936 (blue collar)",
#         `2` = "NL 1936 (white collar)",
#         `3` = "NL 1936 (blue collar)",   # code 3 → Blue collar
#         `4` = "Farmers"                  # will be filtered out later
#       )) %>% select(HH,work_class, work) %>% filter(work_class !="Farmers") %>% write.csv(., "check_work.csv")

Wealth NL 598 employed

Parameters

data manipulation Mean income per income group without farmers (work_class = 4). These average income figures match CBS (1938)

A.1 <- mean(df.NL598[df.NL598$A<1400 & df.NL598$work_class != 4,"A"],na.rm=TRUE)
A.2 <- mean(df.NL598[df.NL598$A>1400 & df.NL598$A<1800 & df.NL598$work_class != 4,"A"],na.rm=TRUE)
A.3 <- mean(df.NL598[df.NL598$A>1800 & df.NL598$A<2300 & df.NL598$work_class != 4,"A"],na.rm=TRUE)
A.4 <- mean(df.NL598[df.NL598$A>2300 & df.NL598$A<3000,"A"],na.rm=TRUE)
A.5 <- mean(df.NL598[df.NL598$A>3000 & df.NL598$A<4000,"A"],na.rm=TRUE)
A.6 <- mean(df.NL598[df.NL598$A>4000 & df.NL598$A<6000,"A"],na.rm=TRUE)
A.7 <- mean(df.NL598[df.NL598$A>6000,"A"],na.rm=TRUE)

#### shares of premiums per income class.
# Pension and old-age
P1 <- 8.28  # < 1400
P2 <- 41.21 # > 1400 & < 1800
P3 <- 60.52 # > 1800 & < 2300
P4 <- 62.03 # > 2300 & < 3000
P5 <- 58.27 # > 3000 & < 4000
P6 <- 59.97 # > 4000 & < 6000
P7 <- 48.37 # > 6000

#Funeral and life-insurance
P11 <- 78.91 # < 1400
P21 <- 50.29 # > 1400 & < 1800
P31 <- 35.14 # > 1800 & < 2300
P41 <- 34.33 # > 2300 & < 3000
P51 <- 38.47 # > 3000 & < 4000
P61 <- 37.31 # > 4000 & < 6000
P71 <- 44.84 # > 6000

#Health and other
P12 <- 12.81  # < 1400
P22 <-  8.5   # > 1400 & < 1800
P32 <-  4.34  # > 1800 & < 2300
P42 <-  3.64  # > 2300 & < 3000
P52 <-  3.26  # > 3000 & < 4000
P62 <-  2.72  # > 4000 & < 6000
P72 <-  6.79  # > 6000

#### setup

# How I set the fraction spent on pension and old-age insurance:
# -   Up until A.1 (mean income of HH with income \< 1400) P1;
# -   From A.1 apply growh rate S1 until A.2 (mean income of HH with income \> 1400 & income \< 1800);
# -   From A.2 apply growth rate S2 until A.3;
# -   From A.3 until and including A.7, take the weighted average of the share P_weighted.
# -   From weighted to A.7 take the growth rate

#### Weighted average share between A3 and A7
#Pension and old-age
P_weighted <- (nrow(subset(df.NL598, df.NL598$A>A.3 & df.NL598$A<=3000 & df.NL598$work_class != 4)) * P4/100 +
              nrow(subset(df.NL598, df.NL598$A>3000 & df.NL598$A<=4000 & df.NL598$work_class != 4)) * P5/100 +
              nrow(subset(df.NL598, df.NL598$A>4000 & df.NL598$A<=A.7& df.NL598$work_class != 4)) * P6/100) /
              nrow(subset(df.NL598,df.NL598$A > A.3 & df.NL598$A <= A.7 & df.NL598$work_class != 4))

#Funeral and life

P_weighted1 <- (nrow(subset(df.NL598, df.NL598$A>A.3 & df.NL598$A<=3000 & df.NL598$work_class != 4)) * P41/100 +
              nrow(subset(df.NL598, df.NL598$A>3000 & df.NL598$A<=4000 & df.NL598$work_class != 4)) * P51/100 +
              nrow(subset(df.NL598, df.NL598$A>4000 & df.NL598$A<=A.7& df.NL598$work_class != 4)) * P61/100) /
              nrow(subset(df.NL598,df.NL598$A > A.3 & df.NL598$A <= A.7 & df.NL598$work_class != 4))

#### Change of P over change of income

#Pension and old-age


S1 <- (P2 - P1)/(A.2-A.1)
S2 <- (P_weighted*100 - P2)/(A.3-A.2)
S3 <- (P4 - P3)/(A.4-A.3)
S4 <- (P5 - P4)/(A.5-A.4)
S5 <- (P6 - P5)/(A.6-A.5)
S6 <- (P7 - P6)/(A.7-A.6)

#Funeral and life
S11 <- (P21 - P11)/(A.2-A.1)
S21 <- (P_weighted1*100 - P21)/(A.3-A.2)
S31 <- (P41 - P31)/(A.4-A.3)
S41 <- (P51 - P41)/(A.5-A.4)
S51 <- (P61 - P51)/(A.6-A.5)
S61 <- (P71 - P61)/(A.7-A.6)

#### combine
#Pension and old-age

df.NL598$ins.frac <-
  ifelse(df.NL598$A<A.1, P1/100,
         ifelse(df.NL598$A>=A.1 & df.NL598$A<A.2,(P1 + ((df.NL598$A-A.1) * S1))/100,
                ifelse(df.NL598$A>=A.2 & df.NL598$A < A.3, (P2 + ((df.NL598$A - A.2) *S2))/100 ,
                       ifelse(df.NL598$A >= A.3  &df.NL598$A < A.6, P_weighted ,
                              ifelse(df.NL598$A>= A.6 & df.NL598$A < A.7, (P6 + ((df.NL598$A - A.6) *S6))/100,
                                     ifelse(df.NL598$A >=A.7,P7/100,"")))))) %>% as.numeric()

#Funeral and life
df.NL598$ins.frac1 <-
  ifelse(df.NL598$A<A.1, P11/100,
         ifelse(df.NL598$A>=A.1 & df.NL598$A<A.2,(P11 + ((df.NL598$A-A.1) * S11))/100,
                ifelse(df.NL598$A>=A.2 & df.NL598$A < A.3, (P21 + ((df.NL598$A - A.2) *S21))/100 ,
                       ifelse(df.NL598$A >= A.3  &df.NL598$A < A.6, P_weighted1 ,
                              ifelse(df.NL598$A>= A.6 & df.NL598$A < A.7, (P61 + ((df.NL598$A - A.6) *S61))/100,
                                     ifelse(df.NL598$A >=A.7,P71/100,"")))))) %>% as.numeric()

#### Original CBS distribution

#Pension and old-age
df.NL598$ins.frac.CBS <-
  ifelse(df.NL598$A<1400 , P1/100,
          ifelse(df.NL598$A>1400 & df.NL598$A<1800,(P2)/100,
              ifelse(df.NL598$A>1800 & df.NL598$A<2300, P3/100 ,
                     ifelse(df.NL598$A>2300 & df.NL598$A<3000, P4/100,
                            ifelse(df.NL598$A>3000 & df.NL598$A<4000, P5/100,
                                   ifelse(df.NL598$A>4000 & df.NL598$A<6000,P6/100,
                                          ifelse(df.NL598$A>6000,P7/100,""
                                                 ))))))) %>% as.numeric()
#Funeral and life
df.NL598$ins.frac.CBS1 <-
  ifelse(df.NL598$A<1400 , P11/100,
          ifelse(df.NL598$A>1400 & df.NL598$A<1800,P21/100,
              ifelse(df.NL598$A>1800 & df.NL598$A<2300, P31/100 ,
                     ifelse(df.NL598$A>2300 & df.NL598$A<3000, P41/100,
                            ifelse(df.NL598$A>3000 & df.NL598$A<4000, P51/100,
                                   ifelse(df.NL598$A>4000 & df.NL598$A<6000,P61/100,
                                          ifelse(df.NL598$A>6000,P71/100,""
                                                 ))))))) %>% as.numeric()

#Health and other
df.NL598$ins.frac.CBS2 <-
  ifelse(df.NL598$A<1400 , P12/100,
          ifelse(df.NL598$A>1400 & df.NL598$A<1800,P22/100,
              ifelse(df.NL598$A>1800 & df.NL598$A<2300, P32/100 ,
                     ifelse(df.NL598$A>2300 & df.NL598$A<3000, P42/100,
                            ifelse(df.NL598$A>3000 & df.NL598$A<4000, P52/100,
                                   ifelse(df.NL598$A>4000 & df.NL598$A<6000,P62/100,
                                          ifelse(df.NL598$A>6000,P72/100,""
                                                 ))))))) %>% as.numeric()

Net Present Value calculations

Now I will calculate NPV following my own fractions and CBS fractions, both for pension and life insurance wealth.

Some households don’t have a male household head so then we work with female age. Here I calculate the number of years applied for discounting (time to retirement, ttr).

df.NL598$ttr <- ifelse(is.na(df.NL598$age_m),
                      ifelse(df.NL598$age_f < 60, 60 - df.NL598$age_f, 0),
                      ifelse(df.NL598$age_m < 60, 60 - df.NL598$age_m, 0))
#### Fraction \* premium spent (D35)

df.NL598$pension <- df.NL598$ins.frac * df.NL598$D35
df.NL598$pension.CBS <- df.NL598$ins.frac.CBS * df.NL598$D35

df.NL598$life <- df.NL598$ins.frac1 * df.NL598$D35
df.NL598$life.CBS <- df.NL598$ins.frac.CBS1 * df.NL598$D35


#### NPV
#Run the NPV command
b  <- NULL
b1 <- NULL
b2 <- NULL
b3 <- NULL

#Calculating the NPV
for(i in 1:nrow(df.NL598)) {
  if (is.na(df.NL598$ttr[i]) ){
    a <- print(NA)
  }
  else{
    a <- (npv(0,df.NL598$pension[i],df.NL598$ttr[i], 0.0332))
    a1 <- (npv(0,df.NL598$pension.CBS[i],df.NL598$ttr[i], 0.0332))
    a2 <- (npv(0,df.NL598$life[i],df.NL598$ttr[i], 0.0332))
    a3 <- (npv(0,df.NL598$life.CBS[i],df.NL598$ttr[i], 0.0332))
  }
  b <- c(b,a)
  b1 <- c(b1,a1)
  b2 <- c(b2,a2)
  b3 <- c(b3,a3)
}


#Combine the data.
df.NL598 <- cbind(df.NL598, npv.pension = b, npv.pension.CBS = b1, npv.life = b2, npv.life.CBS = b3)

#### Land wealth
# land & pacht
df.land <- read.csv("data/land.csv") %>%
  subset(!is.na(pacht)) %>%
  select(provincie, pacht) %>%
  as.data.frame()

df.NL598 <- df.NL598 %>%
  inner_join(df.land, by = "provincie") %>%
  mutate(land = as.numeric(land))

df.NL598 <- df.NL598 %>%
  mutate(land_value = (land * pacht)/ (0.0332 * 1000) * 1000)

#### Home value
df.NL598 <- df.NL598 %>% mutate(home_value = D17 * own_house / 0.0332)

# clean up
rm(list = ls.str(mode = 'numeric'))
rm(df.land)
df.NL598 <- df.NL598 %>% select(-matches("ins.frac"), -matches("CBS"), -pension, -life)

NL 700 unemployed

df.NL700 <- read.csv("data/NL700.csv") %>% select(-X)

# towards yearly figures
df.NL700 <- df.NL700 %>%
  mutate(support = support * 52,
  public_employment = public_employment * 52,
  fuel_subsidy = fuel_subsidy * 52,
  food_distr = food_distr * 52,
  income.HH.head = income.HH.head * 52,
  income.own.company = income.own.company * 52,
  other.HH.income = other.HH.income * 52,
  gifts = gifts * 52,
  stocks = stocks * 52,
  income = income * 52,
  food = food * 52,
  housing = housing * 52,
  fire = fire * 52,
  clothing = clothing * 52,
  insurance = insurance * 52,
  other = other * 52,
  consumption = consumption * 52,
  income = income - stocks)
df.NL700 <- df.NL700 %>% mutate(credit = ifelse(consumption > income, consumption - income, 0),
                          savings = ifelse(consumption < income, income - consumption, 0))


# add financial margin 
df.NL700 <- df.NL700 %>%
  mutate(housing = housing + fire,
  income = support + public_employment + fuel_subsidy + food_distr + income.HH.head + income.own.company + other.HH.income + gifts,
  fm = income  - food - housing - clothing,
  social_security = support + fuel_subsidy + food_distr,
  wage = income.HH.head + income.own.company + other.HH.income + public_employment) %>% 
  mutate(food = food,
         shelter = housing,
         clothing = clothing,
         other = consumption - food - shelter - clothing)

Amsterdam 1934 (Unemployed)

  df.A1934.unemployed <- read.csv("data/Amsterdam1934.csv") %>% 
  mutate(HHincome = Man + Vrouw + Kinderen,
  social_security = Werkloosheidssteun + `Andere.steun` + Brandstoffentoeslag,
  financial = `Invaliditeits..of.weezenrente..Pensioen.e.d.`,
  other_income = Onderhuur + Overige,
  gifts = `Diverse.voorwerpen`+ Schoolvoeding + Schoolkleding,
  credit = `Geleend.of.uitbeleening`) %>%
  mutate(income = select(., HHincome, social_security,financial,other_income, gifts) %>% rowSums(na.rm = TRUE)) %>%
  mutate(food = Food) %>% select(-Food) %>%
  mutate(shelter = Rent + `Gas.El.Fuel`) %>% select(-Rent, -`Gas.El.Fuel`) %>%
  mutate(clothing = Clothing + Footwear) %>% select(-Clothing, -Footwear) %>%
  mutate(insurance = `Funeral.Health`+Insurance) %>%
  mutate(other_cons = Other) %>% select(-Other) %>%
  mutate(consumption = select(., food, shelter, clothing,insurance,other_cons) %>% rowSums(na.rm = TRUE)) %>%
  mutate(fm = income - food - shelter - clothing) %>%
  mutate(across(c(income, consumption, HHincome, social_security, financial, other_income, gifts, 
                 credit,fm, food, shelter, clothing, insurance, other_cons), ~ . / `X.Weeks` * 52)) %>%
  rename(verbruikseenheid = Verbruikseenheden) %>%
  mutate(other = consumption - food - shelter - clothing)

Amsterdam 1934 (Employed)

df.A1934.employed <- read.csv("data/Amsterdam1934_employed.csv", dec = ",") %>%
  mutate(consumption = Consumptie) %>% 
  mutate(food = Voeding) %>% 
  mutate(shelter = Huishuur + Gas + Electriciteit) %>% 
  mutate(clothing = Kleding + Schoeisel) %>% 
  rename(weight = Aantal.gezinnen) %>%
  rename(income.group = group, work = Wie) %>% 
  mutate(other = consumption - food - shelter- clothing) %>%
  select(consumption, food, shelter, clothing, weight, income.group, work, other) 

Den Haag 1932

df.DH1932 <- read.csv("data/DH1932.csv") %>% 
mutate(HHincome = Man_loon + Vrouw_loon + Zoonloon + Dochter_loon) %>%
  mutate(social_security = Totaal_Steun) %>%
  mutate(financial = Ouderdomsrente + Dividend + Restitutie + `Kerst.uitkering`) %>%
  mutate(other = `Overig.inkomen`) %>%
  mutate(gifts = Voeding + Kleding + Schoeisel + Huisraad + Rookwaren + Overig) %>%
  mutate(stocks = Voedsel + Brandstof) %>%
  mutate(credit = `Lening.1`) %>%
  mutate(income = select(., HHincome, social_security,financial,other, gifts, stocks) %>% rowSums(na.rm = TRUE)) %>%
  mutate(consumption = Uitgaven) %>%
  # mutate(fm = income - consumption) %>%
  mutate(across(c(income, consumption, HHincome, social_security, financial, other, gifts, stocks, credit), ~ . / `X.weken` * 52))

Den Haag 1935

df.DH1935 <- read.csv("data/DH1935.csv") %>% 
  mutate(income = Ontvangsten + `Waarde.van.schenkingen.in.natura`) %>%
  mutate(kind = `Waarde.van.schenkingen.in.natura`) %>%
  mutate(social_security = `steunbedrag.per.week.bij.aanvang.onderzoek`) %>%
  mutate(consumption = Uitgaven) %>%
  mutate(fm = income - consumption) %>%
  mutate(across(c(income, consumption, fm, social_security, kind), ~ .  * 52))

Amsterdam 1923

df.A1923 <- read.csv("data/Amsterdam1923.csv", dec = ",") %>% 
  mutate(income = Total.income) %>%
  mutate(consumption = Total.expenditure) %>%
  mutate(food = Food) %>% select(-Food) %>%
  mutate(shelter = Rent) %>% select(-Rent) %>%
  mutate(clothing = Clothing + Shoes) %>% select(-Clothing, -Shoes) %>%
  mutate(fm = income - consumption) %>%
  select(income, consumption, food, shelter, clothing, fm, HISCLASS) 

Figures in order of paper

national accounting

read.csv("data/bbp.csv", sep = ";", skip = 4, dec = ",") %>% slice(-1) %>% select(-Volumemutaties...) %>%
  rename(year = Soort.cijfers,
         gdp.nominal = Lopende.prijzen..mln.euro,
         deflator = Deflatoren....mutaties,
         pop = Gem..bevolking...1.000.personen) %>% subset(year < 1940) %>% 
  mutate(across(where(is.character),
                ~ as.numeric(.))) %>%
  mutate(deflator = 100 * cumprod(1 + deflator/100),
         deflator = c(100, deflator[-length(deflator)]),
         gdp.real = gdp.nominal / deflator *100) %>% 
  pivot_longer(
    cols = c(gdp.nominal, gdp.real),
    names_to = "type",
    values_to = "gdp"
  ) %>% 
   ggplot(aes(y = (gdp/pop), x= year, linetype = type)) + geom_line() + ylab("Gdp per capita")

Representativeness

function income_distribution

This function results in a table of shares of the total sample that belong to various income categories.

income_distribution <- function(df) {
  df_name <- deparse(substitute(df))
  total <- nrow(df %>% select(income))

  below_800 <- nrow(df %>% filter(income < 800)) / total * 100
  from_800_to_1399 <- nrow(df %>% filter(income >= 800 & income < 1399)) / total * 100
  from_1400_to_1999 <- nrow(df %>% filter(income >= 1400 & income < 2000)) / total * 100
  above_2000 <- nrow(df %>% filter(income >= 2000)) / total * 100

  # Format percentages
  result_percent <- sprintf("%.2f%%", c(below_800, from_800_to_1399, from_1400_to_1999, above_2000))

  # Format total
  total_count <- as.character(total)

  # Combine values
  result <- c(result_percent, total_count)

  df_result <- data.frame(
    value = result,
    row.names = c("below_800", "from_800_to_1399", "from_800_to_1999", "2000_and_above", "N")
  )

  # Add average as a new row
  return(df_result)
}

NL 1935 vs. surveys

df.NL598_temp <- df.NL598 %>% 
#Here we adjust based on share of households taxed to account for household income from wife/children. 
  mutate(across(
    income,
    ~ case_when(
      A < 1400 ~             .x + 800 - quantile((df.NL598 %>% subset(A < 1400))$income, probs = 1-(88/167)),
      A >= 1400 & A < 1800 ~ .x + 800 - quantile((df.NL598 %>% subset(A >= 1400 & A < 1800))$income , probs = 1-(91/109)),
      A >= 1800 & A < 2300 ~ .x + 800 - quantile((df.NL598 %>% subset(A >= 1800 & A < 2300))$income, probs = 1-(89/103)),
      A >= 2300 & A < 3000 ~ .x + 800 - quantile((df.NL598 %>% subset(A >= 2300 & A < 3000))$income, probs = 1-(51/60)),
      A >= 3000 ~ .x
    ))) 

cbind(
read.csv("data/NL_income_distribution_1935.csv", dec = ".") %>% slice(1) %>% 
  mutate(X800.1400 = X800.AC0.1400,
         X.1400.2000 = X1400.AC0.2000,
         X.2000 = X2000.AC0.5000 + X5000.AC0.10000 + X10000.AC0.20000 + X.AD4.20000,
         X.800 =  (X800.1400 + X.1400.2000 + X.2000)/(1464/3394)*(1-(1464/3394)), # The share of workers above the tax threshold of 800 is 1464/3394 in 1935. 
         Total = (Total / 1000000)/(1464/3394)) %>% 
  mutate(across(c(X.800, X800.1400, X.1400.2000, X.2000), ~ .x / (X.800 + X800.1400+ X.1400.2000 + X.2000))) %>%
  select(X.800, X800.1400, X.1400.2000 , X.2000, Total) %>% t() %>% as.data.frame %>%
  mutate(V1 = ifelse(row_number() <=4, percent(V1, accuracy = 0.01),round(V1,2))),
income_distribution(df.NL598_temp %>% filter(work_class == 1 | work_class == 3)),
income_distribution(df.NL598_temp %>% filter(work_class == 2 )),
# income_distribution(df.NL598_temp %>% filter(work_class == 3)),
income_distribution(df.NL700 %>% mutate(income = (support + public_employment + income.HH.head + income.own.company))),
income_distribution(df.A1934.unemployed %>% mutate(income = (Man + social_security + Invaliditeits..of.weezenrente..Pensioen.e.d.)))
) %>%
  setNames(c("NL (mln.)", "NL: blue", "NL: white", "NL: un.", "A: un.")) %>%
  (\(x) {
    rownames(x) <- c("<800", "800-1400", "1400-2000", ">2000", "N")
    x
  })() 
##           NL (mln.) NL: blue NL: white NL: un. A: un.
## <800         56.87%   33.44%     4.46%  86.71% 53.85%
## 800-1400     19.55%   65.94%    51.98%  13.14% 46.15%
## 1400-2000    11.96%    0.00%     1.98%   0.14%  0.00%
## >2000        11.63%    0.62%    41.58%   0.00%  0.00%
## N              3.14      323       202     700     78
rm(df.NL598_temp)

Income distribution NL vs. Amsterdam surveys

These figures show the income distribution of NL vs. Amsterdam for 2 time periods (NL 1920 vs A 1923 & NL 1934-1935 vs. A 1934.) This shows that the Employed Amsterdam surveys are in the top of the income distribution when compared to NL. And it shows that both Amsterdam surveys are in a similar position in the income distribution over time, which makes the two groups more comparable.

# Amsterdam 1923 vs. NL 1920. 
# bron distributie: De socialistische gids, maandschrift der sociaaldemocratische arbeiderspartij, Maart 1923 
cbind(
read.csv("data/income_distribution1920-21.csv")  %>% mutate(share.NL = aantal.aangeslagenen / 1368293) %>% subset(inkomens != "totaal") %>% select(inkomens, share.NL),
rbind(
count(df.A1923 %>% select(income) %>% subset(income >800 & income < 1000)) / count(df.A1923 %>% select(income)),
count(df.A1923 %>% select(income) %>% subset(income >1000 & income < 2000)) / count(df.A1923 %>% select(income)),
count(df.A1923 %>% select(income) %>% subset(income >2000 & income < 5000)) / count(df.A1923 %>% select(income)),
count(df.A1923 %>% select(income) %>% subset(income >5000 & income < 10000)) / count(df.A1923 %>% select(income)),
count(df.A1923 %>% select(income) %>% subset(income >10000 & income < 50000)) / count(df.A1923 %>% select(income)),
count(df.A1923 %>% select(income) %>% subset(income >50000 & income < 100000)) / count(df.A1923 %>% select(income)),
count(df.A1923 %>% select(income) %>% subset(income >100000 )) / count(df.A1923 %>% select(income))
)) %>% rename(share.A = n) %>% mutate(share.NL = scales::percent(share.NL), share.A = scales::percent(share.A))
##       inkomens share.NL share.A
## 1     800-1000   9.094%   0.47%
## 2    1000-2000  59.780%  18.87%
## 3    2000-5000  24.336%  70.75%
## 4   5000-10000   4.348%   7.55%
## 5  10000-50000   2.159%   1.42%
## 6 50000-100000   0.180%   0.00%
## 7      >100000   0.104%   0.00%
# Amsterdam 1934 (employed) vs. NL 1934-35. 
# bron distributie: CBS, statistiek der inkomens en vermogens in nederland 1934/1935
read.csv("data/income_distribution1934-35.csv", dec = ",")  %>% mutate(share.NL = aantal.aangeslagenen / 1445019) %>% subset(inkomens != "totaal") %>%
  mutate(arbeiders = arbeiders/100, ambtenaren = ambtenaren /100, total = arbeiders * 75/184 + ambtenaren * 109/184) %>%
  rename("blue collar" = arbeiders, "white collar" = ambtenaren, share.A = total) %>% select(inkomens, share.NL, share.A, "blue collar", "white collar") %>% mutate(across(where(is.numeric), percent), across(where(is.numeric), ~ round(., 2)))
##     inkomens share.NL share.A blue collar white collar
## 1   800-1400    44.9%   7.89%       15.0%        3.00%
## 2  1400-2000    27.3%  16.48%       27.2%        9.10%
## 3  2000-3000    15.0%  39.37%       29.0%       46.52%
## 4  3000-5000     7.9%  12.74%        3.8%       18.88%
## 5 5000-10000     3.5%  11.35%        0.0%       19.17%
## 6     >10000     1.4%   7.31%        0.0%       12.33%
# In Nederland zitten de Amsterdamse surveys in de top 30% van de inkomensverdeling. 

Eindhoven (Verwey-Jonker)

# import verwey dataset. 
df.verwey <- read.csv("data/verwey_jonker.csv") %>% filter(threshold != "totaal")

# change the income buckets to match NL income distribution and add NL598 NL700.
df.eindhoven <- df.verwey %>%
  mutate(
    # define the new bucket label
    bucket = case_when(
      threshold %in% c("<200","200-400","400-600","600-800") ~ "<800",
      threshold %in% c("800-1000","1000-1200","1200-1400")   ~ "800-1400",
      threshold %in% c("1400-1600","1600-1800","1800-2000")  ~ "1400-2000",
      TRUE                                                  ~ ">2000"
    )
  ) %>%
  group_by(bucket) %>%
  summarise(
    "VJ (HH)"      = sum(HH),
    "VJ (singles)" = sum(singles),
    .groups = "drop"
  ) %>%
  rename(threshold = bucket) %>% 
  left_join(
  # -----------------------------------------------------------------
  # 1️⃣  Counts from df.NL598 (filtered to Eindhoven)
  # -----------------------------------------------------------------
    df.NL598 %>% 
      subset(location == "Eindhoven") %>% 
      mutate(
        threshold = cut(
          income,
          breaks = c(-Inf, 800, 1400, 2000, Inf),
          labels = c("<800", "800-1400", "1400-2000", ">2000"),
          right = FALSE
        )
      ) %>% 
      group_by(threshold) %>% 
      summarise(NL598 = n(), .groups = "drop") %>%
    mutate(threshold = as.character(threshold)),
  by = "threshold") %>%
  
  # -----------------------------------------------------------------
  # ️2  Counts from df.nl700 (filtered to Eindhoven)
  # -----------------------------------------------------------------  
left_join(
    df.NL700 %>% 
      subset(HHid > 253 & HHid < 303) %>% 
      mutate(
        threshold = cut(
          income,
          breaks = c(-Inf, 800, 1400, 2000, Inf),
          labels = c("<800", "800-1400", "1400-2000", ">2000"),
          right = FALSE
        )
      ) %>% 
      group_by(threshold) %>% 
      summarise(NL700 = n(), .groups = "drop"),    by = "threshold"
  )



rbind(
# this is the row with the total sum. 
df.eindhoven %>% 
  # 1️⃣ Summarise numeric columns (keep non‑numeric as is)
  summarise(
    # keep the label column (or any identifier) as a character,
    # then replace it with the word "total"
    across(where(is.numeric), sum, na.rm = TRUE),
    .groups = "drop"
  ) %>% mutate(threshold = "N") %>% select(threshold, everything())
,
# here percentages of total is calculated. 
df.eindhoven %>% 
  # 1️⃣ Compute the total of each numeric column (ignore the label column)
  mutate(col_totals = map_dbl(select(., -threshold), sum, na.rm = TRUE)) 
  %>% mutate(across(where(is.numeric), ~ replace_na(.x, 0)))  %>%
  # 2️⃣ Convert each numeric value to a percentage of its column total
  mutate(across(
    .cols = where(is.numeric),                     # all numeric columns
    .fns  = ~ round(.x / col_totals[cur_column()] * 100, 2)
  )) %>% 
  # 3️⃣ Add the “%” sign – turn the numbers into formatted strings
  mutate(across(
    .cols = where(is.numeric),
    .fns  = ~ sprintf("%.2f%%", .x)   # keeps two decimals and appends "%"
  )) %>%   
  # 3️⃣ Drop the temporary totals column
  select(-col_totals)
) %>%
  mutate(threshold = factor(threshold, levels = c("<800", "800-1400", "1400-2000", ">2000", "N"))) %>%  # impose order
  arrange(threshold)   %>%                                         # sort by factor
# , This is NL as a whole. We drop it because these are individuals, other datasets are households. 
# read.csv("data/NL_income_distribution_1935.csv", dec = ".") %>% slice(1) %>% 
#   mutate(X800.1400 = X800.AC0.1400,
#          X.1400.2000 = X1400.AC0.2000,
#          X.2000 = X2000.AC0.5000 + X5000.AC0.10000 + X10000.AC0.20000 + X.AD4.20000,
#          X.800 =  (X800.1400 + X.1400.2000 + X.2000)/(1464/3394)*(1-(1464/3394)), # The share of workers above the tax threshold of 800 is 1464/3394 in 1935. 
#          Total = (Total / 1000000)/(1464/3394)) %>% 
#   mutate(across(c(X.800, X800.1400, X.1400.2000, X.2000), ~ .x / (X.800 + X800.1400+ X.1400.2000 + X.2000))) %>%
#   select(X.800, X800.1400, X.1400.2000 , X.2000, Total) %>% t() %>% as.data.frame %>%
#   mutate(V1 = ifelse(row_number() <=4, percent(V1, accuracy = 0.01),round(V1,2))) %>% rename("NL (mln.)" = V1)
# select("threshold","NL (mln.)", everything()) 
 as.tibble() %>% rename("Household income" = threshold)
## # A tibble: 5 × 5
##   `Household income` `VJ (HH)` `VJ (singles)` NL598  NL700 
##   <fct>              <chr>     <chr>          <chr>  <chr> 
## 1 <800               10.27%    49.17%         0.00%  20.41%
## 2 800-1400           27.30%    35.68%         16.95% 75.51%
## 3 1400-2000          25.78%    9.44%          37.29% 4.08% 
## 4 >2000              36.65%    5.72%          45.76% 0.00% 
## 5 N                  22990     9600           59     49
rm(df.verwey)

Income distribution

Table income composition

bind_rows(

  # ----- df.NL598 -------------------------------------------------
  df.NL598 %>% 
    mutate(
      work_class = recode(
        work_class,
        `1` = "NL 1936 (blue collar)",
        `2` = "NL 1936 (white collar)",
        `3` = "NL 1936 (blue collar)",   # code 3 → Blue collar
        `4` = "Farmers"                  # will be filtered out later
      ),
      wage               = A - A2 - A3 - A4 - A5,
      support            = A2,
      public_employment  = 0,
      production         = A4 + A5,
      gifts              = gifts,
      other              = 0
    ) %>% 
    select(wage, support, public_employment, production, gifts, other, work_class),

  # ----- df.NL700 -------------------------------------------------
  df.NL700 %>% 
    mutate(
      wage               = income.HH.head + other.HH.income,
      support            = support + fuel_subsidy + food_distr,
      production         = income.own.company,
      gifts              = gifts,
      other              = 0,
      public_employment  = public_employment,
      work_class         = "NL 1937 (unemployed)"
    ) %>% 
    select(wage, support, public_employment, production, gifts, other, work_class),

  # ----- df.A1934.unemployed (original block) --------------------
  df.A1934.unemployed %>% 
    mutate(
      wage               = HHincome,
      support            = social_security,
      public_employment  = 0,
      production         = 0,
      gifts              = gifts,
      other              = financial + other_income,
      work_class         = "A'dam 1934 (unemployed)"
    ) %>% 
    select(wage, support, public_employment, production, gifts, other, work_class),

  # ----- Utrecht 1936 (unemployed) – data.frame, double -------
  {
    ## Weekly numbers multiplied by 52 (as you supplied)
    wk_vals <- c(
      wage               = 3.75 * 52,
      support            = 13.72 * 52,
      public_employment  = 0,
      production         = 0,               # not provided → assume 0
      gifts              = 0.32 * 52,
      other              = 0.14 * 52
    )

    utrecht_one <- data.frame(
      wage               = as.double(wk_vals["wage"]),
      support            = as.double(wk_vals["support"]),
      public_employment  = as.double(wk_vals["public_employment"]),
      production         = as.double(wk_vals["production"]),
      gifts              = as.double(wk_vals["gifts"]),
      other              = as.double(wk_vals["other"]),
      work_class         = "Utrecht 1936 (unemployed)",
      stringsAsFactors = FALSE
    )

    ## Replicate the row 77 times (still a data.frame)
    utrecht_one[rep(seq_len(nrow(utrecht_one)), each = 77), ]
  },

  # ----- df.DH1932 (new block) ----------------------------------
  df.DH1932 %>% 
    mutate(
      wage               = HHincome,                     # wages = HHincome
      support            = social_security,              # support = social_security
      public_employment  = 0,
      production         = 0,                            # not mentioned → 0
      gifts              = gifts,
      other              = financial + other + stocks,    # other = financial+other+stocks
      work_class         = "DH 1932 (unemployed)"        # label for ordering
    ) %>% 
    select(wage, support, public_employment, production, gifts, other, work_class)

) %>%                                             # end bind_rows
  filter(work_class != "Farmers") %>%             # drop unwanted class
  mutate(
    income = wage + support + public_employment + production + gifts + other,
    across(
      c(wage, support, public_employment, production, gifts, other),
      ~ .x / income
    )
  ) %>%                                           # component shares
  group_by(work_class) %>%
  summarise(
    n                 = n(),
    Avg_income        = mean(income,        na.rm = TRUE),
    avg_wage          = mean(wage,          na.rm = TRUE),
    avg_support       = mean(support,       na.rm = TRUE),
    avg_public_emp    = mean(public_employment, na.rm = TRUE),
    avg_production    = mean(production,    na.rm = TRUE),
    avg_gifts         = mean(gifts,         na.rm = TRUE),
    avg_other         = mean(other,         na.rm = TRUE),
    .groups = "drop"
  ) %>%
  mutate(
    N                 = round(n, 0),                # renamed from n
    Income            = round(Avg_income, 0),       # renamed from Avg_income

    # -----------------------------------------------------------------
    #  Capitalise all column names and add "(%)" suffix to the share cols
    # -----------------------------------------------------------------
    `Wage (%)`                = sprintf("%.1f%%", avg_wage      * 100),
    `Support (%)`             = sprintf("%.1f%%", avg_support   * 100),
    `Public employment (%)`   = sprintf("%.1f%%", avg_public_emp* 100),
    `Production (%)`          = sprintf("%.1f%%", avg_production* 100),
    `Gifts (%)`               = sprintf("%.1f%%", avg_gifts     * 100),
    `Other (%)`               = sprintf("%.1f%%", avg_other     * 100)
  ) %>%
  select(
    # Keep the newly‑named columns and drop the old interim ones
    work_class, N, Income,
    `Wage (%)`, `Support (%)`, `Public employment (%)`,
    `Production (%)`, `Gifts (%)`, `Other (%)`
  ) %>%

  # -----------------------------------------------------------------
  #  Chronological ordering (earliest → latest)
  # -----------------------------------------------------------------
  mutate(
    Work_class = factor(
      work_class,
      levels = c(
        "DH 1932 (unemployed)",          # earliest
        "A'dam 1934 (unemployed)",      # next
        "Utrecht 1936 (unemployed)",    # then Utrecht
        "NL 1937 (unemployed)",          # most recent
        "NL 1936 (white collar)",       # NL 1936 – white collar
        "NL 1936 (blue collar)"         # NL 1936 – blue collar
      )
    )
  ) %>%
  arrange(Work_class)                               # final ordered table
## # A tibble: 6 × 10
##   work_class            N Income `Wage (%)` `Support (%)` Public employment (%…¹
##   <chr>             <dbl>  <dbl> <chr>      <chr>         <chr>                 
## 1 DH 1932 (unemplo…    90   1104 5.1%       86.7%         0.0%                  
## 2 A'dam 1934 (unem…    78    978 12.1%      82.0%         0.0%                  
## 3 Utrecht 1936 (un…    77    932 20.9%      76.5%         0.0%                  
## 4 NL 1937 (unemplo…   700    816 13.6%      62.3%         18.9%                 
## 5 NL 1936 (white c…   202   3477 99.0%      0.1%          0.0%                  
## 6 NL 1936 (blue co…   323   1455 96.6%      1.0%          0.0%                  
## # ℹ abbreviated name: ¹​`Public employment (%)`
## # ℹ 4 more variables: `Production (%)` <chr>, `Gifts (%)` <chr>,
## #   `Other (%)` <chr>, Work_class <fct>

Total household income

rbind(df.NL598 %>% select(income, work_class) %>% mutate(work_class = ifelse(work_class == 3, 1, work_class)) %>%
        subset(work_class != 4), 
      df.NL700 %>% mutate(work_class = 3) %>% select(work_class, income),
      df.A1934.unemployed %>% mutate(work_class = 4) %>% select(work_class, income)) %>%
    ggplot(aes(y = (income), x= factor(work_class)), weight = weight) + 
  geom_boxplot() + ggtitle("Total household income") + xlab("") +  ylab("Guilders") + ylim(0,5000) + theme(axis.text.x = element_text(angle = 20, vjust = 1, hjust=1)) + scale_x_discrete(
    breaks = c(1,2,3,4),
    labels = c("Blue collar", "White collar",   "Unemployed NL", "Unemployed A.")) + 
  labs(caption = str_wrap(
  "For the national sample of employed total income includes wages, support, own production, gifts and income from renters, both in kind and financial flows. For the national sample of unemployed income includes support, income from public employment, fuel subsidies, wage income of the household head, own company and other family members and gifts. For the Amsterdam sample of unemployed income includes wage income, social security, financial income household income from other family members and gifts. Only incomes below 5000 guilders are shown.",
  width = 120
  ))

Total income household head

df.NL598_temp <- df.NL598 %>% mutate(income = A - A1 - A2) # I assume implicit rent is taxed via income taxation. 

rbind(df.NL598_temp  %>% #Here we adjust based on share of households taxed to account for household income from wife/children. We do not include farmers as we have no information on share of households that pay taxes for this category of households. 
  mutate(across(
    income,
    ~ case_when(
      A < 1400 & work_class != 4 ~ .x + 800 - quantile((df.NL598_temp %>% subset(A < 1400))$income, probs = 1-(88/167)),
      A >= 1400 & A < 1800 & work_class != 4 ~ .x + 800 - quantile((df.NL598_temp %>% subset(A >= 1400 & A < 1800))$income , probs = 1-(91/109)),
      A >= 1800 & A < 2300 & work_class != 4 ~ .x + 800 - quantile((df.NL598_temp %>% subset(A >= 1800 & A < 2300))$income, probs = 1-(89/103)),
      A >= 2300 & A < 3000 & work_class != 4 ~ .x + 800 - quantile((df.NL598_temp %>% subset(A >= 2300 & A < 3000))$income, probs = 1-(51/60)),
      A >= 3000 & work_class != 4 ~ .x,
      work_class == 4 ~ .x
    ))) %>%  select(work_class, income) %>% mutate(work_class = ifelse(work_class == 3, 1, work_class)) %>% subset(work_class != 4), 
      df.NL700 %>% mutate(income = public_employment  + income.HH.head + income.own.company, work_class = 5) %>% select(work_class, income),
      df.A1934.unemployed %>% mutate(income = Man , work_class = 6) %>% select(work_class, income)) %>%
    ggplot(aes(y = (income), x= factor(work_class)), weight = weight) + 
  geom_boxplot() + ggtitle("Income household head") + xlab("") +  ylab("Guilders") + ylim(0,5000) + theme(axis.text.x = element_text(angle = 20, vjust = 1, hjust=1)) + ylim(0,5000) + scale_x_discrete(
    breaks = c(1,2,3,4,5,6),
    labels = c("Blue collar", "White collar", "Agricultural workers", "Farmers", "Unemployed NL", "Unemployed A.")) + 
  labs(caption = str_wrap(
  "Household head income for the national survey of employed households is imputed using the share of household income for different income groups above 800 guilders, the income tax threshold. This allows for an estimation of income sources of other household members. For farmers total income is shown as no distinction between the household head and other family members is possible. For the national unemployed survey income of the household head includes income from wages of the household head and public employment and for the Amsterdam survey income from wages of the household head. Only incomes below 5000 guilders are shown",
  width = 115
  ))

rm(df.NL598_temp)

Total income wife/children

df.NL598_temp <- df.NL598 %>% mutate(income = A - A1 - A2) 

rbind(df.NL598_temp  %>% #Here we adjust based on share of households taxed to account for household income from wife/children. 
  mutate(across(
    income,
    ~ case_when(
      A < 1400 & work_class != 4 ~              -(800 - quantile((df.NL598_temp %>% subset(A < 1400))$income, probs = 1-(88/167))),
      A >= 1400 & A < 1800 & work_class != 4 ~  -(800 - quantile((df.NL598_temp %>% subset(A >= 1400 & A < 1800))$income , probs = 1-(91/109))),
      A >= 1800 & A < 2300 & work_class != 4 ~  -(800 - quantile((df.NL598_temp %>% subset(A >= 1800 & A < 2300))$income, probs = 1-(89/103))),
      A >= 2300 & A < 3000 & work_class != 4 ~  -(800 - quantile((df.NL598_temp %>% subset(A >= 2300 & A < 3000))$income, probs = 1-(51/60))),
      A >= 3000 & work_class != 4            ~  0,
      work_class == 4 ~ 0
    ))) %>%  select(work_class, income) %>% mutate(work_class = ifelse(work_class == 3, 1, work_class)) %>% subset(work_class != 4), 
      df.NL700 %>% mutate(income = other.HH.income, work_class = 5) %>% select(work_class, income),
      df.A1934.unemployed %>% mutate(income = Vrouw + Kinderen, work_class = 6) %>% select(work_class, income)) %>%
    ggplot(aes(y = (income), x= factor(work_class)), weight = weight) + 
  geom_boxplot() + ggtitle("Total household income other family members") + xlab("") +  ylab("Guilders") + theme(axis.text.x = element_text(angle = 20, vjust = 1, hjust=1)) + ylim(0,5000) + scale_x_discrete(
    breaks = c(1,2,3,4,5,6),
    labels = c("Blue collar", "White collar", "Agricultural workers", "Farmers", "Unemployed NL", "Unemployed A.")) + 
  labs(caption = str_wrap(
  "Income of other family members from the national survey of employed households is imputed using the share of household income for different income groups above 800 guilders, the income tax threshold. Farmer's income for other household members is set to 0 as no information was available to impute their respective income share. For the national unemployed survey and the Amsterdam unemployed survey wage income of women and children are reported seperately. Only incomes below 5000 guilders are shown",
  width = 115
  ))

rm(df.NL598_temp)

Other income sources

rbind(df.NL598 %>% 
  mutate(income = A1 + A2) %>%  select(work_class, income) %>% mutate(work_class = ifelse(work_class == 3, 1, work_class)) %>% subset(work_class != 4), 
      df.NL700 %>% mutate(income = fuel_subsidy + food_distr + support, work_class = 5) %>% select(work_class, income),
      df.A1934.unemployed %>% mutate(income = gifts + social_security + financial, work_class = 6) %>% select(work_class, income)) %>%
    ggplot(aes(y = (income), x= factor(work_class)), weight = weight) + 
  geom_boxplot() + ggtitle("Other income sources") + xlab("") +  ylab("Guilders") + ylim(0,5000) + theme(axis.text.x = element_text(angle = 20, vjust = 1, hjust=1)) + ylim(0,5000) + scale_x_discrete(
    breaks = c(1,2,3,4,5,6),
    labels = c("Blue collar", "White collar", "Agricultural workers", "Farmers", "Unemployed NL", "Unemployed A.")) + 
  labs(caption = str_wrap(
  "For the national survey of employed households, other income sources include in kind income, including, wages, support, own production and gifts. For the national unemployed survey other income sources are fuel subsidies, food distribution and support. For the Amsterdam survey other income sources are gifts, social security and financial income.  Only incomes below 5000 guilders are shown",
  width = 115
  ))

Share of contribution household head

Total income household head

df.NL598_temp <- df.NL598 %>% mutate(income = A - A1 - A2) # I assume implicit rent is taxed via income taxation. 

rbind(df.NL598_temp  %>% #Here we adjust based on share of households taxed to account for household income from wife/children. We do not include farmers as we have no information on share of households that pay taxes for this category of households. 
  mutate(across(
    income,
    ~ case_when(
      A < 1400 & work_class != 4 ~ (.x + 800 - quantile((df.NL598_temp %>% subset(A < 1400))$income, probs = 1-(88/167)))/ (wage + support + production),
      A >= 1400 & A < 1800 & work_class != 4 ~ (.x + 800 - quantile((df.NL598_temp %>% subset(A >= 1400 & A < 1800))$income , probs = 1-(91/109)))/ (wage + support + production),
      A >= 1800 & A < 2300 & work_class != 4 ~ (.x + 800 - quantile((df.NL598_temp %>% subset(A >= 1800 & A < 2300))$income, probs = 1-(89/103)))/ (wage + support + production),
      A >= 2300 & A < 3000 & work_class != 4 ~ (.x + 800 - quantile((df.NL598_temp %>% subset(A >= 2300 & A < 3000))$income, probs = 1-(51/60)))/ (wage + support + production),
      A >= 3000 & work_class != 4 ~ .x/ (wage + support + production),
      work_class == 4 ~ .x / income
    ))) %>%  select(work_class, income) %>% mutate(work_class = ifelse(work_class == 3, 1, work_class)) %>% subset(work_class != 4), 
      df.NL700 %>% mutate(income = (public_employment  + income.HH.head + income.own.company)/income, work_class = 5) %>% select(work_class, income),
      df.A1934.unemployed %>% mutate(income = Man/income , work_class = 6) %>% select(work_class, income)) %>%
    ggplot(aes(y = (income), x= factor(work_class)), weight = weight) + 
  geom_boxplot() + ggtitle("Income household head") + xlab("") +  ylab("Guilders") + ylim(0,5000) + theme(axis.text.x = element_text(angle = 20, vjust = 1, hjust=1)) + ylim(0,1) + scale_x_discrete(
    breaks = c(1,2,3,4,5,6),
    labels = c("Blue collar", "White collar", "Agricultural workers", "Farmers", "Unemployed NL", "Unemployed A.")) + 
  labs(caption = str_wrap(
  "Household head income for the national survey of employed households is imputed using the share of household income for different income groups above 800 guilders, the income tax threshold. This allows for an estimation of income sources of other household members. For farmers total income is shown as no distinction between the household head and other family members is possible. For the national unemployed survey income of the household head includes income from wages of the household head and public employment and for the Amsterdam survey income from wages of the household head. Only incomes below 5000 guilders are shown",
  width = 115
  ))

rm(df.NL598_temp)

Share of contribution wife/children.

df.NL598_temp <- df.NL598 %>% mutate(income = A - A1 - A2) 

rbind(df.NL598_temp  %>% #Here we adjust based on share of households taxed to account for household income from wife/children. 
  mutate(across(
    income,
    ~ case_when(
      A < 1400 & work_class != 4 ~              -(800 - quantile((df.NL598_temp %>% subset(A < 1400))$income, probs = 1-(88/167))) / (wage + support + production),
      A >= 1400 & A < 1800 & work_class != 4 ~  -(800 - quantile((df.NL598_temp %>% subset(A >= 1400 & A < 1800))$income , probs = 1-(91/109)))/ (wage + support + production),
      A >= 1800 & A < 2300 & work_class != 4 ~  -(800 - quantile((df.NL598_temp %>% subset(A >= 1800 & A < 2300))$income, probs = 1-(89/103)))/ (wage + support + production),
      A >= 2300 & A < 3000 & work_class != 4 ~  -(800 - quantile((df.NL598_temp %>% subset(A >= 2300 & A < 3000))$income, probs = 1-(51/60)))/ (wage + support + production),
      A >= 3000 & work_class != 4            ~  0,
      work_class == 4 ~ 0
    ))) %>%  select(work_class, income) %>% mutate(work_class = ifelse(work_class == 3, 1, work_class)) %>% subset(work_class != 4), 
      df.NL700 %>% mutate(income = other.HH.income/income, work_class = 5) %>% select(work_class, income),
      df.A1934.unemployed %>% mutate(income = (Vrouw + Kinderen)/income, work_class = 6) %>% select(work_class, income)) %>%
    ggplot(aes(y = (income), x= factor(work_class)), weight = weight) + 
  geom_boxplot() + ggtitle("Share household income other family members") + xlab("") +  ylab("Guilders") + theme(axis.text.x = element_text(angle = 20, vjust = 1, hjust=1)) +
  scale_x_discrete(
    breaks = c(1,2,3,4,5,6),
    labels = c("Blue collar", "White collar", "Agricultural workers", "Farmers", "Unemployed NL", "Unemployed A.")) + 
  labs(caption = str_wrap(
  "Income of other family members from the national survey of employed households is imputed using the share of household income for different income groups above 800 guilders, the income tax threshold. Farmer's income for other household members is set to 0 as no information was available to impute their respective income share. For the national unemployed survey and the Amsterdam unemployed survey wage income of women and children are reported seperately. Only incomes below 5000 guilders are shown",
  width = 115
  ))

rm(df.NL598_temp)

Income

For this analysis I use data from the 598, split between 4 groups (blue collar, white collar, farm workers and farmers) and the 700 (long-term) unemployed. These are the samples for which we have adequate information on the income composition.

wages, own production & public support

Here I look at wages, own production and public support.

rbind(df.NL598 %>% mutate(income = (wage + support + production)) %>% select(wage, production, support, work_class, income) %>% 
        mutate(work_class = ifelse(work_class == 3, 1, work_class)) %>% subset(work_class !=4), 
      df.NL700 %>% mutate(production = 0, work_class = 3) %>% select(wage,support, production, work_class, income)      ) %>%
    ggplot(aes(y = wage/income, x= factor(work_class)), weight = weight) + 
  geom_boxplot() + ggtitle("Wages") + xlab("") +  ylab("Share of income") + theme(axis.text.x = element_text(angle = 20, vjust = 1, hjust=1)) + scale_x_discrete(
    breaks = c(1,2,3),
    labels = c("Blue collar", "White collar",  "Unemployed")) 

rbind(df.NL598 %>% mutate(income = (wage + support + production)) %>% select(wage, production, support, work_class, income) %>% 
        mutate(work_class = ifelse(work_class == 3, 1, work_class)) %>% subset(work_class !=4),       
      df.NL700 %>% mutate(production = 0, work_class = 3) %>% select(wage,support, production, work_class, income)) %>%
    ggplot(aes(y = production/income, x= factor(work_class)), weight = weight) + 
  geom_boxplot() + ggtitle("Own production") + xlab("") +  ylab("Share of income") + theme(axis.text.x = element_text(angle = 20, vjust = 1, hjust=1)) + scale_x_discrete(
    breaks = c(1,2,3),
    labels = c("Blue collar", "White collar",  "Unemployed")) 

rbind(df.NL598 %>% mutate(income = (wage + support + production)) %>% select(wage, production, support, work_class, income) %>% 
        mutate(work_class = ifelse(work_class == 3, 1, work_class)) %>% subset(work_class !=4),        
      df.NL700 %>% mutate(production = 0, work_class = 3) %>% select(wage,support, production, work_class, income)) %>%
    ggplot(aes(y = support/income, x= factor(work_class)), weight = weight) + 
  geom_boxplot() + ggtitle("Support") + xlab("") +  ylab("Share of income") + theme(axis.text.x = element_text(angle = 20, vjust = 1, hjust=1)) + scale_x_discrete(
    breaks = c(1,2,3),
    labels = c("Blue collar", "White collar",  "Unemployed")) 

Consumption

For this analysis we focus on the Amsterdam surveys because they are better comparable through time. We assume we more or less compare similar groups through time.

Main table consumption

library(dplyr)
library(tidyr)   # for uncount()

bind_rows(
  # ----- df.NL598 -------------------------------------------------
  df.NL598 %>% 
    select(food, shelter, clothing, other, work_class) %>% 
    mutate(
      work_class = recode(
        work_class,
        `1` = "NL 1936 (blue collar)",
        `2` = "NL 1936 (white collar)",
        `3` = "NL 1936 (blue collar)",   # code 3 → Blue collar
        `4` = "Farmers"                  # will be filtered out later
      )
    ),

  # ----- df.NL700 -------------------------------------------------
  df.NL700 %>% 
    select(food, shelter, clothing, other) %>% 
    mutate(work_class = "NL 1937 (unemployed)"),

  # ----- df.A1934.employed ----------------------------------------
  df.A1934.employed %>% 
    select(food, shelter, clothing, other, work, weight) %>% 
    uncount(weights = weight, .remove = FALSE) %>% 
    mutate(
      work_class = case_when(
        work == "arbeider"  ~ "A'dam 1934 (blue collar)",
        work == "ambtenaar" ~ "A'dam 1934 (white collar)",
        TRUE                ~ NA_character_
      )
    ) %>% 
    select(-work, -weight),

  # ----- df.A1934.unemployed --------------------------------------
  df.A1934.unemployed %>% 
    select(food, shelter, clothing, other) %>% 
    mutate(work_class = "A'dam 1934 (unemployed)")
) %>%                                            # end bind_rows
filter(work_class != "Farmers") %>%              
mutate(
  consumption = food + shelter + clothing + other,
  across(c(food, shelter, clothing, other), ~ .x / consumption)
) %>%
group_by(work_class) %>%
summarise(
  n                 = n(),
  Total_consumption = mean(consumption, na.rm = TRUE),
  avg_food          = mean(food,        na.rm = TRUE),
  avg_shelter       = mean(shelter,     na.rm = TRUE),
  avg_clothing      = mean(clothing,    na.rm = TRUE),
  avg_other         = mean(other,       na.rm = TRUE),
  .groups = "drop"
) %>%
mutate(
  n                 = round(n, 0),
  Total_consumption = round(Total_consumption, 0),

  `share of food`     = sprintf("%.1f%%", avg_food    * 100),
  `share of shelter`  = sprintf("%.1f%%", avg_shelter * 100),
  `share of clothing` = sprintf("%.1f%%", avg_clothing* 100),
  `share of other`    = sprintf("%.1f%%", avg_other   * 100)
) %>%
select(-avg_food, -avg_shelter, -avg_clothing, -avg_other) %>%
# -------------------------------------------------------------------------
#  Ordering: A’dam first (white → blue → unemployed), then NL
# -------------------------------------------------------------------------
mutate(
  work_class = factor(
    work_class,
    levels = c(
      "NL 1937 (unemployed)",
      "A'dam 1934 (unemployed)",
    
      # NL
      "NL 1936 (white collar)",
      "NL 1936 (blue collar)",      
      # A’dam
      "A'dam 1934 (white collar)",
      "A'dam 1934 (blue collar)"

    )
  )
) %>%
arrange(work_class)                               # final ordered table
## # A tibble: 6 × 7
##   work_class              n Total_consumption `share of food` `share of shelter`
##   <fct>               <dbl>             <dbl> <chr>           <chr>             
## 1 NL 1937 (unemploye…   700               892 49.9%           27.9%             
## 2 A'dam 1934 (unempl…    78              1004 40.3%           42.2%             
## 3 NL 1936 (white col…   202              3365 24.3%           23.0%             
## 4 NL 1936 (blue coll…   323              1469 41.1%           24.6%             
## 5 A'dam 1934 (white …   218              4537 20.8%           17.0%             
## 6 A'dam 1934 (blue c…   150              1895 34.8%           21.8%             
## # ℹ 2 more variables: `share of clothing` <chr>, `share of other` <chr>

Food share Amsterdam

ggarrange(

rbind(
    # Blue collar
df.A1934.employed %>% subset(work == "arbeider") %>% subset(income.group != "Totaal") %>% select(food, weight, consumption) %>% mutate(data = "A 1934"),
df.A1923 %>% subset(HISCLASS > 5) %>% select(food, consumption) %>% mutate(data = "A 1923-24", weight = 1)) %>%
    ggplot(aes(y = food/consumption, x=factor(data)), weight = weight) + scale_y_continuous(limits = c(0, 1)) + 
  geom_boxplot() + ggtitle("Blue collar") + xlab("") +  ylab("Share of expenditure") + theme(axis.text.x = element_text(angle = 20, vjust = 1, hjust=1))

,

rbind(
    # White collar
df.A1934.employed %>% subset(work == "ambtenaar") %>% subset(income.group != "Totaal") %>% select(food, weight, consumption) %>% mutate(data = "A 1934"),
df.A1923 %>% subset(HISCLASS < 5) %>% select(food, consumption) %>% mutate(data = "A 1923-24", weight = 1)) %>%
    ggplot(aes(y = food/consumption, x=factor(data)), weight = weight) + scale_y_continuous(limits = c(0, 1)) + 
  geom_boxplot() + ggtitle("White collar") + xlab("") +  ylab("Share of expenditure") + theme(axis.text.x = element_text(angle = 20, vjust = 1, hjust=1))

,

df.A1934.unemployed %>% select(food, consumption) %>% mutate(data = "A 1934") %>%
ggplot(aes(y = food/consumption, x=factor(data))) + scale_y_continuous(limits = c(0, 1)) + 
  geom_boxplot() + ggtitle("Unemployed") + xlab("") +  ylab("Share of expenditure") + theme(axis.text.x = element_text(angle = 20, vjust = 1, hjust=1))

, ncol = 3) %>% annotate_figure(top = text_grob("Share of food expenditure in Amsterdam", 
                                              face = "bold", size = 14))

Housing share Amsterdam

ggarrange(

rbind(
    # Blue collar
df.A1934.employed %>% subset(work == "arbeider") %>% subset(income.group != "Totaal") %>% select(shelter, weight, consumption) %>% mutate(data = "A 1934"),
df.A1923 %>% subset(HISCLASS > 5) %>% select(shelter, consumption) %>% mutate(data = "A 1923-24", weight = 1)) %>%
    ggplot(aes(y = shelter/consumption, x=factor(data)), weight = weight) + scale_y_continuous(limits = c(0, 1)) + 
  geom_boxplot() + ggtitle("Blue collar") + xlab("") +  ylab("Share of expenditure") + theme(axis.text.x = element_text(angle = 20, vjust = 1, hjust=1))

,

rbind(
    # White collar
df.A1934.employed %>% subset(work == "ambtenaar") %>% subset(income.group != "Totaal") %>% select(shelter, weight, consumption) %>% mutate(data = "A 1934"),
df.A1923 %>% subset(HISCLASS < 5) %>% select(shelter, consumption) %>% mutate(data = "A 1923-24", weight = 1)) %>%
    ggplot(aes(y = shelter/consumption, x=factor(data)), weight = weight) + scale_y_continuous(limits = c(0, 1)) + 
  geom_boxplot() + ggtitle("White collar") + xlab("") +  ylab("Share of expenditure") + theme(axis.text.x = element_text(angle = 20, vjust = 1, hjust=1))

,

df.A1934.unemployed %>% select(shelter, consumption) %>% mutate(data = "A 1934") %>%
ggplot(aes(y = shelter/consumption, x=factor(data))) + scale_y_continuous(limits = c(0, 1)) + 
  geom_boxplot() + ggtitle("Unemployed") + xlab("") +  ylab("Share of expenditure") + theme(axis.text.x = element_text(angle = 20, vjust = 1, hjust=1))

, ncol = 3) %>% annotate_figure(top = text_grob("Share of housing expenditure in Amsterdam", 
                                              face = "bold", size = 14))

Clothing share Amsterdam

ggarrange(

rbind(
    # Blue collar
df.A1934.employed %>% subset(work == "arbeider") %>% subset(income.group != "Totaal") %>% select(clothing, weight, consumption) %>% mutate(data = "A 1934"),
df.A1923 %>% subset(HISCLASS > 5) %>% select(clothing, consumption) %>% mutate(data = "A 1923-24", weight = 1)) %>%
    ggplot(aes(y = clothing/consumption, x=factor(data)), weight = weight) + scale_y_continuous(limits = c(0, 1)) + 
  geom_boxplot() + ggtitle("Blue collar") + xlab("") +  ylab("Share of expenditure") + theme(axis.text.x = element_text(angle = 20, vjust = 1, hjust=1))

,

rbind(
    # White collar
df.A1934.employed %>% subset(work == "ambtenaar") %>% subset(income.group != "Totaal") %>% select(clothing, weight, consumption) %>% mutate(data = "A 1934"),
df.A1923 %>% subset(HISCLASS < 5) %>% select(clothing, consumption) %>% mutate(data = "A 1923-24", weight = 1)) %>%
    ggplot(aes(y = clothing/consumption, x=factor(data)), weight = weight) + scale_y_continuous(limits = c(0, 1)) + 
  geom_boxplot() + ggtitle("White collar") + xlab("") +  ylab("Share of expenditure") + theme(axis.text.x = element_text(angle = 20, vjust = 1, hjust=1))

,

df.A1934.unemployed %>% select(clothing, consumption) %>% mutate(data = "A 1934") %>%
ggplot(aes(y = clothing/consumption, x=factor(data))) + scale_y_continuous(limits = c(0, 1)) + 
  geom_boxplot() + ggtitle("Unemployed") + xlab("") +  ylab("Share of expenditure") + theme(axis.text.x = element_text(angle = 20, vjust = 1, hjust=1))

, ncol = 3) %>% annotate_figure(top = text_grob("Share of clothing expenditure in Amsterdam", 
                                              face = "bold", size = 14))

Primary expenditure share Amsterdam

ggarrange(

rbind(
    # Blue collar
df.A1934.employed %>% subset(work == "arbeider") %>% subset(income.group != "Totaal") %>% mutate(primary = food + shelter + clothing) %>% select(primary, weight, consumption) %>% mutate(data = "A 1934"),
df.A1923 %>% subset(HISCLASS > 5) %>% mutate(primary = food + shelter + clothing) %>% select(primary, consumption) %>% mutate(data = "A 1923-24", weight = 1)) %>%
    ggplot(aes(y = primary/consumption, x=factor(data)), weight = weight) + scale_y_continuous(limits = c(0, 1)) + 
  geom_boxplot() + ggtitle("Blue collar") + xlab("") +  ylab("Share of expenditure") + theme(axis.text.x = element_text(angle = 20, vjust = 1, hjust=1))

,

rbind(
    # White collar
df.A1934.employed %>% subset(work == "ambtenaar") %>% mutate(primary = food + shelter + clothing) %>% subset(income.group != "Totaal") %>% select(primary, weight, consumption) %>% mutate(data = "A 1934"),
df.A1923 %>% subset(HISCLASS < 5) %>% mutate(primary = food + shelter + clothing) %>% select(primary, consumption) %>% mutate(data = "A 1923-24", weight = 1)) %>%
    ggplot(aes(y = primary/consumption, x=factor(data)), weight = weight) + scale_y_continuous(limits = c(0, 1)) + 
  geom_boxplot() + ggtitle("White collar") + xlab("") +  ylab("Share of expenditure") + theme(axis.text.x = element_text(angle = 20, vjust = 1, hjust=1))

,

df.A1934.unemployed %>% mutate(primary = food + shelter + clothing) %>% select(primary, consumption) %>% mutate(data = "A 1934") %>%
ggplot(aes(y = primary/consumption, x=factor(data))) + scale_y_continuous(limits = c(0, 1)) + 
  geom_boxplot() + ggtitle("Unemployed") + xlab("") +  ylab("Share of expenditure") + theme(axis.text.x = element_text(angle = 20, vjust = 1, hjust=1))

, ncol = 3) %>% annotate_figure(top = text_grob("Share of primary expenditure in Amsterdam", 
                                              face = "bold", size = 14))

Financial margin (income - primary expenditure)

# --------------------------------------------------------------
# 1️⃣  Load required packages
# --------------------------------------------------------------
library(dplyr)
library(tidyr)   # only needed for `uncount()` elsewhere

# --------------------------------------------------------------
# 2️⃣  ONE‑PIPE PIPELINE (order: white, blue, Amsterdam, NL)
# --------------------------------------------------------------
bind_rows(

  ## ----- df.NL598 -------------------------------------------------
  df.NL598 %>%
    mutate(
      work_class = recode(
        work_class,
        `1` = "NL 1936 (blue collar)",
        `2` = "NL 1936 (white collar)",
        `3` = "NL 1936 (blue collar)",   # code 3 → blue collar
        `4` = "Farmers"                  # will be filtered out later
      ),
      wage               = A - A2 - A3 - A4 - A5,
      support            = A2,
      public_employment  = 0,
      production         = A4 + A5,
      gifts              = gifts,
      other              = 0
    ) %>%
    select(
      wage, support, public_employment,
      production, gifts, other,
      food, shelter, clothing,
      work_class
    ),

  ## ----- df.NL700 -------------------------------------------------
  df.NL700 %>%
    mutate(
      wage               = income.HH.head + other.HH.income,
      support            = support + fuel_subsidy + food_distr,
      production         = income.own.company,
      gifts              = gifts,
      other              = 0,
      public_employment  = public_employment,
      work_class         = "NL 1937 (unemployed)"
    ) %>%
    select(
      wage, support, public_employment,
      production, gifts, other,
      food, shelter, clothing,
      work_class
    ),

  ## ----- df.A1934.unemployed (original block) --------------------
  df.A1934.unemployed %>%
    mutate(
      wage               = HHincome,
      support            = social_security,
      public_employment  = 0,
      production         = 0,
      gifts              = gifts,
      other              = financial + other_income,
      work_class         = "A'dam 1934 (unemployed)"
    ) %>%
    select(
      wage, support, public_employment,
      production, gifts, other,
      food, shelter, clothing,
      work_class
    )

) %>%                                            # end bind_rows
  filter(work_class != "Farmers") %>%            # drop unwanted class
  # ----------------------------------------------------------
  # Set the desired ordering of work_class
  # ----------------------------------------------------------
  mutate(
    work_class = factor(
      work_class,
      levels = c(
        "NL 1936 (white collar)",
        "NL 1936 (blue collar)",
        "A'dam 1934 (unemployed)",
        "NL 1937 (unemployed)"
      )
    )
  ) %>%
  # ----------------------------------------------------------
  # Compute total income and FM
  # ----------------------------------------------------------
  mutate(
    income = wage + support + public_employment +
             production + gifts + other,
    FM     = income - (food + shelter + clothing)
  ) %>%
  group_by(work_class) %>%
  # --------------------------------------------------------------
  # 3️⃣  Descriptive statistics you requested
  # --------------------------------------------------------------
  summarise(
    N    = n(),
    Min  = min(FM, na.rm = TRUE),
    Q25  = quantile(FM, probs = 0.25, na.rm = TRUE),
    Q50  = quantile(FM, probs = 0.50, na.rm = TRUE),
    Q75  = quantile(FM, probs = 0.75, na.rm = TRUE),
    Max  = max(FM, na.rm = TRUE),
    .groups = "drop"
  ) %>%
  # --------------------------------------------------------------
  # 4️⃣  Nice formatting
  # --------------------------------------------------------------
  mutate(
    N   = as.integer(N),
    Min = round(Min, 2),
    Q25 = round(Q25, 2),
    Q50 = round(Q50, 2),
    Q75 = round(Q75, 2),
    Max = round(Max, 2)
  ) %>%
  # --------------------------------------------------------------
  # 5️⃣  Print the result
  # --------------------------------------------------------------
  print()
## # A tibble: 4 × 7
##   work_class                  N     Min    Q25    Q50   Q75    Max
##   <fct>                   <int>   <dbl>  <dbl>  <dbl> <dbl>  <dbl>
## 1 NL 1936 (white collar)    202   -6.97 678.   1075.  2135. 13650.
## 2 NL 1936 (blue collar)     323 -294.   185.    330.   494.  1680.
## 3 A'dam 1934 (unemployed)    78 -116.    71.7    96.3  147.   291.
## 4 NL 1937 (unemployed)      700 -375.     3.64   70.2  121.   630.

Credit

For the 598 we can distinguish between savings inflow/outflow and credit inflow/outflow. For the 700 we cannot. Here we see positive or negative difference between income and consumption, which we take to be savings and credit increase.

A positive savings value denotes an inflow into the savings account. A positive credit value denotes an increase in outstanding credit. If savings - credit is positive, household net worth increases.

library(dplyr)

df.NL598 %>%                                            # 1936 data

  # -------------------------------------------------
  # 1️⃣ Recode work_class for the NL598 rows
  # -------------------------------------------------
  mutate(
    work_class = recode(
      work_class,
      `1` = "NL 1936 (blue collar)",
      `2` = "NL 1936 (white collar)",
      `3` = "NL 1936 (blue collar)",   # code 3 → blue collar
      `4` = "Farmers"                  # will be dropped next
    ),
    credit1 = C1 - F1,
    credit2 = C2 - F2
  ) %>%

  # -------------------------------------------------
  # 2️⃣ Drop the Farmers rows
  # -------------------------------------------------
  filter(work_class != "Farmers") %>% 

  # -------------------------------------------------
  # 4️⃣ Append the NL700 unemployed group
  # -------------------------------------------------
  bind_rows(
    df.NL700 %>% mutate(work_class = "NL 1937 (unemployed)", 
                        credit1 = credit)
  ) %>% select(credit1, consumption, credit2, work_class) %>%

  # -------------------------------------------------
  # 👉 Set the desired order (white → blue → unemployed)
  # -------------------------------------------------
  mutate(
    work_class = factor(
      work_class,
      levels = c(
        "NL 1936 (white collar)",
        "NL 1936 (blue collar)",
        "NL 1937 (unemployed)"
      )
    )
  ) %>%

  # -------------------------------------------------
  # 8️⃣ Group by the unified work_class label
  # -------------------------------------------------
  group_by(work_class) %>%

  summarise(
  perc_credit1      = mean(credit1!=0,  na.rm = TRUE) * 100,
  avg_credit1_cons  = mean(credit1 / consumption, na.rm = TRUE) * 100,  
  perc_credit2      = mean(credit2!=0,  na.rm = TRUE) * 100,
  avg_credit2_cons  = mean(credit2 / consumption, na.rm = TRUE) * 100,
  .groups = "drop"
) %>%

# -------------------------------------------------
# 🔟 Format percentages and round numbers
# -------------------------------------------------
mutate(
  perc_credit1     = paste0(round(perc_credit1, 2), "%"),
  avg_credit1_cons     = paste0(round(avg_credit1_cons, 2), "%"),
  perc_credit2     = paste0(round(perc_credit2, 2), "%"),
  avg_credit2_cons     = paste0(round(avg_credit2_cons, 2), "%"),
) %>%

# -------------------------------------------------
# ️1️⃣ Rename columns (Survey first) and print
# -------------------------------------------------
select(
  Survey                       = work_class,
  `Cash != 0`                 = perc_credit1,
  `Cash/consumption`                 = avg_credit1_cons,
  `Installment!=0` = perc_credit2,
  `Installment/consumption`        = avg_credit2_cons     # ⬅️ NEW
) %>%
print()
## # A tibble: 3 × 5
##   Survey  `Cash != 0` `Cash/consumption` `Installment!=0` Installment/consumpt…¹
##   <fct>   <chr>       <chr>              <chr>            <chr>                 
## 1 NL 193… 21.29%      -0.07%             89.6%            0.87%                 
## 2 NL 193… 23.84%      -0.49%             72.76%           0.12%                 
## 3 NL 193… 76.14%      8.92%              NaN%             NaN%                  
## # ℹ abbreviated name: ¹​`Installment/consumption`
df.NL598 %>% mutate(C = ifelse(C == 0, NA, C),
                    F = ifelse(F == 0, NA, F)) %>%
  summarise(
    Krediet_up_round = sum(C == round(C) & !is.na(C), na.rm = TRUE),
    Krediet_down_round = sum(F == round(F) & !is.na(F), na.rm = TRUE),
    Krediet_up_notround= sum(C != round(C) & !is.na(C), na.rm = TRUE),
    Krediet_down_notround = sum(F != round(F) & !is.na(F), na.rm = TRUE)
  )
##   Krediet_up_round Krediet_down_round Krediet_up_notround Krediet_down_notround
## 1               46                 65                 265                   174
df.NL700 %>% mutate(credit = ifelse(credit == 0, NA, credit)) %>%
  summarise(
    Krediet_up_rounded = sum(credit == round(credit), na.rm = TRUE),
    Krediet_up_notrounded = sum(credit != round(credit) & !is.na(credit), na.rm = TRUE)
  )
##   Krediet_up_rounded Krediet_up_notrounded
## 1                 10                   523

Savings

library(dplyr)

df.NL598 %>%                                            # 1936 data

  # -------------------------------------------------
  # 1️⃣ Recode work_class for the NL598 rows
  # -------------------------------------------------
  mutate(
    work_class = recode(
      work_class,
      `1` = "NL 1936 (blue collar)",
      `2` = "NL 1936 (white collar)",
      `3` = "NL 1936 (blue collar)",   # code 3 → blue collar
      `4` = "Farmers"                  # will be dropped next
    )
  ) %>%

  # -------------------------------------------------
  # 2️⃣ Drop the Farmers rows
  # -------------------------------------------------
  filter(work_class != "Farmers") %>%

  # -------------------------------------------------
  # 4️⃣ Append the NL700 unemployed group
  # -------------------------------------------------
  bind_rows(
    df.NL700 %>% mutate(work_class = "NL 1937 (unemployed)")
  ) %>%

  # -------------------------------------------------
  # 👉 Set the desired order (white → blue → unemployed)
  # -------------------------------------------------
  mutate(
    work_class = factor(
      work_class,
      levels = c(
        "NL 1936 (white collar)",
        "NL 1936 (blue collar)",
        "NL 1937 (unemployed)"
      )
    )
  ) %>%

  # -------------------------------------------------
  # 8️⃣ Group by the unified work_class label
  # -------------------------------------------------
  group_by(work_class) %>%

  # -------------------------------------------------
  # 9️⃣ Summarise the required **savings** metrics
  # -------------------------------------------------
  summarise(
    perc_sav_pos   = mean(savings > 0,  na.rm = TRUE) * 100,   # % of positive savings
    perc_sav_neg   = mean(savings < 0,  na.rm = TRUE) * 100,   # % of negative savings
    avg_sav_per_cons = mean(savings / consumption, na.rm = TRUE) * 100,
    .groups = "drop"
  ) %>%

  # -------------------------------------------------
  # 🔟 Format percentages and round numbers
  # -------------------------------------------------
  mutate(
    perc_sav_pos     = paste0(round(perc_sav_pos, 2), "%"),
    perc_sav_neg     = paste0(round(perc_sav_neg, 2), "%"),
    avg_sav_per_cons = paste0(round(avg_sav_per_cons, 2), "%")
  ) %>%

  # -------------------------------------------------
  # 1️⃣1️⃣ Rename columns (Survey first) and print
  # -------------------------------------------------
  select(
    Survey                     = work_class,
    `Savings > 0`              = perc_sav_pos,
    `Savings < 0`              = perc_sav_neg,
    `Avg Savings / Consumption (%)` = avg_sav_per_cons
  ) %>%
  print()
## # A tibble: 3 × 4
##   Survey                 `Savings > 0` `Savings < 0` Avg Savings / Consumption…¹
##   <fct>                  <chr>         <chr>         <chr>                      
## 1 NL 1936 (white collar) 60.4%         37.13%        4.2%                       
## 2 NL 1936 (blue collar)  50.15%        42.72%        0.04%                      
## 3 NL 1937 (unemployed)   23.29%        0%            1.17%                      
## # ℹ abbreviated name: ¹​`Avg Savings / Consumption (%)`

figure savings & Credit

rbind(df.NL598 %>% mutate(income = (wage + support + production)) %>% select(income, credit, savings, work_class), 
      df.NL700 %>% mutate(production = 0, 
                          work_class = 5) %>% 
        select(income, credit, savings, work_class)) %>%
    ggplot(aes(y = (savings - credit)/income, x= factor(work_class)), weight = weight) + 
  geom_boxplot() +  xlab("") +  ylab("Share of income") + theme(axis.text.x = element_text(angle = 20, vjust = 1, hjust=1)) + scale_x_discrete(
    breaks = c(1,2,3,4,5),
    labels = c("Blue collar", "White collar", "Agricultural workers", "Farmers", "Unemployed")) + 
  labs(title = "Savings - credit",
       caption = "Data 598 employed & 700 unemployed.")

rbind(df.NL598 %>% mutate(income = (wage + support + production)) %>% select(income, credit, savings, work_class), 
      df.NL700 %>% mutate(production = 0, 
                          work_class = 5) %>% 
        select(income, credit, savings, work_class)) %>%
    ggplot(aes(y = savings/income, x= factor(work_class)), weight = weight) + 
  geom_boxplot() +  xlab("") +  ylab("Share of income") + theme(axis.text.x = element_text(angle = 20, vjust = 1, hjust=1)) + scale_x_discrete(
    breaks = c(1,2,3,4,5),
    labels = c("Blue collar", "White collar", "Agricultural workers", "Farmers", "Unemployed")) + 
  labs(title = "Savings",
       caption = "Data 598 employed & 700 unemployed.")

rbind(df.NL598 %>% mutate(income = (wage + support + production)) %>% select(income, credit, savings, work_class), 
      df.NL700 %>% mutate(production = 0, 
                          work_class = 5) %>% 
        select(income, credit, savings, work_class)) %>%
    ggplot(aes(y = credit/income, x= factor(work_class)), weight = weight) + 
  geom_boxplot() + xlab("") +  ylab("Share of income") + theme(axis.text.x = element_text(angle = 20, vjust = 1, hjust=1)) + scale_x_discrete(
    breaks = c(1,2,3,4,5),
    labels = c("Blue collar", "White collar", "Agricultural workers", "Farmers", "Unemployed")) + 
  labs(title = "Credit",
       caption = "Data 598 employed & 700 unemployed.")

Regression

models

library(modelsummary)

df.NL598_reg <-  df.NL598 %>% 
  filter(work_class != 4) %>%
  mutate(
    wage               = A - A2 - A3 - A4 - A5,
    support            = A2,
    public_employment  = 0,
    production         = A4 + A5,
    gifts              = gifts,
    other              = 0,
    FM                = income - (food + shelter + clothing),
    fffm.con = ifelse(FM < quantile(FM, probs = c(0.33)), FM,
                           ifelse(FM > quantile(FM, probs = c(0.66)), FM, NaN)),
    HH_size = ifelse(is.na(age_m) & !is.na(age_f), 1,
                       ifelse(is.na(age_f) & !is.na(age_m), 1,2)),                    
    HH_size = HH_size + visitor + kids,
    healthcare = D28 / (income),
    healthcare_shock = ifelse(healthcare > quantile(healthcare, probs = c(0.66)), 1, 0),
    white_collar = ifelse(work_class == 2, 1,0)
    )


modelsummary::datasummary(data = df.NL598_reg, escape = TRUE,
                          (`Av. age` = av_age) + 
                          (`Consumption unit` = verbruikseenheid) + 
                          (`HH size` = HH_size) + 
                          (`White collar (d)` = white_collar) + 
                          (`Large (d)` = location21) + 
                          (`Industrial (d)` = location22) + 
                          (`Newborn (d)` = newborn) +  
                          (`Healthcare (d)` = healthcare_shock) + 
                          (`Unemployment` = unemp) ~
                          (Mean = mean) + 
                          (SD = sd) + 
                          (Min = min) + 
                          # (`Q25` = P25) +
                          # (`Median` = median) +
                          # (`Q75` = P75) +
                          (Max = max),
                          sparse_header = TRUE,
                          title = paste0("Summary statistics 525 Households in the Survey of 598 (1936-1937)"))
Summary statistics 525 Households in the Survey of 598 (1936-1937)
Mean SD Min Max
Av. age 37.73 8.38 21.50 79.00
Consumption unit 3.66 1.52 1.80 11.06
HH size 4.86 2.18 2.00 15.00
White collar (d) 0.38 0.49 0.00 1.00
Large (d) 0.48 0.50 0.00 1.00
Industrial (d) 0.40 0.49 0.00 1.00
Newborn (d) 0.14 0.35 0.00 1.00
Healthcare (d) 0.34 0.47 0.00 1.00
Unemployment 2.50 8.65 0.00 52.00
modelsummary::datasummary(data = df.NL598_reg, escape = TRUE,
                          (`Housing wealth` = home_value) + 
                          (`Land wealth` = land_value) + 
                          (`LI wealth` = npv.life) + 
                          (`Pension wealth` = npv.pension)  ~
                          (`Share (%)` = count_over_zero100) +
                          (Mean = mean) + 
                          (SD = sd) + 
                          (Min = min) + 
                          # (`Q25` = P25) +
                          # (`Median` = median) +
                          # (`Q75` = P75) +
                          (Max = max),
                          sparse_header = TRUE,
                          title = paste0("The Estimated Wealth of 525 Households in the Survey of 598 (1936-1937)"))
The Estimated Wealth of 525 Households in the Survey of 598 (1936-1937)
Share (%) Mean SD Min Max
Housing wealth 24.76 2316.68 4847.15 0.00 30120.48
Land wealth 40.19 13999.19 105489.73 0.00 1679329.82
LI wealth 93.33 925.63 1031.13 0.00 11175.71
Pension wealth 93.33 1088.20 1449.66 0.00 12590.88
## robustness check with consumption units 
rm(linear.1, linear.2, linear.3, linear.4)

linear.1 <- lm(fffm.con ~ white_collar  + verbruikseenheid + av_age,
             data = df.NL598_reg) %>% 
             coeftest(., vcov = vcovHC(., type = "HC0"))

linear.2 <- lm(fffm.con ~ white_collar +  verbruikseenheid + av_age+ location21 + location22 ,
             data = df.NL598_reg) %>% 
            coeftest(., vcov = vcovHC(., type = "HC0"))

linear.3 <- lm(fffm.con ~ white_collar +  verbruikseenheid + av_age+ location21 + location22 +home_value + land_value + npv.life + npv.pension,
             data = df.NL598_reg) %>% 
            coeftest(., vcov = vcovHC(., type = "HC0"))

linear.4 <- lm(fffm.con ~ white_collar +  verbruikseenheid + av_age + location21 + location22 + av_age +home_value + land_value + npv.life + npv.pension +  newborn + healthcare_shock + unemp ,
             data = df.NL598_reg) %>% 
            coeftest(., vcov = vcovHC(., type = "HC0"))


stargazer(linear.1, linear.2, linear.3, linear.4, title="The Determinants of Financial Fragility of Dutch Households in 1936-37", type = "text", align=TRUE, digits = 2, covariate.labels = c("White collar (d)",  "Consumption units", "Av. Age" , "Large (d)", "Industrial (d)", "Housing wealth", "Land wealth", "LI wealth", "Pension wealth",  "Newborn (d)", "Healthcare (d)", "Unemployment"))
## 
## The Determinants of Financial Fragility of Dutch Households in 1936-37
## ===================================================================
##                                  Dependent variable:               
##                   -------------------------------------------------
##                                                                    
##                       (1)         (2)         (3)          (4)     
## -------------------------------------------------------------------
## White collar (d)  1,634.19*** 1,626.16*** 1,013.99***  1,047.37*** 
##                    (154.91)    (157.36)     (157.40)     (160.75)  
##                                                                    
## Consumption units  -88.13**    -99.99***   -103.98***   -101.20*** 
##                     (34.21)     (35.98)     (34.37)      (33.78)   
##                                                                    
## Av. Age            23.24***    25.61***     42.21***     39.07***  
##                     (7.21)      (7.76)       (8.39)       (8.10)   
##                                                                    
## Large (d)                      347.58***     -37.49       -66.37   
##                                (129.65)     (104.59)     (101.23)  
##                                                                    
## Industrial (d)                   68.74      -114.52      -131.59   
##                                (103.51)     (100.18)     (104.94)  
##                                                                    
## Housing wealth                              0.04***      0.04***   
##                                              (0.01)       (0.01)   
##                                                                    
## Land wealth                                  0.0001       0.0001   
##                                             (0.0001)     (0.0001)  
##                                                                    
## LI wealth                                   1.32***      1.36***   
##                                              (0.37)       (0.36)   
##                                                                    
## Pension wealth                              -0.45**      -0.49**   
##                                              (0.19)       (0.19)   
##                                                                    
## Newborn (d)                                              -134.44   
##                                                          (129.92)  
##                                                                    
## Healthcare (d)                                          -303.28*** 
##                                                          (105.18)  
##                                                                    
## Unemployment                                              -0.05    
##                                                           (2.88)   
##                                                                    
## Constant            -241.81    -458.97*   -1,446.23*** -1,200.58***
##                    (238.29)    (266.64)     (401.64)     (382.60)  
##                                                                    
## ===================================================================
## ===================================================================
## Note:                                   *p<0.1; **p<0.05; ***p<0.01
linear.1 <- lm(fffm.con ~ white_collar +  HH_size + av_age,
             data = df.NL598_reg) %>% 
             coeftest(., vcov = vcovHC(., type = "HC0"))

linear.2 <- lm(fffm.con ~ white_collar +  HH_size + av_age+ location21 + location22 ,
             data = df.NL598_reg) %>% 
            coeftest(., vcov = vcovHC(., type = "HC0"))

linear.3 <- lm(fffm.con ~ white_collar +  HH_size + av_age+ location21 + location22 +home_value + land_value + npv.life + npv.pension,
             data = df.NL598_reg) %>% 
            coeftest(., vcov = vcovHC(., type = "HC0"))

linear.4 <- lm(fffm.con ~ white_collar +  HH_size + av_age + location21 + location22 + av_age +home_value + land_value + npv.life + npv.pension +  newborn + healthcare_shock + unemp ,
             data = df.NL598_reg) %>% 
            coeftest(., vcov = vcovHC(., type = "HC0"))


stargazer(linear.1, linear.2, linear.3, linear.4, title="The Determinants of Financial Fragility of Dutch Households in 1936-37", type = "text", align=TRUE, digits = 2, covariate.labels = c("White collar (d)",  "HH size", "Av. Age" , "Large (d)", "Industrial (d)", "Housing wealth", "Land wealth", "LI wealth", "Pension wealth",  "Newborn (d)", "Healthcare (d)", "Unemployment"))
## 
## The Determinants of Financial Fragility of Dutch Households in 1936-37
## ==================================================================
##                                 Dependent variable:               
##                  -------------------------------------------------
##                                                                   
##                      (1)         (2)         (3)          (4)     
## ------------------------------------------------------------------
## White collar (d) 1,623.38*** 1,617.68*** 1,000.85***  1,032.14*** 
##                   (151.89)    (155.04)     (155.33)     (158.53)  
##                                                                   
## HH size           -63.61***   -68.93***   -73.86***    -72.53***  
##                    (21.63)     (22.90)     (20.41)      (20.34)   
##                                                                   
## Av. Age           20.76***    22.53***     38.88***     35.89***  
##                    (6.88)      (7.28)       (8.23)       (7.91)   
##                                                                   
## Large (d)                     337.79***     -48.34       -76.40   
##                               (128.65)     (103.09)     (99.91)   
##                                                                   
## Industrial (d)                  65.56      -121.35      -137.38   
##                               (103.25)     (100.16)     (104.75)  
##                                                                   
## Housing wealth                             0.04***      0.04***   
##                                             (0.01)       (0.01)   
##                                                                   
## Land wealth                                 0.0001       0.0001   
##                                            (0.0001)     (0.0001)  
##                                                                   
## LI wealth                                  1.32***      1.36***   
##                                             (0.37)       (0.36)   
##                                                                   
## Pension wealth                             -0.45**      -0.49***  
##                                             (0.19)       (0.19)   
##                                                                   
## Newborn (d)                                             -131.60   
##                                                         (130.13)  
##                                                                   
## Healthcare (d)                                         -304.55*** 
##                                                         (105.82)  
##                                                                   
## Unemployment                                             -0.39    
##                                                          (2.90)   
##                                                                   
## Constant           -154.24     -361.30   -1,329.79*** -1,084.59***
##                   (238.36)    (263.94)     (404.64)     (386.32)  
##                                                                   
## ==================================================================
## ==================================================================
## Note:                                  *p<0.1; **p<0.05; ***p<0.01
rm(linear.1, linear.2, linear.3, linear.4, df.NL598_reg)