10th percentile expenses on food, shelter and clothing, scaled by consumption unit

Eerste tabel geeft 10e percentiel uitgaven weer voor de 598, de tweede tabel voor de 700. Beiden geschaal naar verbruikseenheid.

sub.598 <- df.data %>% filter(work != "landbouwer") %>% select(food,shelter,clothing,verbruikseenheid) %>%
  mutate(. / verbruikseenheid) %>% select(-verbruikseenheid) %>%
  lapply(., quantile, prob = 0.1, names = FALSE) %>% stack()

sub.700 <- df.700 %>% select(food,housing,clothing,verbruikseenheid) %>%
  mutate(. / verbruikseenheid) %>% select(-verbruikseenheid) %>%
  lapply(., quantile, prob = 0.1, names = FALSE) %>% stack()

sub.598
##      values      ind
## 1 125.90248     food
## 2  56.16327  shelter
## 3  21.86315 clothing
sub.700
##      values      ind
## 1 90.128235     food
## 2 33.742813  housing
## 3  5.291765 clothing

Hier definieer ik, per familie, het verschil tussen de daadwerkelijke uitgaven aan voedsel, onderdak en kleren met de subsistance level aan voedsel, onderdak en kleren. De subsistance level definieer ik op basis van de 700, niet de 598. Ik corrigeer voor verbruikseenheid. Ik laat dit alles in een scatterplot zien.

sub.df.data <- df.data %>% filter(work != "landbouwer") %>% select(food,shelter,clothing,verbruikseenheid,net_income) %>%
  mutate(sub.food = verbruikseenheid * sub.700[[1]][1]) %>%
  mutate(sub.shelter = verbruikseenheid * sub.700[[1]][2]) %>%
  mutate(sub.clothing = verbruikseenheid * sub.700[[1]][3]) %>%
  mutate(sub.margin = food + shelter + clothing - sub.food - sub.shelter - sub.clothing ) 

sub.df.700 <- df.700 %>% select(food,housing,clothing,verbruikseenheid,net_income) %>%
  mutate(sub.food = verbruikseenheid * sub.700[[1]][1]) %>%
  mutate(sub.housing = verbruikseenheid * sub.700[[1]][2]) %>%
  mutate(sub.clothing = verbruikseenheid * sub.700[[1]][3]) %>%
  mutate(sub.margin = food + housing + clothing - sub.food - sub.housing - sub.clothing ) 


rbind(cbind(sub.df.700[c("sub.margin", "net_income")], data = rep("NL 700 (1936)", dim(sub.df.700)[1])),
      cbind(sub.df.data[c("sub.margin", "net_income")], data = rep("NL 598 (1935)", dim(sub.df.data)[1]))) %>%
  # ggplot(aes(x=net_income, y=sub.margin, color = data)) + geom_point(shape=1) +
  ggplot(aes(x=log(net_income), y=sub.margin, group = data)) + 
  geom_point(aes(shape=data)) + scale_shape_manual(values=c(6,1)) +
  labs(x = "ln net income",  y = "Expenses over subsistance level" )  +geom_density_2d() + 
  scale_x_continuous(limits = c(6, 8)) +
  scale_y_continuous(limits = c(-250, 800)) +
  theme_minimal()  + 
  geom_hline(yintercept=c(0), linetype='dashed') +
  theme(legend.position="bottom") + guides(color = guide_legend("Data source",nrow = 2, byrow = TRUE)) 
## Warning: Removed 241 rows containing non-finite values (stat_density2d).
## Warning: Removed 241 rows containing missing values (geom_point).

Cross-table

sub.df.data <- df.data %>% filter(work != "landbouwer") %>% select(food,shelter,clothing,verbruikseenheid,net_income) %>%
  mutate(sub.food = verbruikseenheid * sub.700[[1]][1]) %>%
  mutate(sub.shelter = verbruikseenheid * sub.700[[1]][2]) %>%
  mutate(sub.clothing = verbruikseenheid * sub.700[[1]][3]) %>%
  mutate(sub.margin = food + shelter + clothing - sub.food - sub.shelter - sub.clothing ) %>%
  mutate(net_income_class = if_else(net_income < 500, "<500",
                                    ifelse(net_income >= 500 & net_income < 1000, "500-1000",
                                           ifelse(net_income >= 1000 & net_income < 1500, "1000-1500", ">1500"))))%>%
  mutate(sub_margin_class = if_else(sub.margin < 0, "<0",
                                    ifelse(sub.margin >= 0 & sub.margin < 250, "0-250",
                                           ifelse(sub.margin >= 250 & sub.margin < 500, "250-500", ">500"))))

table(sub.df.data$net_income_class)
## 
##     >1500 1000-1500  500-1000 
##       316       135        74
sub.df.700 <- df.700 %>% select(food,housing,clothing,verbruikseenheid,net_income) %>%
  mutate(sub.food = verbruikseenheid * sub.700[[1]][1]) %>%
  mutate(sub.housing = verbruikseenheid * sub.700[[1]][2]) %>%
  mutate(sub.clothing = verbruikseenheid * sub.700[[1]][3]) %>%
  mutate(sub.margin = food + housing + clothing - sub.food - sub.housing - sub.clothing ) %>%
  mutate(net_income_class = if_else(net_income < 500, "<500",
                                    ifelse(net_income >= 500 & net_income < 1000, "500-1000",
                                           ifelse(net_income >= 1000 & net_income < 1500, "1000-1500", ">1500")))) %>%
  mutate(sub_margin_class = if_else(sub.margin < 0, "<0",
                                    ifelse(sub.margin >= 0 & sub.margin < 250, "0-250",
                                           ifelse(sub.margin >= 250 & sub.margin < 500, "250-500", ">500"))))

hist(sub.df.data$net_income)

hist(sub.df.700$net_income)

hist(sub.df.data$sub.margin)

hist(sub.df.700$sub.margin)

# table(sub.df.data$net_income_class)
# table(sub.df.700$net_income_class)
# 
# table(sub.df.data$sub.margin_class)
# table(sub.df.700$sub.margin_class)

# order_ind <- c("<0","0-250","250-500",">500")
# dit is de 598
sub.df.data %>% tabyl(net_income_class, sub_margin_class)  %>% as.data.frame
##   net_income_class <0 >500 0-250 250-500
## 1            >1500  0  308     1       7
## 2        1000-1500  0   89     1      45
## 3         500-1000  3    8    18      45
# en dit de 700
sub.df.700 %>% tabyl(net_income_class, sub_margin_class)%>% as.data.frame
##   net_income_class <0 >500 0-250 250-500
## 1             <500  0    0    27       8
## 2            >1500  0    2     2       3
## 3        1000-1500  2   12    27      44
## 4         500-1000 24    8   249     292