Eerste tabel geeft 10e percentiel uitgaven weer voor de 598, de tweede tabel voor de 700. Beiden geschaal naar verbruikseenheid.
sub.598 <- df.data %>% filter(work != "landbouwer") %>% select(food,shelter,clothing,verbruikseenheid) %>%
mutate(. / verbruikseenheid) %>% select(-verbruikseenheid) %>%
lapply(., quantile, prob = 0.1, names = FALSE) %>% stack()
sub.700 <- df.700 %>% select(food,housing,clothing,verbruikseenheid) %>%
mutate(. / verbruikseenheid) %>% select(-verbruikseenheid) %>%
lapply(., quantile, prob = 0.1, names = FALSE) %>% stack()
sub.598
## values ind
## 1 125.90248 food
## 2 56.16327 shelter
## 3 21.86315 clothing
sub.700
## values ind
## 1 90.128235 food
## 2 33.742813 housing
## 3 5.291765 clothing
Hier definieer ik, per familie, het verschil tussen de daadwerkelijke uitgaven aan voedsel, onderdak en kleren met de subsistance level aan voedsel, onderdak en kleren. De subsistance level definieer ik op basis van de 700, niet de 598. Ik corrigeer voor verbruikseenheid. Ik laat dit alles in een scatterplot zien.
sub.df.data <- df.data %>% filter(work != "landbouwer") %>% select(food,shelter,clothing,verbruikseenheid,net_income) %>%
mutate(sub.food = verbruikseenheid * sub.700[[1]][1]) %>%
mutate(sub.shelter = verbruikseenheid * sub.700[[1]][2]) %>%
mutate(sub.clothing = verbruikseenheid * sub.700[[1]][3]) %>%
mutate(sub.margin = food + shelter + clothing - sub.food - sub.shelter - sub.clothing )
sub.df.700 <- df.700 %>% select(food,housing,clothing,verbruikseenheid,net_income) %>%
mutate(sub.food = verbruikseenheid * sub.700[[1]][1]) %>%
mutate(sub.housing = verbruikseenheid * sub.700[[1]][2]) %>%
mutate(sub.clothing = verbruikseenheid * sub.700[[1]][3]) %>%
mutate(sub.margin = food + housing + clothing - sub.food - sub.housing - sub.clothing )
rbind(cbind(sub.df.700[c("sub.margin", "net_income")], data = rep("NL 700 (1936)", dim(sub.df.700)[1])),
cbind(sub.df.data[c("sub.margin", "net_income")], data = rep("NL 598 (1935)", dim(sub.df.data)[1]))) %>%
# ggplot(aes(x=net_income, y=sub.margin, color = data)) + geom_point(shape=1) +
ggplot(aes(x=log(net_income), y=sub.margin, group = data)) +
geom_point(aes(shape=data)) + scale_shape_manual(values=c(6,1)) +
labs(x = "ln net income", y = "Expenses over subsistance level" ) +geom_density_2d() +
scale_x_continuous(limits = c(6, 8)) +
scale_y_continuous(limits = c(-250, 800)) +
theme_minimal() +
geom_hline(yintercept=c(0), linetype='dashed') +
theme(legend.position="bottom") + guides(color = guide_legend("Data source",nrow = 2, byrow = TRUE))
## Warning: Removed 241 rows containing non-finite values (stat_density2d).
## Warning: Removed 241 rows containing missing values (geom_point).
Cross-table
sub.df.data <- df.data %>% filter(work != "landbouwer") %>% select(food,shelter,clothing,verbruikseenheid,net_income) %>%
mutate(sub.food = verbruikseenheid * sub.700[[1]][1]) %>%
mutate(sub.shelter = verbruikseenheid * sub.700[[1]][2]) %>%
mutate(sub.clothing = verbruikseenheid * sub.700[[1]][3]) %>%
mutate(sub.margin = food + shelter + clothing - sub.food - sub.shelter - sub.clothing ) %>%
mutate(net_income_class = if_else(net_income < 500, "<500",
ifelse(net_income >= 500 & net_income < 1000, "500-1000",
ifelse(net_income >= 1000 & net_income < 1500, "1000-1500", ">1500"))))%>%
mutate(sub_margin_class = if_else(sub.margin < 0, "<0",
ifelse(sub.margin >= 0 & sub.margin < 250, "0-250",
ifelse(sub.margin >= 250 & sub.margin < 500, "250-500", ">500"))))
table(sub.df.data$net_income_class)
##
## >1500 1000-1500 500-1000
## 316 135 74
sub.df.700 <- df.700 %>% select(food,housing,clothing,verbruikseenheid,net_income) %>%
mutate(sub.food = verbruikseenheid * sub.700[[1]][1]) %>%
mutate(sub.housing = verbruikseenheid * sub.700[[1]][2]) %>%
mutate(sub.clothing = verbruikseenheid * sub.700[[1]][3]) %>%
mutate(sub.margin = food + housing + clothing - sub.food - sub.housing - sub.clothing ) %>%
mutate(net_income_class = if_else(net_income < 500, "<500",
ifelse(net_income >= 500 & net_income < 1000, "500-1000",
ifelse(net_income >= 1000 & net_income < 1500, "1000-1500", ">1500")))) %>%
mutate(sub_margin_class = if_else(sub.margin < 0, "<0",
ifelse(sub.margin >= 0 & sub.margin < 250, "0-250",
ifelse(sub.margin >= 250 & sub.margin < 500, "250-500", ">500"))))
hist(sub.df.data$net_income)
hist(sub.df.700$net_income)
hist(sub.df.data$sub.margin)
hist(sub.df.700$sub.margin)
# table(sub.df.data$net_income_class)
# table(sub.df.700$net_income_class)
#
# table(sub.df.data$sub.margin_class)
# table(sub.df.700$sub.margin_class)
# order_ind <- c("<0","0-250","250-500",">500")
# dit is de 598
sub.df.data %>% tabyl(net_income_class, sub_margin_class) %>% as.data.frame
## net_income_class <0 >500 0-250 250-500
## 1 >1500 0 308 1 7
## 2 1000-1500 0 89 1 45
## 3 500-1000 3 8 18 45
# en dit de 700
sub.df.700 %>% tabyl(net_income_class, sub_margin_class)%>% as.data.frame
## net_income_class <0 >500 0-250 250-500
## 1 <500 0 0 27 8
## 2 >1500 0 2 2 3
## 3 1000-1500 2 12 27 44
## 4 500-1000 24 8 249 292