DIY vs Pro Spending - R/R Model Rebuild Materials Spend Only Adjustment

Exploratory Plots

Ian Kennedy
2023-10-03
Show code
library(ggthemes)
library(openxlsx)
library(tidyverse)
library(add2ggplot)
library(priceR)
library(sysfonts)
library(conflicted)

# Prefer Dplyr over other packages for certain functions
conflicts_prefer(dplyr::summarize(), dplyr::rename(), dplyr::group_by(), dplyr::filter(), dplyr::mutate(), dplyr::select(), base::as.numeric())

# Read in the Raw AHS Data
Data <- read.xlsx("C:/Users/ikennedy/JBREC/BP research public use microdata coding and data - General/AHSProject/CleanedData/Data_1995_2021.xlsx")

# Convert JOBDIY to NA if it was Not Reported ('NR') and Create a 'Count' variable (this will be used later on)
Data <- Data %>%
  mutate(JOBDIY = ifelse(JOBDIY == 'NR', NA, JOBDIY),
         Count = 1)

Reweights <- read.csv("C:/Users/ikennedy/JBREC/BP research public use microdata coding and data - General/AHSGeneralInfo/2013 AHS_Harvard JCHS reweights.csv")
Reweights <- Reweights %>%
  rename(CONTROL = control) %>%
  select(CONTROL, jchsweight2)
Reweights$CONTROL <- as.character(Reweights$CONTROL)
Data_2013 <- Data %>%
  filter(AHSYEAR == 2013)

Data_2013 <- Data_2013 %>%
  left_join(Reweights, by = 'CONTROL')

Data_2013 <- Data_2013 %>%
  mutate(WEIGHT = jchsweight2) %>%
  select(-jchsweight2)

Data <- Data %>%
  filter(AHSYEAR != 2013)

Data <- Data %>%
  rbind(Data_2013) %>%
  arrange(AHSYEAR)

# Read in the Inflation Adjustment Spreadsheet
Inflation <- read.xlsx("C:/Users/ikennedy/JBREC/BP research public use microdata coding and data - General/AHSProject/InflationRate.xlsx")
# Select the year and rate columns
Inflation <- Inflation[,c(1,3)]

DataTest <- Data %>%
  filter(!is.na(JobCategory) & TENURE == "Owned/Bought" & JobCategory != 'DisRepairs' & JOBCOST > 0 & JOBCOST != MAINTAMT) %>%
  left_join(Inflation, by = c('AHSYEAR' = 'Year')) %>%
  mutate(JOBCOST = JOBCOST*Rate,
         JOBCOSTWeighted = JOBCOST*WEIGHT,
         JOBDIY = ifelse(JOBDIY == '-1', NA, JOBDIY)) %>%
  select(-Rate) %>%
  mutate(CostClass = case_when(JOBCOST <= 1800 ~ 'Mini',
                               JOBCOST > 1800 & JOBCOST <= 10000 ~ "Small",
                               JOBCOST > 10000 & JOBCOST <= 30000 ~ 'Medium',
                               JOBCOST > 30000 ~ 'Large'))

1 DIY v Pro Plots

1.1 Using Mean Project Spend

Show code
Summary <- DataTest %>%
  filter(!is.na(JOBDIY) & JOBCOST > 10) %>%
  group_by(AHSYEAR, JOBDIY, CostClass, JobCategory) %>%
  summarize(MeanSpend = mean(JOBCOST),
            MedianSpend = median(JOBCOST),
            Projects = round(sum(WEIGHT)))

Large_Medium <- Summary %>%
  filter(CostClass %in% c('Medium', 'Large'))
  
Small <- Summary %>%
  filter(CostClass == 'Small')

Mini <- Summary %>%
  filter(CostClass == 'Mini')

Large_Medium %>%
  filter(CostClass == 'Large') %>%
  ggplot(aes(AHSYEAR, MedianSpend, fill = JOBDIY)) +
  geom_col(position = 'dodge') +
  scale_y_continuous(labels = scales::comma) +
  scale_x_continuous(breaks = c(seq(1995, 2021, 2))) +
  scale_fill_jbrec() +
  theme_jbrec(basesize = 40, linewidth = 2) +
  labs(title = 'Median Spend by Project Category for Large Projects, DIY v Pro',
       y = 'Median Spend (Real $)',
       x = 'Year') +
  theme(axis.text.x = element_text(angle = 90),
        axis.ticks = element_blank()) +
  facet_wrap(~JobCategory, scales = 'free')
Show code
Large_Medium %>%
  filter(CostClass == 'Medium') %>%
  ggplot(aes(AHSYEAR, MedianSpend, fill = JOBDIY)) +
  geom_col(position = 'dodge') +
  scale_x_continuous(breaks = c(seq(1995, 2021, 2))) +
  scale_y_continuous(labels = scales::comma) +
  scale_fill_jbrec() +
  theme_jbrec(basesize = 40, linewidth = 2) +
  labs(title = 'Median Spend by Project Category for Medium Projects, DIY v Pro',
       y = 'Median Spend (Real $)',
       x = 'Year') +
  theme(axis.text.x = element_text(angle = 90),
        axis.ticks = element_blank()) +
  facet_wrap(~JobCategory, scales = 'free')
Show code
Small %>%
  ggplot(aes(AHSYEAR, MedianSpend, fill = JOBDIY)) +
  geom_col(position = 'dodge') +
  scale_y_continuous(labels = scales::comma) +
  scale_x_continuous(breaks = c(seq(1995, 2021, 2))) +
  scale_fill_jbrec() +
  theme_jbrec(basesize = 40, linewidth = 2) +
  theme(axis.text.x = element_text(angle = 90),
        axis.ticks = element_blank()) +
  labs(title = 'Median Spend by Project Category for Small Projects, DIY v Pro',
       y = 'Median Spend (Real $)',
       x = 'Year') +
  facet_wrap(~JobCategory, scales = 'free')
Show code
Mini %>%
  ggplot(aes(AHSYEAR, MedianSpend, fill = JOBDIY)) +
  geom_col(position = 'dodge') +
  scale_y_continuous(labels = scales::comma) +
  scale_x_continuous(breaks = c(seq(1995, 2021, 2))) +
  scale_fill_jbrec() +
  theme_jbrec(basesize = 40, linewidth = 2) +
  theme(axis.text.x = element_text(angle = 90),
        axis.ticks = element_blank()) +
  labs(title = 'Median Spend by Project Category for Mini Projects, DIY v Pro',
       y = 'Median Spend (Real $)',
       x = 'Year') +
  facet_wrap(~JobCategory, scales = 'free')

1.2 Using Median Project Spend

Show code
Large_Medium %>%
  filter(CostClass == 'Large') %>%
  ggplot(aes(AHSYEAR, MeanSpend, fill = JOBDIY)) +
  geom_col(position = 'dodge') +
  scale_y_continuous(labels = scales::comma) +
  scale_x_continuous(breaks = c(seq(1995, 2021, 2))) +
  scale_fill_jbrec() +
  theme_jbrec(basesize = 40, linewidth = 2) +
  labs(title = 'Mean Spend by Project Category for Large Projects, DIY v Pro',
       y = 'Mean Spend (Real $)',
       x = 'Year') +
  theme(axis.text.x = element_text(angle = 90),
        axis.ticks = element_blank()) +
  facet_wrap(~JobCategory, scales = 'free')
Show code
Large_Medium %>%
  filter(CostClass == 'Medium') %>%
  ggplot(aes(AHSYEAR, MeanSpend, fill = JOBDIY)) +
  geom_col(position = 'dodge') +
  scale_x_continuous(breaks = c(seq(1995, 2021, 2))) +
  scale_y_continuous(labels = scales::comma) +
  scale_fill_jbrec() +
  theme_jbrec(basesize = 40, linewidth = 2) +
  labs(title = 'Mean Spend by Project Category for Medium Projects, DIY v Pro',
       y = 'Mean Spend (Real $)',
       x = 'Year') +
  theme(axis.text.x = element_text(angle = 90),
        axis.ticks = element_blank()) +
  facet_wrap(~JobCategory, scales = 'free')
Show code
Small %>%
  ggplot(aes(AHSYEAR, MeanSpend, fill = JOBDIY)) +
  geom_col(position = 'dodge') +
  scale_y_continuous(labels = scales::comma) +
  scale_x_continuous(breaks = c(seq(1995, 2021, 2))) +
  scale_fill_jbrec() +
  theme_jbrec(basesize = 40, linewidth = 2) +
  theme(axis.text.x = element_text(angle = 90),
        axis.ticks = element_blank()) +
  labs(title = 'Mean Spend by Project Category for Small Projects, DIY v Pro',
       y = 'Mean Spend (Real $)',
       x = 'Year') +
  facet_wrap(~JobCategory, scales = 'free')
Show code
Mini %>%
  ggplot(aes(AHSYEAR, MeanSpend, fill = JOBDIY)) +
  geom_col(position = 'dodge') +
  scale_y_continuous(labels = scales::comma) +
  scale_x_continuous(breaks = c(seq(1995, 2021, 2))) +
  scale_fill_jbrec() +
  theme_jbrec(basesize = 40, linewidth = 2) +
  theme(axis.text.x = element_text(angle = 90),
        axis.ticks = element_blank()) +
  labs(title = 'Mean Spend by Project Category for Mini Projects, DIY v Pro',
       y = 'Mean Spend (Real $)',
       x = 'Year') +
  facet_wrap(~JobCategory, scales = 'free')

2 Labor Cost Visualization - Using Median Spend on Mini Projects

2.1 Exterior, Interior, Lot/Yard, and Systems Projects Only

2.1.1 $0 - $1,800

Show code
Summary <- DataTest %>%
  filter(!is.na(JOBDIY)) %>%
  group_by(AHSYEAR, JOBDIY, CostClass, JobCategory) %>%
  summarize(MeanSpend = mean(JOBCOST),
            MedianSpend = median(JOBCOST),
            Projects = round(sum(WEIGHT)))

Mini <- Summary %>%
  filter(CostClass == 'Mini')

Mini <- Mini %>%
  filter(JobCategory %in% c('Exterior', 'Interior', 'LotYardOther', 'Systems')) %>%
  select(AHSYEAR, JOBDIY, JobCategory, MedianSpend:Projects) %>%
  ungroup() %>%
  select(-CostClass)

Mini <- Mini %>%
  pivot_wider(id_cols = c(AHSYEAR, JobCategory), names_from = JOBDIY, values_from = c(MedianSpend, Projects))


Mini <- Mini %>%
  mutate(ProDifference = (`MedianSpend_Not DIY` - MedianSpend_DIY) / `MedianSpend_Not DIY`) %>%
  group_by(AHSYEAR) %>%
  mutate(ProjectProp = `Projects_Not DIY`/sum(`Projects_Not DIY`)) %>%
  ungroup() %>%
  mutate(LaborPropFinal = ProDifference*ProjectProp)

Mini <- Mini %>%
  group_by(AHSYEAR) %>%
  summarize(LaborProportionofProSpend = sum(LaborPropFinal))

Mini %>%
  ggplot(aes(AHSYEAR, LaborProportionofProSpend)) +
  geom_line(size = 2) +
  scale_y_continuous(labels = scales::percent, breaks = c(seq(.32, .52, .02)), limits = c(.32, .52)) +
  scale_x_continuous(breaks = c(seq(1995, 2021, 2))) +
  scale_fill_jbrec() +
  theme_jbrec(basesize = 40, linewidth = 2) +
  theme(axis.text.x = element_text(angle = 90),
        axis.ticks = element_blank()) +
  labs(title = 'Labor Costs as a Percentage of Pro Spend',
       y = 'Labor Costs as a Percentage of Pro Spend',
       x = 'Year')

2.1.2 $50 - $1,800

Show code
Summary <- DataTest %>%
  filter(!is.na(JOBDIY) & JOBCOST >= 50) %>%
  group_by(AHSYEAR, JOBDIY, CostClass, JobCategory) %>%
  summarize(MeanSpend = mean(JOBCOST),
            MedianSpend = median(JOBCOST),
            Projects = round(sum(WEIGHT)))

Mini <- Summary %>%
  filter(CostClass == 'Mini')

Mini <- Mini %>%
  filter(JobCategory %in% c('Exterior', 'Interior', 'LotYardOther', 'Systems')) %>%
  select(AHSYEAR, JOBDIY, JobCategory, MedianSpend:Projects) %>%
  ungroup() %>%
  select(-CostClass)

Mini <- Mini %>%
  pivot_wider(id_cols = c(AHSYEAR, JobCategory), names_from = JOBDIY, values_from = c(MedianSpend, Projects))


Mini <- Mini %>%
  mutate(ProDifference = (`MedianSpend_Not DIY` - MedianSpend_DIY) / `MedianSpend_Not DIY`) %>%
  group_by(AHSYEAR) %>%
  mutate(ProjectProp = `Projects_Not DIY`/sum(`Projects_Not DIY`)) %>%
  ungroup() %>%
  mutate(LaborPropFinal = ProDifference*ProjectProp)

Mini <- Mini %>%
  group_by(AHSYEAR) %>%
  summarize(LaborProportionofProSpend = sum(LaborPropFinal))

Mini %>%
  ggplot(aes(AHSYEAR, LaborProportionofProSpend)) +
  geom_line(size = 2) +
  scale_y_continuous(labels = scales::percent, breaks = c(seq(.32, .52, .02)), limits = c(.32, .52)) +
  scale_x_continuous(breaks = c(seq(1995, 2021, 2))) +
  scale_fill_jbrec() +
  theme_jbrec(basesize = 40, linewidth = 2) +
  theme(axis.text.x = element_text(angle = 90),
        axis.ticks = element_blank()) +
  labs(title = 'Labor Costs as a Percentage of Pro Spend',
       y = 'Labor Costs as a Percentage of Pro Spend',
       x = 'Year')

2.1.3 $100 - $1,800

Show code
Summary <- DataTest %>%
  filter(!is.na(JOBDIY) & JOBCOST >= 100) %>%
  group_by(AHSYEAR, JOBDIY, CostClass, JobCategory) %>%
  summarize(MeanSpend = mean(JOBCOST),
            MedianSpend = median(JOBCOST),
            Projects = round(sum(WEIGHT)))

Mini <- Summary %>%
  filter(CostClass == 'Mini')

Mini <- Mini %>%
  filter(JobCategory %in% c('Exterior', 'Interior', 'LotYardOther', 'Systems')) %>%
  select(AHSYEAR, JOBDIY, JobCategory, MedianSpend:Projects) %>%
  ungroup() %>%
  select(-CostClass)

Mini <- Mini %>%
  pivot_wider(id_cols = c(AHSYEAR, JobCategory), names_from = JOBDIY, values_from = c(MedianSpend, Projects))


Mini <- Mini %>%
  mutate(ProDifference = (`MedianSpend_Not DIY` - MedianSpend_DIY) / `MedianSpend_Not DIY`) %>%
  group_by(AHSYEAR) %>%
  mutate(ProjectProp = `Projects_Not DIY`/sum(`Projects_Not DIY`)) %>%
  ungroup() %>%
  mutate(LaborPropFinal = ProDifference*ProjectProp)

Mini <- Mini %>%
  group_by(AHSYEAR) %>%
  summarize(LaborProportionofProSpend = sum(LaborPropFinal))

Mini %>%
  ggplot(aes(AHSYEAR, LaborProportionofProSpend)) +
  geom_line(size = 2) +
  scale_y_continuous(labels = scales::percent, breaks = c(seq(.32, .52, .02)), limits = c(.32, .52)) +
  scale_x_continuous(breaks = c(seq(1995, 2021, 2))) +
  scale_fill_jbrec() +
  theme_jbrec(basesize = 40, linewidth = 2) +
  theme(axis.text.x = element_text(angle = 90),
        axis.ticks = element_blank()) +
  labs(title = 'Labor Costs as a Percentage of Pro Spend',
       y = 'Labor Costs as a Percentage of Pro Spend',
       x = 'Year')

2.2 All Mini Projects

2.2.1 $0 - $1,800

Show code
Summary <- DataTest %>%
  filter(!is.na(JOBDIY)) %>%
  group_by(AHSYEAR, JOBDIY, CostClass, JobCategory) %>%
  summarize(MeanSpend = mean(JOBCOST),
            MedianSpend = median(JOBCOST),
            Projects = round(sum(WEIGHT)))

Mini <- Summary %>%
  filter(CostClass == 'Mini')

Mini <- Mini %>%
  select(AHSYEAR, JOBDIY, JobCategory, MedianSpend:Projects) %>%
  ungroup() %>%
  select(-CostClass)

Mini <- Mini %>%
  pivot_wider(id_cols = c(AHSYEAR, JobCategory), names_from = JOBDIY, values_from = c(MedianSpend, Projects))


Mini <- Mini %>%
  mutate(ProDifference = (`MedianSpend_Not DIY` - MedianSpend_DIY) / `MedianSpend_Not DIY`) %>%
  group_by(AHSYEAR) %>%
  mutate(ProjectProp = `Projects_Not DIY`/sum(`Projects_Not DIY`)) %>%
  ungroup() %>%
  mutate(LaborPropFinal = ProDifference*ProjectProp)

Mini <- Mini %>%
  group_by(AHSYEAR) %>%
  summarize(LaborProportionofProSpend = sum(LaborPropFinal))

Mini %>%
  ggplot(aes(AHSYEAR, LaborProportionofProSpend)) +
  geom_line(size = 2) +
  scale_y_continuous(labels = scales::percent, breaks = c(seq(.32, .58, .02)), limits = c(.32, .58)) +
  scale_x_continuous(breaks = c(seq(1995, 2021, 2))) +
  scale_fill_jbrec() +
  theme_jbrec(basesize = 40, linewidth = 2) +
  theme(axis.text.x = element_text(angle = 90),
        axis.ticks = element_blank()) +
  labs(title = 'Labor Costs as a Percentage of Pro Spend',
       y = 'Labor Costs as a Percentage of Pro Spend',
       x = 'Year')

2.2.2 $50 - $1,800

Show code
Summary <- DataTest %>%
  filter(!is.na(JOBDIY) & JOBCOST >= 50) %>%
  group_by(AHSYEAR, JOBDIY, CostClass, JobCategory) %>%
  summarize(MeanSpend = mean(JOBCOST),
            MedianSpend = median(JOBCOST),
            Projects = round(sum(WEIGHT)))

Mini <- Summary %>%
  filter(CostClass == 'Mini')

Mini <- Mini %>%
  select(AHSYEAR, JOBDIY, JobCategory, MedianSpend:Projects) %>%
  ungroup() %>%
  select(-CostClass)

Mini <- Mini %>%
  pivot_wider(id_cols = c(AHSYEAR, JobCategory), names_from = JOBDIY, values_from = c(MedianSpend, Projects))


Mini <- Mini %>%
  mutate(ProDifference = (`MedianSpend_Not DIY` - MedianSpend_DIY) / `MedianSpend_Not DIY`) %>%
  group_by(AHSYEAR) %>%
  mutate(ProjectProp = `Projects_Not DIY`/sum(`Projects_Not DIY`)) %>%
  ungroup() %>%
  mutate(LaborPropFinal = ProDifference*ProjectProp)

Mini <- Mini %>%
  group_by(AHSYEAR) %>%
  summarize(LaborProportionofProSpend = sum(LaborPropFinal))

Mini %>%
  ggplot(aes(AHSYEAR, LaborProportionofProSpend)) +
  geom_line(size = 2) +
  scale_y_continuous(labels = scales::percent, breaks = c(seq(.32, .52, .02)), limits = c(.32, .52)) +
  scale_x_continuous(breaks = c(seq(1995, 2021, 2))) +
  scale_fill_jbrec() +
  theme_jbrec(basesize = 40, linewidth = 2) +
  theme(axis.text.x = element_text(angle = 90),
        axis.ticks = element_blank()) +
  labs(title = 'Labor Costs as a Percentage of Pro Spend',
       y = 'Labor Costs as a Percentage of Pro Spend',
       x = 'Year')

2.2.3 $100 - $1,800

Show code
Summary <- DataTest %>%
  filter(!is.na(JOBDIY) & JOBCOST >= 100) %>%
  group_by(AHSYEAR, JOBDIY, CostClass, JobCategory) %>%
  summarize(MeanSpend = mean(JOBCOST),
            MedianSpend = median(JOBCOST),
            Projects = round(sum(WEIGHT)))

Mini <- Summary %>%
  filter(CostClass == 'Mini')

Mini <- Mini %>%
  select(AHSYEAR, JOBDIY, JobCategory, MedianSpend:Projects) %>%
  ungroup() %>%
  select(-CostClass)

Mini <- Mini %>%
  pivot_wider(id_cols = c(AHSYEAR, JobCategory), names_from = JOBDIY, values_from = c(MedianSpend, Projects))


Mini <- Mini %>%
  mutate(ProDifference = (`MedianSpend_Not DIY` - MedianSpend_DIY) / `MedianSpend_Not DIY`) %>%
  group_by(AHSYEAR) %>%
  mutate(ProjectProp = `Projects_Not DIY`/sum(`Projects_Not DIY`)) %>%
  ungroup() %>%
  mutate(LaborPropFinal = ProDifference*ProjectProp)

Mini <- Mini %>%
  group_by(AHSYEAR) %>%
  summarize(LaborProportionofProSpend = sum(LaborPropFinal))

Mini %>%
  ggplot(aes(AHSYEAR, LaborProportionofProSpend)) +
  geom_line(size = 2) +
  scale_y_continuous(labels = scales::percent, breaks = c(seq(.32, .52, .02)), limits = c(.32, .52)) +
  scale_x_continuous(breaks = c(seq(1995, 2021, 2))) +
  scale_fill_jbrec() +
  theme_jbrec(basesize = 40, linewidth = 2) +
  theme(axis.text.x = element_text(angle = 90),
        axis.ticks = element_blank()) +
  labs(title = 'Labor Costs as a Percentage of Pro Spend',
       y = 'Labor Costs as a Percentage of Pro Spend',
       x = 'Year')