Exploratory Plots
library(ggthemes)
library(openxlsx)
library(tidyverse)
library(add2ggplot)
library(priceR)
library(sysfonts)
library(conflicted)
# Prefer Dplyr over other packages for certain functions
conflicts_prefer(dplyr::summarize(), dplyr::rename(), dplyr::group_by(), dplyr::filter(), dplyr::mutate(), dplyr::select(), base::as.numeric())
# Read in the Raw AHS Data
Data <- read.xlsx("C:/Users/ikennedy/JBREC/BP research public use microdata coding and data - General/AHSProject/CleanedData/Data_1995_2021.xlsx")
# Convert JOBDIY to NA if it was Not Reported ('NR') and Create a 'Count' variable (this will be used later on)
Data <- Data %>%
mutate(JOBDIY = ifelse(JOBDIY == 'NR', NA, JOBDIY),
Count = 1)
Reweights <- read.csv("C:/Users/ikennedy/JBREC/BP research public use microdata coding and data - General/AHSGeneralInfo/2013 AHS_Harvard JCHS reweights.csv")
Reweights <- Reweights %>%
rename(CONTROL = control) %>%
select(CONTROL, jchsweight2)
Reweights$CONTROL <- as.character(Reweights$CONTROL)
Data_2013 <- Data %>%
filter(AHSYEAR == 2013)
Data_2013 <- Data_2013 %>%
left_join(Reweights, by = 'CONTROL')
Data_2013 <- Data_2013 %>%
mutate(WEIGHT = jchsweight2) %>%
select(-jchsweight2)
Data <- Data %>%
filter(AHSYEAR != 2013)
Data <- Data %>%
rbind(Data_2013) %>%
arrange(AHSYEAR)
# Read in the Inflation Adjustment Spreadsheet
Inflation <- read.xlsx("C:/Users/ikennedy/JBREC/BP research public use microdata coding and data - General/AHSProject/InflationRate.xlsx")
# Select the year and rate columns
Inflation <- Inflation[,c(1,3)]
DataTest <- Data %>%
filter(!is.na(JobCategory) & TENURE == "Owned/Bought" & JobCategory != 'DisRepairs' & JOBCOST > 0 & JOBCOST != MAINTAMT) %>%
left_join(Inflation, by = c('AHSYEAR' = 'Year')) %>%
mutate(JOBCOST = JOBCOST*Rate,
JOBCOSTWeighted = JOBCOST*WEIGHT,
JOBDIY = ifelse(JOBDIY == '-1', NA, JOBDIY)) %>%
select(-Rate) %>%
mutate(CostClass = case_when(JOBCOST <= 1800 ~ 'Mini',
JOBCOST > 1800 & JOBCOST <= 10000 ~ "Small",
JOBCOST > 10000 & JOBCOST <= 30000 ~ 'Medium',
JOBCOST > 30000 ~ 'Large'))
Summary <- DataTest %>%
filter(!is.na(JOBDIY) & JOBCOST > 10) %>%
group_by(AHSYEAR, JOBDIY, CostClass, JobCategory) %>%
summarize(MeanSpend = mean(JOBCOST),
MedianSpend = median(JOBCOST),
Projects = round(sum(WEIGHT)))
Large_Medium <- Summary %>%
filter(CostClass %in% c('Medium', 'Large'))
Small <- Summary %>%
filter(CostClass == 'Small')
Mini <- Summary %>%
filter(CostClass == 'Mini')
Large_Medium %>%
filter(CostClass == 'Large') %>%
ggplot(aes(AHSYEAR, MedianSpend, fill = JOBDIY)) +
geom_col(position = 'dodge') +
scale_y_continuous(labels = scales::comma) +
scale_x_continuous(breaks = c(seq(1995, 2021, 2))) +
scale_fill_jbrec() +
theme_jbrec(basesize = 40, linewidth = 2) +
labs(title = 'Median Spend by Project Category for Large Projects, DIY v Pro',
y = 'Median Spend (Real $)',
x = 'Year') +
theme(axis.text.x = element_text(angle = 90),
axis.ticks = element_blank()) +
facet_wrap(~JobCategory, scales = 'free')
Large_Medium %>%
filter(CostClass == 'Medium') %>%
ggplot(aes(AHSYEAR, MedianSpend, fill = JOBDIY)) +
geom_col(position = 'dodge') +
scale_x_continuous(breaks = c(seq(1995, 2021, 2))) +
scale_y_continuous(labels = scales::comma) +
scale_fill_jbrec() +
theme_jbrec(basesize = 40, linewidth = 2) +
labs(title = 'Median Spend by Project Category for Medium Projects, DIY v Pro',
y = 'Median Spend (Real $)',
x = 'Year') +
theme(axis.text.x = element_text(angle = 90),
axis.ticks = element_blank()) +
facet_wrap(~JobCategory, scales = 'free')
Small %>%
ggplot(aes(AHSYEAR, MedianSpend, fill = JOBDIY)) +
geom_col(position = 'dodge') +
scale_y_continuous(labels = scales::comma) +
scale_x_continuous(breaks = c(seq(1995, 2021, 2))) +
scale_fill_jbrec() +
theme_jbrec(basesize = 40, linewidth = 2) +
theme(axis.text.x = element_text(angle = 90),
axis.ticks = element_blank()) +
labs(title = 'Median Spend by Project Category for Small Projects, DIY v Pro',
y = 'Median Spend (Real $)',
x = 'Year') +
facet_wrap(~JobCategory, scales = 'free')
Mini %>%
ggplot(aes(AHSYEAR, MedianSpend, fill = JOBDIY)) +
geom_col(position = 'dodge') +
scale_y_continuous(labels = scales::comma) +
scale_x_continuous(breaks = c(seq(1995, 2021, 2))) +
scale_fill_jbrec() +
theme_jbrec(basesize = 40, linewidth = 2) +
theme(axis.text.x = element_text(angle = 90),
axis.ticks = element_blank()) +
labs(title = 'Median Spend by Project Category for Mini Projects, DIY v Pro',
y = 'Median Spend (Real $)',
x = 'Year') +
facet_wrap(~JobCategory, scales = 'free')
Large_Medium %>%
filter(CostClass == 'Large') %>%
ggplot(aes(AHSYEAR, MeanSpend, fill = JOBDIY)) +
geom_col(position = 'dodge') +
scale_y_continuous(labels = scales::comma) +
scale_x_continuous(breaks = c(seq(1995, 2021, 2))) +
scale_fill_jbrec() +
theme_jbrec(basesize = 40, linewidth = 2) +
labs(title = 'Mean Spend by Project Category for Large Projects, DIY v Pro',
y = 'Mean Spend (Real $)',
x = 'Year') +
theme(axis.text.x = element_text(angle = 90),
axis.ticks = element_blank()) +
facet_wrap(~JobCategory, scales = 'free')
Large_Medium %>%
filter(CostClass == 'Medium') %>%
ggplot(aes(AHSYEAR, MeanSpend, fill = JOBDIY)) +
geom_col(position = 'dodge') +
scale_x_continuous(breaks = c(seq(1995, 2021, 2))) +
scale_y_continuous(labels = scales::comma) +
scale_fill_jbrec() +
theme_jbrec(basesize = 40, linewidth = 2) +
labs(title = 'Mean Spend by Project Category for Medium Projects, DIY v Pro',
y = 'Mean Spend (Real $)',
x = 'Year') +
theme(axis.text.x = element_text(angle = 90),
axis.ticks = element_blank()) +
facet_wrap(~JobCategory, scales = 'free')
Small %>%
ggplot(aes(AHSYEAR, MeanSpend, fill = JOBDIY)) +
geom_col(position = 'dodge') +
scale_y_continuous(labels = scales::comma) +
scale_x_continuous(breaks = c(seq(1995, 2021, 2))) +
scale_fill_jbrec() +
theme_jbrec(basesize = 40, linewidth = 2) +
theme(axis.text.x = element_text(angle = 90),
axis.ticks = element_blank()) +
labs(title = 'Mean Spend by Project Category for Small Projects, DIY v Pro',
y = 'Mean Spend (Real $)',
x = 'Year') +
facet_wrap(~JobCategory, scales = 'free')
Mini %>%
ggplot(aes(AHSYEAR, MeanSpend, fill = JOBDIY)) +
geom_col(position = 'dodge') +
scale_y_continuous(labels = scales::comma) +
scale_x_continuous(breaks = c(seq(1995, 2021, 2))) +
scale_fill_jbrec() +
theme_jbrec(basesize = 40, linewidth = 2) +
theme(axis.text.x = element_text(angle = 90),
axis.ticks = element_blank()) +
labs(title = 'Mean Spend by Project Category for Mini Projects, DIY v Pro',
y = 'Mean Spend (Real $)',
x = 'Year') +
facet_wrap(~JobCategory, scales = 'free')
Summary <- DataTest %>%
filter(!is.na(JOBDIY)) %>%
group_by(AHSYEAR, JOBDIY, CostClass, JobCategory) %>%
summarize(MeanSpend = mean(JOBCOST),
MedianSpend = median(JOBCOST),
Projects = round(sum(WEIGHT)))
Mini <- Summary %>%
filter(CostClass == 'Mini')
Mini <- Mini %>%
filter(JobCategory %in% c('Exterior', 'Interior', 'LotYardOther', 'Systems')) %>%
select(AHSYEAR, JOBDIY, JobCategory, MedianSpend:Projects) %>%
ungroup() %>%
select(-CostClass)
Mini <- Mini %>%
pivot_wider(id_cols = c(AHSYEAR, JobCategory), names_from = JOBDIY, values_from = c(MedianSpend, Projects))
Mini <- Mini %>%
mutate(ProDifference = (`MedianSpend_Not DIY` - MedianSpend_DIY) / `MedianSpend_Not DIY`) %>%
group_by(AHSYEAR) %>%
mutate(ProjectProp = `Projects_Not DIY`/sum(`Projects_Not DIY`)) %>%
ungroup() %>%
mutate(LaborPropFinal = ProDifference*ProjectProp)
Mini <- Mini %>%
group_by(AHSYEAR) %>%
summarize(LaborProportionofProSpend = sum(LaborPropFinal))
Mini %>%
ggplot(aes(AHSYEAR, LaborProportionofProSpend)) +
geom_line(size = 2) +
scale_y_continuous(labels = scales::percent, breaks = c(seq(.32, .52, .02)), limits = c(.32, .52)) +
scale_x_continuous(breaks = c(seq(1995, 2021, 2))) +
scale_fill_jbrec() +
theme_jbrec(basesize = 40, linewidth = 2) +
theme(axis.text.x = element_text(angle = 90),
axis.ticks = element_blank()) +
labs(title = 'Labor Costs as a Percentage of Pro Spend',
y = 'Labor Costs as a Percentage of Pro Spend',
x = 'Year')
Summary <- DataTest %>%
filter(!is.na(JOBDIY) & JOBCOST >= 50) %>%
group_by(AHSYEAR, JOBDIY, CostClass, JobCategory) %>%
summarize(MeanSpend = mean(JOBCOST),
MedianSpend = median(JOBCOST),
Projects = round(sum(WEIGHT)))
Mini <- Summary %>%
filter(CostClass == 'Mini')
Mini <- Mini %>%
filter(JobCategory %in% c('Exterior', 'Interior', 'LotYardOther', 'Systems')) %>%
select(AHSYEAR, JOBDIY, JobCategory, MedianSpend:Projects) %>%
ungroup() %>%
select(-CostClass)
Mini <- Mini %>%
pivot_wider(id_cols = c(AHSYEAR, JobCategory), names_from = JOBDIY, values_from = c(MedianSpend, Projects))
Mini <- Mini %>%
mutate(ProDifference = (`MedianSpend_Not DIY` - MedianSpend_DIY) / `MedianSpend_Not DIY`) %>%
group_by(AHSYEAR) %>%
mutate(ProjectProp = `Projects_Not DIY`/sum(`Projects_Not DIY`)) %>%
ungroup() %>%
mutate(LaborPropFinal = ProDifference*ProjectProp)
Mini <- Mini %>%
group_by(AHSYEAR) %>%
summarize(LaborProportionofProSpend = sum(LaborPropFinal))
Mini %>%
ggplot(aes(AHSYEAR, LaborProportionofProSpend)) +
geom_line(size = 2) +
scale_y_continuous(labels = scales::percent, breaks = c(seq(.32, .52, .02)), limits = c(.32, .52)) +
scale_x_continuous(breaks = c(seq(1995, 2021, 2))) +
scale_fill_jbrec() +
theme_jbrec(basesize = 40, linewidth = 2) +
theme(axis.text.x = element_text(angle = 90),
axis.ticks = element_blank()) +
labs(title = 'Labor Costs as a Percentage of Pro Spend',
y = 'Labor Costs as a Percentage of Pro Spend',
x = 'Year')
Summary <- DataTest %>%
filter(!is.na(JOBDIY) & JOBCOST >= 100) %>%
group_by(AHSYEAR, JOBDIY, CostClass, JobCategory) %>%
summarize(MeanSpend = mean(JOBCOST),
MedianSpend = median(JOBCOST),
Projects = round(sum(WEIGHT)))
Mini <- Summary %>%
filter(CostClass == 'Mini')
Mini <- Mini %>%
filter(JobCategory %in% c('Exterior', 'Interior', 'LotYardOther', 'Systems')) %>%
select(AHSYEAR, JOBDIY, JobCategory, MedianSpend:Projects) %>%
ungroup() %>%
select(-CostClass)
Mini <- Mini %>%
pivot_wider(id_cols = c(AHSYEAR, JobCategory), names_from = JOBDIY, values_from = c(MedianSpend, Projects))
Mini <- Mini %>%
mutate(ProDifference = (`MedianSpend_Not DIY` - MedianSpend_DIY) / `MedianSpend_Not DIY`) %>%
group_by(AHSYEAR) %>%
mutate(ProjectProp = `Projects_Not DIY`/sum(`Projects_Not DIY`)) %>%
ungroup() %>%
mutate(LaborPropFinal = ProDifference*ProjectProp)
Mini <- Mini %>%
group_by(AHSYEAR) %>%
summarize(LaborProportionofProSpend = sum(LaborPropFinal))
Mini %>%
ggplot(aes(AHSYEAR, LaborProportionofProSpend)) +
geom_line(size = 2) +
scale_y_continuous(labels = scales::percent, breaks = c(seq(.32, .52, .02)), limits = c(.32, .52)) +
scale_x_continuous(breaks = c(seq(1995, 2021, 2))) +
scale_fill_jbrec() +
theme_jbrec(basesize = 40, linewidth = 2) +
theme(axis.text.x = element_text(angle = 90),
axis.ticks = element_blank()) +
labs(title = 'Labor Costs as a Percentage of Pro Spend',
y = 'Labor Costs as a Percentage of Pro Spend',
x = 'Year')
Summary <- DataTest %>%
filter(!is.na(JOBDIY)) %>%
group_by(AHSYEAR, JOBDIY, CostClass, JobCategory) %>%
summarize(MeanSpend = mean(JOBCOST),
MedianSpend = median(JOBCOST),
Projects = round(sum(WEIGHT)))
Mini <- Summary %>%
filter(CostClass == 'Mini')
Mini <- Mini %>%
select(AHSYEAR, JOBDIY, JobCategory, MedianSpend:Projects) %>%
ungroup() %>%
select(-CostClass)
Mini <- Mini %>%
pivot_wider(id_cols = c(AHSYEAR, JobCategory), names_from = JOBDIY, values_from = c(MedianSpend, Projects))
Mini <- Mini %>%
mutate(ProDifference = (`MedianSpend_Not DIY` - MedianSpend_DIY) / `MedianSpend_Not DIY`) %>%
group_by(AHSYEAR) %>%
mutate(ProjectProp = `Projects_Not DIY`/sum(`Projects_Not DIY`)) %>%
ungroup() %>%
mutate(LaborPropFinal = ProDifference*ProjectProp)
Mini <- Mini %>%
group_by(AHSYEAR) %>%
summarize(LaborProportionofProSpend = sum(LaborPropFinal))
Mini %>%
ggplot(aes(AHSYEAR, LaborProportionofProSpend)) +
geom_line(size = 2) +
scale_y_continuous(labels = scales::percent, breaks = c(seq(.32, .58, .02)), limits = c(.32, .58)) +
scale_x_continuous(breaks = c(seq(1995, 2021, 2))) +
scale_fill_jbrec() +
theme_jbrec(basesize = 40, linewidth = 2) +
theme(axis.text.x = element_text(angle = 90),
axis.ticks = element_blank()) +
labs(title = 'Labor Costs as a Percentage of Pro Spend',
y = 'Labor Costs as a Percentage of Pro Spend',
x = 'Year')
Summary <- DataTest %>%
filter(!is.na(JOBDIY) & JOBCOST >= 50) %>%
group_by(AHSYEAR, JOBDIY, CostClass, JobCategory) %>%
summarize(MeanSpend = mean(JOBCOST),
MedianSpend = median(JOBCOST),
Projects = round(sum(WEIGHT)))
Mini <- Summary %>%
filter(CostClass == 'Mini')
Mini <- Mini %>%
select(AHSYEAR, JOBDIY, JobCategory, MedianSpend:Projects) %>%
ungroup() %>%
select(-CostClass)
Mini <- Mini %>%
pivot_wider(id_cols = c(AHSYEAR, JobCategory), names_from = JOBDIY, values_from = c(MedianSpend, Projects))
Mini <- Mini %>%
mutate(ProDifference = (`MedianSpend_Not DIY` - MedianSpend_DIY) / `MedianSpend_Not DIY`) %>%
group_by(AHSYEAR) %>%
mutate(ProjectProp = `Projects_Not DIY`/sum(`Projects_Not DIY`)) %>%
ungroup() %>%
mutate(LaborPropFinal = ProDifference*ProjectProp)
Mini <- Mini %>%
group_by(AHSYEAR) %>%
summarize(LaborProportionofProSpend = sum(LaborPropFinal))
Mini %>%
ggplot(aes(AHSYEAR, LaborProportionofProSpend)) +
geom_line(size = 2) +
scale_y_continuous(labels = scales::percent, breaks = c(seq(.32, .52, .02)), limits = c(.32, .52)) +
scale_x_continuous(breaks = c(seq(1995, 2021, 2))) +
scale_fill_jbrec() +
theme_jbrec(basesize = 40, linewidth = 2) +
theme(axis.text.x = element_text(angle = 90),
axis.ticks = element_blank()) +
labs(title = 'Labor Costs as a Percentage of Pro Spend',
y = 'Labor Costs as a Percentage of Pro Spend',
x = 'Year')
Summary <- DataTest %>%
filter(!is.na(JOBDIY) & JOBCOST >= 100) %>%
group_by(AHSYEAR, JOBDIY, CostClass, JobCategory) %>%
summarize(MeanSpend = mean(JOBCOST),
MedianSpend = median(JOBCOST),
Projects = round(sum(WEIGHT)))
Mini <- Summary %>%
filter(CostClass == 'Mini')
Mini <- Mini %>%
select(AHSYEAR, JOBDIY, JobCategory, MedianSpend:Projects) %>%
ungroup() %>%
select(-CostClass)
Mini <- Mini %>%
pivot_wider(id_cols = c(AHSYEAR, JobCategory), names_from = JOBDIY, values_from = c(MedianSpend, Projects))
Mini <- Mini %>%
mutate(ProDifference = (`MedianSpend_Not DIY` - MedianSpend_DIY) / `MedianSpend_Not DIY`) %>%
group_by(AHSYEAR) %>%
mutate(ProjectProp = `Projects_Not DIY`/sum(`Projects_Not DIY`)) %>%
ungroup() %>%
mutate(LaborPropFinal = ProDifference*ProjectProp)
Mini <- Mini %>%
group_by(AHSYEAR) %>%
summarize(LaborProportionofProSpend = sum(LaborPropFinal))
Mini %>%
ggplot(aes(AHSYEAR, LaborProportionofProSpend)) +
geom_line(size = 2) +
scale_y_continuous(labels = scales::percent, breaks = c(seq(.32, .52, .02)), limits = c(.32, .52)) +
scale_x_continuous(breaks = c(seq(1995, 2021, 2))) +
scale_fill_jbrec() +
theme_jbrec(basesize = 40, linewidth = 2) +
theme(axis.text.x = element_text(angle = 90),
axis.ticks = element_blank()) +
labs(title = 'Labor Costs as a Percentage of Pro Spend',
y = 'Labor Costs as a Percentage of Pro Spend',
x = 'Year')