TPO Heat Map Analysis

TPO Heat Map Analysis

Ian Kennedy
2023-03-11

1 Distribution Visualizations

Show code
TPOSampleJoined %>%
  ggplot(aes(log10(TOT_MCF))) +
  geom_density() +
  scale_x_continuous(limits = c(-2,5), breaks = c(seq(-2,5,1))) +
  scale_y_continuous(limits = c, breaks = c(seq(0,.7,.1))) +
  theme_fivethirtyeight(base_size = 20, base_family = "serif") +
  theme(axis.title = element_text(family = 'serif', size = 30)) +
  labs(title = "Logged Volume Distribution - Unfiltered", x = "Log(10) MCF", y = "Density") 
Show code
TPOSampleJoined %>%
  filter(TOT_MCF < 16000 & Pro_Rad < 200 & !is.na(Pro_Rad)) %>%
  ggplot(aes(log10(TOT_MCF))) +
  geom_density() +
  scale_x_continuous(limits = c(-2,5), breaks = c(seq(-2,5,1))) +
  scale_y_continuous(limits = c, breaks = c(seq(0,.7,.1))) +
  theme_fivethirtyeight(base_size = 20, base_family = "serif") +
  theme(axis.title = element_text(family = 'serif', size = 30)) +
  labs(title = "Logged Volume Distribution - Model Obs. Only", x = "Log(10) MCF", y = "Density") 
Show code
TPOSampleJoined %>%
  ggplot(aes(sqrt(Pro_Rad)))+
  geom_density() + 
  scale_x_continuous(limits = c(0, 20), breaks = c(seq(0,20,2))) +
  scale_y_continuous(limits = c(0,.25), breaks = c(seq(0,.25,.05))) +
  theme_fivethirtyeight(base_size = 20, base_family = "serif") +
  theme(axis.title = element_text(family = 'serif', size = 30)) +
  labs(title = "SqRt Radius Distribution", x = "SqRt Procurement Radius", y = "Density")

2 Normal-QQ Plots

Show code
Agg <- TPOSampleJoined %>%
  filter(!is.na(Pro_Rad)) %>%
ggplot(aes(sample = log10(TOT_MCF))) +
  stat_qq(geom = "point", size = 2) +
  stat_qq_line() +
  scale_x_continuous(breaks = c(seq(-2,2,1)), limits = c(-4,4)) +
  scale_y_continuous(breaks = c(seq(-2,5,1)), limits = c(-2,5.2)) +
  theme_fivethirtyeight(base_size = 30, base_family = "serif") +
  theme(legend.position = "none", title = element_text(size = 20, family = "serif"), axis.text = element_text(size = 30))


L <- TPOSampleJoined %>%
  filter(!is.na(Pro_Rad) & Region == "Lake") %>%
ggplot(aes(sample = log10(TOT_MCF))) +
  stat_qq(geom = "point", size = 2, color = "blue") +
  stat_qq_line(color = "blue") +
  scale_x_continuous(breaks = c(seq(-2,2,1)), limits = c(-4,4)) +
  scale_y_continuous(breaks = c(seq(-2,5,1)), limits = c(-2,5.2)) +
  theme_fivethirtyeight(base_size = 30, base_family = "serif") +
  theme(legend.position = "none", title = element_text(size = 20, family = "serif"), axis.text = element_text(size = 30))

NE <- TPOSampleJoined %>%
  filter(!is.na(Pro_Rad) & Region == "NewEngland") %>%
ggplot(aes(sample = log10(TOT_MCF))) +
  stat_qq(geom = "point", size = 2, color = "red") +
  stat_qq_line(color = "red") +
  scale_x_continuous(breaks = c(seq(-2,2,1)), limits = c(-4,4)) +
  scale_y_continuous(breaks = c(seq(-2,5,1)), limits = c(-2,5.2)) +
  theme_fivethirtyeight(base_size = 30, base_family = "serif") +
  theme(legend.position = "none", title = element_text(size = 20, family = "serif"), axis.text = element_text(size = 30))

MA <- TPOSampleJoined %>%
  filter(!is.na(Pro_Rad) & Region == "MidAtl") %>%
ggplot(aes(sample = log10(TOT_MCF))) +
  stat_qq(geom = "point", size = 2, color = "purple") +
  stat_qq_line(color = "purple") +
  scale_x_continuous(breaks = c(seq(-2,2,1)), limits = c(-4,4)) +
  scale_y_continuous(breaks = c(seq(-2,5,1)), limits = c(-2,5.2)) +
  theme_fivethirtyeight(base_size = 30, base_family = "serif") +
  theme(legend.position = "none", title = element_text(size = 20, family = "serif"), axis.text = element_text(size = 30))

P <- TPOSampleJoined %>%
  filter(!is.na(Pro_Rad) & Region == "Plains") %>%
ggplot(aes(sample = log10(TOT_MCF))) +
  stat_qq(geom = "point", size = 2, color = "brown") +
  stat_qq_line(color = "brown") +
  scale_x_continuous(breaks = c(seq(-2,2,1)), limits = c(-4,4)) +
  scale_y_continuous(breaks = c(seq(-2,5,1)), limits = c(-2,5.2)) +
  theme_fivethirtyeight(base_size = 30, base_family = "serif") +
  theme(legend.position = "none", title = element_text(size = 20, family = "serif"), axis.text = element_text(size = 30))

C <- TPOSampleJoined %>%
  filter(!is.na(Pro_Rad) & Region == "Central") %>%
ggplot(aes(sample = log10(TOT_MCF))) +
  stat_qq(geom = "point", size = 2, color = "darkgreen") +
  stat_qq_line(color = "darkgreen") +
  scale_x_continuous(breaks = c(seq(-2,2,1)), limits = c(-4,4)) +
  scale_y_continuous(breaks = c(seq(-2,5,1)), limits = c(-2,5.2)) +
  theme_fivethirtyeight(base_size = 30, base_family = "serif") +
  theme(legend.position = "none", title = element_text(size = 20, family = "serif"), axis.text = element_text(size = 30))

Test <- ggarrange(plotlist = list(Agg,C,L,MA,NE,P), labels = list("Agg.", "Central", "Lake", "MidAtl", "NewEng", "Plains"), font.label = list(size = 50, face = "plain", color ="black", family = 'serif')) 

annotate_figure(Test,
                left = textGrob("Log10(MCF)", rot = 90, vjust = .4, gp = gpar(cex = 4, fontfamily = "serif")),
                bottom = textGrob("Theoretical Quantiles",gp = gpar(cex = 4, fontfamily = "serif")),
                top = textGrob("Normal-QQ Plots - Log10(MCF)", gp = gpar(cex = 6, fontfamily = "serif")))
Show code
rm(Agg,C,L,MA,NE,P)


Agg <- TPOSampleJoined %>%
  filter(!is.na(Pro_Rad) & Pro_Rad < 200) %>%
ggplot(aes(sample = sqrt(Pro_Rad))) +
  stat_qq(geom = "point", size = 2) +
  stat_qq_line() +
  scale_x_continuous(breaks = c(seq(-2,2,1)), limits = c(-4,5)) +
  scale_y_continuous(breaks = c(seq(0,15,3)), limits = c(-.1,15)) +
  theme_fivethirtyeight(base_size = 30, base_family = "serif") +
  theme(legend.position = "none", title = element_text(size = 20, family = "serif"), axis.text = element_text(size = 30))

L <- TPOSampleJoined %>%
  filter(!is.na(Pro_Rad) & Region == "Lake") %>%
ggplot(aes(sample = sqrt(Pro_Rad))) +
  stat_qq(geom = "point", size = 2, color = "blue") +
  stat_qq_line(color = "blue") +
  scale_x_continuous(breaks = c(seq(-2,2,1)), limits = c(-4,4)) +
  scale_y_continuous(breaks = c(seq(0,15,3)), limits = c(-.4,15)) +
  theme_fivethirtyeight(base_size = 30, base_family = "serif") +
  theme(legend.position = "none", title = element_text(size = 20, family = "serif"), axis.text = element_text(size = 30))

NE <- TPOSampleJoined %>%
  filter(!is.na(Pro_Rad) & Region == "NewEngland") %>%
ggplot(aes(sample = sqrt(Pro_Rad))) +
  stat_qq(geom = "point", size = 2, color = "red") +
  stat_qq_line(color = "red") +
  scale_x_continuous(breaks = c(seq(-2,2,1)), limits = c(-4,4)) +
  scale_y_continuous(breaks = c(seq(0,15,3)), limits = c(-.4,15)) +
  theme_fivethirtyeight(base_size = 30, base_family = "serif") +
  theme(legend.position = "none", title = element_text(size = 20, family = "serif"), axis.text = element_text(size = 30))

MA <- TPOSampleJoined %>%
  filter(!is.na(Pro_Rad) & Region == "MidAtl") %>%
ggplot(aes(sample = sqrt(Pro_Rad))) +
  stat_qq(geom = "point", size = 2, color = "purple") +
  stat_qq_line(color = "purple") +
  scale_x_continuous(breaks = c(seq(-2,2,1)), limits = c(-4,4)) +
  scale_y_continuous(breaks = c(seq(0,15,3)), limits = c(-.4,15)) +
  theme_fivethirtyeight(base_size = 30, base_family = "serif") +
  theme(legend.position = "none", title = element_text(size = 20, family = "serif"), axis.text = element_text(size = 30))

P <- TPOSampleJoined %>%
  filter(!is.na(Pro_Rad) & Region == "Plains") %>%
ggplot(aes(sample = sqrt(Pro_Rad))) +
  stat_qq(geom = "point", size = 2, color = "brown") +
  stat_qq_line(color = "brown") +
  scale_x_continuous(breaks = c(seq(-2,2,1)), limits = c(-4,4)) +
  scale_y_continuous(breaks = c(seq(0,15,3)), limits = c(-.4,15)) +
  theme_fivethirtyeight(base_size = 30, base_family = "serif") +
  theme(legend.position = "none", title = element_text(size = 20, family = "serif"), axis.text = element_text(size = 30))

C <- TPOSampleJoined %>%
  filter(!is.na(Pro_Rad) & Region == "Central") %>%
ggplot(aes(sample = sqrt(Pro_Rad))) +
  stat_qq(geom = "point", size = 2, color = "darkgreen") +
  stat_qq_line(color = "darkgreen") +
  scale_x_continuous(breaks = c(seq(-2,2,1)), limits = c(-4,4)) +
  scale_y_continuous(breaks = c(seq(0,15,3)), limits = c(-.4,15)) +
  theme_fivethirtyeight(base_size = 30, base_family = "serif") +
  theme(legend.position = "none", title = element_text(size = 20, family = "serif"), axis.text = element_text(size = 30))

Test <- ggarrange(plotlist = list(Agg,C,L,MA,NE,P), labels = list("Agg.", "Central", "Lake", "MidAtl", "NewEng", "Plains"), font.label = list(size = 50, face = "plain", color ="black", family = 'serif')) 

annotate_figure(Test,
                left = textGrob("Sqrt(Procurement Radius)", rot = 90, gp = gpar(cex = 4, fontfamily = "serif")),
                bottom = textGrob("Theoretical Quantiles",gp = gpar(cex = 4, fontfamily = "serif")),
                top = textGrob("Normal-QQ Plots - Procurement Radius", gp = gpar(cex = 6, fontfamily = "serif")))
Show code
rm(Agg,C,L,MA,NE,P)
Show code
# ggplot(TPOSampleJoined, aes(LOG_TOT_MCF, Pro_Rad_SqRt)) +
#     geom_point() +
#     geom_smooth(se = FALSE, method = "lm", fullrange = TRUE) +
#     scale_x_continuous(limits = c(-2, 4.5), breaks = c(seq(-2,4.5,.5))) +
#     scale_y_continuous(limits = c(0, 14), breaks = c(seq(0,14,2))) +
#     facet_wrap(~Region, ncol = 3) +
#     theme_fivethirtyeight(base_size = 20, base_family = "serif") +
#     theme(legend.position = "none", title = element_text(size = 20, family = "serif")) +
#     labs(title = "Regression Observations") +
#     ggpubr::stat_regline_equation(label.x = 1, label.y = 14, aes(label = ..rr.label..))


Agg <- TPOSampleJoined %>%
    ggplot(aes(LOG_TOT_MCF, Pro_Rad_SqRt)) +
    geom_point(size = 3) +
    geom_smooth(se = FALSE, method = "lm", fullrange = TRUE, color = "black") +
    scale_x_continuous(limits = c(-2, 4.5), breaks = c(seq(-2,4.5,.5))) +
    scale_y_continuous(limits = c(0, 14), breaks = c(seq(0,14,2))) +
    theme_fivethirtyeight(base_size = 30, base_family = "serif") +
    theme(axis.title = element_text(family = 'serif', size = 30), axis.title.x = element_blank(), axis.title.y = element_blank(), , axis.text = element_text(size = 30)) +
    ggpubr::stat_regline_equation(label.x = 1, label.y = 14, size = 10, aes(label = ..rr.label..))

C <- TPOSampleJoined %>%
  filter(Region == "Central") %>%
  ggplot(aes(LOG_TOT_MCF, Pro_Rad_SqRt)) +
    geom_point(size = 3, color = "darkgreen") +
    geom_smooth(se = FALSE, method = "lm", fullrange = TRUE, color = "darkgreen") +
    scale_x_continuous(limits = c(-2, 4.5), breaks = c(seq(-2,4.5,.5))) +
    scale_y_continuous(limits = c(0, 14), breaks = c(seq(0,14,2))) +
    theme_fivethirtyeight(base_size = 30, base_family = "serif") +
    theme(axis.title = element_text(family = 'serif', size = 30), axis.title.x = element_blank(), axis.title.y = element_blank(), axis.text = element_text(size = 30)) +
    ggpubr::stat_regline_equation(label.x = 1, label.y = 14, size = 10, aes(label = ..rr.label..))

L <- TPOSampleJoined %>%
  filter(Region == "Lake") %>%
  ggplot(aes(LOG_TOT_MCF, Pro_Rad_SqRt)) +
    geom_point(size = 3, color = "blue") +
    geom_smooth(se = FALSE, method = "lm", fullrange = TRUE, color = "blue") +
    scale_x_continuous(limits = c(-2, 4.5), breaks = c(seq(-2,4.5,.5))) +
    scale_y_continuous(limits = c(0, 14), breaks = c(seq(0,14,2))) +
    theme_fivethirtyeight(base_size = 30, base_family = "serif") +
    theme(axis.title = element_text(family = 'serif', size = 30), axis.title.x = element_blank(), axis.title.y = element_blank(), axis.text = element_text(size = 30)) +
    ggpubr::stat_regline_equation(label.x = 1, label.y = 14, size = 10, aes(label = ..rr.label..))

MA <- TPOSampleJoined %>%
  filter(Region == "MidAtl") %>%
  ggplot(aes(LOG_TOT_MCF, Pro_Rad_SqRt)) +
    geom_point(size = 3, color = "purple") +
    geom_smooth(se = FALSE, method = "lm", fullrange = TRUE, color = "purple") +
    scale_x_continuous(limits = c(-2, 4.5), breaks = c(seq(-2,4.5,.5))) +
    scale_y_continuous(limits = c(0, 14), breaks = c(seq(0,14,2))) +
    theme_fivethirtyeight(base_size = 30, base_family = "serif") +
    theme(axis.title = element_text(family = 'serif', size = 30), axis.title.x = element_blank(), axis.title.y = element_blank(), axis.text = element_text(size = 30)) +
    ggpubr::stat_regline_equation(label.x = 1, label.y = 14, size = 10, aes(label = ..rr.label..))

NE <- TPOSampleJoined %>%
  filter(Region == "NewEngland") %>%
  ggplot(aes(LOG_TOT_MCF, Pro_Rad_SqRt)) +
    geom_point(size = 3, color = "red") +
    geom_smooth(se = FALSE, method = "lm", fullrange = TRUE, color = "red") +
    scale_x_continuous(limits = c(-2, 4.5), breaks = c(seq(-2,4.5,.5))) +
    scale_y_continuous(limits = c(0, 14), breaks = c(seq(0,14,2))) +
    theme_fivethirtyeight(base_size = 30, base_family = "serif") +
    theme(axis.title = element_text(family = 'serif', size = 30), axis.title.x = element_blank(), axis.title.y = element_blank(), axis.text = element_text(size = 30)) +
    ggpubr::stat_regline_equation(label.x = 1, label.y = 14, size = 10, aes(label = ..rr.label..))

P <- TPOSampleJoined %>%
  filter(Region == "Plains") %>%
  ggplot(aes(LOG_TOT_MCF, Pro_Rad_SqRt)) +
    geom_point(size = 3, color = "brown") +
    geom_smooth(se = FALSE, method = "lm", fullrange = TRUE, color = "brown") +
    scale_x_continuous(limits = c(-2, 4.5), breaks = c(seq(-2,4.5,.5))) +
    scale_y_continuous(limits = c(0, 14), breaks = c(seq(0,14,2))) +
    theme_fivethirtyeight(base_size = 30, base_family = "serif") +
    theme(axis.title = element_text(family = 'serif', size = 30), axis.title.x = element_blank(), axis.title.y = element_blank(), axis.text = element_text(size = 30)) +
    ggpubr::stat_regline_equation(label.x = 1, label.y = 14, size = 10, aes(label = ..rr.label..))


Test <- ggarrange(plotlist = list(Agg,C,L,MA,NE,P), labels = list("Agg.", "Central", "Lake", "MidAtl", "NewEng", "Plains"), font.label = list(size = 50, face = "plain", color ="black", family = 'serif')) 

annotate_figure(Test,
                left = textGrob("Sqrt(Procurement Radius)", rot = 90, gp = gpar(cex = 4, fontfamily = "serif")),
                bottom = textGrob("Log(10) MCF",gp = gpar(cex = 4, fontfamily = "serif")),
                top = textGrob("Regression Observations", gp = gpar(cex = 6, fontfamily = "serif")))
Show code
rm(Agg,C,L,MA,NE,P)

ModelStats <- read_xlsx(path = "C:\\Users\\ikenn\\Downloads\\ModelStats.xlsx")

knitr::kable(ModelStats, digits = 3, align = "cccc", col.names = c("Model", "RMSE", "MAE", "R-Squared"), caption = "Model Fit Statistics for Assessed Models. 
      All statistics calculated using test data.") %>%
  kable_styling(font_size = 16)
Table 1: Model Fit Statistics for Assessed Models. All statistics calculated using test data.
Model RMSE MAE R-Squared
Linear 2.144 1.662 0.135
Additive 2.155 1.669 0.126
Decision Tree 2.205 1.709 0.084
Random Forest 2.165 1.667 0.117
Gradient Boosted 2.159 1.672 0.122
Machine Learning (h2o) 2.028 1.557 0.149