TPO Heat Map Analysis
TPOSampleJoined %>%
ggplot(aes(LOG_TOT_MCF)) +
geom_density() +
scale_x_continuous(limits = c(-2,5), breaks = c(seq(-2,5,1))) +
scale_y_continuous(limits = c, breaks = c(seq(0,.7,.1))) +
theme_fivethirtyeight(base_size = 12, base_family = "serif") +
theme(axis.title = element_text(family = 'serif', size = 14)) +
labs(title = "Logged Volume Distribution", x= "Log(10) MCF",y = "Density")
TPOSampleJoined %>%
ggplot(aes(Pro_Rad_SqRt))+
geom_density() +
scale_x_continuous(limits = c(0, 15), breaks = c(seq(0,15,1))) +
scale_y_continuous(limits = c(0,.25), breaks = c(seq(0,.25,.05))) +
theme_fivethirtyeight(base_size = 12, base_family = "serif") +
theme(axis.title = element_text(family = 'serif', size = 14)) +
labs(title = "SqRt Radius Distribution", x= "SqRt Procurement Radius",y = "Density")
TPOSampleJoined <- TPOSampleJoined %>%
filter(TOT_MCF < 16000 & !is.na(TOT_MCF)) %>%
mutate(MILL_COUNTYCD = as.character(MILL_COUNTYCD),
MILL_STATECD = as.character(MILL_STATECD),
MTC = as.character(MTC))
#Remove Outliers from Joined Sample, store as 'UnusedFinal'...These mills provided a Pro Rad during 2019-2021 and can be joined back following model creation
Unused1 <- TPOSampleJoined %>%
filter((Pro_Rad >= 200 | Pro_Rad <= 1))
Unused2 <- TPOSampleJoined %>%
filter(MILL_STATE == "OH" & volt == "low")
Unused3 <- TPOSampleJoined %>%
filter(MILL_STATE == "MI" & Pro_Rad > 150) %>%
filter(!(MILL_NAME %in% Unused1$MILL_NAME))
Unused4 <- TPOSampleJoined %>%
filter(MILL_NAME == "ANTHONY & CO., INC.")
UnusedFinal <- Unused1 %>%
bind_rows(Unused2, Unused3, Unused4)
rm(Unused1, Unused2, Unused3, Unused4)
TPOSampleJoined <- TPOSampleJoined %>%
filter(!UnusedCheck %in% UnusedFinal$UnusedCheck)
TPOSampleJoined <- TPOSampleJoined %>%
mutate(MTC = case_when(MTC == "10" ~ "Sawmill",
MILL_STATE == "OH" & MTC != "10" ~ "Sawmill",
MILL_STATE %in% c("MO", "SD") ~ "Sawmill",
MTC != "10" ~ "Others"))
TPOSampleJoined %>%
filter(!is.na(Pro_Rad_SqRt) & !is.na(volt) & !is.na(MTC) & !is.na(MILL_STATE) & !is.na(LOG_TOT_MCF)) %>%
ggplot(aes(LOG_TOT_MCF)) +
geom_density() +
scale_x_continuous(limits = c(-2, 5), breaks = c(seq(-2,5,1))) +
scale_y_continuous(limits = c, breaks = c(seq(0,.7,.1))) +
theme_fivethirtyeight(base_size = 12, base_family = "serif") +
theme(axis.title = element_text(family = 'serif', size = 14)) +
labs(title = "Logged Volume Distribution", x= "Log(10) MCF",y = "Density")
TPOSampleJoined %>%
filter(!is.na(Pro_Rad_SqRt) & !is.na(volt) & !is.na(MTC) & !is.na(MILL_STATE) & !is.na(LOG_TOT_MCF)) %>%
ggplot(aes(Pro_Rad_SqRt)) +
geom_density() +
scale_x_continuous(limits = c(0, 15), breaks = c(seq(0,15,1))) +
scale_y_continuous(limits = c(0,.25), breaks = c(seq(0,.25,.05))) +
theme_fivethirtyeight(base_size = 12, base_family = "serif") +
theme(axis.title = element_text(family = 'serif', size = 14)) +
labs(title = "SqRt Radius Distribution", x= "SqRt Procurement Radius",y = "Density")
# RegionTest <- TPOSampleJoined %>%
# mutate(Region = case_when(MILL_STATECD %in% c("25", "23", "44", "33", "50", "9") ~ "NewEngland",
# MILL_STATECD %in% c("36", "34", "24", "10") ~ "MidAtl",
# MILL_STATECD %in% c("54", "39", "17", "18", "19") ~ "Central",
# MILL_STATECD %in% c("26", "27", "55") ~ "Lake",
# MILL_STATECD %in% c("38", "46", "31", "20", "29") ~ "Plains",
# MILL_STATECD == "42" ~ "PA"))
#
# TPOSampleJoined %>%
# dplyr::count(MILL_STATECD, volt, sort = TRUE)
Best_Model <- lm(TPOSampleJoined$Pro_Rad ~ TPOSampleJoined$TOT_MCF + TPOSampleJoined$MILL_STATE + TPOSampleJoined$volt + 0)
summary(Best_Model)
Call:
lm(formula = TPOSampleJoined$Pro_Rad ~ TPOSampleJoined$TOT_MCF +
TPOSampleJoined$MILL_STATE + TPOSampleJoined$volt + 0)
Residuals:
Min 1Q Median 3Q Max
-54.567 -20.960 -8.422 16.168 115.347
Coefficients:
Estimate Std. Error t value
TPOSampleJoined$TOT_MCF 0.0019813 0.0009188 2.156
TPOSampleJoined$MILL_STATEIA 83.5345708 7.3284715 11.399
TPOSampleJoined$MILL_STATEIL 51.5768161 7.2353945 7.128
TPOSampleJoined$MILL_STATEIN 57.1340456 6.3606360 8.982
TPOSampleJoined$MILL_STATEMI 70.2488211 3.8198170 18.391
TPOSampleJoined$MILL_STATEMO 54.5661445 4.6418215 11.755
TPOSampleJoined$MILL_STATEOH 76.1253880 5.5276194 13.772
TPOSampleJoined$MILL_STATEOthers 57.6047963 2.7165284 21.205
TPOSampleJoined$MILL_STATESD 44.2959245 12.9745960 3.414
TPOSampleJoined$voltlow -14.4914337 3.2123067 -4.511
Pr(>|t|)
TPOSampleJoined$TOT_MCF 0.031515 *
TPOSampleJoined$MILL_STATEIA < 0.0000000000000002 ***
TPOSampleJoined$MILL_STATEIL 0.00000000000347 ***
TPOSampleJoined$MILL_STATEIN < 0.0000000000000002 ***
TPOSampleJoined$MILL_STATEMI < 0.0000000000000002 ***
TPOSampleJoined$MILL_STATEMO < 0.0000000000000002 ***
TPOSampleJoined$MILL_STATEOH < 0.0000000000000002 ***
TPOSampleJoined$MILL_STATEOthers < 0.0000000000000002 ***
TPOSampleJoined$MILL_STATESD 0.000691 ***
TPOSampleJoined$voltlow 0.00000800083270 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 31.01 on 512 degrees of freedom
(2390 observations deleted due to missingness)
Multiple R-squared: 0.7849, Adjusted R-squared: 0.7807
F-statistic: 186.8 on 10 and 512 DF, p-value: < 0.00000000000000022
Call:
lm(formula = TPOSampleJoined$Pro_Rad ~ TPOSampleJoined$TOT_MCF +
TPOSampleJoined$Region + TPOSampleJoined$volt + 0)
Residuals:
Min 1Q Median 3Q Max
-59.156 -21.938 -7.141 12.928 112.833
Coefficients:
Estimate Std. Error t value
TPOSampleJoined$TOT_MCF 0.0020795 0.0009503 2.188
TPOSampleJoined$RegionCentral 66.7768826 3.2582164 20.495
TPOSampleJoined$RegionLake 71.2798762 3.1258612 22.803
TPOSampleJoined$RegionMidAtl 54.6043875 3.2321220 16.894
TPOSampleJoined$RegionNewEngland 54.1118219 4.4552893 12.146
TPOSampleJoined$RegionPlains 51.6677796 4.0039339 12.904
TPOSampleJoined$voltlow -14.6645805 3.1588927 -4.642
Pr(>|t|)
TPOSampleJoined$TOT_MCF 0.0291 *
TPOSampleJoined$RegionCentral < 0.0000000000000002 ***
TPOSampleJoined$RegionLake < 0.0000000000000002 ***
TPOSampleJoined$RegionMidAtl < 0.0000000000000002 ***
TPOSampleJoined$RegionNewEngland < 0.0000000000000002 ***
TPOSampleJoined$RegionPlains < 0.0000000000000002 ***
TPOSampleJoined$voltlow 0.00000438 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 30.91 on 515 degrees of freedom
(2390 observations deleted due to missingness)
Multiple R-squared: 0.7851, Adjusted R-squared: 0.7821
F-statistic: 268.7 on 7 and 515 DF, p-value: < 0.00000000000000022
Test_Res <- mean(abs(residuals(Test_Model)))
# TPOSampleJoined$MILL_STATE <- as.character(TPOSampleJoined$MILL_STATE)
# GAM <- gam(data = TPOSampleJoined, formula = Pro_Rad ~ te(TOT_MCF + MILL_STATE + volt, sp = .2, k = 9))
# summary(GAM)
# gam.check(GAM)
#
# plot.gam(GAM, residuals = TRUE, shade = TRUE, seWithMean = TRUE)
#
# GAM_Res <- mean(abs(residuals.gam(GAM)))
TPOSampleJoined %>%
filter(!is.na(Pro_Rad) & !is.na(volt) & !is.na(MTC)) %>%
ggplot(aes(TOT_MCF, Pro_Rad)) +
geom_point() +
geom_smooth(se = FALSE, method = "lm") +
# scale_x_continuous(limits = c(0, 16000), breaks = c(seq(0,16000,2000))) +
scale_y_continuous(limits = c(0, 200), breaks = c(seq(0,200,25))) +
facet_wrap(facets = vars(MILL_STATE, volt), scales = "free_x", nrow = 3) +
theme_fivethirtyeight(base_size = 12, base_family = "serif") +
theme(axis.title = element_text(family = 'serif', size = 14)) +
labs(title = "Linear Regression Test Plot", x = "MCF",y = "Procurement Radius") +
stat_regline_equation(label.x = 1, label.y = 180, aes(label = ..rr.label..))
VIFScores <- vif(Best_Model) %>%
as.data.frame(row.names = NULL)
VIFScores <- VIFScores %>%
dplyr::rename("VIF" = `GVIF^(1/(2*Df))`)
VIFs <- as.data.frame(VIFScores$VIF)
VIFs <- cbind(VIFs, Variable = c("MCF", "State", "Volume Classificaion"))
VIFs <- VIFs %>%
dplyr::rename('VIF' =`VIFScores$VIF`)
ggplot(VIFs, aes(Variable, VIF)) +
geom_col(position = "identity") +
geom_hline(yintercept = 4) +
geom_text(aes(label = round(VIF, digits = 2)), nudge_y = .12) +
scale_y_continuous(limits = c(0,5), breaks = c(seq(0,5,1))) +
theme_fivethirtyeight(base_size = 12, base_family = "serif") +
theme(axis.title = element_text(family = 'serif', size = 14)) +
labs(title = "Linear Regression Model - VIF Scores", y = "GVIF Value", x = "Variable")
rm(VIFScores, VIFs)
TPOSampleJoined %>%
filter(!is.na(Pro_Rad) & !is.na(volt) & !is.na(MTC)) %>%
ggplot(aes(TOT_MCF, Pro_Rad)) +
geom_point() +
geom_smooth(se = FALSE, method = "lm") +
# scale_x_continuous(limits = c(0, 16000), breaks = c(seq(0,16000,2000))) +
scale_y_continuous(limits = c(0, 200), breaks = c(seq(0,200,25))) +
facet_wrap(facets = vars(Region, volt), scales = "free_x", nrow = 3) +
theme_fivethirtyeight(base_size = 12, base_family = "serif") +
theme(axis.title = element_text(family = 'serif', size = 14)) +
labs(title = "Linear Regression Test Plot", x = "MCF",y = "Procurement Radius") +
stat_regline_equation(label.x = 1, label.y = 180, aes(label = ..rr.label..))
VIFScores <- vif(Test_Model) %>%
as.data.frame(row.names = NULL)
VIFScores <- VIFScores %>%
dplyr::rename("VIF" = `GVIF^(1/(2*Df))`)
VIFs <- as.data.frame(VIFScores$VIF)
VIFs <- cbind(VIFs, Variable = c("MCF", "Region", "Volume Classificaion"))
VIFs <- VIFs %>%
dplyr::rename('VIF' =`VIFScores$VIF`)
ggplot(VIFs, aes(Variable, VIF)) +
geom_col(position = "identity") +
geom_hline(yintercept = 4) +
geom_text(aes(label = round(VIF, digits = 2)), nudge_y = .12) +
scale_y_continuous(limits = c(0,5), breaks = c(seq(0,5,1))) +
theme_fivethirtyeight(base_size = 12, base_family = "serif") +
theme(axis.title = element_text(family = 'serif', size = 14)) +
labs(title = "Linear Regression Model - VIF Scores", y = "GVIF Value", x = "Variable")
# TPOSampleJoined <- TPOSampleJoined %>%
# select(-c(BestPredictions, BestResiduals))
TPOSampleJoined_Chi <- TPOSampleJoined %>%
filter(!is.na(Pro_Rad_SqRt) & !is.na(volt) & !is.na(MILL_STATE) & !is.na(MTC) & !is.na(LOG_TOT_MCF))
# Chi Tests
Chi_Rad_Vol <- chisq.test(table(TPOSampleJoined_Chi$Pro_Rad, TPOSampleJoined_Chi$TOT_MCF))
Chi_Rad_State <- chisq.test(table(TPOSampleJoined_Chi$Pro_Rad, TPOSampleJoined_Chi$MILL_STATE))
Chi_Rad_Volt <- chisq.test(table(TPOSampleJoined_Chi$Pro_Rad, TPOSampleJoined_Chi$volt))
Chi_Rad_Region <- chisq.test(table(TPOSampleJoined_Chi$Pro_Rad, TPOSampleJoined_Chi$Region))
ChiStats <- c(Chi_Rad_Vol$statistic, Chi_Rad_State$statistic, Chi_Rad_Volt$statistic, Chi_Rad_Region$statistic)
ChiParams <- c(Chi_Rad_Vol$parameter, Chi_Rad_State$parameter, Chi_Rad_Volt$parameter, Chi_Rad_Region$parameter)
ChiPVals <- c(Chi_Rad_Vol$p.value, Chi_Rad_State$p.value, Chi_Rad_Volt$p.value, Chi_Rad_Region$p.value)
ChiJoined <- as.data.frame(cbind(Stats = ChiStats, Parameters = ChiParams, PValues = ChiPVals), row.names = c("Volume", "State", "Volume Classification", "Region")) %>%
rownames_to_column(var = "Variable")
ggplot(ChiJoined, aes(PValues, Variable)) +
geom_col(position = "identity") +
geom_hline(yintercept = .05) +
geom_text(aes(label = round(PValues, digits = 10)), nudge_x = .005) +
scale_x_continuous(limits = c(0,.2), breaks = c(seq(0,.2,.02))) +
theme_fivethirtyeight(base_size = 12, base_family = "serif") +
theme(axis.title = element_text(family = 'serif', size = 14)) +
labs(title = "P-Value of Model Variables in Relation to Procurement Radius", y = "Variable", x = "P-Value") +
coord_flip()
Chi_Rad_Vol
Pearson's Chi-squared test
data: table(TPOSampleJoined_Chi$Pro_Rad, TPOSampleJoined_Chi$TOT_MCF)
X-squared = 14754, df = 14250, p-value = 0.00155
[1] "Correlation: 0.1697"
Chi_Rad_State
Pearson's Chi-squared test
data: table(TPOSampleJoined_Chi$Pro_Rad, TPOSampleJoined_Chi$MILL_STATE)
X-squared = 293.39, df = 266, p-value = 0.1195
Chi_Rad_Region
Pearson's Chi-squared test
data: table(TPOSampleJoined_Chi$Pro_Rad, TPOSampleJoined_Chi$Region)
X-squared = 175.04, df = 152, p-value = 0.09717
Chi_Rad_Volt
Pearson's Chi-squared test
data: table(TPOSampleJoined_Chi$Pro_Rad, TPOSampleJoined_Chi$volt)
X-squared = 113.55, df = 38, p-value = 0.000000001874