1 Distribution Plots

1.1 Full Sample

Show code

TPOSampleJoined %>%
  ggplot(aes(LOG_TOT_MCF)) +
  geom_density() +
  scale_x_continuous(limits = c(-2,5), breaks = c(seq(-2,5,1))) +
  scale_y_continuous(limits = c, breaks = c(seq(0,.7,.1))) +
  theme_fivethirtyeight(base_size = 20, base_family = "serif") +
  theme(axis.title = element_text(family = 'serif', size = 20)) +
  labs(title = "Logged Volume Distribution", x= "Log(10) MCF",y = "Density")

Show code

TPOSampleJoined %>%
  ggplot(aes(Pro_Rad_SqRt))+
  geom_density() + 
  scale_x_continuous(limits = c(0, 15), breaks = c(seq(0,15,1))) +
  scale_y_continuous(limits = c(0,.25), breaks = c(seq(0,.25,.05))) +
  theme_fivethirtyeight(base_size = 20, base_family = "serif") +
  theme(axis.title = element_text(family = 'serif', size = 20)) +
  labs(title = "SqRt Radius Distribution", x= "SqRt Procurement Radius",y = "Density")

Show code

TPOSampleJoined <- TPOSampleJoined %>%
  filter(TOT_MCF < 16000 & !is.na(TOT_MCF)) %>%
  mutate(MILL_COUNTYCD = as.character(MILL_COUNTYCD),
         MILL_STATECD = as.character(MILL_STATECD),
         MTC = as.character(MTC))

#Remove Outliers from Joined Sample, store as 'UnusedFinal'...These mills provided a Pro Rad during 2019-2021 and can be joined back following model creation
Unused1 <- TPOSampleJoined %>%
  filter((Pro_Rad >= 200 | Pro_Rad <= 1))  

Unused2 <- TPOSampleJoined %>%            
  filter(MILL_STATE == "OH" & volt == "low")

Unused3 <- TPOSampleJoined %>%              
  filter(MILL_STATE == "MI" & Pro_Rad > 150) %>%
  filter(!(MILL_NAME %in% Unused1$MILL_NAME))

Unused4 <- TPOSampleJoined %>%
  filter(MILL_NAME == "ANTHONY & CO., INC.")

Unused5 <- TPOSampleJoined %>%
  filter((Region == "MidAtl" & LOG_TOT_MCF < 1 & Pro_Rad_SqRt > 10) | (Region == "MI" & LOG_TOT_MCF < -1 & Pro_Rad_SqRt >= 10) | (Region == "PA" & LOG_TOT_MCF < 1))

UnusedFinal <- Unused1 %>%
  bind_rows(Unused2, Unused3, Unused4, Unused5)

rm(Unused1, Unused2, Unused3, Unused4, Unused5)
  
TPOSampleJoined <- TPOSampleJoined %>%
  filter(!UnusedCheck %in% UnusedFinal$UnusedCheck)

1.2 Model-Obervations Only

Show code

TPOSampleJoined %>%
  filter(!is.na(Pro_Rad_SqRt) & !is.na(volt) & !is.na(Region) & !is.na(LOG_TOT_MCF)) %>%
  ggplot(aes(LOG_TOT_MCF)) +
  geom_density() +
  scale_x_continuous(limits = c(-2, 5), breaks = c(seq(-2,5,1))) +
  scale_y_continuous(limits = c, breaks = c(seq(0,.7,.1))) +
  theme_fivethirtyeight(base_size = 20, base_family = "serif") +
  theme(axis.title = element_text(family = 'serif', size = 20)) +
  labs(title = "Logged Volume Distribution", x= "Log(10) MCF",y = "Density")

Show code

TPOSampleJoined %>%
  filter(!is.na(Pro_Rad_SqRt) & !is.na(volt) & !is.na(Region) & !is.na(LOG_TOT_MCF)) %>%
  ggplot(aes(Pro_Rad_SqRt)) +
  geom_density() + 
  scale_x_continuous(limits = c(0, 15), breaks = c(seq(0,15,1))) +
  scale_y_continuous(limits = c(0,.25), breaks = c(seq(0,.25,.05))) +
  theme_fivethirtyeight(base_size = 20, base_family = "serif") +
  theme(axis.title = element_text(family = 'serif', size = 20)) +
  labs(title = "SqRt Radius Distribution", x= "SqRt Procurement Radius",y = "Density")

2 Model Creation & Summary

Show code

Best_Model <- lm(TPOSampleJoined$Pro_Rad_SqRt ~ TPOSampleJoined$LOG_TOT_MCF * TPOSampleJoined$Region)
summary(Best_Model)


Call:
lm(formula = TPOSampleJoined$Pro_Rad_SqRt ~ TPOSampleJoined$LOG_TOT_MCF * 
    TPOSampleJoined$Region)

Residuals:
    Min      1Q  Median      3Q     Max 
-5.0971 -1.2219 -0.2919  1.2442  6.7138 

Coefficients:
                                                             Estimate
(Intercept)                                                    5.1673
TPOSampleJoined$LOG_TOT_MCF                                    1.0110
TPOSampleJoined$RegionLake                                     1.8902
TPOSampleJoined$RegionMidAtl                                  -0.1994
TPOSampleJoined$RegionNewEngland                              -1.4989
TPOSampleJoined$RegionPlains                                  -0.4106
TPOSampleJoined$LOG_TOT_MCF:TPOSampleJoined$RegionLake        -0.6245
TPOSampleJoined$LOG_TOT_MCF:TPOSampleJoined$RegionMidAtl      -0.2587
TPOSampleJoined$LOG_TOT_MCF:TPOSampleJoined$RegionNewEngland   0.2328
TPOSampleJoined$LOG_TOT_MCF:TPOSampleJoined$RegionPlains      -0.2247
                                                             Std. Error
(Intercept)                                                      0.6690
TPOSampleJoined$LOG_TOT_MCF                                      0.2575
TPOSampleJoined$RegionLake                                       0.8133
TPOSampleJoined$RegionMidAtl                                     0.8785
TPOSampleJoined$RegionNewEngland                                 0.8727
TPOSampleJoined$RegionPlains                                     0.8260
TPOSampleJoined$LOG_TOT_MCF:TPOSampleJoined$RegionLake           0.3130
TPOSampleJoined$LOG_TOT_MCF:TPOSampleJoined$RegionMidAtl         0.3366
TPOSampleJoined$LOG_TOT_MCF:TPOSampleJoined$RegionNewEngland     0.3300
TPOSampleJoined$LOG_TOT_MCF:TPOSampleJoined$RegionPlains         0.3242
                                                             t value
(Intercept)                                                    7.723
TPOSampleJoined$LOG_TOT_MCF                                    3.926
TPOSampleJoined$RegionLake                                     2.324
TPOSampleJoined$RegionMidAtl                                  -0.227
TPOSampleJoined$RegionNewEngland                              -1.718
TPOSampleJoined$RegionPlains                                  -0.497
TPOSampleJoined$LOG_TOT_MCF:TPOSampleJoined$RegionLake        -1.995
TPOSampleJoined$LOG_TOT_MCF:TPOSampleJoined$RegionMidAtl      -0.768
TPOSampleJoined$LOG_TOT_MCF:TPOSampleJoined$RegionNewEngland   0.705
TPOSampleJoined$LOG_TOT_MCF:TPOSampleJoined$RegionPlains      -0.693
                                                                       Pr(>|t|)
(Intercept)                                                  0.0000000000000602
TPOSampleJoined$LOG_TOT_MCF                                  0.0000982166651530
TPOSampleJoined$RegionLake                                               0.0205
TPOSampleJoined$RegionMidAtl                                             0.8206
TPOSampleJoined$RegionNewEngland                                         0.0865
TPOSampleJoined$RegionPlains                                             0.6193
TPOSampleJoined$LOG_TOT_MCF:TPOSampleJoined$RegionLake                   0.0466
TPOSampleJoined$LOG_TOT_MCF:TPOSampleJoined$RegionMidAtl                 0.4426
TPOSampleJoined$LOG_TOT_MCF:TPOSampleJoined$RegionNewEngland             0.4809
TPOSampleJoined$LOG_TOT_MCF:TPOSampleJoined$RegionPlains                 0.4885
                                                                
(Intercept)                                                  ***
TPOSampleJoined$LOG_TOT_MCF                                  ***
TPOSampleJoined$RegionLake                                   *  
TPOSampleJoined$RegionMidAtl                                    
TPOSampleJoined$RegionNewEngland                             .  
TPOSampleJoined$RegionPlains                                    
TPOSampleJoined$LOG_TOT_MCF:TPOSampleJoined$RegionLake       *  
TPOSampleJoined$LOG_TOT_MCF:TPOSampleJoined$RegionMidAtl        
TPOSampleJoined$LOG_TOT_MCF:TPOSampleJoined$RegionNewEngland    
TPOSampleJoined$LOG_TOT_MCF:TPOSampleJoined$RegionPlains        
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 1.955 on 511 degrees of freedom
  (2390 observations deleted due to missingness)
Multiple R-squared:  0.2041,    Adjusted R-squared:  0.1901 
F-statistic: 14.56 on 9 and 511 DF,  p-value: < 0.00000000000000022

Show code

Test_Model <- lm(TPOSampleJoined$Pro_Rad_SqRt ~ TPOSampleJoined$LOG_TOT_MCF + TPOSampleJoined$MILL_STATE + 0)
# summary(Test_Model)
# Test_Res <- mean(abs(residuals(Test_Model)^2))


Test_NE <- TPOSampleJoined %>%
  filter(Region == "NewEngland")

NE_Model = lm(Test_NE$Pro_Rad_SqRt ~ Test_NE$LOG_TOT_MCF)
summary(NE_Model)


Call:
lm(formula = Test_NE$Pro_Rad_SqRt ~ Test_NE$LOG_TOT_MCF)

Residuals:
    Min      1Q  Median      3Q     Max 
-4.6144 -1.2219 -0.3449  1.2442  6.7138 

Coefficients:
                    Estimate Std. Error t value      Pr(>|t|)    
(Intercept)           3.6683     0.5487   6.685 0.00000000297 ***
Test_NE$LOG_TOT_MCF   1.2438     0.2021   6.154 0.00000002935 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 1.914 on 79 degrees of freedom
  (319 observations deleted due to missingness)
Multiple R-squared:  0.324, Adjusted R-squared:  0.3155 
F-statistic: 37.87 on 1 and 79 DF,  p-value: 0.00000002935

Show code

Test_MidAtl <- TPOSampleJoined %>%
  filter(Region == "MidAtl") 
  
MidAtl_Model = lm(Test_MidAtl$Pro_Rad_SqRt ~ Test_MidAtl$LOG_TOT_MCF)
summary(MidAtl_Model)


Call:
lm(formula = Test_MidAtl$Pro_Rad_SqRt ~ Test_MidAtl$LOG_TOT_MCF)

Residuals:
    Min      1Q  Median      3Q     Max 
-4.4555 -1.0194 -0.2379  0.9882  4.8135 

Coefficients:
                        Estimate Std. Error t value
(Intercept)               4.9679     0.4879  10.182
Test_MidAtl$LOG_TOT_MCF   0.7523     0.1858   4.049
                                    Pr(>|t|)    
(Intercept)             < 0.0000000000000002 ***
Test_MidAtl$LOG_TOT_MCF            0.0000908 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 1.676 on 122 degrees of freedom
  (499 observations deleted due to missingness)
Multiple R-squared:  0.1185,    Adjusted R-squared:  0.1112 
F-statistic: 16.39 on 1 and 122 DF,  p-value: 0.00009079

Show code

Test_Central <- TPOSampleJoined %>%
  filter(Region == "Central") 
  
Central_Model = lm(Test_Central$Pro_Rad_SqRt ~ Test_Central$LOG_TOT_MCF)
summary(Central_Model)


Call:
lm(formula = Test_Central$Pro_Rad_SqRt ~ Test_Central$LOG_TOT_MCF)

Residuals:
    Min      1Q  Median      3Q     Max 
-5.0971 -1.4679 -0.1721  1.4140  5.0554 

Coefficients:
                         Estimate Std. Error t value       Pr(>|t|)
(Intercept)                5.1673     0.7316   7.063 0.000000000191
Test_Central$LOG_TOT_MCF   1.0110     0.2816   3.590       0.000506
                            
(Intercept)              ***
Test_Central$LOG_TOT_MCF ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 2.138 on 104 degrees of freedom
  (396 observations deleted due to missingness)
Multiple R-squared:  0.1103,    Adjusted R-squared:  0.1017 
F-statistic: 12.89 on 1 and 104 DF,  p-value: 0.000506

Show code

Test_Lake <- TPOSampleJoined %>%
  filter(Region == "Lake") 
  
Lake_Model = lm(Test_Lake$Pro_Rad_SqRt ~ Test_Lake$LOG_TOT_MCF)
summary(Lake_Model)


Call:
lm(formula = Test_Lake$Pro_Rad_SqRt ~ Test_Lake$LOG_TOT_MCF)

Residuals:
    Min      1Q  Median      3Q     Max 
-4.5303 -1.1671 -0.3769  1.6801  5.0317 

Coefficients:
                      Estimate Std. Error t value            Pr(>|t|)
(Intercept)             7.0574     0.4779  14.768 <0.0000000000000002
Test_Lake$LOG_TOT_MCF   0.3865     0.1839   2.102              0.0375
                         
(Intercept)           ***
Test_Lake$LOG_TOT_MCF *  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 2.02 on 132 degrees of freedom
  (690 observations deleted due to missingness)
Multiple R-squared:  0.03239,   Adjusted R-squared:  0.02506 
F-statistic: 4.418 on 1 and 132 DF,  p-value: 0.03745

Show code

Test_Plains <- TPOSampleJoined %>%
  filter(Region == "Plains") 
  
Plains_Model = lm(Test_Plains$Pro_Rad_SqRt ~ Test_Plains$LOG_TOT_MCF)
summary(Plains_Model)


Call:
lm(formula = Test_Plains$Pro_Rad_SqRt ~ Test_Plains$LOG_TOT_MCF)

Residuals:
    Min      1Q  Median      3Q     Max 
-3.5742 -1.4026 -0.2425  1.0928  5.9987 

Coefficients:
                        Estimate Std. Error t value
(Intercept)               4.7566     0.5046   9.426
Test_Plains$LOG_TOT_MCF   0.7863     0.2052   3.832
                                  Pr(>|t|)    
(Intercept)             0.0000000000000262 ***
Test_Plains$LOG_TOT_MCF           0.000264 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 2.037 on 74 degrees of freedom
  (486 observations deleted due to missingness)
Multiple R-squared:  0.1656,    Adjusted R-squared:  0.1543 
F-statistic: 14.68 on 1 and 74 DF,  p-value: 0.0002644

Show code

# Test_MI <- TPOSampleJoined %>%
#   filter(Region == "MI") 
#   
# MI_Model = lm(Test_MI$Pro_Rad_SqRt ~ Test_MI$LOG_TOT_MCF)
# summary(MI_Model)
# 
# Test_PA <- TPOSampleJoined %>%
#   filter(Region == "PA") 
#   
# PA_Model = lm(Test_PA$Pro_Rad_SqRt ~ Test_PA$LOG_TOT_MCF)
# summary(PA_Model)


Intercepts <- c(summary(Central_Model)$coefficients[,4][1],
           summary(Lake_Model)$coefficients[,4][1],
           summary(MidAtl_Model)$coefficients[,4][1],
           summary(NE_Model)$coefficients[,4][1],
           summary(Plains_Model)$coefficients[,4][1])

Regions = c("Central", "Lake", "MidAtl", "NewEngland", "Plains")

PVals <- c(summary(Central_Model)$coefficients[,4][2],
           summary(Lake_Model)$coefficients[,4][2],
           summary(MidAtl_Model)$coefficients[,4][2],
           summary(NE_Model)$coefficients[,4][2],
           summary(Plains_Model)$coefficients[,4][2])

ModelStats <- data.frame(Region = Regions,Int = Intercepts, PVals = PVals)


# GAM <- gam(data = TPOSampleJoined, formula = Pro_Rad_SqRt ~ te(LOG_TOT_MCF, by = as.factor(Region), sp = .35, k = 3))
# summary(GAM)
# gam.check(GAM)
# plot.gam(GAM, residuals = TRUE, shade = TRUE, seWithMean = TRUE)

# GAM_Res <- residuals.gam(GAM)
# GAM_Res <- GAM_Res^2
# mean(GAM_Res)

# Central=function(x){coef(Best_Model)[2]*x+coef(Best_Model)[1]}
# Lake=function(x){coef(Best_Model)[2]*x+coef(Best_Model)[1]+coef(Best_Model)[3]}
# MI=function(x){coef(Best_Model)[2]*x+coef(Best_Model)[1]+coef(Best_Model)[4]}
# MidAtl=function(x){coef(Best_Model)[2]*x+coef(Best_Model)[1]+coef(Best_Model)[5]}
# NE=function(x){coef(Best_Model)[2]*x+coef(Best_Model)[1]+coef(Best_Model)[6]}
# PA=function(x){coef(Best_Model)[2]*x+coef(Best_Model)[1]+coef(Best_Model)[7]}
# Plains=function(x){coef(Best_Model)[2]*x+coef(Best_Model)[1]+coef(Best_Model)[8]}
# 
# ggplot(TPOSampleJoined,aes(y=Pro_Rad_SqRt,x=LOG_TOT_MCF,color=Region))+
#   geom_point()+
#         stat_function(fun=Central,geom="line",color='yellow') +
#         stat_function(fun=Lake,geom="line",color='blue') +
#         stat_function(fun=MI,geom="line",color='red') +
#         stat_function(fun=MidAtl,geom="line",color='black') +
#         stat_function(fun=NE,geom="line",color='green') +
#         stat_function(fun=PA,geom="line",color='orange') +
#         stat_function(fun=Plains,geom="line",color='purple')

# TPOSampleJoined %>%
#   filter(!is.na(Pro_Rad_SqRt)) %>%
# ggplot(aes(sample = Pro_Rad_SqRt)) +
#   stat_qq_line() +
#   stat_qq_point(size = 2) +
#   theme_fivethirtyeight(base_size = 20, base_family = "serif") +
#   theme(axis.title = element_text(family = 'serif', size = 20)) +
#   labs(title = "Normal-QQ Plot", x= "Theoretical Quantiles",y = "Residuals")

3 Model/VIF Visualization

3.1 Preferred Model (Volume & Radius)

Show code

TPOSampleJoined <- TPOSampleJoined %>% 
  left_join(RegionCounts, by = "Region") %>%
  left_join(ModelStats, by = "Region")

TPOSampleJoined %>%
  filter(!is.na(Pro_Rad)) %>%
  ggplot(aes(LOG_TOT_MCF, Pro_Rad_SqRt)) +
    geom_point() +
    geom_smooth(se = FALSE, method = "lm") +
    scale_x_continuous(limits = c(-.5, 4.5), breaks = c(seq(-.5,4.5,.5))) +
    scale_y_continuous(limits = c(0, 15), breaks = c(seq(0,14,2))) +
    facet_wrap(~Region, nrow = 3) +
    theme_fivethirtyeight(base_size = 20, base_family = "serif") +
    theme(axis.title = element_text(family = 'serif', size = 20)) +
    labs(title = "Linear Regression Test Plot", x = "Log(10) MCF",y = "Square Root of Procurement Radius") +
    stat_regline_equation(label.x = -.5, label.y = 14, aes(label = ..rr.label..)) +
    geom_text(x = -.25, y = 12, aes(label = paste0("n = ", n))) +
    geom_text(x = -.2, y = 0, aes(label = paste0("P-Val = ", round(PVals, 8))))

Show code

ggplot(ModelStats, aes(Region, PVals)) +
    geom_col() +
    geom_text(aes(label = round(PVals, digits = 8)), size = 6 ,nudge_y = .001) + 
    scale_y_continuous(limits = c(0, .05), breaks = c(seq(0,.05,.01))) +
    theme_fivethirtyeight(base_size = 20, base_family = "serif") +
    theme(axis.title = element_text(family = 'serif', size = 20)) +
    labs(title = "P-Values by Region", x = "Region", y = "P-Value")

Show code

VIFScores <- vif(Best_Model) %>%
  as.data.frame(row.names = NULL)

VIFScores <- VIFScores %>%
  dplyr::rename("VIF" = `GVIF^(1/(2*Df))`)
VIFs <- as.data.frame(VIFScores$VIF)
VIFs <- cbind(VIFs, Variable = c("Log(10) MCF", "Region", "Interaction"))
VIFs <- VIFs %>%
  dplyr::rename('VIF' =`VIFScores$VIF`)

ggplot(VIFs, aes(Variable, VIF)) +
  geom_col(position = "identity") + 
  geom_hline(yintercept = 4) +
  geom_text(aes(label = round(VIF, digits = 2)), nudge_y = .12) +
  scale_y_continuous(limits = c(0,5), breaks = c(seq(0,5,1))) +
  theme_fivethirtyeight(base_size = 20, base_family = "serif") +
  theme(axis.title = element_text(family = 'serif', size = 20)) +
  labs(title = "Linear Regression Model - VIF Scores", y = "VIF Score", x = "Variable")

Show code

rm(VIFScores, VIFs)

Intercepts

                           (Intercept) 
0.000000000190791855684621138398299189 
                           (Intercept) 
0.000000000000000000000000000009640239 
                           (Intercept) 
0.000000000000000005352235469197340064 
                           (Intercept) 
0.000000002969597000132149999980513133 
                           (Intercept) 
0.000000000000026187370596178335012058

3.2 Fall-Back Model (Volume & State)

Show code

TPOSampleJoined %>%
  filter(!is.na(Pro_Rad)) %>%
  ggplot(aes(LOG_TOT_MCF, Pro_Rad_SqRt)) +
    geom_point() +
    geom_smooth(se = FALSE, method = "lm") +
    # scale_x_continuous(limits = c(0, 16000), breaks = c(seq(0,16000,2000))) +
    scale_y_continuous(limits = c(0, 14), breaks = c(seq(0,14,2))) +
    facet_wrap(~MILL_STATE, scales = "free_x", nrow = 3) +
    theme_fivethirtyeight(base_size = 20, base_family = "serif") +
    theme(axis.title = element_text(family = 'serif', size = 20)) +
    labs(title = "Linear Regression Test Plot", x = "MCF",y = "Procurement Radius") +
    stat_regline_equation(label.x = 1, label.y = 14, aes(label = ..rr.label..))

Show code

VIFScores <- vif(Test_Model) %>%
  as.data.frame(row.names = NULL)

VIFScores <- VIFScores %>%
  dplyr::rename("VIF" = `GVIF^(1/(2*Df))`)
VIFs <- as.data.frame(VIFScores$VIF)
VIFs <- cbind(VIFs, Variable = c("Log(10) MCF", "State"))
VIFs <- VIFs %>%
  dplyr::rename('VIF' =`VIFScores$VIF`)

ggplot(VIFs, aes(Variable, VIF)) +
  geom_col(position = "identity") + 
  geom_hline(yintercept = 4) +
  geom_text(aes(label = round(VIF, digits = 2)), nudge_y = .12) +
  scale_y_continuous(limits = c(0,5), breaks = c(seq(0,5,1))) +
  theme_fivethirtyeight(base_size = 20, base_family = "serif") +
  theme(axis.title = element_text(family = 'serif', size = 20)) +
  labs(title = "Linear Regression Model - VIF Scores", y = "GVIF Value", x = "Variable")

Show code

rm(VIFScores, VIFs)
# TPOSampleJoined <- TPOSampleJoined %>%
#   select(-c(BestPredictions, BestResiduals))

4 Chi-Squared Tests

Show code

TPOSampleJoined_Chi <- TPOSampleJoined %>%
  filter(!is.na(Pro_Rad_SqRt) & !is.na(MILL_STATE) & !is.na(LOG_TOT_MCF)) 

# Chi Tests

Chi_Rad_Vol <- chisq.test(table(TPOSampleJoined_Chi$Pro_Rad_SqRt, TPOSampleJoined_Chi$LOG_TOT_MCF))
Chi_Rad_State <- chisq.test(table(TPOSampleJoined_Chi$Pro_Rad_SqRt, TPOSampleJoined_Chi$MILL_STATE))
Chi_Rad_Volt <- chisq.test(table(TPOSampleJoined_Chi$Pro_Rad_SqRt, TPOSampleJoined_Chi$volt))
Chi_Rad_Region <- chisq.test(table(TPOSampleJoined_Chi$Pro_Rad_SqRt, TPOSampleJoined_Chi$Region))

ChiStats <- c(Chi_Rad_Vol$statistic, Chi_Rad_State$statistic, Chi_Rad_Volt$statistic, Chi_Rad_Region$statistic)
ChiParams <- c(Chi_Rad_Vol$parameter, Chi_Rad_State$parameter, Chi_Rad_Volt$parameter, Chi_Rad_Region$parameter)
ChiPVals <- c(Chi_Rad_Vol$p.value, Chi_Rad_State$p.value, Chi_Rad_Volt$p.value, Chi_Rad_Region$p.value)

ChiJoined <- as.data.frame(cbind(Stats = ChiStats, Parameters = ChiParams, PValues = ChiPVals), row.names = c("Log(10) MCF", "State", "Volume Classification", "Region")) %>%
  rownames_to_column(var = "Variable")

ggplot(ChiJoined, aes(PValues, Variable)) +
  geom_col(position = "identity") + 
  geom_hline(yintercept = .05) +
  geom_text(aes(label = round(PValues, digits = 10)), nudge_x = .005) +
  scale_x_continuous(limits = c(0,.2), breaks = c(seq(0,.2,.02))) +
  theme_fivethirtyeight(base_size = 20, base_family = "serif") +
  theme(axis.title = element_text(family = 'serif', size = 20)) +
  labs(title = "P-Value of Model Variables in Relation to SqRt Procurement Radius", y = "Variable", x = "P-Value") +
  coord_flip()

4.1 Sqrt(Radius) & Log(10) MCF

Show code

Chi_Rad_Vol


    Pearson's Chi-squared test

data:  table(TPOSampleJoined_Chi$Pro_Rad_SqRt, TPOSampleJoined_Chi$LOG_TOT_MCF)
X-squared = 14796, df = 14250, p-value = 0.000693

Show code

Correlation <- cor(TPOSampleJoined_Chi$Pro_Rad_SqRt, TPOSampleJoined_Chi$TOT_MCF)

print(paste0("Correlation: ", round(Correlation, digits = 4)))

[1] "Correlation: 0.1965"

4.2 Sqrt(Radius) & State

Show code

Chi_Rad_State


    Pearson's Chi-squared test

data:  table(TPOSampleJoined_Chi$Pro_Rad_SqRt, TPOSampleJoined_Chi$MILL_STATE)
X-squared = 296.51, df = 266, p-value = 0.09615

4.3 Sqrt(Radius) & Region

Show code

Chi_Rad_Region


    Pearson's Chi-squared test

data:  table(TPOSampleJoined_Chi$Pro_Rad_SqRt, TPOSampleJoined_Chi$Region)
X-squared = 176.64, df = 152, p-value = 0.0836

4.4 Sqrt(Radius) & Volume Classification (High/Low)

Show code

Chi_Rad_Volt


    Pearson's Chi-squared test

data:  table(TPOSampleJoined_Chi$Pro_Rad_SqRt, TPOSampleJoined_Chi$volt)
X-squared = 115.44, df = 38, p-value = 0.0000000009739

Show code

rm(Chi_Rad_Region, Chi_Rad_State, Chi_Rad_Vol, Chi_Rad_Volt, ChiJoined, ChiParams, ChiPVals, ChiStats)

TPO Heat Map Analysis

1 Distribution Plots

1.1 Full Sample

1.2 Model-Obervations Only

2 Model Creation & Summary

3 Model/VIF Visualization

3.1 Preferred Model (Volume & Radius)

3.2 Fall-Back Model (Volume & State)

4 Chi-Squared Tests

4.1 Sqrt(Radius) & Log(10) MCF

4.2 Sqrt(Radius) & State

4.3 Sqrt(Radius) & Region

4.4 Sqrt(Radius) & Volume Classification (High/Low)