1 Distribution Plots

1.1 Full Sample

Show code

TPOSampleJoined %>%
  ggplot(aes(LOG_TOT_MCF)) +
  geom_density() +
  scale_x_continuous(limits = c(-2,5), breaks = c(seq(-2,5,1))) +
  scale_y_continuous(limits = c, breaks = c(seq(0,.7,.1))) +
  theme_fivethirtyeight(base_size = 20, base_family = "serif") +
  theme(axis.title = element_text(family = 'serif', size = 20)) +
  labs(title = "Logged Volume Distribution", x= "Log(10) MCF",y = "Density")

Show code

TPOSampleJoined %>%
  ggplot(aes(Pro_Rad_SqRt))+
  geom_density() + 
  scale_x_continuous(limits = c(0, 15), breaks = c(seq(0,15,1))) +
  scale_y_continuous(limits = c(0,.25), breaks = c(seq(0,.25,.05))) +
  theme_fivethirtyeight(base_size = 20, base_family = "serif") +
  theme(axis.title = element_text(family = 'serif', size = 20)) +
  labs(title = "SqRt Radius Distribution", x= "SqRt Procurement Radius",y = "Density")

Show code

TPOSampleJoined <- TPOSampleJoined %>%
  filter(TOT_MCF < 16000 & !is.na(TOT_MCF)) %>%
  mutate(MILL_COUNTYCD = as.character(MILL_COUNTYCD),
         MILL_STATECD = as.character(MILL_STATECD),
         MTC = as.character(MTC))

#Remove Outliers from Joined Sample, store as 'UnusedFinal'...These mills provided a Pro Rad during 2019-2021 and can be joined back following model creation
##Remove those with radii >= 200 and <= 1
Unused1 <- TPOSampleJoined %>%
  filter(Pro_Rad >= 200 | Pro_Rad <= 1 | Pro_Rad >= 150 & LOG_TOT_MCF < 0.5 & Region %in% c("Plains", "Lake"))  

##Remove Lake entries with radii >= 100 & Log(10) MCF <= 0.5
# Unused2 <- TPOSampleJoined %>%            
#   filter(Region == "Lake" & LOG_TOT_MCF < 0.5 & Pro_Rad >= 100)
# 
# Unused3 <- TPOSampleJoined %>%              
#   filter(MILL_STATE == "MI" & Pro_Rad > 150) %>%
#   filter(!(MILL_NAME %in% Unused1$MILL_NAME))
# 
# Unused4 <- TPOSampleJoined %>%
#   filter(MILL_NAME == "ANTHONY & CO., INC.")
# 
# Unused5 <- TPOSampleJoined %>%
#   filter((Region == "MidAtl" & LOG_TOT_MCF < 1 & Pro_Rad >= 100))
# 
# UnusedFinal <- Unused1 %>%
#   bind_rows(Unused2, Unused5)
# 
# rm(Unused1, Unused2, Unused5)
  
TPOSampleJoined <- TPOSampleJoined %>%
  filter(!UnusedCheck %in% Unused1$UnusedCheck)

RegionCounts <- TPOSampleJoined %>%
  filter(!is.na(Pro_Rad_SqRt)) %>%
  dplyr::count(Region)

1.2 Model-Obervations Only

Show code

TPOSampleJoined %>%
  filter(!is.na(Pro_Rad_SqRt) & !is.na(Region) & !is.na(LOG_TOT_MCF)) %>%
  ggplot(aes(LOG_TOT_MCF)) +
  geom_density() +
  scale_x_continuous(limits = c(-2, 5), breaks = c(seq(-2,5,1))) +
  scale_y_continuous(limits = c, breaks = c(seq(0,.7,.1))) +
  theme_fivethirtyeight(base_size = 20, base_family = "serif") +
  theme(axis.title = element_text(family = 'serif', size = 20)) +
  labs(title = "Logged Volume Distribution", x= "Log(10) MCF",y = "Density")

Show code

TPOSampleJoined %>%
  filter(!is.na(Pro_Rad_SqRt) & !is.na(Region) & !is.na(LOG_TOT_MCF)) %>%
  ggplot(aes(Pro_Rad_SqRt)) +
  geom_density() + 
  scale_x_continuous(limits = c(0, 15), breaks = c(seq(0,15,1))) +
  scale_y_continuous(limits = c(0,.25), breaks = c(seq(0,.25,.05))) +
  theme_fivethirtyeight(base_size = 20, base_family = "serif") +
  theme(axis.title = element_text(family = 'serif', size = 20)) +
  labs(title = "SqRt Radius Distribution", x= "SqRt Procurement Radius",y = "Density")

2 Model Creation & Summary

Show code

Best_Model <- lm(TPOSampleJoined$Pro_Rad_SqRt ~ TPOSampleJoined$LOG_TOT_MCF * TPOSampleJoined$Region)
summary(Best_Model)


Call:
lm(formula = TPOSampleJoined$Pro_Rad_SqRt ~ TPOSampleJoined$LOG_TOT_MCF * 
    TPOSampleJoined$Region)

Residuals:
    Min      1Q  Median      3Q     Max 
-5.0677 -1.2180 -0.2464  1.2087  6.7138 

Coefficients:
                                                             Estimate
(Intercept)                                                    5.2810
TPOSampleJoined$LOG_TOT_MCF                                    0.9642
TPOSampleJoined$RegionLake                                     1.5126
TPOSampleJoined$RegionMidAtl                                   0.1150
TPOSampleJoined$RegionNewEngland                              -1.6127
TPOSampleJoined$RegionPlains                                  -0.5243
TPOSampleJoined$LOG_TOT_MCF:TPOSampleJoined$RegionLake        -0.4986
TPOSampleJoined$LOG_TOT_MCF:TPOSampleJoined$RegionMidAtl      -0.3657
TPOSampleJoined$LOG_TOT_MCF:TPOSampleJoined$RegionNewEngland   0.2796
TPOSampleJoined$LOG_TOT_MCF:TPOSampleJoined$RegionPlains      -0.1779
                                                             Std. Error
(Intercept)                                                      0.5430
TPOSampleJoined$LOG_TOT_MCF                                      0.2145
TPOSampleJoined$RegionLake                                       0.7937
TPOSampleJoined$RegionMidAtl                                     0.7732
TPOSampleJoined$RegionNewEngland                                 0.7810
TPOSampleJoined$RegionPlains                                     0.7282
TPOSampleJoined$LOG_TOT_MCF:TPOSampleJoined$RegionLake           0.3036
TPOSampleJoined$LOG_TOT_MCF:TPOSampleJoined$RegionMidAtl         0.3005
TPOSampleJoined$LOG_TOT_MCF:TPOSampleJoined$RegionNewEngland     0.2979
TPOSampleJoined$LOG_TOT_MCF:TPOSampleJoined$RegionPlains         0.2914
                                                             t value
(Intercept)                                                    9.725
TPOSampleJoined$LOG_TOT_MCF                                    4.494
TPOSampleJoined$RegionLake                                     1.906
TPOSampleJoined$RegionMidAtl                                   0.149
TPOSampleJoined$RegionNewEngland                              -2.065
TPOSampleJoined$RegionPlains                                  -0.720
TPOSampleJoined$LOG_TOT_MCF:TPOSampleJoined$RegionLake        -1.642
TPOSampleJoined$LOG_TOT_MCF:TPOSampleJoined$RegionMidAtl      -1.217
TPOSampleJoined$LOG_TOT_MCF:TPOSampleJoined$RegionNewEngland   0.939
TPOSampleJoined$LOG_TOT_MCF:TPOSampleJoined$RegionPlains      -0.610
                                                                         Pr(>|t|)
(Intercept)                                                  < 0.0000000000000002
TPOSampleJoined$LOG_TOT_MCF                                            0.00000866
TPOSampleJoined$RegionLake                                                 0.0572
TPOSampleJoined$RegionMidAtl                                               0.8819
TPOSampleJoined$RegionNewEngland                                           0.0394
TPOSampleJoined$RegionPlains                                               0.4718
TPOSampleJoined$LOG_TOT_MCF:TPOSampleJoined$RegionLake                     0.1012
TPOSampleJoined$LOG_TOT_MCF:TPOSampleJoined$RegionMidAtl                   0.2243
TPOSampleJoined$LOG_TOT_MCF:TPOSampleJoined$RegionNewEngland               0.3484
TPOSampleJoined$LOG_TOT_MCF:TPOSampleJoined$RegionPlains                   0.5418
                                                                
(Intercept)                                                  ***
TPOSampleJoined$LOG_TOT_MCF                                  ***
TPOSampleJoined$RegionLake                                   .  
TPOSampleJoined$RegionMidAtl                                    
TPOSampleJoined$RegionNewEngland                             *  
TPOSampleJoined$RegionPlains                                    
TPOSampleJoined$LOG_TOT_MCF:TPOSampleJoined$RegionLake          
TPOSampleJoined$LOG_TOT_MCF:TPOSampleJoined$RegionMidAtl        
TPOSampleJoined$LOG_TOT_MCF:TPOSampleJoined$RegionNewEngland    
TPOSampleJoined$LOG_TOT_MCF:TPOSampleJoined$RegionPlains        
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 1.958 on 506 degrees of freedom
  (2540 observations deleted due to missingness)
Multiple R-squared:  0.1988,    Adjusted R-squared:  0.1845 
F-statistic: 13.95 on 9 and 506 DF,  p-value: < 0.00000000000000022

Show code

Test_Model <- lm(TPOSampleJoined$Pro_Rad_SqRt ~ TPOSampleJoined$LOG_TOT_MCF + TPOSampleJoined$MILL_STATE + 0)
# summary(Test_Model)
# Test_Res <- mean(abs(residuals(Test_Model)^2))


Test_NE <- TPOSampleJoined %>%
  filter(Region == "NewEngland")

NE_Model = lm(Test_NE$Pro_Rad_SqRt ~ Test_NE$LOG_TOT_MCF)
summary(NE_Model)


Call:
lm(formula = Test_NE$Pro_Rad_SqRt ~ Test_NE$LOG_TOT_MCF)

Residuals:
    Min      1Q  Median      3Q     Max 
-4.6144 -1.2219 -0.3449  1.2442  6.7138 

Coefficients:
                    Estimate Std. Error t value      Pr(>|t|)    
(Intercept)           3.6683     0.5487   6.685 0.00000000297 ***
Test_NE$LOG_TOT_MCF   1.2438     0.2021   6.154 0.00000002935 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 1.914 on 79 degrees of freedom
  (319 observations deleted due to missingness)
Multiple R-squared:  0.324, Adjusted R-squared:  0.3155 
F-statistic: 37.87 on 1 and 79 DF,  p-value: 0.00000002935

Show code

Test_MidAtl <- TPOSampleJoined %>%
  filter(Region == "MidAtl") 
  
MidAtl_Model = lm(Test_MidAtl$Pro_Rad_SqRt ~ Test_MidAtl$LOG_TOT_MCF)
summary(MidAtl_Model)


Call:
lm(formula = Test_MidAtl$Pro_Rad_SqRt ~ Test_MidAtl$LOG_TOT_MCF)

Residuals:
    Min      1Q  Median      3Q     Max 
-4.6992 -1.0704 -0.2317  0.9577  5.4396 

Coefficients:
                        Estimate Std. Error t value
(Intercept)               5.3959     0.4904  11.004
Test_MidAtl$LOG_TOT_MCF   0.5985     0.1875   3.192
                                    Pr(>|t|)    
(Intercept)             < 0.0000000000000002 ***
Test_MidAtl$LOG_TOT_MCF              0.00179 ** 
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 1.744 on 123 degrees of freedom
  (499 observations deleted due to missingness)
Multiple R-squared:  0.07651,   Adjusted R-squared:  0.069 
F-statistic: 10.19 on 1 and 123 DF,  p-value: 0.001792

Show code

Test_Central <- TPOSampleJoined %>%
  filter(Region == "Central") 
  
Central_Model = lm(Test_Central$Pro_Rad_SqRt ~ Test_Central$LOG_TOT_MCF)
summary(Central_Model)


Call:
lm(formula = Test_Central$Pro_Rad_SqRt ~ Test_Central$LOG_TOT_MCF)

Residuals:
    Min      1Q  Median      3Q     Max 
-5.0677 -1.4601 -0.1734  1.5167  5.0355 

Coefficients:
                         Estimate Std. Error t value
(Intercept)                5.2810     0.5871   8.996
Test_Central$LOG_TOT_MCF   0.9642     0.2319   4.157
                                    Pr(>|t|)    
(Intercept)              0.00000000000000472 ***
Test_Central$LOG_TOT_MCF 0.00006132827517507 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 2.117 on 118 degrees of freedom
  (524 observations deleted due to missingness)
Multiple R-squared:  0.1278,    Adjusted R-squared:  0.1204 
F-statistic: 17.28 on 1 and 118 DF,  p-value: 0.00006133

Show code

Test_Lake <- TPOSampleJoined %>%
  filter(Region == "Lake") 
  
Lake_Model = lm(Test_Lake$Pro_Rad_SqRt ~ Test_Lake$LOG_TOT_MCF)
summary(Lake_Model)


Call:
lm(formula = Test_Lake$Pro_Rad_SqRt ~ Test_Lake$LOG_TOT_MCF)

Residuals:
    Min      1Q  Median      3Q     Max 
-4.3963 -1.1448 -0.3122  1.5763  5.6386 

Coefficients:
                      Estimate Std. Error t value            Pr(>|t|)
(Intercept)             6.7935     0.5863  11.587 <0.0000000000000002
Test_Lake$LOG_TOT_MCF   0.4656     0.2176   2.139              0.0346
                         
(Intercept)           ***
Test_Lake$LOG_TOT_MCF *  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 1.983 on 112 degrees of freedom
  (712 observations deleted due to missingness)
Multiple R-squared:  0.03926,   Adjusted R-squared:  0.03068 
F-statistic: 4.576 on 1 and 112 DF,  p-value: 0.03459

Show code

Test_Plains <- TPOSampleJoined %>%
  filter(Region == "Plains") 
  
Plains_Model = lm(Test_Plains$Pro_Rad_SqRt ~ Test_Plains$LOG_TOT_MCF)
summary(Plains_Model)


Call:
lm(formula = Test_Plains$Pro_Rad_SqRt ~ Test_Plains$LOG_TOT_MCF)

Residuals:
    Min      1Q  Median      3Q     Max 
-3.5742 -1.4026 -0.2425  1.0928  5.9987 

Coefficients:
                        Estimate Std. Error t value
(Intercept)               4.7566     0.5046   9.426
Test_Plains$LOG_TOT_MCF   0.7863     0.2052   3.832
                                  Pr(>|t|)    
(Intercept)             0.0000000000000262 ***
Test_Plains$LOG_TOT_MCF           0.000264 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 2.037 on 74 degrees of freedom
  (486 observations deleted due to missingness)
Multiple R-squared:  0.1656,    Adjusted R-squared:  0.1543 
F-statistic: 14.68 on 1 and 74 DF,  p-value: 0.0002644

Show code

Intercepts <- c(summary(Central_Model)$coefficients[,4][1],
           summary(Lake_Model)$coefficients[,4][1],
           summary(MidAtl_Model)$coefficients[,4][1],
           summary(NE_Model)$coefficients[,4][1],
           summary(Plains_Model)$coefficients[,4][1])

Regions = c("Central", "Lake", "MidAtl", "NewEngland", "Plains")

PVals <- c(summary(Central_Model)$coefficients[,4][2],
           summary(Lake_Model)$coefficients[,4][2],
           summary(MidAtl_Model)$coefficients[,4][2],
           summary(NE_Model)$coefficients[,4][2],
           summary(Plains_Model)$coefficients[,4][2])

ModelStats <- data.frame(Region = Regions,Int = Intercepts, PVals = PVals)


# GAM <- gam(data = TPOSampleJoined, formula = Pro_Rad_SqRt ~ te(LOG_TOT_MCF, by = as.factor(Region), sp = .35, k = 3))
# summary(GAM)
# gam.check(GAM)
# plot.gam(GAM, residuals = TRUE, shade = TRUE, seWithMean = TRUE)

# GAM_Res <- residuals.gam(GAM)
# GAM_Res <- GAM_Res^2
# mean(GAM_Res)

# Central=function(x){coef(Best_Model)[2]*x+coef(Best_Model)[1]}
# Lake=function(x){coef(Best_Model)[2]*x+coef(Best_Model)[1]+coef(Best_Model)[3]}
# MI=function(x){coef(Best_Model)[2]*x+coef(Best_Model)[1]+coef(Best_Model)[4]}
# MidAtl=function(x){coef(Best_Model)[2]*x+coef(Best_Model)[1]+coef(Best_Model)[5]}
# NE=function(x){coef(Best_Model)[2]*x+coef(Best_Model)[1]+coef(Best_Model)[6]}
# PA=function(x){coef(Best_Model)[2]*x+coef(Best_Model)[1]+coef(Best_Model)[7]}
# Plains=function(x){coef(Best_Model)[2]*x+coef(Best_Model)[1]+coef(Best_Model)[8]}
# 
# ggplot(TPOSampleJoined,aes(y=Pro_Rad_SqRt,x=LOG_TOT_MCF,color=Region))+
#   geom_point()+
#         stat_function(fun=Central,geom="line",color='yellow') +
#         stat_function(fun=Lake,geom="line",color='blue') +
#         stat_function(fun=MI,geom="line",color='red') +
#         stat_function(fun=MidAtl,geom="line",color='black') +
#         stat_function(fun=NE,geom="line",color='green') +
#         stat_function(fun=PA,geom="line",color='orange') +
#         stat_function(fun=Plains,geom="line",color='purple')

3 Model/VIF Visualization

3.1 Preferred Model (Volume & Radius)

Show code

TPOSampleJoined <- TPOSampleJoined %>% 
  left_join(RegionCounts, by = "Region") %>%
  left_join(ModelStats, by = "Region")

TPOSampleJoined %>%
  filter(!is.na(Pro_Rad)) %>%
  ggplot(aes(LOG_TOT_MCF, Pro_Rad_SqRt)) +
    geom_point() +
    geom_smooth(se = FALSE, method = "lm") +
    scale_x_continuous(limits = c(-.5, 4.5), breaks = c(seq(-.5,4.5,.5))) +
    scale_y_continuous(limits = c(0, 15), breaks = c(seq(0,14,2))) +
    facet_wrap(~Region, nrow = 3) +
    theme_fivethirtyeight(base_size = 20, base_family = "serif") +
    theme(axis.title = element_text(family = 'serif', size = 20)) +
    labs(title = "Linear Regression Test Plot", x = "Log(10) MCF",y = "Square Root of Procurement Radius") +
    stat_regline_equation(label.x = -.5, label.y = 14, aes(label = ..rr.label..)) +
    geom_text(x = -.25, y = 12, aes(label = paste0("n = ", n))) +
    geom_text(x = -.2, y = 0, aes(label = paste0("P-Val = ", round(PVals, 8))))

Show code

ggplot(ModelStats, aes(Region, PVals)) +
    geom_col() +
    geom_text(aes(label = round(PVals, digits = 8)), size = 6 ,nudge_y = .001) + 
    scale_y_continuous(limits = c(0, .05), breaks = c(seq(0,.05,.01))) +
    theme_fivethirtyeight(base_size = 20, base_family = "serif") +
    theme(axis.title = element_text(family = 'serif', size = 20)) +
    labs(title = "P-Values by Region", x = "Region", y = "P-Value")

Show code

VIFScores <- vif(Best_Model) %>%
  as.data.frame(row.names = NULL)

VIFScores <- VIFScores %>%
  dplyr::rename("VIF" = `GVIF^(1/(2*Df))`)
VIFs <- as.data.frame(VIFScores$VIF)
VIFs <- cbind(VIFs, Variable = c("Log(10) MCF", "Region", "Interaction"))
VIFs <- VIFs %>%
  dplyr::rename('VIF' =`VIFScores$VIF`)

ggplot(VIFs, aes(Variable, VIF)) +
  geom_col(position = "identity") + 
  geom_hline(yintercept = 4) +
  geom_text(aes(label = round(VIF, digits = 2)), nudge_y = .12) +
  scale_y_continuous(limits = c(0,5), breaks = c(seq(0,5,1))) +
  theme_fivethirtyeight(base_size = 20, base_family = "serif") +
  theme(axis.title = element_text(family = 'serif', size = 20)) +
  labs(title = "Linear Regression Model - VIF Scores", y = "VIF Score", x = "Variable")

Show code

rm(VIFScores, VIFs)

Intercepts

                  (Intercept)                   (Intercept) 
0.000000000000004719958607295 0.000000000000000000006974468 
                  (Intercept)                   (Intercept) 
0.000000000000000000050105485 0.000000002969597000132150000 
                  (Intercept) 
0.000000000000026187370596178

3.2 Fall-Back Model (Volume & State)

Show code

TPOSampleJoined %>%
  filter(!is.na(Pro_Rad)) %>%
  ggplot(aes(LOG_TOT_MCF, Pro_Rad_SqRt)) +
    geom_point() +
    geom_smooth(se = FALSE, method = "lm") +
    # scale_x_continuous(limits = c(0, 16000), breaks = c(seq(0,16000,2000))) +
    scale_y_continuous(limits = c(0, 14), breaks = c(seq(0,14,2))) +
    facet_wrap(~MILL_STATE, scales = "free_x", nrow = 3) +
    theme_fivethirtyeight(base_size = 20, base_family = "serif") +
    theme(axis.title = element_text(family = 'serif', size = 20)) +
    labs(title = "Linear Regression Test Plot", x = "MCF",y = "Procurement Radius") +
    stat_regline_equation(label.x = 1, label.y = 14, aes(label = ..rr.label..))

Show code

VIFScores <- vif(Test_Model) %>%
  as.data.frame(row.names = NULL)

VIFScores <- VIFScores %>%
  dplyr::rename("VIF" = `GVIF^(1/(2*Df))`)
VIFs <- as.data.frame(VIFScores$VIF)
VIFs <- cbind(VIFs, Variable = c("Log(10) MCF", "State"))
VIFs <- VIFs %>%
  dplyr::rename('VIF' =`VIFScores$VIF`)

ggplot(VIFs, aes(Variable, VIF)) +
  geom_col(position = "identity") + 
  geom_hline(yintercept = 4) +
  geom_text(aes(label = round(VIF, digits = 2)), nudge_y = .12) +
  scale_y_continuous(limits = c(0,5), breaks = c(seq(0,5,1))) +
  theme_fivethirtyeight(base_size = 20, base_family = "serif") +
  theme(axis.title = element_text(family = 'serif', size = 20)) +
  labs(title = "Linear Regression Model - VIF Scores", y = "GVIF Value", x = "Variable")

Show code

rm(VIFScores, VIFs)
# TPOSampleJoined <- TPOSampleJoined %>%
#   select(-c(BestPredictions, BestResiduals))

4 Chi-Squared Tests

Show code

TPOSampleJoined_Chi <- TPOSampleJoined %>%
  filter(!is.na(Pro_Rad_SqRt) & !is.na(MILL_STATE) & !is.na(LOG_TOT_MCF)) 

# Chi Tests

Chi_Rad_Vol <- chisq.test(table(TPOSampleJoined_Chi$Pro_Rad_SqRt, TPOSampleJoined_Chi$LOG_TOT_MCF))
#Chi_Rad_State <- chisq.test(table(TPOSampleJoined_Chi$Pro_Rad_SqRt, TPOSampleJoined_Chi$MILL_STATE))
#Chi_Rad_Volt <- chisq.test(table(TPOSampleJoined_Chi$Pro_Rad_SqRt, TPOSampleJoined_Chi$volt))
Chi_Rad_Region <- chisq.test(table(TPOSampleJoined_Chi$Pro_Rad_SqRt, TPOSampleJoined_Chi$Region))

ChiStats <- c(Chi_Rad_Vol$statistic, Chi_Rad_Region$statistic)
ChiParams <- c(Chi_Rad_Vol$parameter, Chi_Rad_Region$parameter)
ChiPVals <- c(Chi_Rad_Vol$p.value, Chi_Rad_Region$p.value)

ChiJoined <- as.data.frame(cbind(Stats = ChiStats, Parameters = ChiParams, PValues = ChiPVals), row.names = c("Log(10) MCF", "Region")) %>%
  rownames_to_column(var = "Variable")

ggplot(ChiJoined, aes(PValues, Variable)) +
  geom_col(position = "identity") + 
  geom_hline(yintercept = .05) +
  geom_text(aes(label = round(PValues, digits = 10)), nudge_x = .005) +
  scale_x_continuous(limits = c(0,.2), breaks = c(seq(0,.2,.02))) +
  theme_fivethirtyeight(base_size = 20, base_family = "serif") +
  theme(axis.title = element_text(family = 'serif', size = 20)) +
  labs(title = "P-Value of Model Variables in Relation to SqRt Procurement Radius", y = "Variable", x = "P-Value") +
  coord_flip()

4.1 Sqrt(Radius) & Log(10) MCF

Show code

Chi_Rad_Vol


    Pearson's Chi-squared test

data:  table(TPOSampleJoined_Chi$Pro_Rad_SqRt, TPOSampleJoined_Chi$LOG_TOT_MCF)
X-squared = 13873, df = 13542, p-value = 0.02265

Show code

Correlation <- cor(TPOSampleJoined_Chi$Pro_Rad_SqRt, TPOSampleJoined_Chi$TOT_MCF)

print(paste0("Correlation: ", round(Correlation, digits = 4)))

[1] "Correlation: 0.1995"

4.2 Sqrt(Radius) & Region

Show code

Chi_Rad_Region


    Pearson's Chi-squared test

data:  table(TPOSampleJoined_Chi$Pro_Rad_SqRt, TPOSampleJoined_Chi$Region)
X-squared = 168.51, df = 148, p-value = 0.1191

5 Model Residuals

Show code

ggplot(Residuals, aes(Regions, Mean_Residuals)) +
  geom_col(position = "identity") +
  geom_text(aes(label = round(Mean_Residuals, digits = 1)), nudge_y = .5) +
  scale_y_continuous(limits = c(0,30), breaks = c(seq(0,30,5))) +
  theme_fivethirtyeight(base_size = 20, base_family = "serif") +
  theme(axis.title = element_text(family = 'serif', size = 20)) +
  labs(title = "Mean Residuals by Region", y = "Mean Residuals (Unstransformed Radius)", x = "Region")

TPO Heat Map Analysis

1 Distribution Plots

1.1 Full Sample

1.2 Model-Obervations Only

2 Model Creation & Summary

3 Model/VIF Visualization

3.1 Preferred Model (Volume & Radius)

3.2 Fall-Back Model (Volume & State)

4 Chi-Squared Tests

4.1 Sqrt(Radius) & Log(10) MCF

4.2 Sqrt(Radius) & Region

5 Model Residuals