TPO Mill Size Analysis

Early findings from the Mill-Size Analysis.

Ian Kennedy
2023-06-12
Show code
# Function to convert plot listings to 1,000 instead of 1000 (i.e. allow commas for thousands places)
knit_hooks$set(inline = function(x) {
  prettyNum(x, big.mark=",")
})

# Read in the 2021 Sample-Frame (Updated with NR Calls Spreadsheet)
TPO_Data <- read_excel(path = "C:\\Users\\ikenn\\OneDrive - University of Massachusetts\\Documents - FFRC_TPO\\2021 Mill Data (SY2022)\\IMPLEMENTATION\\NR_CALLS\\SampleFrame_2021_Updated.xlsx")

# Function to convert StateCodes to State Abbr
State_Code_Conversion <- function(Data){
   Data %>%
    mutate(MILL_STATE = case_when(MILL_STATECD == 9 ~ "CT",
                                MILL_STATECD == 10 ~ "DE",
                                MILL_STATECD == 17 ~ "IL", 
                                MILL_STATECD == 18 ~ "IN",
                                MILL_STATECD == 19 ~ "IA",
                                MILL_STATECD == 20 ~ "KS",
                                MILL_STATECD == 23 ~ "ME",
                                MILL_STATECD == 24 ~ "MD",
                                MILL_STATECD == 25 ~ "MA",
                                MILL_STATECD == 26 ~ "MI",
                                MILL_STATECD == 27 ~ "MN",
                                MILL_STATECD == 29 ~ "MO",
                                MILL_STATECD == 31 ~ "NE",
                                MILL_STATECD == 33 ~ "NH",
                                MILL_STATECD == 34 ~ "NJ",
                                MILL_STATECD == 36 ~ "NY",
                                MILL_STATECD == 38 ~ "ND",
                                MILL_STATECD == 39 ~ "OH",
                                MILL_STATECD == 42 ~ "PA",
                                MILL_STATECD == 44 ~ "RI",
                                MILL_STATECD == 46 ~ "SD",
                                MILL_STATECD == 50 ~ "VT",
                                MILL_STATECD == 54 ~ "WV",
                                MILL_STATECD == 55 ~ "WI"))
}

# Correct a StateCD for one incorrect mill listing
TPO_Data <- TPO_Data %>%
  mutate(MILL_STATECD = ifelse(MILL_NAME == "James (Jim) Arron", 25, MILL_STATECD))
#Convert State Codes to State Abbr (this adds a column named "MILL_STATE" to the df)
TPO_Data <- State_Code_Conversion(TPO_Data)

# Filter out Idle, OOB, and Dismantled mills, Convert Address listings to the most up to date listings (from NR-Calls), and correct a state listing for "VA" (which should be "OH"),
TPO_Data <- TPO_Data %>%
  # Filter out Idle, OOB, and Dismantled mills
  filter(!IDLE_OOB_DISMANTLED %in% c("IDLE", "OOB", "DISMANTLED")) %>%
  # Convert Address listings to the most up to date listings (from NR-Calls)
  mutate(MILL_NAME = ifelse(!is.na(MILL_NAME_2021), MILL_NAME_2021, MILL_NAME),
         MILL_STREET1 = ifelse(!is.na(MILL_MAILING_ADDRESS), MILL_MAILING_ADDRESS, MILL_STREET1),
         MILL_CITY = ifelse(!is.na(MILL_MAILING_CITY), MILL_MAILING_CITY, MILL_CITY),
         MILL_ZIP_CODE = ifelse(!is.na(MILL_MAILING_ZIP), MILL_MAILING_ZIP, MILL_ZIP_CODE),
         MILL_STATE = ifelse(!is.na(MILL_MAILING_STATE), MILL_MAILING_STATE, MILL_STATE),
         # Convert TPOID to the correct value...This is a 2021-specific problem (a few mills had missing Mill #s so we had to create some which were amended by MN later)
         TPOID = ifelse(!is.na(UNIQUE_ID_CORRECTED), UNIQUE_ID_CORRECTED, TPOID),
         # Correct a state listing for an OH mill
         MILL_STATE = ifelse(MILL_STATE == "VA", "OH", MILL_STATE)) %>% 
  # Drop unnecessary columns
  select(-c(SURVEY_YEAR, MEANING, HS, MILL_STATUS_CD:note, UNIQUE_ID_CORRECTED:MILL_MAILING_ZIP, CONTACT_FIRST_NAME:STATUS_NOTES_2021))

# Filter out duplicate listings
Dups <- TPO_Data %>%
  filter(MILL_NBR != "NA" & !is.na(MILL_NBR))
TPO_Data <- TPO_Data %>%
  filter(MILL_NBR == "NA" | is.na(MILL_NBR))
Dups <- Dups %>%
  distinct(TPOID, .keep_all = TRUE)
TPO_Data <- TPO_Data %>%
  rbind(Dups)
rm(Dups)
Show code
# All volumes reported here are PROCUREMENT VOLUMES

# Convert volume (MCF) listings to log10(MCF), BF, and log10(BF)
TPO_Data <- TPO_Data %>%
  mutate(TOT_MCF_LOG = log10(TOT_MCF),
         TOT_BF = (TOT_MCF/.158) * 1000,
         TOT_BF_LOG = log10(TOT_BF))

# Calculate total volume of Non-Pulp mills/Export Yard 
TOT_Volume_NonPulp <- TPO_Data %>%
  filter(!MILL_TYPE_CD %in% c(30, 40, 117)) %>%
  select(TOT_MCF)%>%
  sum(na.rm = FALSE)

# Calculate total number of Non-Pulp mills/Export Yard
MillCount_NonPulp <- TPO_Data %>%
  filter(!MILL_TYPE_CD %in% c(30, 40, 117)) %>%
  nrow() %>%
  as.numeric()

# Calculate total volume of Pulp mills/Export Yard 
TOT_Volume_Pulp <- TPO_Data %>%
  filter(MILL_TYPE_CD %in% c(30, 40, 117)) %>%
  select(TOT_MCF)%>%
  sum(na.rm = FALSE)

# Calculate total number of Pulp mills/Export Yard 
MillCount_Pulp <- TPO_Data %>%
  filter(MILL_TYPE_CD %in% c(30, 40, 117)) %>%
  nrow() %>%
  as.numeric()

# Calculate total volume of all NRS mills
TOT_Volume <- TPO_Data %>%
  select(TOT_MCF)%>%
  sum(na.rm = FALSE)

#Calculate Total # of mills
TPO_Data %>%
    nrow() -> TOT_Mills
TOT_Mills <- as.numeric(unlist(TOT_Mills))

# Calculate number of mills with volume > 100,000,000 BF
TPO_Data %>%
  filter(TOT_BF >= 100000000) %>%
  nrow -> TOT_Mills_High_Vol
TOT_Mills_High_Vol <- as.numeric(unlist(TOT_Mills_High_Vol))

# The portion below ('Initial Results') prints the values calculated above within the provided sentences

1 Initial Results

2 Histograms of Log(10) Volume

Show code
# Create variable for breaks
Hist_Breaks_MCF <- c(seq(-2, 5, by = .5))

#Create a histogram of Mill Procurement Volumes - Log10(MCF)
ggplot(TPO_Data, aes(TOT_MCF_LOG, label = ..count..), ylim = c(0, 600)) + 
  geom_histogram(breaks = Hist_Breaks_MCF, bins = 14) + 
  scale_x_continuous(breaks = Hist_Breaks_MCF) +
  labs(title = "Histogram of Log(10) MCF") + 
  # Print 'count' labels for each bin
  geom_text(stat = "bin", nudge_y = 10, size = 15, breaks = Hist_Breaks_MCF) +
  theme(text = element_text(family = "serif")) +
  theme_fivethirtyeight(base_size = 60, 
                        base_family = 'serif') +
theme(axis.title = element_text(family = 'serif', size = 60), 
      axis.text = element_text(size = 50),
      legend.text = element_text(size = 50), 
      legend.title = element_text(size = 60)) +   
  ylab('Mill Count') + xlab('Log(10) MCF')
Show code
rm(Hist_Breaks_MCF)

3 State Level Plots

3.1 Log(10) Int’l BF Volume Box & Scatter Plots

Show code
#Create State-by-State boxplots and scatter plots for Logged MCF Volume

# State-by State Jittered Scatter Plot for log10(MCF)
ggplot(TPO_Data, aes(x = MILL_STATE, y = TOT_MCF_LOG, color = MILL_STATE)) + 
  # Width controls 'width' of jittered points, maximum of .5 (i.e. .5 in each direction = 1)
  geom_jitter(size = 2.5, width = .15) +
  scale_y_continuous(breaks = c(seq(-2,5,.5)), limits = c(-2,5)) + 
  labs(title = "Log(10) MCF by State", y = "Log(10) MCF", x = "State") +
  theme_fivethirtyeight(base_size = 60, base_family = 'serif') +
  theme(axis.title = element_text(family = 'serif', size = 60), 
        axis.text = element_text(size = 50), legend.position = "none") +
  ylab("Log(10) MCF") + xlab('State')
Show code
# This is a duplicate plot as the one above, though includes horizontal lines at three of the 'thresholds' analyzed later
# To change which threshold(s) are shown,  change the '10', '15', and/or '45' to a different value...10 refers to 10,000 BF, 15 to 15,000 BF, etc.
# Thresholds analyzed later include 1000 (1), 5000 (5), 10000 (10), 15000 (15), 20000 (20), 25000 (25), 30000 (30), 35000 (35), 40000 (40), 45000 (45), 50000 (50)
ggplot(TPO_Data, aes(x = MILL_STATE, y = TOT_MCF_LOG, color = MILL_STATE)) +
  # Width controls 'width' of jittered points, maximum of .5 (i.e. .5 in each direction = 1)
  geom_jitter(size = 2.5, width = .15) +
  # Draw horizontal lines at certain thresholds...For a line drawn at threshold = 35000 BF, Use the following:
  # geom_hline(yintercept = log10(35*.158), size = .6, color ="red")
  geom_hline(yintercept = log10(10*.158), size = .6, color ="red") +
  geom_hline(yintercept = log10(15*.158), size = .6, color ="red") +
  geom_hline(yintercept = log10(45*.158), size = .6, color ="red") +
  scale_y_continuous(breaks = c(seq(-2,5,.5)), limits = c(-2,5)) + 
  labs(title = "Log(10) MCF by State", y = "Log(10) MCF", x = "State") +
  theme_fivethirtyeight(base_size = 60, base_family = 'serif') +
  theme(axis.title = element_text(family = 'serif', size = 60), 
        axis.text = element_text(size = 50), legend.position = "none") +
  ylab("Log(10) MCF") + xlab('State')
Show code
# State-by State Box Plot for log10(MCF)
ggplot(TPO_Data, aes(x = MILL_STATE, y = TOT_MCF_LOG, color = MILL_STATE)) + 
  geom_boxplot(outlier.size = 2.5, lwd = 1.2) +
  scale_y_continuous(breaks = c(seq(-2,5,.5)), limits = c(-2,5)) + 
  labs(title = "Log(10) MCF by State", y = "Log(10) MCF", x = "State") +
  theme_fivethirtyeight(base_size = 60, base_family = 'serif') +
  theme(axis.title = element_text(family = 'serif', size = 60), 
        axis.text = element_text(size = 50), 
        legend.position = "none") +
  ylab("Log(10) MCF") + xlab('State')
Show code
# State-by Violin & Jittered Scatter Plot for log10(MCF)
ggplot(TPO_Data, aes(0, y = TOT_MCF_LOG, color = MILL_STATE)) + 
  geom_violin(width = 1, scale = 'count') + 
  # Width controls 'width' of jittered points, maximum of .5 (i.e. .5 in each direction = 1)
  geom_jitter(width = 0.5, size = 2.5) +
  scale_y_continuous(breaks = c(seq(-2,5,1)), limits = c(-2,5)) + 
  labs(title = "Log(10) of MCF by State") +  
  theme_fivethirtyeight(base_size = 60, base_family = 'serif') + 
  theme(axis.title = element_text(family = 'serif', size = 60), 
        legend.position = "none",
        axis.text.x = element_blank(), 
        panel.grid.major.x = element_blank(), 
        axis.text.y = element_text(size = 40)) + 
  ylab("Log(10) MCF") + xlab('State') + 
  facet_wrap(facets = vars(MILL_STATE), ncol = 8)

3.2 Volume-Outlier Plot

Show code
#Create Bar Chart for High Volume Mills (> 1,000,000 BF) by State
TPO_Data %>%
  group_by(MILL_STATE) %>%
  # Filter for mills procuring > 1,000,000 BF
  filter(TOT_BF_LOG >= 8) %>%
  summarize("High_Volume_Mills" = n()) %>%
  # Order states by number of outlier mills
  ggplot(aes(x = fct_reorder(MILL_STATE, High_Volume_Mills, .desc = TRUE), y = High_Volume_Mills)) + 
    geom_col() + 
    # Place a 'count' label for each state
    geom_text(aes(label = High_Volume_Mills), size = 14, nudge_y = .2) + 
    scale_y_continuous(breaks = c(seq(0, 10, 1))) + 
    theme_fivethirtyeight(base_size = 60, base_family = 'serif') +
    theme(axis.title = element_text(family = 'serif', size = 60), 
          axis.text = element_text(family = 'serif', size = 50), 
          panel.grid.major.x = element_blank()) + 
    ylab('High Volume Mills') + 
    xlab('State') +
    labs(title = "High Volume Mills (Log(10) Int'l BF > 8) by State")
Show code
# Create High Volume Mill Table 

#High_Vol_by_State_Table <- TPO_Data %>% filter(TOT_BF_LOG >= 8) %>% select(MILL_NBR:MTC,  TOT_MCF:TOT_BF_LOG, MILL_STATUS_CD,MILL_STATE) %>% arrange(MILL_STATECD, desc(TOT_BF_LOG))

#kable(High_Vol_by_State_Table, digits = 3, format.args = list(scientific = FALSE), col.names = c("Mill #", "State Code", "Mill Name", "Mailing Address", "City", "Zip Code", "Mill Type Code", "Volume (MCF)", "Volume (Log10 MCF)", "Volume (BF)", "Volume (Log10 BF)", "Status", "State"), align = "ccccccccccccc", caption = "High Volume Mills") %>% kable_styling(font_size = 16) %>% pack_rows(index = table(High_Vol_by_State_Table$MILL_STATE))

3.3 Mill Type Plot

Show code
#Create MTC_Summary, a df of Mill Type Counts by State (MTC = Mill Type Code)
MTC_summary <- TPO_Data %>%
  rename('MTC' = 'MILL_TYPE_CD') %>%
  mutate(MTC_Tidy = case_when(MTC %in% c(10,21,22,23,24,25,26) ~ "Sawmill",
                         MTC == 20 ~ "Veneer Mill",
                         MTC %in% c(30,40) ~ "Pulp/Composite Mill",
                         MTC == 50 ~ "Bioenergy Mill",
                         MTC == 60 ~ "House/Cabin Mill",
                         MTC %in% c(70,80,90) ~ "Post/Pole/Piling Mill",
                         MTC == 100 ~ "Residential Firewood Mill",
                         MTC %in% c(110,111) ~ "Other/Misc Mill",
                         MTC == 117 ~ "Concentration Yard",
                         is.na(MTC) ~ "Not Provided"))%>%
  group_by(MILL_STATE, MTC_Tidy) %>%
  arrange(as.factor(MTC_Tidy)) %>%
  summarize("Mill_Type_Count" = n())

#Create bar chart of Mill Type Counts by State
MTC_Tidy_Factor_Levels <- c("Sawmill", "Veneer Mill", "Pulp/Composite Mill", "Bioenergy Mill", "House/Cabin Mill", "Post/Pole/Piling Mill",
                            "Residential Firewood Mill", "Other/Misc Mill", "Concentration Yard", "Not Provided")

# x variable orders states by TOTAL number of mills (regardless of Mill_Type_Count)
ggplot(MTC_summary, aes(x = fct_infreq(MILL_STATE, Mill_Type_Count), 
                        y = Mill_Type_Count, 
                        # fill variable orders within-state Mill_Type_Count (sawmills will always be the first group shown in the bar plot as they dominate every state)
                        fill = fct_rev(fct_infreq(factor(MTC_Tidy, levels = MTC_Tidy_Factor_Levels))))) +
  geom_col(position = 'stack', width = .8) + 
  labs(title = "Mill Type Counts by State") +
  scale_y_continuous(limits = c(0, 425), breaks = c(seq(from = 0, to = 425, by = 25))) +
  # Custom color scale for different Mill Types (Sawmills will show as red)
  scale_fill_manual(name = "Mill Type", values = turbo(10)) +
  theme_fivethirtyeight(base_size = 60, base_family = 'serif') +
  theme(axis.title = element_text(family = 'serif', size = 60), axis.text = element_text(size = 50), 
        legend.text = element_text(size = 40), legend.title = element_text(size = 50)) +
  ylab('Mill Count') + xlab('State') +
  # Create a legend for Mill Types using the color guide - turbo(10)
  guides(fill = guide_legend(title = "Mill Type", reverse = T))
Show code
rm(MTC_summary)

# The next portion creates a seperate df for plotting Mill Types by volume/count (Sample-Frame wide)
MTC_DF <- TPO_Data %>%
  mutate(MTC_Tidy = case_when(MILL_TYPE_CD %in% c(10,21,22,23,24,25,26) ~ "Sawmill",
                         MILL_TYPE_CD == 20 ~ "Veneer Mill",
                         MILL_TYPE_CD %in% c(30,40) ~ "Pulp Mill",
                         MILL_TYPE_CD == 50 ~ "Bioenergy Mill",
                         MILL_TYPE_CD == 60 ~ "House/Cabin Mill",
                         MILL_TYPE_CD %in% c(70,80,90) ~ "Post/Pole/Piling Mill",
                         MILL_TYPE_CD == 100 ~ "Residential Firewood Mill",
                         MILL_TYPE_CD %in% c(110,111) ~ "Other/Misc Mill",
                         MILL_TYPE_CD == 117 ~ "Concentration Yard",
                         is.na(MILL_TYPE_CD) ~ "Not Provided")) %>%
  mutate(MTC_Tidy = as.factor(MTC_Tidy),
         MTC_Tidy = fct_reorder(MTC_Tidy, TOT_MCF_LOG, .desc = TRUE))

# MTCCounts will contain a df of all Mill Types by Mill count from the Sample-Frame
MTCCounts <- MTC_DF%>%
  group_by(MTC_Tidy) %>%
  summarise(Count = n())

# Create a Violin Plot of Mill Type by Volume
ggplot(MTC_DF, aes(x = MTC_Tidy, y = TOT_MCF_LOG, fill = MTC_Tidy)) + 
  # Draw 25, 50, and 75% quantiles, alpha = .5 for semi-transparent plot
  geom_violin(draw_quantiles = c(.25, .5, .75), color = 'transparent') +
  scale_y_continuous(breaks = seq(-2, 5, .5), limits = c(-2, 5)) + 
  scale_fill_viridis(discrete = TRUE) +  # Use viridis color scale
  theme_fivethirtyeight(base_family = 'Agency FB') +  
  theme(axis.title = element_text(family = 'Agency FB', size = 50), 
        axis.text.x = element_blank(), 
        axis.text.y = element_text(size = 50), 
        legend.text = element_text(size = 40), 
        legend.title = element_text(size = 50),
        axis.title.x = element_blank(), 
        panel.grid.major.x = element_blank(),
        title = element_text(size = 60)) +
  labs(title = "Log(10) MCF by Mill Type", y = "Log(10) MCF", 
       subtitle = "Lines within plots refer to 25th, 50th, & 75th quantiles.", 
       x = "Mill Type") +
  # Create a legend for Mill Types
  guides(fill = guide_legend(title = "Mill Type"))
Show code
# Create a Bar Plot of Mill Types by Count (Sample-Frame wide)
ggplot(MTCCounts, aes(x = MTC_Tidy, y = Count, fill = MTC_Tidy)) + 
  geom_col() +
  scale_fill_viridis(discrete = TRUE) +
  scale_y_continuous(breaks = c(seq(0,2500,250)), limits = c(0,2700)) + 
  labs(title = "Mill Count by Mill Type", y = "Mill Count", x = "Mill Type") +
  theme_fivethirtyeight(base_size = 60, base_family = 'Agency FB') + 
  theme(axis.title = element_text(family = 'Agency FB', size = 60), axis.text.y = element_text(size = 50),
        axis.text.x = element_blank(), axis.title.x = element_blank(), panel.grid.major.x = element_blank(), 
        legend.text = element_text(size = 40), legend.title = element_text(size = 50)) +
  geom_text(aes(label = Count), size = 14 , nudge_y = 75) +
  ylab('Mill Count') + 
  # Create a legend for Mill Type
  guides(fill = guide_legend(title = "Mill Type"))
Show code
rm(MTCDF, MTCCounts, MTC_Tidy_Factor_Levels)

4 State Level Means, Medians, & St. Deviations

Show code
#Calculate means, medians, and standard deviations of log10(MCF) volumes by state
TPO_summary <- TPO_Data %>%
  rename(MTC = MILL_TYPE_CD) %>%
  select(MILL_STATECD, MILL_STATE, MTC, TOT_MCF_LOG, TOT_BF_LOG) %>%
  group_by(MILL_STATE) %>%
  summarize(meanBF = mean(TOT_BF_LOG, na.rm = TRUE), 
            medianBF = median(TOT_BF_LOG, na.rm = TRUE), 
            meanMCF = mean(TOT_MCF_LOG, na.rm = TRUE), 
            medianMCF = median(TOT_MCF_LOG, na.rm = TRUE), 
            St_Dev = sd(TOT_BF_LOG, na.rm = TRUE), 
            NumberofMills = n()) 

#Create table for means, medians, and standard deviations by State
# digits controls number of digits shown for numeric values, align controls the alignment for each column's entries ("ccccccc" for seven columns, all centered)...For 3 centered columns use "ccc"), col.names must be entered for each column IN ORDER!

knitr::kable(TPO_summary, digits = 4, align = "ccccccc", 
col.names = c("State", "Mean Log(10) Int'l BF", "Median Log(10) Int'l BF", "Mean Log(10) MCF", "Median Log(10) MCF", "Standard Deviation", "Mill Count"), 
caption = "Volume-based means, medians, and standard deviations for all mills (omissions not removed) by State. States highlighted in yellow contain at least one outlier mill") %>%
  # Edit the font size
  kable_styling(font_size = 16) %>%
  # Highlight states (rows) with High-Volume mills in yellow
  row_spec(c(6,8,9:11,18,19,23), background = "yellow")
Table 1: Volume-based means, medians, and standard deviations for all mills (omissions not removed) by State. States highlighted in yellow contain at least one outlier mill
State Mean Log(10) Int’l BF Median Log(10) Int’l BF Mean Log(10) MCF Median Log(10) MCF Standard Deviation Mill Count
CT 5.6984 5.5756 1.8971 1.7742 0.7604 24
DE 5.1220 5.3188 1.3206 1.5175 0.9013 17
IA 4.7282 4.1019 0.9269 0.3005 1.0711 91
IL 4.6356 4.0854 0.8342 0.2841 1.0360 142
IN 5.7767 6.1036 1.9754 2.3022 1.1514 144
KS 4.3063 4.3054 0.5050 0.5041 0.5303 41
MA 4.5291 4.0000 0.7278 0.1987 0.9123 97
MD 5.2602 5.0000 1.4588 1.1987 1.3550 50
ME 5.5914 5.1036 1.7900 1.3023 1.2847 164
MI 5.6891 5.6830 1.8878 1.8817 1.0770 353
MN 5.1510 5.0000 1.3497 1.1987 1.1290 260
MO 5.5730 5.7958 1.7717 1.9944 1.0009 403
ND 4.2331 4.1004 0.4318 0.2990 1.0891 4
NE 4.4955 4.3945 0.6942 0.5932 0.8884 48
NH 5.5217 5.4007 1.7204 1.5993 1.1281 44
NJ 4.3523 4.2138 0.5509 0.4124 1.0186 15
NY 5.8210 5.8891 2.0197 2.0877 0.8734 126
OH 5.5482 5.6990 1.7468 1.8976 1.1493 181
PA 5.8743 6.0000 2.0730 2.1987 0.7479 405
RI 4.5644 5.0334 0.7630 1.2321 1.9369 4
SD 5.6997 5.6930 1.8984 1.8917 1.0343 14
VT 5.4912 5.2717 1.6899 1.4704 1.0270 61
WI 6.0059 6.1330 2.2046 2.3316 1.0375 212
WV 6.2482 6.3602 2.4469 2.5588 1.0049 72
Show code
rm(TPO_summary)

5 Omission Threshold Plot and Table

Show code
# Custom function for numbers with commas, Ignore this!
knit_hooks$set(inline = function(x) {
  prettyNum(x, big.mark=",")
})

#Create Omission Thresholds
# If changing thresholds, make sure to update Function_Volumes.
# If new thresholds are not 1/10000th of top-bounding threshold, make sure to update for-in loop.

# This mutate call will create a new column (Volume_Code) that will represent the group the mill falls into based on volume. 
# A mill that procures a volume between 10,000 - 15,000 BF will receive a '1.5', or 15,000/10,000 (1/10,000th of top-bounding threshold)
# A mill that procures a volume between 35,000 - 40,000 BF will receive a '4', or 40,000/10,000 (1/10,000th of top-bounding threshold)
# A mill that procures a volume over 50,000 BF will receive a '99', while a mill with a procurement volume of 0 will receive a '0'
TPO_Data <- TPO_Data %>%
  mutate(Volume_Code = case_when(TOT_MCF >= 7.90 ~ 99,
                                 TOT_MCF < 7.90 & TOT_MCF >= 7.11 ~ 5,
                                 TOT_MCF < 7.11 & TOT_MCF >= 6.32 ~ 4.5,
                                 TOT_MCF < 6.32 & TOT_MCF >= 5.53 ~ 4,
                                 TOT_MCF < 5.53 & TOT_MCF >= 4.74 ~ 3.5,
                                 TOT_MCF < 4.74 & TOT_MCF >= 3.95 ~ 3,
                                 TOT_MCF < 3.95 & TOT_MCF >= 3.16 ~ 2.5,
                                 TOT_MCF < 3.16 & TOT_MCF >= 2.37 ~ 2,
                                 TOT_MCF < 2.37 & TOT_MCF >= 1.58 ~ 1.5,
                                 TOT_MCF < 1.58 & TOT_MCF >= 0.79 ~ 1,
                                 TOT_MCF < 0.79 & TOT_MCF > 0.16 ~ 0.5,
                                 TOT_MCF < 0.16 & TOT_MCF > 0 ~ 0.1,
                                 TOT_MCF == 0 ~ 0))

# Remove mills that are above the 50,000 BF threshold (Volume_Code == 99) or are have a volume listing of 0 (Volume_Code == 0)
Threshold_Data <- TPO_Data %>%
  filter(Volume_Code != 99 & Volume_Code !=0)

#Create Function for Aggregating Mill Count 
Threshold_Mills <- function(Data, Volumes){
  Data %>% 
    # Filter for Volumes_Code(s) that are in 'Volumes' (i.e. no 0s/99s)
    filter(Volume_Code %in% Volumes) %>%
    # Find the number of mills within a given range
    select(TOT_MCF) %>%
    nrow -> placeholder 
  placeholder <- as.numeric(unlist(placeholder))
}

#Create Function for Aggregating Volume Omission Data
Threshold_Volume <- function(Data, Volumes){
  Data %>%
    # Filter for Volumes_Code(s) that are in 'Volumes' (i.e. no 0s/99s)
    filter(Volume_Code %in% Volumes) %>%
    # Find the total volume for mills within a given range
    select(TOT_MCF) %>%
    sum(na.rm = TRUE)
}

#For-Loop for Mill & Volume Omission Data

# Function Volumes will contain all of the 'Volume_Codes' (i.e. 0.1, 0.5, 1, 1.5,...,5)
Function_Volumes <- Threshold_Data$Volume_Code %>%
  unique() %>%
  sort()

# For each 'Volume_Code', Assign values to two variables:

# "TOT_MILL_Omit_5000" will contain a count of mills with a procurement volume between 1,000 - 5,000 BF
# "TOT_MILL_Omit_10000" will contain a count of mills with a procurement volume between 5,000 - 10,000 BF...etc.

# "TOT_Volume_Omit_5000" will contain the summed volume (log10(MCF)) of all mills with a procurement volume between 1,000 - 5,000 BF
# "TOT_Volume_Omit_5000" will contain the summed volume (log10(MCF)) of all mills with a procurement volume between 5,000 - 10,000 BF...etc.
for (i in 1:length(Function_Volumes)){
  assign(paste0("TOT_Mill_Omit_", Function_Volumes[i]*10000), value = Threshold_Mills(Threshold_Data, Function_Volumes[1:i]))
  assign(paste0("TOT_Volume_Omit_", Function_Volumes[i]*10000), value = Threshold_Volume(Threshold_Data, Function_Volumes[1:i]))
}


# Create Vectors for Omitted Mill Counts and Volumes
# Need to change these vectors if changing omission thresholds

# Join together all of the "TOT_Mill_Omit_" variables into a single vector
Omit_Mill_Count_Vector <- c(TOT_Mill_Omit_1000, TOT_Mill_Omit_5000, TOT_Mill_Omit_10000, TOT_Mill_Omit_15000, 
                            TOT_Mill_Omit_20000, TOT_Mill_Omit_25000, TOT_Mill_Omit_30000, TOT_Mill_Omit_35000, 
                            TOT_Mill_Omit_40000, TOT_Mill_Omit_45000, TOT_Mill_Omit_50000)

# Join together all of the "TOT_Volume_Omit_" variables into a single vector
Omit_Volume_Vector <- c(TOT_Volume_Omit_1000,TOT_Volume_Omit_5000, TOT_Volume_Omit_10000, TOT_Volume_Omit_15000, 
                        TOT_Volume_Omit_20000, TOT_Volume_Omit_25000, TOT_Volume_Omit_30000, TOT_Volume_Omit_35000, 
                        TOT_Volume_Omit_40000, TOT_Volume_Omit_45000, TOT_Volume_Omit_50000)

# Remove all of the intermediate variables
rm(TOT_Mill_Omit_1000,TOT_Mill_Omit_5000, TOT_Mill_Omit_10000, TOT_Mill_Omit_15000, TOT_Mill_Omit_20000,
   TOT_Mill_Omit_25000, TOT_Mill_Omit_30000, TOT_Mill_Omit_35000, 
   TOT_Mill_Omit_40000, TOT_Mill_Omit_45000, TOT_Mill_Omit_50000)
rm(TOT_Volume_Omit_1000, TOT_Volume_Omit_5000, TOT_Volume_Omit_10000, TOT_Volume_Omit_15000, TOT_Volume_Omit_20000,
   TOT_Volume_Omit_25000, TOT_Volume_Omit_30000, TOT_Volume_Omit_35000, 
   TOT_Volume_Omit_40000, TOT_Volume_Omit_45000, TOT_Volume_Omit_50000)

# Create a vector of the total volume of all mills within the sample-frame (same length as the "Omit_Mill_Count_Vector" & "Omit_Volume_Vector" created above)
TOT_Volume_Vector <- c(TOT_Volume) %>%
  rep.int(times = length(Omit_Volume_Vector))
# Create a vector of sample-wide mill count (same length as the "Omit_Mill_Count_Vector" & "Omit_Volume_Vector" created above)
TOT_Mill_Vector <- c(TOT_Mills) %>%
  rep.int(times = length(Omit_Volume_Vector))


# Convert Vectors to Dataframe
Omit_DF <- data.frame(Omit_Mill_Count_Vector, Omit_Volume_Vector, TOT_Volume_Vector, TOT_Mill_Vector, 
                      row.names = c('0.16 MCF','0.79 MCF', '1.58 MCF', '2.37 MCF','3.16 MCF', '3.85 MCF', '4.74 MCF', '5.53 MCF', '6.32 MCF', '7.11 MCF', '7.90 MCF')) %>%
  # Use the 'TOT_Mill_Vector' to convert omitted mill count to percentage of the entire sample mill count 
  mutate(Percent_Omitted = Omit_Mill_Count_Vector/TOT_Mill_Vector*100)
# Use the 'TOT_Volume_Vector' to convert omitted volume to percentage of the entire sample volume
Omit_DF_Kable <- Omit_DF %>%
  mutate(Omit_Volume_Vector = as.integer(Omit_Volume_Vector),
         AggPercent = Omit_Volume_Vector/TOT_Volume*100) 

# Produce Omission Table and Plot

# digits controls number of digits shown for numeric values, align controls the alignment for each column's entries ("ccccc" for five columns, all centered)...For 3 centered columns use "ccc"), col.names must be entered for each column IN ORDER!
kable(Omit_DF_Kable, digits = 3, align = "cccccc", 
      col.names = c("Omitted Mill Count", "Omitted Volume (MCF)", "Agg. Volume (MCF) in Sample", 
                    "Agg. Mill Count (No Omissions)", "% of Mill Count Omitted", "% of Agg. Volume (MCF) Omitted"), 
      caption = "Omissions by Varying Volume Baselines", 
      # Converts thousand-place numbers to incoporate commas
      format.args = list(big.mark = ",", scientific = FALSE)) %>%
  # Change the font size
  kable_styling(font_size = 16) %>%
  # Bolden the first column (i.e. top-bounding volume thresholds)...These will be converted to measures of MCF when plotted!
  column_spec(column = 1, bold = TRUE) %>%
  # Remove the 4th/5th columns (These contain sample-wide mill counts/volumes and are not needed)
  remove_column(columns = c(4,5))
Table 2: Omissions by Varying Volume Baselines
Omitted Mill Count Omitted Volume (MCF) % of Mill Count Omitted % of Agg. Volume (MCF) Omitted
0.16 MCF 10 0 0.336 0.000
0.79 MCF 71 26 2.389 0.001
1.58 MCF 280 238 9.421 0.010
2.37 MCF 489 590 16.454 0.024
3.16 MCF 531 703 17.867 0.029
3.85 MCF 593 914 19.953 0.038
4.74 MCF 663 1,206 22.308 0.050
5.53 MCF 721 1,500 24.260 0.062
6.32 MCF 734 1,578 24.697 0.065
7.11 MCF 778 1,866 26.178 0.077
7.90 MCF 798 2,017 26.851 0.083
Show code
rm(Omit_DF_Kable)

# Create a vector with thresholds converted to MCF (from BF)
Omit_Vec <- Function_Volumes*10*.158
# Bind 'Omit_Vec' to 'Omit_DF'
Omit_DF <- cbind(Omit_DF, Omit_Vec)

Omit_DF %>%
  ggplot(aes(Percent_Omitted, Omit_Volume_Vector, 
             # Change color of points by omission threshold
             color = as.character(round(Omit_Vec, 2)))) +
  geom_point(size = 6) +
  # Create a smooth line for connecting all points...No error bars of course!
  geom_smooth(size = .8, se = FALSE, color = 'black') +
  # Limit x-scale (Percent of Mill Count lost) to 0-28%, this may need to be changed in the future!
  scale_x_continuous(breaks = c(seq(0,28,2)), limits = c(0,28)) +
  # Limit y-scale (Volume Omitted - MCF) to 0-2,100 MCF, this may need to be changed in the future!
  scale_y_continuous(labels = scales::comma, limits = c(0,2100), breaks = c(seq(0,2100,100)),
                     # Create a secondary y-axis (right side) that displays volume lost as a percentage of the aggregate sample volume
                     sec.axis =  sec_axis(~.*100/TOT_Volume_Vector, breaks = c(seq(0,.16,.01)), name = "% of Aggregate Volume (MCF) Omitted")) +
  # Create a threshold-volume based scale for the legend, thresholds will be ordered from smallest to largest in the legend.
  scale_color_discrete(name = "Omission Threshold (MCF)", labels = function(x) format(sort(as.numeric(x)), big.mark = ",", scientific = FALSE)) +
  labs(title = "Omitted Volumes (MCF) & Mill Counts at Varying Thresholds", subtitle = "Colored #'s refer to the # of mills dropped at each threshold") +
  # Label each point by the count of mills dropped at each threshold, nudge_y = y-adjustment from point for the label, nudge_x = x-adjustment from point for the label
  geom_text(aes(label = Omit_Mill_Count_Vector), size =  16, nudge_y = 70, nudge_x = -.2, show.legend = FALSE) +
  theme_fivethirtyeight(base_size = 40, base_family = 'serif') +
  # plot.margin allows for additional space outside the plot for placement of the secondary y-axis (volume lost as a percentage of the aggregate sample volume)
  theme(axis.title = element_text(family = 'serif', size = 50), plot.margin = unit(c(1,1,1,1), "cm"), 
        axis.title.y.right = element_text(vjust=unit(1.5, "cm")), axis.text = element_text(size = 40),
        legend.text = element_text(size = 60), legend.title = element_text(size = 60)) + 
  ylab("Aggregate Volume (MCF) Omitted") + xlab('% of Mills Omitted')
Show code
# Remove Unneeded Vectors
#rm(Omit_Mill_Count_Vector, Omit_Volume_Vector, TOT_Volume_Vector, TOT_Mill_Vector, i, Function_Volumes)

5.1 State-Level Omission Plot and Table

Omission Threshold Interactive Web Map

Show code
# Find State-by-State Omitted Volume Totals
# At this point, Threshold_Data contains all mills within 0-50,000 BF, with Volume_Code representing 1/10,000th of the upper threshold

# Find the State-Level omitted mill counts at each threshold
State_Omit_Count_Levels <- Threshold_Data %>%
  rename(MTC = MILL_TYPE_CD) %>%
  select(MILL_STATECD, MTC:TOT_BF_LOG, Volume_Code) %>%
  # Group by State and threshold
  group_by(MILL_STATECD, Volume_Code) %>%
  # Find mill count at each threshold
  summarize(Mill_Count = n()) %>%
  # Convert Volume_Code into the associated upper threshold
  mutate(Volume_Code = Volume_Code*10000)
#Convert State Codes to State Abbrs
State_Omit_Count_Levels <- State_Code_Conversion(State_Omit_Count_Levels)

# Function for summing Omitted Volume by Threshold/State

#Specify Column Order 
col_order <- c("State", "Omission Threshold (BF)", "Volume Omitted (BF)",
               "Total State Volume (BF)", "% of Volume Omitted")

# Find Total State Volumes
State_Volumes <- TPO_Data %>%
  select('MILL_STATE', 'TOT_MCF', 'TOT_MCF_LOG') %>%
  group_by(MILL_STATE) %>%
  summarize(State_Volume = sum(TOT_MCF))

# Create Omitted Volume by State DF
# Omission_Data will contain mill count/volume-based omission data for each threshold relevant to the state
## Ex. The only mills in CT within 0-50,000 BF are within the 25,000-30,000 BF range...Thus only one row with the top-bounding threshold's (30,000 BF) data will be present for CT
Omission_Data <- Threshold_Data %>%
  # Group by State and threshold
  group_by(MILL_STATE, Volume_Code) %>%
  # Find volume omitted at each threshold
  summarize('Volume Omitted (BF)' = sum(TOT_MCF)) %>%
  # Convert Volume_Code into the associated upper threshold
  mutate('Omission Threshold (BF)' = Volume_Code*10000) %>%
  select(-Volume_Code) %>%
  # Join the State-level Aggregate Volumes and rename a few fields
  left_join(State_Volumes, by = 'MILL_STATE') %>%
  rename('Total State Volume (BF)' = 'State_Volume', 'State' = 'MILL_STATE') %>%
  ungroup()

# Custom Function for summing within-state volumes
Sum_Threshold_Volumes_by_State <- function(Data){
  # For each Omission Threshold
  for (v in 1:length(Data$`Omission Threshold (BF)`)){
    # If this is first threshold in a state (1000 BF), skip!
    if (v == 1){
      next
      # If the previous listing is not in the same state as the current listing, skip! (See the Omission_Data df to understand why these are skipped)
    } else if (identical(Data$State[v], Data$State[v-1]) == FALSE){
      next
      # If the previous listing is in the same state & the Omission Threshold (i.e. 1000, 5000, 10000, etc.) is greater than the previous threshold, add the volumes for the previous/current listing together...This new volume will overtake the old entry and will be summed for thresholds up-until the current threshold
    } else if (Data$`Omission Threshold (BF)`[v] > Data$`Omission Threshold (BF)`[v-1]){
      Data$`Volume Omitted (BF)`[v] + Data$`Volume Omitted (BF)`[v-1] -> Data$`Volume Omitted (BF)`[v]
      # Else break!
    } else{
      break
    }
    }
  Data
  }

# STOPPED HERE!!!

# Sum Volumes by State/Threshold & Create % field comparing Omitted Threshold Volume to Total State Volume
Omission_Data <- Omission_Data %>%
  Sum_Threshold_Volumes_by_State() %>%
  mutate('% of Volume Omitted' = `Volume Omitted (BF)`/`Total State Volume (BF)` * 100)
  

# Reorder columns
Omission_Data <- Omission_Data[, col_order]
rm(col_order)

# Join Omission Volume Data with Omitted Mill Counts
Omission_Data <- Omission_Data %>%
  left_join(State_Omit_Count_Levels, by = c('State' = 'MILL_STATE', 'Omission Threshold (BF)' = 'Volume_Code')) 

# Sum Omitted Mill Counts by State
Omission_Data <- Omission_Data %>%
  group_by(State) %>%
  mutate(state_Count = sum(Mill_Count)) %>%
  select(-MILL_STATECD)


#Create an Omission Template, with NAs for all empty fields.
States <- unique(TPO_Data$MILL_STATE) 
State_Length = as.numeric(length(States))
Thresholds <- unique(Threshold_Data$Volume_Code) %>%
  sort(decreasing = FALSE)*10000
States <- rep.int(States, times = length(Thresholds))
States <- States %>%
  sort(States, decreasing = FALSE)
Thresholds <- Thresholds %>%
  rep.int(times = State_Length)

Columns <- c("State", "Omission Threshold (BF)")
#Create Omission Template, named TestDF
TestDF <- data.frame("State" = States, "Omission Threshold (BF)" = Thresholds)%>%
  group_by(State)
colnames(TestDF) <- Columns

#Join Omission_Data to the empty template --> TestDF, save as Omission_Data
Omission_Data <- TestDF %>%
  left_join(Omission_Data, by = c('State', "Omission Threshold (BF)")) 

# Fill in all missing values for the empty template...See Omission_Data before/after running this line to see what is happening here!
Omission_Data <- Omission_Data %>%
  group_by(State) %>%
  fill(c("Volume Omitted (BF)", "Total State Volume (BF)", "% of Volume Omitted", "Mill_Count", "state_Count"), .direction = "down") 

#Remove unneeded vectors/DFs
rm(Columns, States, Thresholds, State_Length, TestDF)

#Change NA values to 0...These are thresholds in which the state did not lose any mills/volume
Omission_Data[is.na(Omission_Data)] = 0



SumTest <- function(Data){
  for (v in 1:length(Data$Mill_Count)){
    # Pass the first row
    if (v == 1){
    next
    # Pass to the next row if passing to new state
    } else if (Data$State[v] != Data$State[v-1]){
      next
    # If current row's mill count is zero & previous row is not zero, pull the mill count value from the previous row
    } else if (Data$Mill_Count[v] == 0 & Data$Mill_Count[v-1] != 0){
      Data$Mill_Count[v-1] -> Data$Mill_Count[v]
      next
    # If current row's mill count is not zero, previous row is not zero, & volume omitted is greater than the previous row (threshold), add mill count from previous row       to current row's mill count
    } else if (Data$Mill_Count[v] != 0 & Data$Mill_Count[v-1] != 0 & Data$`Volume Omitted (BF)`[v] > Data$`Volume Omitted (BF)`[v-1]){
      Data$Mill_Count[v] + Data$Mill_Count[v-1] -> Data$Mill_Count[v]
      next
    # If current row's mill count is not zero, & volume omitted is equal to the previous row (threshold), pull mill count from previous row
    } else if (Data$Mill_Count[v] != 0 & Data$Mill_Count[v-1] != 0 & Data$`Volume Omitted (BF)`[v] == Data$`Volume Omitted (BF)`[v-1]){
      Data$Mill_Count[v-1] -> Data$Mill_Count[v]
      next
    # Else pass to next row
    } else{
      next
    }
  }
  Data
}

# Run the SumTest function to finalize the state-level omission data!
Omission_Data <- Omission_Data %>%
  SumTest()

# write_xlsx(Omission_Data, path = "C:\\Users\\ikenn\\OneDrive - University of Massachusetts\\Documents\\R_Workspace\\Omission_Data.xlsx")
Show code
# Custom color palette for usage in the following omission plot
pal <- brewer.pal(11, name = "Spectral")
pal <- rev(pal)

# Old Plot
# Omission_Data %>%
#   ggplot(aes(State,`Volume Omitted (BF)`, color  = factor(as.numeric(round(`Omission Threshold (BF)`/1000*.158, 2))))) + 
#   geom_point(size = 4) +
#   scale_y_continuous(labels = scales::comma, limits = c(0,300), breaks = c(seq(0,300,25))) +
#   scale_color_manual(values = pal, name = "Omission Threshold (MCF)") +
#   labs(title = "Omitted Volumes (MCF) at Varying Thresholds by State", 
#        y = "Omitted Volume (MCF)", x = "State") +
#   theme_fivethirtyeight(base_size = 50, base_family = 'serif') +
#   theme(axis.title = element_text(family = 'serif', size = 50), axis.text = element_text(size = 36)) + 
#   ylab("Volume (MCF) Omitted") + xlab('State')

# Create State-Level Omission Plot (Agg. Volume Omitted at each threshold)
Omission_Data %>%
  # fill variable converts the BF omission threshold to MCF for plotting...(BF/1000)*.158 = MCF
  ggplot(aes(0, `Volume Omitted (BF)`, fill = factor(as.numeric(round(`Omission Threshold (BF)`/1000*.158, 2))))) +
  # Allow within-state bars to be placed side-by-side rather than one on top of the other
  geom_col(position = 'dodge') +
  # Allow for thousands place seperators
  scale_y_continuous(labels = scales::comma) +
  # Use the custom color palette for the legend/plotting points
  scale_fill_manual(values = pal, name = "Omission Threshold (MCF)") +
  # Create plots for each state, 'free_y' allows for independent y-axes for each state
  facet_wrap(facets = vars(State), scales = 'free_y', ncol = 6) +
  labs(title = "Omitted Volumes (MCF) at Varying Thresholds by State", y = "Omitted Volume (MCF)", x = "State") +
  theme_fivethirtyeight(base_size = 50, 
                        base_family = 'serif') +
  theme(axis.text.x=element_blank(), 
        axis.ticks.x=element_blank(), 
        axis.title = element_text(family = 'serif', size = 50),
        panel.grid.major.x = element_blank(), 
        axis.text = element_text(size = 36),
        legend.text = element_text(size = 50), 
        legend.title = element_text(size = 50)) +
  ylab("Volume (MCF) Omitted") 
Show code
# Create State-Level Omission Plot (% of Agg. Volume Omitted at each threshold)
Omission_Data %>%
  # fill variable converts the BF omission threshold to MCF for plotting...(BF/1000)*.158 = MCF
  ggplot(aes(0, `% of Volume Omitted`/100, fill = factor(as.numeric(round(`Omission Threshold (BF)`/1000*.158, 2))))) +
  # Allow within-state bars to be placed side-by-side rather than one on top of the other
  geom_col(position = 'dodge') +
  # Allow for thousands place seperators
  scale_y_continuous(labels = scales::percent) +
  # Use the custom color palette for the legend/plotting points
  scale_fill_manual(values = pal, name = "Omission Threshold (MCF)") +
  # Create plots for each state, 'free_y' allows for independent y-axes for each state
  facet_wrap(facets = vars(State), scales = 'free_y', ncol = 6) +
  labs(title = "% of Statewide Volume Omitted at Varying Thresholds by State", 
       x = "State") +
  theme_fivethirtyeight(base_size = 50, 
                        base_family = 'serif') +
  theme(axis.text.x=element_blank(), 
        axis.ticks.x=element_blank(), 
        axis.title = element_text(family = 'serif', size = 50),
        panel.grid.major.x = element_blank(), 
        axis.text = element_text(size = 36),
        legend.text = element_text(size = 50), 
        legend.title = element_text(size = 50)) + 
  ylab('% of Statewide Volume Omitted') 
Show code
# Modify the df for usage in a table...Converts BF thresholds to MCF thresholds
Omission_Data <- Omission_Data %>%
  rename(`Omission Threshold (MCF)`= `Omission Threshold (BF)`,
         `Volume Omitted (MCF)` = `Volume Omitted (BF)`,
         `Total State Volume (MCF)` = `Total State Volume (BF)`) %>%
  mutate(`Omission Threshold (MCF)` = case_when(`Omission Threshold (MCF)` == 1000 ~ 0.16,
                                                `Omission Threshold (MCF)` == 5000 ~ 0.79,
                                                `Omission Threshold (MCF)` == 10000 ~ 1.58,
                                                `Omission Threshold (MCF)` == 15000 ~ 2.37,
                                                `Omission Threshold (MCF)` == 20000 ~ 3.16,
                                                `Omission Threshold (MCF)` == 25000 ~ 3.95,
                                                `Omission Threshold (MCF)` == 30000 ~ 4.74,
                                                `Omission Threshold (MCF)` == 35000 ~ 5.53,
                                                `Omission Threshold (MCF)` == 40000 ~ 6.32,
                                                `Omission Threshold (MCF)` == 45000 ~ 7.11,
                                                `Omission Threshold (MCF)` == 50000 ~ 7.90,))


# Create the Omission Threshold Table...This contains all state level omission data
# Some of this code is in Javascript (rowstyle)...If not interested in highlighting states with volume-outliers, feel free to comment the 'rowstyle' portion out!
Omission_Table <- reactable(Omission_Data[,c(1:3,5:6)], groupBy = "State", defaultPageSize = 24, outlined = TRUE, resizable = TRUE, wrap = TRUE, highlight = TRUE, 
                           theme = reactableTheme(
                           cellStyle = list(display = "flex", flexDirection = "column", justifyContent = "center"), 
                           headerStyle = list(display = "flex", flexDirection = "column", justifyContent = "center")),
                           # Color all states with high-volume outliers (> 100,000,000 BF in yellow)
                           rowStyle = JS("function(rowInfo) {if (rowInfo.row['State'] == 'KS'|| rowInfo.row['State'] == 'ME'||
                           rowInfo.row['State'] == 'MD'|| rowInfo.row['State'] == 'MI'|| rowInfo.row['State'] == 'MN'|| 
                           rowInfo.row['State'] == 'OH'|| rowInfo.row['State'] == 'PA'|| rowInfo.row['State'] == 'WI') {
                           return { backgroundColor: 'yellow' }
                           }
                           }"),
                           # Locate the correct columns and change the 'format' if necessary.
                           columns = list(
                            Mill_Count = colDef(name = "# of Mills Omitted", align = "center"),
                            State = colDef(align = "center"), 
                            'Omission Threshold (MCF)' = colDef(align = "center", format = colFormat(separators = TRUE)),
                            'Volume Omitted (MCF)' = colDef(align = "center", format = colFormat(separators = TRUE, digits = 2)),
                            '% of Volume Omitted' = colDef(align = "center", format = colFormat(digits = 3))))

# Append a title and caption to the table created above
Omission_Table_Titled <- htmlwidgets::prependContent(Omission_Table, 
h2(class = "title", "Omitted Volumes (MCF) at Varying Thresholds by State", 
   style = "text-align: center"), 
h3(class = "caption", "States highlighted in yellow contain at least one volume-outlier mill", 
   style = "text-align: center; font-size: 85%; font-weight: normal"))

# Output the table
Omission_Table_Titled

Omitted Volumes (MCF) at Varying Thresholds by State

States highlighted in yellow contain at least one volume-outlier mill