Response Rate Analysis

Early findings from the Mill-Size Analysis.

Ian Kennedy
2023-04-12

1 Response Rate Analysis

Show code
State_Code_Conversion <- function(Data){
  Data %>%
    mutate(MILL_STATE = case_when(MILL_STATECD == 9 ~ "CT",
                                  MILL_STATECD == 10 ~ "DE",
                                  MILL_STATECD == 17 ~ "IL", 
                                  MILL_STATECD == 18 ~ "IN",
                                  MILL_STATECD == 19 ~ "IA",
                                  MILL_STATECD == 20 ~ "KS",
                                  MILL_STATECD == 23 ~ "ME",
                                  MILL_STATECD == 24 ~ "MD",
                                  MILL_STATECD == 25 ~ "MA",
                                  MILL_STATECD == 26 ~ "MI",
                                  MILL_STATECD == 27 ~ "MN",
                                  MILL_STATECD == 29 ~ "MO",
                                  MILL_STATECD == 31 ~ "NE",
                                  MILL_STATECD == 33 ~ "NH",
                                  MILL_STATECD == 34 ~ "NJ",
                                  MILL_STATECD == 36 ~ "NY",
                                  MILL_STATECD == 38 ~ "ND",
                                  MILL_STATECD == 39 ~ "OH",
                                  MILL_STATECD == 42 ~ "PA",
                                  MILL_STATECD == 44 ~ "RI",
                                  MILL_STATECD == 46 ~ "SD",
                                  MILL_STATECD == 50 ~ "VT",
                                  MILL_STATECD == 54 ~ "WV",
                                  MILL_STATECD == 55 ~ "WI"))
}

RL <- read_xlsx("C:\\Users\\ikenn\\OneDrive - University of Massachusetts\\Documents - FFRC_TPO\\2021 Mill Data (SY2022)\\SURVEY OUTPUT\\TPO_ReturnLogs_2021.xlsx", sheet = 2)
Sample <- read_xlsx("C:\\Users\\ikenn\\OneDrive - University of Massachusetts\\Documents - FFRC_TPO\\2021 Mill Data (SY2022)\\IMPLEMENTATION\\NR_CALLS\\SampleFrame_2021_Updated.xlsx")
TPO2021 <- read_csv("C:\\Users\\ikenn\\OneDrive - University of Massachusetts\\Documents - FFRC_TPO\\2021 Mill Data (SY2022)\\SURVEY OUTPUT\\TELEFORM CSV AND OVERFLOW SHEETS\\2021_TPO_Mail_Phone_3.28.23.CSV")

RL <- RL %>%
  State_Code_Conversion() %>%
  distinct(TPOID, .keep_all = TRUE)

RLTest <- RL %>%
  rename(Status = "RESPONSE", Returned = "Return Date") %>%
  filter(!Status %in% c("DISMANTLED", "OOB", "IDLE", "REFUSAL")) %>%
  filter(Administrator %in% c("FFRC", "WI_DNR", "VT State", "MN State")) %>%
  mutate(Returned = if_else(is.na(Returned), "No", "Yes")) %>%
  select(c(TPOID, Administrator, Status, FULL_NAME, MILL_STATE, Returned)) %>%
  filter(MILL_STATE != "MI")

RLFFRC <- RLTest 

Sample <- Sample %>%
  select(c(TPOID, TOT_MCF, MILL_TYPE_CD)) %>%
  mutate(TPOID = ifelse(startsWith(TPOID, "9"), paste0("0", TPOID), TPOID)) %>%
  filter(!is.na(TPOID)) %>%
  distinct(TPOID, .keep_all = TRUE) 

RLTest <- RLTest %>%
  left_join(Sample, by = 'TPOID') 
RLTest$Returned <- as.factor(RLTest$Returned)
RLTest$Status <- as.factor(RLTest$Status)

RLSummary <- RLTest %>%
  group_by(Returned, MILL_STATE) %>%
  summarize(AvgVol = mean(log10(TOT_MCF), na.rm = TRUE),
            MedVol = median(log10(TOT_MCF), na.rm = TRUE),
            Count = n()) 

RLTest %>%
  filter(MILL_STATE != "MI") %>%
  ggplot(aes(Returned, log10(TOT_MCF))) + 
  geom_boxplot(outlier.shape = NA) +
  #geom_violin(draw_quantiles = .5) +
  geom_jitter(width = .25,size = 2, alpha = .5) +
  scale_y_continuous(breaks = c(seq(-2,5,.25)), limits = c(-2,5)) + 
  labs(title = "Mean log10(MCF) by Return Status", y = "log10(MCF)", 
       x = "Returned?") +
  theme_fivethirtyeight(base_size = 50, base_family = 'serif') +
  theme(axis.title = element_text(family = 'serif', size = 45), 
        axis.text = element_text(family = 'serif', size = 40),
        legend.position = "none",
        panel.grid.major.x = element_blank())
Show code
RLSummary %>%
  filter(MILL_STATE != "MI") %>%
  ggplot(aes(Returned, AvgVol, fill = MILL_STATE)) +
  geom_col() +
  scale_y_continuous(limits = c(-1,3), breaks = seq(-1,3,1)) +
  theme_fivethirtyeight(base_size = 50, base_family = 'serif') +
  geom_text(aes(label = as.character(Count)), size = 10, nudge_y = .1) + 
  facet_wrap(~MILL_STATE, nrow = 4) +
  theme(panel.grid.major.x = element_blank(), legend.position="none", 
        axis.title = element_text(family = 'serif', size = 45), 
        axis.text = element_text(family = 'serif', size = 40)) +
  labs(title = "Mean log10(MCF) by Return Status & State") +
  ylab("Mean log10(MCF)") +
  xlab("State")
Show code
RLTest %>%
  filter(MILL_STATE != "MI") %>%
  filter(MILL_TYPE_CD != 70) %>%
  filter(!is.na(MILL_TYPE_CD)) %>%
  ggplot(aes(Returned, log10(TOT_MCF))) + 
  geom_boxplot(outlier.shape = NA) +
  #geom_violin(draw_quantiles = .5) +
  geom_jitter(width = .25,size = 1.75, alpha = .5) +
  scale_y_continuous(breaks = c(seq(-2,5,1)), limits = c(-2,5)) + 
  labs(title = "Mean log10(MCF) Return Status & Mill Type", y = "log10(MCF)", 
       x = "Returned?") +
  theme_fivethirtyeight(base_size = 50, base_family = 'serif') +
  theme(axis.title = element_text(family = 'serif', size = 45),
        axis.text = element_text(family = 'serif', size = 40),
        legend.position = "none",
        panel.grid.major.x = element_blank()) + 
  facet_wrap(~MILL_TYPE_CD, nrow = 2)
Show code
RLTest %>%
  filter(MILL_STATE != "MI") %>%
  filter(MILL_TYPE_CD != 70) %>%
  filter(!is.na(MILL_TYPE_CD)) %>%
  ggplot(aes(Returned, log10(TOT_MCF))) + 
  geom_boxplot(outlier.shape = NA) +
  #geom_violin(draw_quantiles = .5) +
  geom_jitter(width = .25,size = 1.75, alpha = .5) +
  scale_y_continuous(breaks = c(seq(-2,5,1)), limits = c(-2,5)) + 
  labs(title = "Mean log10(MCF) by Return Status & State", y = "log10(MCF)", 
       x = "Returned?") +
  theme_fivethirtyeight(base_size = 50, base_family = 'serif') +
  theme(axis.title = element_text(family = 'serif', size = 45),
        axis.text = element_text(family = 'serif', size = 40),
        legend.position = "none",
        panel.grid.major.x = element_blank()) + 
  facet_wrap(~MILL_STATE, nrow = 2) 
Show code
RLTest %>%
  filter(MILL_STATE != "MI") %>%
  ggplot(aes(Returned, log10(TOT_MCF))) + 
  geom_boxplot(outlier.shape = NA) +
  #geom_violin(draw_quantiles = .5) +
  geom_jitter(width = .25,size = 2, alpha = .5) +
  scale_y_continuous(breaks = c(seq(-2,5,.25)), limits = c(-2,5)) + 
  labs(title = "Mean log10(MCF) by Return Status", y = "log10(MCF)", 
       x = "Returned?") +
  theme_fivethirtyeight(base_size = 50, base_family = 'serif') +
  theme(axis.title = element_text(family = 'serif', size = 45), 
        axis.text = element_text(family = 'serif', size = 40),
        legend.position = "none",
        panel.grid.major.x = element_blank())
Show code
RLTotals <- RLTest %>%
  group_by(Returned) %>%
  summarize(MeanVol = mean(TOT_MCF, na.rm = TRUE))

RLTotals %>%
  ggplot(aes(Returned, MeanVol)) +
  geom_point(size = 12) +
  scale_y_continuous(breaks = c(seq(600,1400, 100)), limits = c(600,1400)) + 
  labs(title = "Mean MCF by Return Status", y = "MCF", 
       x = "Returned?") +
  theme_fivethirtyeight(base_size = 50, base_family = 'serif') +
  theme(axis.title = element_text(family = 'serif', size = 45),
        axis.text = element_text(family = 'serif', size = 40),
        legend.position = "none",
        panel.grid.major.x = element_blank())
Show code
# RLTest %>%
#   filter(MILL_STATE != "MI") %>%
#   ggplot(aes(Returned, log10(TOT_MCF))) + 
#   geom_violin(alpha = .5, draw_quantiles = c(.25, .5, .75)) +
#   scale_y_continuous(breaks = c(seq(-2,5,.5)), limits = c(-2,5)) + 
#   labs(title = "Log(10) MCF by State & Return Status", y = "Log(10) MCF", 
#        subtitle = "Lines within plots refer to 25th, 50th, & 75th quantiles.", 
#        x = "Returned?") +
#   theme_fivethirtyeight(base_size = 10, base_family = 'serif') +
#   theme(axis.title = element_text(family = 'serif', size = 10), 
#         legend.position = "none",
#         panel.grid.major.x = element_blank()) + 
#   labs(title = "Log(10) MCF by Mill Type", y = "Log(10) MCF", 
#        subtitle = "Lines within plots refer to 25th, 50th, & 75th quantiles.", 
#        x = "Returned?") +
#   facet_wrap(~MILL_STATE)

# TestPlot %>%
#   annotate_figure(fig.lab = paste0("Median - Log10(MCF): ", round(RLSummary$MedVol[1])), fig.lab.pos = "bottom.left", fig.lab.size = 8) %>%
#   annotate_figure(fig.lab = paste0("Median - Log10(MCF): ", round(RLSummary$MedVol[2])), fig.lab.pos = "bottom.right", fig.lab.size = 8) %>%
#   annotate_figure(fig.lab = paste0("Mean - Log10(MCF): ", round(RLSummary$MedVol[1])), fig.lab.pos = "top.left", fig.lab.size = 8) %>%
#   annotate_figure(fig.lab = paste0("Mean - Log10(MCF): ", round(RLSummary$MedVol[2])), fig.lab.pos = "top.right", fig.lab.size = 8) 

RL2021 <- RLTest

RROverall <- as.numeric(sum(RL2021$Returned == "Yes"))/nrow(RL2021)

RRbyState <- RL2021 %>%
  group_by(MILL_STATE) %>%
  summarize(Yes = sum(Returned == "Yes"),
            No = sum(Returned == "No"),
            Total = Yes+No,
            Response_Rate = Yes/(Yes + No)) %>%
  rename(State = MILL_STATE)

ggplot(RRbyState, aes(reorder(State, -Response_Rate), Response_Rate)) + 
  geom_col() + 
  geom_text(aes(label = paste0("SS: ", Total)), size = 10, nudge_y = .02) + 
  scale_y_continuous(limits = c(0,1.05), breaks = c(seq(0,1,.1))) + 
  theme_fivethirtyeight(base_size = 50, base_family = 'serif') +
  theme(axis.title = element_text(family = 'serif', size = 45), 
        axis.text = element_text(family = 'serif', size = 40), 
        legend.key = element_blank()) + 
  ylab('Response Rate') + 
  xlab('State') +
  labs(title = "Qualified Response Rate by State", subtitle = "Idle, OOB, & Dismantled Mills Not Included, SS = Sample Size")
Show code
RRbyState <- RRbyState %>%
    arrange(-Response_Rate) 

print(RRbyState, n = 23)
# A tibble: 23 × 5
   State   Yes    No Total Response_Rate
   <chr> <int> <int> <int>         <dbl>
 1 ME       54     9    63         0.857
 2 MN       74    28   102         0.725
 3 VT       16     7    23         0.696
 4 MD       15     7    22         0.682
 5 WV       21    10    31         0.677
 6 ND        2     1     3         0.667
 7 SD        9     5    14         0.643
 8 WI       57    36    93         0.613
 9 NY       27    23    50         0.54 
10 PA       83    71   154         0.539
11 MO       59    57   116         0.509
12 KS        8     8    16         0.5  
13 NH        9     9    18         0.5  
14 RI        2     2     4         0.5  
15 NE       10    11    21         0.476
16 MA       16    18    34         0.471
17 IN       25    32    57         0.439
18 DE        7     9    16         0.438
19 CT        8    11    19         0.421
20 OH       28    40    68         0.412
21 IA       15    22    37         0.405
22 IL       14    38    52         0.269
23 NJ        3    12    15         0.2  
Show code
# Rest <- RLFFRC %>%
#   filter(!MILL_STATE %in% c("VT", "MN")) 
#   group_by('Status') %>%
#   summarize(Yes = sum(Returned == "Yes"),
#             No = sum(Returned == "No"))

2 Annual Sample Prop. Capture

Show code
TPO2021 <- read.xlsx("C:\\Users\\ikenn\\OneDrive - University of Massachusetts\\Documents - FFRC_TPO\\2021 Mill Data (SY2022)\\SURVEY OUTPUT\\TPO_ReturnLogs_2021.xlsx", sheet = 2)
TPO2022 <- read.xlsx("C:\\Users\\ikenn\\OneDrive - University of Massachusetts\\Documents - FFRC_TPO\\2022 Mill Data (SY2023)\\TPO2022_ReturnLogs.xlsx", sheet = 2)
Sample <- read_xlsx("C:\\Users\\ikenn\\OneDrive - University of Massachusetts\\Documents - FFRC_TPO\\2021 Mill Data (SY2022)\\IMPLEMENTATION\\NR_CALLS\\SampleFrame_2021_Updated.xlsx")

Sample <- Sample %>%
  select(TPOID, TOT_MCF)

TPO2021 <- TPO2021 %>%
  filter(!is.na(TPOID)) %>%
  left_join(Sample, by = 'TPOID') 

TPO2022 <- TPO2022 %>%
  filter(!is.na(TPOID)) %>%
  distinct(TPOID, .keep_all = TRUE) %>%
  mutate(TPOID = str_replace(TPOID, "2022", "2021")) %>%
  left_join(Sample, by = 'TPOID') %>%
  mutate(IN_2021 = ifelse(TPOID %in% TPO2021$TPOID, 1, 0))

TPO2022 <- TPO2022[-884,]

In2021_2022 <- TPO2022 %>%
  filter(IN_2021 == 1)

NotIn2021 <- TPO2022 %>%
  filter(IN_2021 == 0)

Percent_BothYears <- sum(TPO2022$IN_2021)/nrow(TPO2022)
Vol_BothYears <- mean(In2021_2022$TOT_MCF, na.rm = TRUE)
Vol_2022Only <- mean(NotIn2021$TOT_MCF, na.rm = TRUE)

TPO2022 <- TPO2022 %>%
  State_Code_Conversion() 

TPO2022$MILL_STATE <- as.factor(TPO2022$MILL_STATE)

Test <- TPO2022 %>%
  group_by(MILL_STATE) %>%
  summarize(InBoth = sum(IN_2021),
            Count = n(),
            Percent = (InBoth/Count)*100)

Test2 <- TPO2022 %>%
  filter(IN_2021 == 1) %>%
  group_by(MILL_STATE) %>%
  summarize(MeanVol = mean(log10(TOT_MCF), na.rm = TRUE))

Test3 <- TPO2022 %>%
  filter(IN_2021 == 0) %>%
  group_by(MILL_STATE) %>%
  summarize(MeanVol = mean(log10(TOT_MCF), na.rm = TRUE))   

Test <- Test %>% 
  left_join(Test2, by = 'MILL_STATE') %>%
  left_join(Test3, by = 'MILL_STATE')

Test <- Test %>%
  rename(Both = MeanVol.x, Only2022 = MeanVol.y) %>%
  mutate(Diff = Both - Only2022) 

ggplot(Test, aes(fct_reorder(MILL_STATE, -Percent), Percent)) +
  geom_col() +
  scale_y_continuous(limits = c(0,102), breaks = seq(0,100,10)) +
  theme_fivethirtyeight(base_size = 30, base_family = 'serif') +
  geom_text(aes(label = paste0("SS: ", as.character(Count))), size = 10, nudge_y = 1) + 
  theme(panel.grid.major.x = element_blank(), legend.position = "none", 
        axis.title = element_text(family = 'serif', size = 45),
        axis.text = element_text(family = 'serif', size = 40)) +
  labs(title = "Sample Prop 2021/2022") +
  ylab("Percent in both 2021/2022") +
  xlab("State")
Show code
Test %>%
  filter(!is.na(Diff)) %>%
  ggplot(aes(fct_reorder(MILL_STATE, -Diff), Diff)) +
  geom_col() +
  scale_y_continuous(limits = c(0,4), breaks = seq(0,4,.25)) +
  theme_fivethirtyeight(base_size = 50, base_family = 'serif') +
  theme(panel.grid.major.x = element_blank(), legend.position = "none", 
        axis.title = element_text(family = 'serif', size = 45), 
        axis.text = element_text(family = 'serif', size = 40)) +
   geom_text(aes(label = as.character(round(Diff, 2))), size = 10, nudge_y = .04) + 
  labs(title = "Diff. in Mean log10(MCF) for Mills In-Sample for 2021 & 2022 vs Mills In-Sample only for 2022") +
  ylab("Mean log10(MCF)") +
  xlab("State")
Show code
Test %>%
  filter(!is.na(Diff)) %>%
  ggplot(aes(fct_reorder(MILL_STATE, -Diff), Both)) +
  geom_col() +
  scale_y_continuous(limits = c(0,4), breaks = seq(0,4,.25)) +
  theme_fivethirtyeight(base_size = 50, base_family = 'serif') +
  theme(panel.grid.major.x = element_blank(), legend.position = "none", 
        axis.title = element_text(family = 'serif', size = 45), 
        axis.text = element_text(family = 'serif', size = 40)) +
  geom_text(aes(label = as.character(round(Both, 2))), size = 12, nudge_y = .04) + 
  labs(title = "Mean log10(MCF) - Mills In-Sample for 2022 & 2021") +
  ylab("Mean log10(MCF)") +
  xlab("State")
Show code
Test %>%
  filter(!is.na(Diff)) %>%
  ggplot(aes(fct_reorder(MILL_STATE, -Diff), Only2022)) +
  geom_col() +
  scale_y_continuous(limits = c(0,4), breaks = seq(0,4,.25)) +
  theme_fivethirtyeight(base_size = 50, base_family = 'serif') +
  theme(panel.grid.major.x = element_blank(), legend.position = "none", 
        axis.title = element_text(family = 'serif', size = 45), 
        axis.text = element_text(family = 'serif', size = 40)) +
    geom_text(aes(label = as.character(round(Only2022, 2))), size = 12, nudge_y = .04) + 
  labs(title = "Mean log10(MCF) - Mills In-Sample for only 2022") +
  ylab("Mean log10(MCF)") +
  xlab("State")
Show code
sum(sqrt(Sample$TOT_MCF), na.rm = TRUE)
[1] 47053.46
Show code
VolumeComparison <- data.frame(Volume = c(Vol_BothYears, Vol_2022Only), 
                               SampleInclusion = c("2021 & 2022", "2022 Only"))
VolumeComparison %>%
  ggplot(aes(SampleInclusion, Volume)) +
  geom_col() +
  scale_y_continuous(limits = c(0, 1200), breaks = seq(0, 1200, 200)) +
  theme_fivethirtyeight(base_size = 50, base_family = 'serif') +
  theme(panel.grid.major.x = element_blank(), legend.position="none", axis.title = element_text(family = 'serif', size = 45)) +
  geom_text(aes(label = as.character(round(VolumeComparison$Volume, 0))), size = 16, nudge_y = 15) + 
  labs(title = "Mean MCF by Sample Inclusion") +
  ylab("Mean MCF") +
  xlab("Sampled in?")

Show code
TPO2021 <- read_csv("C:\\Users\\ikenn\\OneDrive - University of Massachusetts\\Documents - FFRC_TPO\\2021 Mill Data (SY2022)\\SURVEY OUTPUT\\TELEFORM CSV AND OVERFLOW SHEETS\\2021_TPO_Mail_Phone_3.28.23.CSV")

EmployeeInfo <- TPO2021 %>%
  select(TPOID, NBR_EMPLOYEES_ALL)

EmployeeInfo <- EmployeeInfo %>%
  distinct(TPOID, .keep_all = TRUE) %>%
  left_join(RLTest, by = 'TPOID')

EmployeeInfo %>%
  ggplot(aes(NBR_EMPLOYEES_ALL, log10(TOT_MCF))) +
  geom_point() +
  geom_smooth(se = FALSE) +
  labs(title = "Log10(MCF) by # of Employees for 2021 Respondents", y = "log10(MCF)", 
       x = "# of Employees") +
  scale_y_continuous(limits = c(-1, 5), breaks = seq(-1, 5, .5)) +
  scale_x_continuous(limits = c(0, 300), breaks = seq(0,300, 25)) +
  theme_fivethirtyeight(base_size = 50, base_family = 'serif') +
  theme(axis.title = element_text(family = 'serif', size = 45),
        axis.text = element_text(family = 'serif', size = 40),
        legend.position = "none")
Show code
Sample <- read_xlsx("C:\\Users\\ikenn\\OneDrive - University of Massachusetts\\Documents - FFRC_TPO\\2021 Mill Data (SY2022)\\IMPLEMENTATION\\NR_CALLS\\SampleFrame_2021_Updated.xlsx")

Sample <- Sample %>%
  group_by(sample) %>%
  summarize(MeanMCF = mean(TOT_MCF),
            MedianMCF = median(TOT_MCF)) %>%
  mutate(sample = ifelse(sample == 1, "In Sample", "Not In Sample")) %>%
  rename('Sample Status - 2022' = 'sample')

Sample %>%
  ggplot(aes(`Sample Status - 2022`, MeanMCF)) +
  geom_col() +
  scale_y_continuous(limits = c(0, 1600), breaks = seq(0, 1600, 200)) +
  theme_fivethirtyeight(base_size = 50, base_family = 'serif') +
  theme(axis.title = element_text(family = 'serif', size = 45),
        axis.text = element_text(family = 'serif', size = 40),
        legend.position = "none") +
  geom_text(aes(label = as.character(round(MeanMCF, 0))), size = 12, nudge_y = 50) + 
  labs(title = "Mean MCF by Sample Inclusion") +
  ylab("Mean MCF") 
Show code
Sample %>%
  ggplot(aes(`Sample Status - 2022`, MedianMCF)) +
  geom_col() +
  scale_y_continuous(limits = c(0, 350), breaks = seq(0, 350, 50)) +
  theme_fivethirtyeight(base_size = 50, base_family = 'serif') +
  theme(axis.title = element_text(family = 'serif', size = 45),
        axis.text = element_text(family = 'serif', size = 40),
        legend.position = "none") +
  geom_text(aes(label = as.character(round(MedianMCF, 0))), size = 12, nudge_y = 20) + 
  labs(title = "Median MCF by Sample Inclusion") +
  ylab("Median MCF")