Early findings from the Mill-Size Analysis.
State_Code_Conversion <- function(Data){
Data %>%
mutate(MILL_STATE = case_when(MILL_STATECD == 9 ~ "CT",
MILL_STATECD == 10 ~ "DE",
MILL_STATECD == 17 ~ "IL",
MILL_STATECD == 18 ~ "IN",
MILL_STATECD == 19 ~ "IA",
MILL_STATECD == 20 ~ "KS",
MILL_STATECD == 23 ~ "ME",
MILL_STATECD == 24 ~ "MD",
MILL_STATECD == 25 ~ "MA",
MILL_STATECD == 26 ~ "MI",
MILL_STATECD == 27 ~ "MN",
MILL_STATECD == 29 ~ "MO",
MILL_STATECD == 31 ~ "NE",
MILL_STATECD == 33 ~ "NH",
MILL_STATECD == 34 ~ "NJ",
MILL_STATECD == 36 ~ "NY",
MILL_STATECD == 38 ~ "ND",
MILL_STATECD == 39 ~ "OH",
MILL_STATECD == 42 ~ "PA",
MILL_STATECD == 44 ~ "RI",
MILL_STATECD == 46 ~ "SD",
MILL_STATECD == 50 ~ "VT",
MILL_STATECD == 54 ~ "WV",
MILL_STATECD == 55 ~ "WI"))
}
RL <- read_xlsx("C:\\Users\\ikenn\\OneDrive - University of Massachusetts\\Documents - FFRC_TPO\\2021 Mill Data (SY2022)\\SURVEY OUTPUT\\TPO_ReturnLogs_2021.xlsx", sheet = 2)
Sample <- read_xlsx("C:\\Users\\ikenn\\OneDrive - University of Massachusetts\\Documents - FFRC_TPO\\2021 Mill Data (SY2022)\\IMPLEMENTATION\\NR_CALLS\\SampleFrame_2021_Updated.xlsx")
TPO2021 <- read_csv("C:\\Users\\ikenn\\OneDrive - University of Massachusetts\\Documents - FFRC_TPO\\2021 Mill Data (SY2022)\\SURVEY OUTPUT\\TELEFORM CSV AND OVERFLOW SHEETS\\2021_TPO_Mail_Phone_3.28.23.CSV")
RL <- RL %>%
State_Code_Conversion() %>%
distinct(TPOID, .keep_all = TRUE)
RLTest <- RL %>%
rename(Status = "RESPONSE", Returned = "Return Date") %>%
filter(!Status %in% c("DISMANTLED", "OOB", "IDLE", "REFUSAL")) %>%
filter(Administrator %in% c("FFRC", "WI_DNR", "VT State", "MN State")) %>%
mutate(Returned = if_else(is.na(Returned), "No", "Yes")) %>%
select(c(TPOID, Administrator, Status, FULL_NAME, MILL_STATE, Returned)) %>%
filter(MILL_STATE != "MI")
RLFFRC <- RLTest
Sample <- Sample %>%
select(c(TPOID, TOT_MCF, MILL_TYPE_CD)) %>%
mutate(TPOID = ifelse(startsWith(TPOID, "9"), paste0("0", TPOID), TPOID)) %>%
filter(!is.na(TPOID)) %>%
distinct(TPOID, .keep_all = TRUE)
RLTest <- RLTest %>%
left_join(Sample, by = 'TPOID')
RLTest$Returned <- as.factor(RLTest$Returned)
RLTest$Status <- as.factor(RLTest$Status)
RLSummary <- RLTest %>%
group_by(Returned, MILL_STATE) %>%
summarize(AvgVol = mean(log10(TOT_MCF), na.rm = TRUE),
MedVol = median(log10(TOT_MCF), na.rm = TRUE),
Count = n())
RLTest %>%
filter(MILL_STATE != "MI") %>%
ggplot(aes(Returned, log10(TOT_MCF))) +
geom_boxplot(outlier.shape = NA) +
#geom_violin(draw_quantiles = .5) +
geom_jitter(width = .25,size = 2, alpha = .5) +
scale_y_continuous(breaks = c(seq(-2,5,.25)), limits = c(-2,5)) +
labs(title = "Mean log10(MCF) by Return Status", y = "log10(MCF)",
x = "Returned?") +
theme_fivethirtyeight(base_size = 50, base_family = 'serif') +
theme(axis.title = element_text(family = 'serif', size = 45),
axis.text = element_text(family = 'serif', size = 40),
legend.position = "none",
panel.grid.major.x = element_blank())
RLSummary %>%
filter(MILL_STATE != "MI") %>%
ggplot(aes(Returned, AvgVol, fill = MILL_STATE)) +
geom_col() +
scale_y_continuous(limits = c(-1,3), breaks = seq(-1,3,1)) +
theme_fivethirtyeight(base_size = 50, base_family = 'serif') +
geom_text(aes(label = as.character(Count)), size = 10, nudge_y = .1) +
facet_wrap(~MILL_STATE, nrow = 4) +
theme(panel.grid.major.x = element_blank(), legend.position="none",
axis.title = element_text(family = 'serif', size = 45),
axis.text = element_text(family = 'serif', size = 40)) +
labs(title = "Mean log10(MCF) by Return Status & State") +
ylab("Mean log10(MCF)") +
xlab("State")
RLTest %>%
filter(MILL_STATE != "MI") %>%
filter(MILL_TYPE_CD != 70) %>%
filter(!is.na(MILL_TYPE_CD)) %>%
ggplot(aes(Returned, log10(TOT_MCF))) +
geom_boxplot(outlier.shape = NA) +
#geom_violin(draw_quantiles = .5) +
geom_jitter(width = .25,size = 1.75, alpha = .5) +
scale_y_continuous(breaks = c(seq(-2,5,1)), limits = c(-2,5)) +
labs(title = "Mean log10(MCF) Return Status & Mill Type", y = "log10(MCF)",
x = "Returned?") +
theme_fivethirtyeight(base_size = 50, base_family = 'serif') +
theme(axis.title = element_text(family = 'serif', size = 45),
axis.text = element_text(family = 'serif', size = 40),
legend.position = "none",
panel.grid.major.x = element_blank()) +
facet_wrap(~MILL_TYPE_CD, nrow = 2)
RLTest %>%
filter(MILL_STATE != "MI") %>%
filter(MILL_TYPE_CD != 70) %>%
filter(!is.na(MILL_TYPE_CD)) %>%
ggplot(aes(Returned, log10(TOT_MCF))) +
geom_boxplot(outlier.shape = NA) +
#geom_violin(draw_quantiles = .5) +
geom_jitter(width = .25,size = 1.75, alpha = .5) +
scale_y_continuous(breaks = c(seq(-2,5,1)), limits = c(-2,5)) +
labs(title = "Mean log10(MCF) by Return Status & State", y = "log10(MCF)",
x = "Returned?") +
theme_fivethirtyeight(base_size = 50, base_family = 'serif') +
theme(axis.title = element_text(family = 'serif', size = 45),
axis.text = element_text(family = 'serif', size = 40),
legend.position = "none",
panel.grid.major.x = element_blank()) +
facet_wrap(~MILL_STATE, nrow = 2)
RLTest %>%
filter(MILL_STATE != "MI") %>%
ggplot(aes(Returned, log10(TOT_MCF))) +
geom_boxplot(outlier.shape = NA) +
#geom_violin(draw_quantiles = .5) +
geom_jitter(width = .25,size = 2, alpha = .5) +
scale_y_continuous(breaks = c(seq(-2,5,.25)), limits = c(-2,5)) +
labs(title = "Mean log10(MCF) by Return Status", y = "log10(MCF)",
x = "Returned?") +
theme_fivethirtyeight(base_size = 50, base_family = 'serif') +
theme(axis.title = element_text(family = 'serif', size = 45),
axis.text = element_text(family = 'serif', size = 40),
legend.position = "none",
panel.grid.major.x = element_blank())
RLTotals <- RLTest %>%
group_by(Returned) %>%
summarize(MeanVol = mean(TOT_MCF, na.rm = TRUE))
RLTotals %>%
ggplot(aes(Returned, MeanVol)) +
geom_point(size = 12) +
scale_y_continuous(breaks = c(seq(600,1400, 100)), limits = c(600,1400)) +
labs(title = "Mean MCF by Return Status", y = "MCF",
x = "Returned?") +
theme_fivethirtyeight(base_size = 50, base_family = 'serif') +
theme(axis.title = element_text(family = 'serif', size = 45),
axis.text = element_text(family = 'serif', size = 40),
legend.position = "none",
panel.grid.major.x = element_blank())
# RLTest %>%
# filter(MILL_STATE != "MI") %>%
# ggplot(aes(Returned, log10(TOT_MCF))) +
# geom_violin(alpha = .5, draw_quantiles = c(.25, .5, .75)) +
# scale_y_continuous(breaks = c(seq(-2,5,.5)), limits = c(-2,5)) +
# labs(title = "Log(10) MCF by State & Return Status", y = "Log(10) MCF",
# subtitle = "Lines within plots refer to 25th, 50th, & 75th quantiles.",
# x = "Returned?") +
# theme_fivethirtyeight(base_size = 10, base_family = 'serif') +
# theme(axis.title = element_text(family = 'serif', size = 10),
# legend.position = "none",
# panel.grid.major.x = element_blank()) +
# labs(title = "Log(10) MCF by Mill Type", y = "Log(10) MCF",
# subtitle = "Lines within plots refer to 25th, 50th, & 75th quantiles.",
# x = "Returned?") +
# facet_wrap(~MILL_STATE)
# TestPlot %>%
# annotate_figure(fig.lab = paste0("Median - Log10(MCF): ", round(RLSummary$MedVol[1])), fig.lab.pos = "bottom.left", fig.lab.size = 8) %>%
# annotate_figure(fig.lab = paste0("Median - Log10(MCF): ", round(RLSummary$MedVol[2])), fig.lab.pos = "bottom.right", fig.lab.size = 8) %>%
# annotate_figure(fig.lab = paste0("Mean - Log10(MCF): ", round(RLSummary$MedVol[1])), fig.lab.pos = "top.left", fig.lab.size = 8) %>%
# annotate_figure(fig.lab = paste0("Mean - Log10(MCF): ", round(RLSummary$MedVol[2])), fig.lab.pos = "top.right", fig.lab.size = 8)
RL2021 <- RLTest
RROverall <- as.numeric(sum(RL2021$Returned == "Yes"))/nrow(RL2021)
RRbyState <- RL2021 %>%
group_by(MILL_STATE) %>%
summarize(Yes = sum(Returned == "Yes"),
No = sum(Returned == "No"),
Total = Yes+No,
Response_Rate = Yes/(Yes + No)) %>%
rename(State = MILL_STATE)
ggplot(RRbyState, aes(reorder(State, -Response_Rate), Response_Rate)) +
geom_col() +
geom_text(aes(label = paste0("SS: ", Total)), size = 10, nudge_y = .02) +
scale_y_continuous(limits = c(0,1.05), breaks = c(seq(0,1,.1))) +
theme_fivethirtyeight(base_size = 50, base_family = 'serif') +
theme(axis.title = element_text(family = 'serif', size = 45),
axis.text = element_text(family = 'serif', size = 40),
legend.key = element_blank()) +
ylab('Response Rate') +
xlab('State') +
labs(title = "Qualified Response Rate by State", subtitle = "Idle, OOB, & Dismantled Mills Not Included, SS = Sample Size")
# A tibble: 23 × 5
State Yes No Total Response_Rate
<chr> <int> <int> <int> <dbl>
1 ME 54 9 63 0.857
2 MN 74 28 102 0.725
3 VT 16 7 23 0.696
4 MD 15 7 22 0.682
5 WV 21 10 31 0.677
6 ND 2 1 3 0.667
7 SD 9 5 14 0.643
8 WI 57 36 93 0.613
9 NY 27 23 50 0.54
10 PA 83 71 154 0.539
11 MO 59 57 116 0.509
12 KS 8 8 16 0.5
13 NH 9 9 18 0.5
14 RI 2 2 4 0.5
15 NE 10 11 21 0.476
16 MA 16 18 34 0.471
17 IN 25 32 57 0.439
18 DE 7 9 16 0.438
19 CT 8 11 19 0.421
20 OH 28 40 68 0.412
21 IA 15 22 37 0.405
22 IL 14 38 52 0.269
23 NJ 3 12 15 0.2
# Rest <- RLFFRC %>%
# filter(!MILL_STATE %in% c("VT", "MN"))
# group_by('Status') %>%
# summarize(Yes = sum(Returned == "Yes"),
# No = sum(Returned == "No"))
TPO2021 <- read.xlsx("C:\\Users\\ikenn\\OneDrive - University of Massachusetts\\Documents - FFRC_TPO\\2021 Mill Data (SY2022)\\SURVEY OUTPUT\\TPO_ReturnLogs_2021.xlsx", sheet = 2)
TPO2022 <- read.xlsx("C:\\Users\\ikenn\\OneDrive - University of Massachusetts\\Documents - FFRC_TPO\\2022 Mill Data (SY2023)\\TPO2022_ReturnLogs.xlsx", sheet = 2)
Sample <- read_xlsx("C:\\Users\\ikenn\\OneDrive - University of Massachusetts\\Documents - FFRC_TPO\\2021 Mill Data (SY2022)\\IMPLEMENTATION\\NR_CALLS\\SampleFrame_2021_Updated.xlsx")
Sample <- Sample %>%
select(TPOID, TOT_MCF)
TPO2021 <- TPO2021 %>%
filter(!is.na(TPOID)) %>%
left_join(Sample, by = 'TPOID')
TPO2022 <- TPO2022 %>%
filter(!is.na(TPOID)) %>%
distinct(TPOID, .keep_all = TRUE) %>%
mutate(TPOID = str_replace(TPOID, "2022", "2021")) %>%
left_join(Sample, by = 'TPOID') %>%
mutate(IN_2021 = ifelse(TPOID %in% TPO2021$TPOID, 1, 0))
TPO2022 <- TPO2022[-884,]
In2021_2022 <- TPO2022 %>%
filter(IN_2021 == 1)
NotIn2021 <- TPO2022 %>%
filter(IN_2021 == 0)
Percent_BothYears <- sum(TPO2022$IN_2021)/nrow(TPO2022)
Vol_BothYears <- mean(In2021_2022$TOT_MCF, na.rm = TRUE)
Vol_2022Only <- mean(NotIn2021$TOT_MCF, na.rm = TRUE)
TPO2022 <- TPO2022 %>%
State_Code_Conversion()
TPO2022$MILL_STATE <- as.factor(TPO2022$MILL_STATE)
Test <- TPO2022 %>%
group_by(MILL_STATE) %>%
summarize(InBoth = sum(IN_2021),
Count = n(),
Percent = (InBoth/Count)*100)
Test2 <- TPO2022 %>%
filter(IN_2021 == 1) %>%
group_by(MILL_STATE) %>%
summarize(MeanVol = mean(log10(TOT_MCF), na.rm = TRUE))
Test3 <- TPO2022 %>%
filter(IN_2021 == 0) %>%
group_by(MILL_STATE) %>%
summarize(MeanVol = mean(log10(TOT_MCF), na.rm = TRUE))
Test <- Test %>%
left_join(Test2, by = 'MILL_STATE') %>%
left_join(Test3, by = 'MILL_STATE')
Test <- Test %>%
rename(Both = MeanVol.x, Only2022 = MeanVol.y) %>%
mutate(Diff = Both - Only2022)
ggplot(Test, aes(fct_reorder(MILL_STATE, -Percent), Percent)) +
geom_col() +
scale_y_continuous(limits = c(0,102), breaks = seq(0,100,10)) +
theme_fivethirtyeight(base_size = 30, base_family = 'serif') +
geom_text(aes(label = paste0("SS: ", as.character(Count))), size = 10, nudge_y = 1) +
theme(panel.grid.major.x = element_blank(), legend.position = "none",
axis.title = element_text(family = 'serif', size = 45),
axis.text = element_text(family = 'serif', size = 40)) +
labs(title = "Sample Prop 2021/2022") +
ylab("Percent in both 2021/2022") +
xlab("State")
Test %>%
filter(!is.na(Diff)) %>%
ggplot(aes(fct_reorder(MILL_STATE, -Diff), Diff)) +
geom_col() +
scale_y_continuous(limits = c(0,4), breaks = seq(0,4,.25)) +
theme_fivethirtyeight(base_size = 50, base_family = 'serif') +
theme(panel.grid.major.x = element_blank(), legend.position = "none",
axis.title = element_text(family = 'serif', size = 45),
axis.text = element_text(family = 'serif', size = 40)) +
geom_text(aes(label = as.character(round(Diff, 2))), size = 10, nudge_y = .04) +
labs(title = "Diff. in Mean log10(MCF) for Mills In-Sample for 2021 & 2022 vs Mills In-Sample only for 2022") +
ylab("Mean log10(MCF)") +
xlab("State")
Test %>%
filter(!is.na(Diff)) %>%
ggplot(aes(fct_reorder(MILL_STATE, -Diff), Both)) +
geom_col() +
scale_y_continuous(limits = c(0,4), breaks = seq(0,4,.25)) +
theme_fivethirtyeight(base_size = 50, base_family = 'serif') +
theme(panel.grid.major.x = element_blank(), legend.position = "none",
axis.title = element_text(family = 'serif', size = 45),
axis.text = element_text(family = 'serif', size = 40)) +
geom_text(aes(label = as.character(round(Both, 2))), size = 12, nudge_y = .04) +
labs(title = "Mean log10(MCF) - Mills In-Sample for 2022 & 2021") +
ylab("Mean log10(MCF)") +
xlab("State")
Test %>%
filter(!is.na(Diff)) %>%
ggplot(aes(fct_reorder(MILL_STATE, -Diff), Only2022)) +
geom_col() +
scale_y_continuous(limits = c(0,4), breaks = seq(0,4,.25)) +
theme_fivethirtyeight(base_size = 50, base_family = 'serif') +
theme(panel.grid.major.x = element_blank(), legend.position = "none",
axis.title = element_text(family = 'serif', size = 45),
axis.text = element_text(family = 'serif', size = 40)) +
geom_text(aes(label = as.character(round(Only2022, 2))), size = 12, nudge_y = .04) +
labs(title = "Mean log10(MCF) - Mills In-Sample for only 2022") +
ylab("Mean log10(MCF)") +
xlab("State")
[1] 47053.46
VolumeComparison <- data.frame(Volume = c(Vol_BothYears, Vol_2022Only),
SampleInclusion = c("2021 & 2022", "2022 Only"))
VolumeComparison %>%
ggplot(aes(SampleInclusion, Volume)) +
geom_col() +
scale_y_continuous(limits = c(0, 1200), breaks = seq(0, 1200, 200)) +
theme_fivethirtyeight(base_size = 50, base_family = 'serif') +
theme(panel.grid.major.x = element_blank(), legend.position="none", axis.title = element_text(family = 'serif', size = 45)) +
geom_text(aes(label = as.character(round(VolumeComparison$Volume, 0))), size = 16, nudge_y = 15) +
labs(title = "Mean MCF by Sample Inclusion") +
ylab("Mean MCF") +
xlab("Sampled in?")
TPO2021 <- read_csv("C:\\Users\\ikenn\\OneDrive - University of Massachusetts\\Documents - FFRC_TPO\\2021 Mill Data (SY2022)\\SURVEY OUTPUT\\TELEFORM CSV AND OVERFLOW SHEETS\\2021_TPO_Mail_Phone_3.28.23.CSV")
EmployeeInfo <- TPO2021 %>%
select(TPOID, NBR_EMPLOYEES_ALL)
EmployeeInfo <- EmployeeInfo %>%
distinct(TPOID, .keep_all = TRUE) %>%
left_join(RLTest, by = 'TPOID')
EmployeeInfo %>%
ggplot(aes(NBR_EMPLOYEES_ALL, log10(TOT_MCF))) +
geom_point() +
geom_smooth(se = FALSE) +
labs(title = "Log10(MCF) by # of Employees for 2021 Respondents", y = "log10(MCF)",
x = "# of Employees") +
scale_y_continuous(limits = c(-1, 5), breaks = seq(-1, 5, .5)) +
scale_x_continuous(limits = c(0, 300), breaks = seq(0,300, 25)) +
theme_fivethirtyeight(base_size = 50, base_family = 'serif') +
theme(axis.title = element_text(family = 'serif', size = 45),
axis.text = element_text(family = 'serif', size = 40),
legend.position = "none")
Sample <- read_xlsx("C:\\Users\\ikenn\\OneDrive - University of Massachusetts\\Documents - FFRC_TPO\\2021 Mill Data (SY2022)\\IMPLEMENTATION\\NR_CALLS\\SampleFrame_2021_Updated.xlsx")
Sample <- Sample %>%
group_by(sample) %>%
summarize(MeanMCF = mean(TOT_MCF),
MedianMCF = median(TOT_MCF)) %>%
mutate(sample = ifelse(sample == 1, "In Sample", "Not In Sample")) %>%
rename('Sample Status - 2022' = 'sample')
Sample %>%
ggplot(aes(`Sample Status - 2022`, MeanMCF)) +
geom_col() +
scale_y_continuous(limits = c(0, 1600), breaks = seq(0, 1600, 200)) +
theme_fivethirtyeight(base_size = 50, base_family = 'serif') +
theme(axis.title = element_text(family = 'serif', size = 45),
axis.text = element_text(family = 'serif', size = 40),
legend.position = "none") +
geom_text(aes(label = as.character(round(MeanMCF, 0))), size = 12, nudge_y = 50) +
labs(title = "Mean MCF by Sample Inclusion") +
ylab("Mean MCF")
Sample %>%
ggplot(aes(`Sample Status - 2022`, MedianMCF)) +
geom_col() +
scale_y_continuous(limits = c(0, 350), breaks = seq(0, 350, 50)) +
theme_fivethirtyeight(base_size = 50, base_family = 'serif') +
theme(axis.title = element_text(family = 'serif', size = 45),
axis.text = element_text(family = 'serif', size = 40),
legend.position = "none") +
geom_text(aes(label = as.character(round(MedianMCF, 0))), size = 12, nudge_y = 20) +
labs(title = "Median MCF by Sample Inclusion") +
ylab("Median MCF")