#Packages
library(tidyverse)
library(readxl)
library(dplyr)
library(ggplot2)
library(ggalluvial)
library(naniar)
library(knitr)
library(kableExtra)
library(webshot2)
#Functions for graphs
plot_demo_bar <- function(data, var, xlab) {
ggplot(data, aes(x = {{ var }},
y = after_stat(..count.. / sum(..count..)),
fill = {{ var }})) +
geom_bar(color="black") +
scale_y_continuous(expand = c(0, 0),
name = "Percent",
labels = function(x) paste0(round(100 * x / sum(x), 1),"%"))+
scale_fill_brewer(palette = "Pastel4") +
labs(x = xlab) +
theme_minimal(base_size = 12) +
theme(axis.text.x = element_text(angle = 30, hjust = 1))
}
plot_demo_pie_legend <- function(data, var, title = NULL) {
df <- data %>%
count({{ var }}) %>%
mutate(pct = round(n / sum(n) * 100, 1),
legend_label = paste0({{ var }}, " (", pct, "%)"))
ggplot(df, aes(x = "", y = n, fill = legend_label)) +
geom_col(width = 1, color = "white") +
coord_polar(theta = "y") +
theme_void() +
labs(title = title, fill = "") +
theme(
plot.title = element_text(hjust = 0.5),
legend.position = "right" )
}
#Importing the Data
Pesticide<- read_excel("Pesticide data.xlsx")
## New names:
## • `Q32...205` -> `Q32...190`
## • `Q32...207` -> `Q32...192`
#Evaluating the Missingness
dim(Pesticide)
## [1] 634 235
nrow(Pesticide)
## [1] 634
ncol(Pesticide)
## [1] 235
length(Pesticide) * nrow(Pesticide) #Total
## [1] 148990
sum(is.na(Pesticide)) # Total Missing
## [1] 0
(115364/158500)*100
## [1] 72.78486
data_summary <- data.frame(
Total = nrow(Pesticide),
Missing = colSums(is.na(Pesticide)),
NonMissing = colSums(!is.na(Pesticide))
)
data_summary
##Columns that are completely deleted due to privacy or not needed
Pesticide$IPAddress<- NULL
Pesticide$Status<- NULL
Pesticide$RecipientLastName<- NULL
Pesticide$RecipientFirstName<- NULL
Pesticide$RecipientEmail<- NULL
Pesticide$ExternalReference<- NULL
Pesticide$LocationLatitude<- NULL
Pesticide$LocationLongitude<- NULL
Pesticide$StartDate<- NULL
Pesticide$EndDate<- NULL
Pesticide$Progress<- NULL
Pesticide$`Duration (in seconds)`<- NULL
Pesticide$Finished<- NULL
Pesticide$RecordedDate<- NULL
Pesticide_Clean<- read.csv("cleaned_Pesticide.csv")
Pesticide_Clean<- Pesticide_Clean[-1,]
#Qustion 1
Pesticide_License_Type<- Pesticide_Clean$Q2
Pesticide_License_Type_NA<- ifelse(is.na(Pesticide_License_Type), "Missing", as.character(Pesticide_License_Type))
recode for bar chart
Pesticide_License_Type <- as.data.frame(Pesticide_License_Type)
Pesticide_License_Type_recode <- Pesticide_License_Type %>%
mutate(
Applicator = recode(Pesticide_License_Type,
"1" = "Private Applicator",
"2" = "Commerical Applicator",
"3" = "Commerical Applicator with Contractor License")
)
Pesticide_License_Type_recode <- Pesticide_License_Type_recode %>% drop_na()
ggplot(data = Pesticide_License_Type_recode, aes(x = Applicator, fill = Applicator)) +
geom_bar() +
geom_text(stat = "count", aes(label = after_stat(count)),
vjust = -0.3, size = 4)+
scale_fill_manual(
values = c("steelblue", "darkorchid", "deeppink2", "darkred")
) +
labs(x = "License Type", y = "Count", fill = "License Type", title = "Counts of Type of Pesticide Applicator") +
theme(axis.text.x = element_blank(),
axis.ticks.x = element_blank())
Pesticide_License_Type <- Pesticide_License_Type %>% drop_na()
applicator <- plot_demo_pie_legend(data=Pesticide_License_Type_recode, Applicator, "Type of Pesticide Applicator")
ggsave("~/Desktop/UGA/Fall 2025/STAT 5010W/Project/Charts/applicator_piechart.png", applicator)
## Saving 7 x 5 in image
length(Pesticide_License_Type) #Total Responses
## [1] 1
table(Pesticide_License_Type, useNA = "ifany")
## Pesticide_License_Type
## 1 2 3
## 556 38 3
prop.table(table(Pesticide_License_Type))*100 #Without Missing Data
## Pesticide_License_Type
## 1 2 3
## 93.1323283 6.3651591 0.5025126
prop.table(table(Pesticide_License_Type, useNA = "ifany"))*100 #With Missing Data
## Pesticide_License_Type
## 1 2 3
## 93.1323283 6.3651591 0.5025126
#Question 2
counts_Q2<- colSums(Pesticide_Clean[, c("Q3_1", "Q3_2", "Q3_3", "Q3_4", "Q3_5", "Q3_6", "Q3_7", "Q3_8", "Q3_9")] == 1, na.rm = TRUE)
table_Q2<- data.frame(
option = names(counts_Q2),
count = counts_Q2,
percent = round(counts_Q2/ sum(counts_Q2) *100, 1)
)
table_Q2
These are the percentages for the respondents.
sum(rowSums(is.na(Pesticide_Clean[, c("Q3_1", "Q3_2", "Q3_3", "Q3_4", "Q3_5", "Q3_6", "Q3_7", "Q3_8", "Q3_9")])) == 9)
## [1] 36
(36/633) *100 # Percent Missing
## [1] 5.687204
#Question 3
Q2_reg<- Pesticide_Clean[, c("Q3_1", "Q3_2", "Q3_3", "Q3_4", "Q3_5", "Q3_6", "Q3_7", "Q3_8", "Q3_9")]
Q3_reg<- Pesticide_Clean[, c("Q4_1", "Q4_2", "Q4_3", "Q4_4", "Q4_5", "Q4_6", "Q4_7", "Q4_8", "Q4_9", "Q4_10", "Q4_11")]
Q2<- Pesticide_Clean[, c("Q3_1", "Q3_2", "Q3_3", "Q3_4", "Q3_5", "Q3_6", "Q3_7", "Q3_8", "Q3_9")]
Q3<- Pesticide_Clean[, c("Q4_1", "Q4_2", "Q4_3", "Q4_4", "Q4_5", "Q4_6", "Q4_7", "Q4_8", "Q4_9", "Q4_10", "Q4_11")]
Q2[is.na(Q2)] <- 0
Q3[is.na(Q3)] <- 0
skipLogic_Q3<- list(
Q4_1 = Q2$Q3_1 == 1 | Q2$Q3_2 == 1,
Q4_2 = Q2$Q3_4 == 1,
Q4_3 = Q2$Q3_5 == 1,
Q4_4 = Q2$Q3_3 == 1,
Q4_5 = Q2$Q3_2 == 1,
Q4_6 = Q2$Q3_6 == 1,
Q4_7 = Q2$Q3_7 == 1,
Q4_8 = Q2$Q3_1 == 1 | Q2$Q3_2 == 1,
Q4_9 = Q2$Q3_8 == 1,
Q4_10 = Q2$Q3_1 == 1 | Q2$Q3_2 == 1,
Q4_11 = Q2$Q3_8 == 1
)
Q4_table_title<- c(Q4_1 = "Plant Ag",
Q4_2 = "Animal Ag",
Q4_3 = "Forestry",
Q4_4 = "Ornamental/Turf",
Q4_5 = "Seed Treatment",
Q4_6 = "Aquatic",
Q4_7 = "ROW",
Q4_8 = "Aerial Equipment",
Q4_9 = "Pest Control",
Q4_10 = "Ag Commodity Fumigation",
Q4_11 = "Mosquito Control")
result_Q3<- data.frame(
Option = names(skipLogic_Q3),
Eligible = sapply(skipLogic_Q3, sum),
Selected = sapply(names(skipLogic_Q3), function(col) {
skip<- skipLogic_Q3[[col]]
sum(Q3[skip, col] == 1, na.rm = TRUE)
}
)
)
result_Q3$Percentage<- round(result_Q3$Selected / result_Q3$Eligible *100, 1)
result_Q3$Option<- Q4_table_title[result_Q3$Option]
results_Q3_table_ready<- result_Q3 %>%
rename(
"Pesticide License" = Option,
"Eligible Participants" = Eligible,
"Selected Participants" = Selected,
"Percentage Selected (%)" = Percentage
)
kable(results_Q3_table_ready,
caption = "Survey Question 3 Eligibility and Selection Summary",
row.names = FALSE)
| Pesticide License | Eligible Participants | Selected Participants | Percentage Selected (%) |
|---|---|---|---|
| Plant Ag | 360 | 331 | 91.9 |
| Animal Ag | 189 | 138 | 73.0 |
| Forestry | 136 | 101 | 74.3 |
| Ornamental/Turf | 87 | 67 | 77.0 |
| Seed Treatment | 211 | 26 | 12.3 |
| Aquatic | 24 | 14 | 58.3 |
| ROW | 26 | 15 | 57.7 |
| Aerial Equipment | 360 | 4 | 1.1 |
| Pest Control | 109 | 14 | 12.8 |
| Ag Commodity Fumigation | 360 | 18 | 5.0 |
| Mosquito Control | 109 | 17 | 15.6 |
#Q2 & Q3 Alluvial Plot:
Q2_Q3_Skip_Labels <- c(
Q3_1 = "Fruits/Vegetables",
Q3_2 = "Row Crops",
Q3_3 = "Green Industry",
Q3_4 = "Livestock",
Q3_5 = "Forestry",
Q3_6 = "Aquatic",
Q3_7 = "Utility/ROW",
Q3_8 = "Pest Management",
Q3_9 = "Other Industry",
Q4_1 = "Plant Ag",
Q4_2 = "Animal Ag",
Q4_3 = "Forestry",
Q4_4 = "Ornamental/Turf",
Q4_5 = "Seed Treatment",
Q4_6 = "Aquatic",
Q4_7 = "ROW",
Q4_8 = "Aerial Equipment",
Q4_9 = "Pest Control",
Q4_10 = "Ag Commodity Fumigation",
Q4_11 = "Mosquito Control"
)
Q2_long<- Q2 %>%
mutate(ID = 1:n()) %>%
pivot_longer(
cols = starts_with("Q3_"),
names_to = ("Q2_option"),
values_to = "selected"
) %>%
filter(selected == 1)
Q3_long<- Q3 %>%
mutate(ID = 1:n()) %>%
pivot_longer(
cols = starts_with("Q4_"),
names_to = ("Q3_option"),
values_to = "selected"
) %>%
filter(selected == 1)
long_format_merge_2_3<- merge(Q2_long[, c("ID", "Q2_option")],
Q3_long[, c("ID", "Q3_option")],
by = "ID")
long_format_merge_2_3$Q2_option<- Q2_Q3_Skip_Labels[long_format_merge_2_3$Q2_option]
long_format_merge_2_3$Q3_option<- Q2_Q3_Skip_Labels[long_format_merge_2_3$Q3_option]
Q2_colors<- c(
"Fruits/Vegetables" = "cornsilk1",
"Row Crops" = "lemonchiffon3",
"Green Industry" ="mediumspringgreen",
"Livestock" ="sienna2",
"Forestry" ="palegreen3",
"Aquatic" ="steelblue2",
"Utility/ROW" ="orchid2",
"Pest Management" ="lightpink",
"Other Industry" ="mediumpurple3"
)
alluvial <- ggplot(long_format_merge_2_3, aes(axis1 = Q2_option, axis2 = Q3_option)) +
geom_alluvium(aes(fill = Q2_option), alpha = 0.9, width = 1/10) +
geom_stratum(width = 1/10, fill= "grey95", color="black") +
geom_label(stat = "stratum", aes(label = after_stat(stratum)), size = 3) +
scale_fill_manual(values = Q2_colors,
name = "Industry Options") +
scale_x_discrete(limits = c("Survey Q2", "Survey Q3")) +
theme_void(base_size = (12))
ggsave("~/Desktop/UGA/Fall 2025/STAT 5010W/Project/Charts/alluvial.png", alluvial)
## Saving 7 x 5 in image
#Question 4 Need Info
#Question 5 Need Info
#Question 6
Q6_data <- as.data.frame(Pesticide_Clean[,186])
colnames(Q6_data) <- c("Employees")
Q6_data$Employees <- as.character(Q6_data$Employees)
Q6_data <- Q6_data %>%
mutate(
Employees = recode(as.character(Employees),
"1" = "Sole operator",
"2" = "Between 1 and 10 employees",
"3" = "Between 11 and 24 employees",
"4" = "Between 25 and 50 employees",
"5" = "Between 51 and 75 employees",
"6" = "Between 76 and 100 employees",
"7" = "More than 100 employees")) %>%
filter(!is.na(Employees))
Q6_table <- Q6_data %>%
count(Employees, name = "Count") %>%
mutate(
Percent = round(100 * Count / sum(Count), 1),
Percent = paste0(Percent, "%")
)
Q6_table %>%
kable(
caption = "Survey Question 6: Number of Employees",
col.names = c("Number of Employees", "Count", "Percent"),
align = c("l", "c", "c"),
booktabs = T
) %>%
kable_styling(
full_width = F,
bootstrap_options = c("striped", "hover", "condensed")
) %>%
save_kable("~/Desktop/UGA/Fall 2025/STAT 5010W/Project/Charts/employees.png")
## file:////private/var/folders/zy/qjf3c90x4slfz39f8vt6rz1c0000gn/T/RtmpbvvQ5b/employeesa7ea3a561b71.html screenshot completed
## save_kable will have the best result with magick installed.
#Question 7
Q7_data <- as.data.frame(Pesticide_Clean[,187])
colnames(Q7_data) <- c("Years")
Q7_data$Years <- as.character(Q7_data$Years)
Q7_data <- Q7_data %>%
mutate(
Years = recode(as.character(Years),
"1" = "Less than 1 year",
"2" = "Between 1 to 5 years",
"3" = "Between 6 to 10 years",
"4" = "Between 11 to 15 years",
"5" = "Between 16 to 20 years",
"6" = "More than 20 years")) %>%
filter(!is.na(Years))
Q7_table <- Q7_data %>%
count(Years, name = "Count") %>%
mutate(
Percent = round(100 * Count / sum(Count), 1),
Percent = paste0(Percent, "%")
)
Q7_table %>%
kable(
caption = "Survey Question 7: How long have you held a license?",
col.names = c("Years Licensed", "Count ", "Percent"),
align = c("l", "c", "c"),
booktabs = T
) %>%
kable_styling(
full_width = F,
bootstrap_options = c("striped", "hover", "condensed")
) %>%
save_kable("~/Desktop/UGA/Fall 2025/STAT 5010W/Project/Charts/licenses_year.png")
## file:////private/var/folders/zy/qjf3c90x4slfz39f8vt6rz1c0000gn/T/RtmpbvvQ5b/licenses_yeara7ea180e9d95.html screenshot completed
## save_kable will have the best result with magick installed.
#Question 8
#Demographics
demo_data <- Pesticide_Clean[,231:235]
colnames(demo_data) = c("Gender", "Race", "Ethnicity", "Age", "Education_Level")
demo_data <- demo_data %>% mutate(across(everything(), as.character))
demo_data <- demo_data %>%
mutate(
Gender = recode(as.character(Gender),
"1" = "Male",
"2" = "Female",
"3" = "Prefer not to answer"),
Race = recode(as.character(Race),
"1" = "American Indian or Alaska Native",
"2" = "Asian",
"3" = "Black/African American",
"4" = "Native Hawaiian or Other Pacific Islander",
"5" = "White",
"6" = "More than one race",
"7" = "Other"),
Ethnicity = recode(as.character(Ethnicity),
"1" = "Hispanic/Latino",
"2" = "Not Hispanic/Latino"),
Age = recode(as.character(Age),
"9" = "Under 18",
"10" = "18–24",
"11" = "25–34",
"12" = "35–44",
"13" = "45–54",
"14" = "55–64",
"15" = "65–74",
"16" = "75–84",
"17" = "85+"),
Education_Level = recode(as.character(Education_Level),
"1" = "Less than 9th grade",
"2" = "9th to 12th grade, no diploma",
"3" = "High school graduate or GED",
"4" = "Some college, no degree",
"5" = "Associate’s degree",
"6" = "Bachelor’s degree",
"7" = "Graduate or professional degree"),
)
#Filtering Data
demo_gender <- demo_data[,1, drop=F] %>%
filter(!is.na(Gender))
demo_race <- demo_data[,2, drop=F] %>%
filter(!is.na(Race))
demo_ethnicity <- demo_data[,3, drop=F] %>%
filter(!is.na(Ethnicity))
demo_age <- demo_data[,4, drop=F] %>%
filter(!is.na(Age))
demo_edu <- demo_data[,5, drop=F] %>%
filter(!is.na(Education_Level))
#Functions for graphs
plot_demo_bar <- function(data, var, xlab) {
ggplot(data, aes(x = {{ var }},
y = after_stat(..count.. / sum(..count..)),
fill = {{ var }})) +
geom_bar(color="black") +
scale_y_continuous(expand = c(0, 0),
name = "Percent",
labels = function(x) paste0(round(100 * x / sum(x), 1),"%"))+
scale_fill_brewer(palette = "Pastel4") +
labs(x = xlab) +
theme_minimal(base_size = 12) +
theme(axis.text.x = element_text(angle = 30, hjust = 1))
}
plot_demo_pie_legend <- function(data, var, title = NULL) {
df <- data %>%
count({{ var }}) %>%
mutate(pct = round(n / sum(n) * 100, 1),
legend_label = paste0({{ var }}, " (", pct, "%)"))
ggplot(df, aes(x = "", y = n, fill = legend_label)) +
geom_col(width = 1, color = "white") +
coord_polar(theta = "y") +
theme_void() +
labs(title = title, fill = "") +
theme(
plot.title = element_text(hjust = 0.5),
legend.position = "right" )
}
# Gender
plot_demo_pie_legend(demo_data, Gender, "Gender")
# Race
plot_demo_pie_legend(demo_data, Race, "Distribution of Race")
# Ethnicity
plot_demo_pie_legend(demo_data, Ethnicity, "Ethnicity")
# Age (categories)
plot_demo_pie_legend(demo_data, Age, "Distribution of Age")
# Education
plot_demo_pie_legend(demo_data, Education_Level, "Distribution of Education Level")
#Taking out NA
# Gender
gender <- plot_demo_bar(demo_gender, Gender, "Gender")
## Warning: Unknown palette: "Pastel4"
ggsave("~/Desktop/UGA/Fall 2025/STAT 5010W/Project/Charts/gender_barchart.png", gender)
## Saving 7 x 5 in image
# Race
race <- plot_demo_pie_legend(demo_race, Race, "Distribution of Race")
ggsave("~/Desktop/UGA/Fall 2025/STAT 5010W/Project/Charts/race_piechart.png", race)
## Saving 7 x 5 in image
# Ethnicity
plot_demo_bar(demo_ethnicity, Ethnicity, "Ethnicity")
## Warning: Unknown palette: "Pastel4"
# Age (categories)
age <- plot_demo_bar(demo_age, Age, "Distribution of Age")
## Warning: Unknown palette: "Pastel4"
ggsave("~/Desktop/UGA/Fall 2025/STAT 5010W/Project/Charts/age_barchart.png", age)
## Saving 7 x 5 in image
# Education
education <- plot_demo_pie_legend(demo_edu, Education_Level, "Distribution of Education Level")
ggsave("~/Desktop/UGA/Fall 2025/STAT 5010W/Project/Charts/education_piechart.png", education)
## Saving 7 x 5 in image