#Packages

library(tidyverse)
library(readxl)
library(dplyr)
library(ggplot2)
library(ggalluvial)
library(naniar)
library(knitr)
library(kableExtra)
library(webshot2)
#Functions for graphs
plot_demo_bar <- function(data, var, xlab) {
  ggplot(data, aes(x = {{ var }}, 
                   y = after_stat(..count.. / sum(..count..)), 
                   fill = {{ var }})) +
    geom_bar(color="black") +
    scale_y_continuous(expand = c(0, 0),
                       name = "Percent",
                       labels = function(x) paste0(round(100 * x / sum(x), 1),"%"))+
    scale_fill_brewer(palette = "Pastel4") +
    labs(x = xlab) +
    theme_minimal(base_size = 12) +
    theme(axis.text.x = element_text(angle = 30, hjust = 1)) 
}


plot_demo_pie_legend <- function(data, var, title = NULL) {
  df <- data %>%
    count({{ var }}) %>%
    mutate(pct = round(n / sum(n) * 100, 1),
           legend_label = paste0({{ var }}, " (", pct, "%)"))
  ggplot(df, aes(x = "", y = n, fill = legend_label)) +
    geom_col(width = 1, color = "white") +
    coord_polar(theta = "y") +
    theme_void() +
    labs(title = title, fill = "") +
    theme(
      plot.title = element_text(hjust = 0.5),
      legend.position = "right" )
}

#Importing the Data

Pesticide<- read_excel("Pesticide data.xlsx")
## New names:
## • `Q32...205` -> `Q32...190`
## • `Q32...207` -> `Q32...192`

#Evaluating the Missingness

dim(Pesticide)     
## [1] 634 235
nrow(Pesticide)
## [1] 634
ncol(Pesticide)
## [1] 235
length(Pesticide) * nrow(Pesticide) #Total
## [1] 148990
sum(is.na(Pesticide)) # Total Missing
## [1] 0
(115364/158500)*100
## [1] 72.78486
data_summary <- data.frame(
  Total = nrow(Pesticide),
  Missing = colSums(is.na(Pesticide)),
  NonMissing = colSums(!is.na(Pesticide))
)
data_summary
##Columns that are completely deleted due to privacy or not needed
Pesticide$IPAddress<- NULL
Pesticide$Status<- NULL
Pesticide$RecipientLastName<- NULL
Pesticide$RecipientFirstName<- NULL
Pesticide$RecipientEmail<- NULL
Pesticide$ExternalReference<- NULL
Pesticide$LocationLatitude<- NULL
Pesticide$LocationLongitude<- NULL
Pesticide$StartDate<- NULL
Pesticide$EndDate<- NULL
Pesticide$Progress<- NULL
Pesticide$`Duration (in seconds)`<- NULL
Pesticide$Finished<- NULL
Pesticide$RecordedDate<- NULL
Pesticide_Clean<- read.csv("cleaned_Pesticide.csv")
Pesticide_Clean<- Pesticide_Clean[-1,]

#Qustion 1

Pesticide_License_Type<- Pesticide_Clean$Q2
Pesticide_License_Type_NA<- ifelse(is.na(Pesticide_License_Type), "Missing", as.character(Pesticide_License_Type))

recode for bar chart

Pesticide_License_Type <- as.data.frame(Pesticide_License_Type)
Pesticide_License_Type_recode <- Pesticide_License_Type %>%
    mutate(
    Applicator = recode(Pesticide_License_Type,
                    "1" = "Private Applicator",
                    "2" = "Commerical Applicator",
                    "3" = "Commerical Applicator with Contractor License")
    )
Pesticide_License_Type_recode <- Pesticide_License_Type_recode %>% drop_na()
ggplot(data = Pesticide_License_Type_recode, aes(x = Applicator, fill = Applicator)) +
  geom_bar() +
  geom_text(stat = "count", aes(label = after_stat(count)), 
          vjust = -0.3, size = 4)+
  scale_fill_manual(
    values = c("steelblue", "darkorchid", "deeppink2", "darkred")
    ) + 
  labs(x = "License Type", y = "Count", fill = "License Type", title = "Counts of Type of Pesticide Applicator") +
  theme(axis.text.x = element_blank(),
      axis.ticks.x = element_blank())

Pesticide_License_Type <- Pesticide_License_Type %>% drop_na()

applicator <- plot_demo_pie_legend(data=Pesticide_License_Type_recode, Applicator, "Type of Pesticide Applicator")

ggsave("~/Desktop/UGA/Fall 2025/STAT 5010W/Project/Charts/applicator_piechart.png", applicator)
## Saving 7 x 5 in image
length(Pesticide_License_Type) #Total Responses
## [1] 1
table(Pesticide_License_Type, useNA = "ifany")
## Pesticide_License_Type
##   1   2   3 
## 556  38   3
prop.table(table(Pesticide_License_Type))*100 #Without Missing Data
## Pesticide_License_Type
##          1          2          3 
## 93.1323283  6.3651591  0.5025126
prop.table(table(Pesticide_License_Type, useNA = "ifany"))*100 #With Missing Data
## Pesticide_License_Type
##          1          2          3 
## 93.1323283  6.3651591  0.5025126

#Question 2

counts_Q2<- colSums(Pesticide_Clean[, c("Q3_1", "Q3_2", "Q3_3", "Q3_4", "Q3_5", "Q3_6", "Q3_7", "Q3_8", "Q3_9")] == 1, na.rm = TRUE)
table_Q2<- data.frame(
  option = names(counts_Q2),
  count = counts_Q2, 
  percent = round(counts_Q2/ sum(counts_Q2) *100, 1)
)
table_Q2

These are the percentages for the respondents.

sum(rowSums(is.na(Pesticide_Clean[, c("Q3_1", "Q3_2", "Q3_3", "Q3_4", "Q3_5", "Q3_6", "Q3_7", "Q3_8", "Q3_9")])) == 9)
## [1] 36
(36/633) *100 # Percent Missing
## [1] 5.687204

#Question 3

Q2_reg<- Pesticide_Clean[, c("Q3_1", "Q3_2", "Q3_3", "Q3_4", "Q3_5", "Q3_6", "Q3_7", "Q3_8", "Q3_9")]

Q3_reg<- Pesticide_Clean[, c("Q4_1", "Q4_2", "Q4_3", "Q4_4", "Q4_5", "Q4_6", "Q4_7", "Q4_8", "Q4_9", "Q4_10", "Q4_11")]

Q2<- Pesticide_Clean[, c("Q3_1", "Q3_2", "Q3_3", "Q3_4", "Q3_5", "Q3_6", "Q3_7", "Q3_8", "Q3_9")]

Q3<- Pesticide_Clean[, c("Q4_1", "Q4_2", "Q4_3", "Q4_4", "Q4_5", "Q4_6", "Q4_7", "Q4_8", "Q4_9", "Q4_10", "Q4_11")]
Q2[is.na(Q2)] <- 0

Q3[is.na(Q3)] <- 0

skipLogic_Q3<- list(
  Q4_1 = Q2$Q3_1 == 1 | Q2$Q3_2 == 1, 
  Q4_2 = Q2$Q3_4 == 1,
  Q4_3 = Q2$Q3_5 == 1,
  Q4_4 = Q2$Q3_3 == 1,
  Q4_5 = Q2$Q3_2 == 1, 
  Q4_6 = Q2$Q3_6 == 1,
  Q4_7 = Q2$Q3_7 == 1, 
  Q4_8 = Q2$Q3_1 == 1 | Q2$Q3_2 == 1,
  Q4_9 = Q2$Q3_8 == 1,
  Q4_10 = Q2$Q3_1 == 1 | Q2$Q3_2 == 1,
  Q4_11 = Q2$Q3_8 == 1
)

Q4_table_title<- c(Q4_1 = "Plant Ag",
  Q4_2 = "Animal Ag",
  Q4_3 = "Forestry",
  Q4_4 = "Ornamental/Turf",
  Q4_5 = "Seed Treatment",
  Q4_6 = "Aquatic",
  Q4_7 = "ROW",
  Q4_8 = "Aerial Equipment",
  Q4_9 = "Pest Control",
  Q4_10 = "Ag Commodity Fumigation",
  Q4_11 = "Mosquito Control")

result_Q3<- data.frame(
  Option = names(skipLogic_Q3),
  Eligible = sapply(skipLogic_Q3, sum),
  Selected = sapply(names(skipLogic_Q3), function(col) {
    skip<- skipLogic_Q3[[col]]
    sum(Q3[skip, col] == 1, na.rm = TRUE)
  }
    )
)

result_Q3$Percentage<- round(result_Q3$Selected / result_Q3$Eligible *100, 1)

result_Q3$Option<- Q4_table_title[result_Q3$Option]
results_Q3_table_ready<- result_Q3 %>%
  rename(
    "Pesticide License" = Option,
    "Eligible Participants" = Eligible,
    "Selected Participants" = Selected,
    "Percentage Selected (%)" = Percentage
  )

kable(results_Q3_table_ready, 
      caption = "Survey Question 3 Eligibility and Selection Summary",
      row.names = FALSE)
Survey Question 3 Eligibility and Selection Summary
Pesticide License Eligible Participants Selected Participants Percentage Selected (%)
Plant Ag 360 331 91.9
Animal Ag 189 138 73.0
Forestry 136 101 74.3
Ornamental/Turf 87 67 77.0
Seed Treatment 211 26 12.3
Aquatic 24 14 58.3
ROW 26 15 57.7
Aerial Equipment 360 4 1.1
Pest Control 109 14 12.8
Ag Commodity Fumigation 360 18 5.0
Mosquito Control 109 17 15.6

#Q2 & Q3 Alluvial Plot:

Q2_Q3_Skip_Labels <- c(
  Q3_1 = "Fruits/Vegetables",
  Q3_2 = "Row Crops",
  Q3_3 = "Green Industry",
  Q3_4 = "Livestock",
  Q3_5 = "Forestry",
  Q3_6 = "Aquatic",
  Q3_7 = "Utility/ROW",
  Q3_8 = "Pest Management",
  Q3_9 = "Other Industry",

  Q4_1 = "Plant Ag",
  Q4_2 = "Animal Ag",
  Q4_3 = "Forestry",
  Q4_4 = "Ornamental/Turf",
  Q4_5 = "Seed Treatment",
  Q4_6 = "Aquatic",
  Q4_7 = "ROW",
  Q4_8 = "Aerial Equipment",
  Q4_9 = "Pest Control",
  Q4_10 = "Ag Commodity Fumigation",
  Q4_11 = "Mosquito Control"
)
Q2_long<- Q2 %>%
  mutate(ID = 1:n()) %>%
  pivot_longer(
    cols = starts_with("Q3_"),
    names_to = ("Q2_option"),
    values_to = "selected"
  ) %>%
  filter(selected == 1)

Q3_long<- Q3 %>%
  mutate(ID = 1:n()) %>%
  pivot_longer(
    cols = starts_with("Q4_"),
    names_to = ("Q3_option"),
    values_to = "selected"
  ) %>%
  filter(selected == 1)
long_format_merge_2_3<- merge(Q2_long[, c("ID", "Q2_option")],
                              Q3_long[, c("ID", "Q3_option")],
                              by = "ID")
long_format_merge_2_3$Q2_option<- Q2_Q3_Skip_Labels[long_format_merge_2_3$Q2_option]
long_format_merge_2_3$Q3_option<- Q2_Q3_Skip_Labels[long_format_merge_2_3$Q3_option]
Q2_colors<- c(
  "Fruits/Vegetables" = "cornsilk1",
  "Row Crops" = "lemonchiffon3",
  "Green Industry" ="mediumspringgreen",
  "Livestock" ="sienna2",
  "Forestry" ="palegreen3",
  "Aquatic" ="steelblue2",
  "Utility/ROW" ="orchid2",
  "Pest Management" ="lightpink",
  "Other Industry" ="mediumpurple3"
)

alluvial <- ggplot(long_format_merge_2_3, aes(axis1 = Q2_option, axis2 = Q3_option)) + 
  geom_alluvium(aes(fill = Q2_option), alpha = 0.9, width = 1/10) +
  geom_stratum(width = 1/10, fill= "grey95", color="black") +
  geom_label(stat = "stratum", aes(label = after_stat(stratum)), size = 3) +
  scale_fill_manual(values = Q2_colors,
                    name = "Industry Options") +
  scale_x_discrete(limits = c("Survey Q2", "Survey Q3")) + 
  theme_void(base_size = (12)) 

ggsave("~/Desktop/UGA/Fall 2025/STAT 5010W/Project/Charts/alluvial.png", alluvial)
## Saving 7 x 5 in image

#Question 4 Need Info

#Question 5 Need Info

#Question 6

Q6_data <- as.data.frame(Pesticide_Clean[,186])

colnames(Q6_data) <- c("Employees")

Q6_data$Employees <- as.character(Q6_data$Employees)

Q6_data <- Q6_data %>%
  mutate(
    Employees = recode(as.character(Employees),
           "1" = "Sole operator",
           "2" = "Between 1 and 10 employees",
           "3" = "Between 11 and 24 employees",
           "4" = "Between 25 and 50 employees",
           "5" = "Between 51 and 75 employees",
           "6" = "Between 76 and 100 employees",
           "7" = "More than 100 employees")) %>%
  filter(!is.na(Employees))
Q6_table <- Q6_data %>%
  count(Employees, name = "Count") %>%
  mutate(
    Percent = round(100 * Count / sum(Count), 1),
    Percent = paste0(Percent, "%")
  )

Q6_table %>%
  kable(
    caption = "Survey Question 6: Number of Employees",
    col.names = c("Number of Employees", "Count", "Percent"),
    align = c("l", "c", "c"),
    booktabs = T
  ) %>%
  kable_styling(
    full_width = F,
    bootstrap_options = c("striped", "hover", "condensed")
  ) %>%
  save_kable("~/Desktop/UGA/Fall 2025/STAT 5010W/Project/Charts/employees.png")
## file:////private/var/folders/zy/qjf3c90x4slfz39f8vt6rz1c0000gn/T/RtmpbvvQ5b/employeesa7ea3a561b71.html screenshot completed
## save_kable will have the best result with magick installed.

#Question 7

Q7_data <- as.data.frame(Pesticide_Clean[,187])

colnames(Q7_data) <- c("Years")

Q7_data$Years <- as.character(Q7_data$Years)

Q7_data <- Q7_data %>%
  mutate(
    Years = recode(as.character(Years),
           "1" = "Less than 1 year",
           "2" = "Between 1 to 5 years",
           "3" = "Between 6 to 10 years",
           "4" = "Between 11 to 15 years",
           "5" = "Between 16 to 20 years",
           "6" = "More than 20 years")) %>%
  filter(!is.na(Years))
Q7_table <- Q7_data %>%
  count(Years, name = "Count") %>%
  mutate(
    Percent = round(100 * Count / sum(Count), 1),
    Percent = paste0(Percent, "%")
  )

Q7_table %>%
  kable(
    caption = "Survey Question 7: How long have you held a license?",
    col.names = c("Years Licensed", "Count     ", "Percent"),
    align = c("l", "c", "c"),
    booktabs = T
  ) %>%
  kable_styling(
    full_width = F,
    bootstrap_options = c("striped", "hover", "condensed")
  ) %>%
  save_kable("~/Desktop/UGA/Fall 2025/STAT 5010W/Project/Charts/licenses_year.png")
## file:////private/var/folders/zy/qjf3c90x4slfz39f8vt6rz1c0000gn/T/RtmpbvvQ5b/licenses_yeara7ea180e9d95.html screenshot completed
## save_kable will have the best result with magick installed.

#Question 8

#Demographics

demo_data <- Pesticide_Clean[,231:235]

colnames(demo_data) = c("Gender", "Race", "Ethnicity", "Age", "Education_Level")

demo_data <- demo_data %>% mutate(across(everything(), as.character))

demo_data <- demo_data %>% 
  mutate(
    Gender = recode(as.character(Gender),
                    "1" = "Male",
                    "2" = "Female",
                    "3" = "Prefer not to answer"),
    Race = recode(as.character(Race),
                  "1" = "American Indian or Alaska Native",
                  "2" = "Asian",
                  "3" = "Black/African American",
                  "4" = "Native Hawaiian or Other Pacific Islander",
                  "5" = "White",
                  "6" = "More than one race",
                  "7" = "Other"),
    Ethnicity = recode(as.character(Ethnicity),
                       "1" = "Hispanic/Latino",
                       "2" = "Not Hispanic/Latino"),
    Age = recode(as.character(Age),
                 "9" = "Under 18",
                 "10" = "18–24",
                 "11" = "25–34",
                 "12" = "35–44",
                 "13" = "45–54",
                 "14" = "55–64",
                 "15" = "65–74",
                 "16" = "75–84",
                 "17" = "85+"),
    Education_Level = recode(as.character(Education_Level),
                             "1" = "Less than 9th grade",
                             "2" = "9th to 12th grade, no diploma",
                             "3" = "High school graduate or GED",
                             "4" = "Some college, no degree",
                             "5" = "Associate’s degree",
                             "6" = "Bachelor’s degree",
                             "7" = "Graduate or professional degree"),
  )


#Filtering Data
demo_gender <- demo_data[,1, drop=F] %>%
  filter(!is.na(Gender))

demo_race <- demo_data[,2, drop=F] %>%
  filter(!is.na(Race))

demo_ethnicity <- demo_data[,3, drop=F] %>%
  filter(!is.na(Ethnicity))

demo_age <- demo_data[,4, drop=F] %>%
  filter(!is.na(Age))

demo_edu <- demo_data[,5, drop=F] %>%
  filter(!is.na(Education_Level))

#Functions for graphs

plot_demo_bar <- function(data, var, xlab) {
  ggplot(data, aes(x = {{ var }}, 
                   y = after_stat(..count.. / sum(..count..)), 
                   fill = {{ var }})) +
    geom_bar(color="black") +
    scale_y_continuous(expand = c(0, 0),
                       name = "Percent",
                       labels = function(x) paste0(round(100 * x / sum(x), 1),"%"))+
    scale_fill_brewer(palette = "Pastel4") +
    labs(x = xlab) +
    theme_minimal(base_size = 12) +
    theme(axis.text.x = element_text(angle = 30, hjust = 1)) 
}


plot_demo_pie_legend <- function(data, var, title = NULL) {
  df <- data %>%
    count({{ var }}) %>%
    mutate(pct = round(n / sum(n) * 100, 1),
           legend_label = paste0({{ var }}, " (", pct, "%)"))
  ggplot(df, aes(x = "", y = n, fill = legend_label)) +
    geom_col(width = 1, color = "white") +
    coord_polar(theta = "y") +
    theme_void() +
    labs(title = title, fill = "") +
    theme(
      plot.title = element_text(hjust = 0.5),
      legend.position = "right" )
}
# Gender
plot_demo_pie_legend(demo_data, Gender, "Gender")

# Race
plot_demo_pie_legend(demo_data, Race, "Distribution of Race")

# Ethnicity
plot_demo_pie_legend(demo_data, Ethnicity, "Ethnicity")

# Age (categories)
plot_demo_pie_legend(demo_data, Age, "Distribution of Age")

# Education
plot_demo_pie_legend(demo_data, Education_Level, "Distribution of Education Level")

#Taking out NA

# Gender
gender <- plot_demo_bar(demo_gender, Gender, "Gender")
## Warning: Unknown palette: "Pastel4"
ggsave("~/Desktop/UGA/Fall 2025/STAT 5010W/Project/Charts/gender_barchart.png", gender)
## Saving 7 x 5 in image
# Race
race <- plot_demo_pie_legend(demo_race, Race, "Distribution of Race")
ggsave("~/Desktop/UGA/Fall 2025/STAT 5010W/Project/Charts/race_piechart.png", race)
## Saving 7 x 5 in image
# Ethnicity
plot_demo_bar(demo_ethnicity, Ethnicity, "Ethnicity")
## Warning: Unknown palette: "Pastel4"

# Age (categories)
age <- plot_demo_bar(demo_age, Age, "Distribution of Age")
## Warning: Unknown palette: "Pastel4"
ggsave("~/Desktop/UGA/Fall 2025/STAT 5010W/Project/Charts/age_barchart.png", age)
## Saving 7 x 5 in image
# Education
education <- plot_demo_pie_legend(demo_edu, Education_Level, "Distribution of Education Level")
ggsave("~/Desktop/UGA/Fall 2025/STAT 5010W/Project/Charts/education_piechart.png", education)
## Saving 7 x 5 in image