Q4.knit

# 1. Load the data from the text provided
# (Replace the '...' with your full CSV text if running in a clean session)
raw_text <- "Age,Gender,Country,state,self_employed,family_history,treatment,work_interfere,remote_work,tech_company,benefits,care_options,wellness_program,seek_help,leave,phys_health_consequence,coworkers,obs_consequence,comments
37,Female,United States,IL,NA,No,Yes,Often,No,Yes,Yes,Not sure,No,Yes,Somewhat easy,No,Some of them,No,NA
44,M,United States,IN,NA,No,No,Rarely,No,No,Don't know,No,Don't know,Don't know,Don't know,No,No,No,NA" # ... (paste full data here)

survey <- read.csv(text = raw_text)

# 2. Check if it loaded and then run the visual
if (exists("survey")) {
  library(dplyr)
  library(tidyr)
  library(ggplot2)

  # Cleaning
  survey_clean <- survey %>%
    select(treatment, family_history, work_interfere, care_options, benefits) %>%
    mutate(across(everything(), as.character)) %>%
    mutate(work_interfere = replace_na(work_interfere, "N/A"))

  # Aggregating
  plot_data <- survey_clean %>%
    pivot_longer(cols = -treatment, names_to = "Variable", values_to = "Status") %>%
    group_by(Variable, Status, treatment) %>%
    summarise(n = n(), .groups = 'drop') %>%
    group_by(Variable, Status) %>%
    mutate(percentage = n / sum(n) * 100) %>%
    filter(treatment == "Yes")

  # Plotting
  ggplot(plot_data, aes(x = reorder(Status, percentage), y = percentage, fill = Variable)) +
    geom_bar(stat = "identity", show.legend = FALSE, alpha = 0.8) +
    geom_text(aes(label = paste0(round(percentage), "%")), vjust = -0.5, size = 3) +
    facet_wrap(~Variable, scales = "free_x") +
    labs(
      title = "Probability of Seeking Treatment by Factor",
      x = "Response Status",
      y = "% Who Sought Treatment"
    ) +
    theme_minimal()
}

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union