library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.5.3
## Warning: package 'lubridate' was built under R version 4.5.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.2.0     ✔ readr     2.2.0
## ✔ forcats   1.0.1     ✔ stringr   1.6.0
## ✔ ggplot2   4.0.2     ✔ tibble    3.3.1
## ✔ lubridate 1.9.5     ✔ tidyr     1.3.2
## ✔ purrr     1.2.1     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
# 1. Load the data
Mental_Health_Survey <- read.csv("survey.csv")

# 2. Filter out rows with NA values (excluding comments and state)
# We use if_all() because filter() requires a logical vector, not a data frame
Mental_Health_Survey <- Mental_Health_Survey %>%
  filter(if_all(-c(comments, state), ~ !is.na(.)))

# 3. Clean the Gender column
# Standardizing common entries to 'Male', 'Female', and 'Other'
Mental_Health_Survey <- Mental_Health_Survey %>%
  mutate(Gender = case_when(
    tolower(Gender) %in% c("m", "male", "male-ish", "maile", "mal", "male (cis)", "make", "male ", "man", "msle", "mail", "malr", "cis male", "cis man") ~ "Male",
    tolower(Gender) %in% c("f", "female", "trans-female", "female (trans)", "female ", "femail", "cis female", "femake", "cis-female/femme", "woman", "female") ~ "Female",
    TRUE ~ "Other"
  ))

# 4. Clean the Age column
# Removing nonsensical ages (keeping only 18 to 100)
Mental_Health_Survey <- Mental_Health_Survey %>%
  filter(Age >= 18 & Age <= 100)

# 5. Review the cleaned data
summary(Mental_Health_Survey)
##       Age           Gender            Country             state          
##  Min.   :18.00   Length:971         Length:971         Length:971        
##  1st Qu.:27.00   Class :character   Class :character   Class :character  
##  Median :31.00   Mode  :character   Mode  :character   Mode  :character  
##  Mean   :32.31                                                           
##  3rd Qu.:36.00                                                           
##  Max.   :72.00                                                           
##  self_employed      family_history      treatment         work_interfere    
##  Length:971         Length:971         Length:971         Length:971        
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##  remote_work        tech_company         benefits         care_options      
##  Length:971         Length:971         Length:971         Length:971        
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##  wellness_program    seek_help            leave          
##  Length:971         Length:971         Length:971        
##  Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character  
##                                                          
##                                                          
##                                                          
##  phys_health_consequence  coworkers         obs_consequence   
##  Length:971              Length:971         Length:971        
##  Class :character        Class :character   Class :character  
##  Mode  :character        Mode  :character   Mode  :character  
##                                                               
##                                                               
##                                                               
##    comments        
##  Length:971        
##  Class :character  
##  Mode  :character  
##                    
##                    
## 
head(Mental_Health_Survey)
##   Age Gender       Country state self_employed family_history treatment
## 1  46   Male United States    MD           Yes            Yes        No
## 2  29   Male United States    NY            No            Yes       Yes
## 3  31   Male United States    NC           Yes             No        No
## 4  46   Male United States    MA            No             No       Yes
## 5  41   Male United States    IA            No             No       Yes
## 6  33   Male United States    CA            No            Yes       Yes
##   work_interfere remote_work tech_company   benefits care_options
## 1      Sometimes         Yes          Yes        Yes     Not sure
## 2      Sometimes          No          Yes        Yes          Yes
## 3          Never         Yes          Yes         No           No
## 4          Often         Yes          Yes        Yes          Yes
## 5          Never          No           No Don't know           No
## 6         Rarely          No          Yes        Yes     Not sure
##   wellness_program  seek_help              leave phys_health_consequence
## 1              Yes Don't know          Very easy                      No
## 2               No         No Somewhat difficult                      No
## 3               No         No Somewhat difficult                      No
## 4               No         No         Don't know                      No
## 5               No Don't know         Don't know                      No
## 6       Don't know        Yes         Don't know                      No
##      coworkers obs_consequence                            comments
## 1          Yes             Yes                                <NA>
## 2 Some of them              No                                <NA>
## 3 Some of them              No                                <NA>
## 4 Some of them              No                                <NA>
## 5           No              No                                <NA>
## 6          Yes              No Relatively new job. Ask again later

#The Mental Health in Tech Survey (2014) provides a snapshot of how mental health was perceived and handled in the technology industry over a decade ago.

Core Findings High Prevalence: Roughly 50% of respondents had sought treatment for a mental health condition, showing it is a widespread reality in tech. The Comfort Gap: Employees are significantly more comfortable discussing physical health issues (like a broken leg) with supervisors than mental health issues (like anxiety). Benefit Awareness: While many companies offer mental health benefits, a large portion of employees are either unaware of them or don’t know how to access them. Anonymity Matters: Respondents cited anonymity as a primary factor in whether they would use employer-provided resources. Why It Matters This dataset is widely used in data science to practice data cleaning (due to messy ‘Gender’ and ‘Age’ fields) and to analyze the stigma that persists in high-pressure work environments. It highlights that providing benefits isn’t enough; a culture of openness is required for those benefits to be effective.