Mental health survey

library(tidyverse)


dat<-read_csv("C:/Users/Dr Mohsen/Documents/R/coursera/data_visualization_capestone/survey.csv")

Relation between respondent’s age and seeking mental treatment

table(dat$treatment, useNA = "ifany")

table(dat$Age, useNA = "ifany")

# remove outliers with negative or more than 100 years

dat2<-dat %>% filter(Age > 0 & Age < 100)
ggplot(data = dat2, aes(x = treatment, y = Age, col = treatment))+ 
  
  geom_boxplot(outlier.shape = NA)+
  
  geom_jitter()+
  
labs(title = "Relation between respondent's age and seeking mental treatment",
     
     x = "Seeking mental treatment", col = "Seeking mental treatment",
     
     y ="Age (years)")+
  
  theme_classic()+
  
    theme(plot.title = element_text(size = 12, face = "bold", hjust = 0.5),
          
          axis.text = element_text(face = "bold"))

Relation between Respondent’s country and seeking mental treatment

library(plotly)

table(dat$Country, useNA = "ifany")

df<-dat %>% group_by(Country) %>% count(treatment) %>%
  
  dplyr::filter(sum(n) > 10) %>%
  
  dplyr::mutate(percentage = round((n/sum(n))*100)) %>% 
  
  dplyr::filter(treatment=="Yes") %>% ungroup() %>%
  
  dplyr::mutate(Country = reorder(Country, percentage))
p<-ggplot(data = df, aes(x=percentage,y= Country))+
  
  geom_point(size=3)+
     
     geom_segment(aes(x = 0, xend = percentage, y = Country,
                      
                      yend = Country))+
     

     scale_x_continuous(expand = c(0,1))+
  
  
  labs(x="Percentage seeking treatment",y="")+
     
     theme_classic()




ggplotly(p) %>%
  layout(title = list(text = paste0("Relation between respondent's country and percentage seeking mental treatment",
                                    '<br>',
                                    '<sup>',
                                     'For countries with more than 10 respondents','</sup>'), font = list(size = 14)))

Relation between Respondent’s U.S. state and seeking mental treatment

library(maps)

df<-data.frame(state = state.name, state_abb = state.abb)

dat2<-dat %>% left_join(df, by = c("state" = "state_abb"))


my_state_map <- map_data("state")

df2<-dat2 %>% group_by(state.y) %>% count(treatment) %>%
  
    dplyr::filter(sum(n) > 1) %>%

  
  dplyr::mutate(percentage = round((n/sum(n))*100)) %>% 
  
  dplyr::filter(treatment=="Yes") %>% drop_na() %>%
  
  dplyr::mutate(state.y = str_to_lower(state.y))

df3<-left_join(my_state_map,df2,by=c("region"= "state.y"))

p<-ggplot(data = df3, mapping = aes(x= long, y= lat, 
                                 
                                 group = group,
                                 fill= percentage, col = ""))+
  geom_polygon()+
  
  scale_fill_gradient2(low="brown3", mid="cornsilk1", high="turquoise4",
                     limits=c(20, 100), na.value="black") +
     scale_colour_manual(values= NA) +              
     guides(colour=guide_legend("No data", 
                                override.aes=list(colour="black")))+
  
  labs(title = "Relation between respondent's U.S. state and percentage seeking mental treatment",
       
       subtitle = "For states with more than 1 respondent",
       
       fill = "Percentage seeking mental treatment")+
  
  
  theme_void()+
  
      theme(plot.title = element_text(size = 12, face = "bold", hjust = 0.5),
            
    plot.subtitle = element_text(size = 10, hjust = 0.5))


p

Relation between self employed and seeking treatment

table(dat$self_employed, useNA = "ifany")

df<-dat %>% drop_na(self_employed) %>% group_by(self_employed) %>%
  
  count(treatment) %>%
  
  mutate(percentage = round((n/sum(n))*100))
ggplot(data = dat %>% drop_na(self_employed), 
       
       aes(x = self_employed, fill = treatment))+
  
  geom_bar(position = "fill", width = 0.5)+
  
  geom_text(data = df, aes(x = self_employed, y = n, 
                           
                           label = paste(percentage,"%")),
            
            position = position_fill(vjust = 0.5))+
  scale_y_continuous(labels = scales::percent_format())+
  
  labs(title = "Relation between self employement and seeking mental treatment",
     
     x = "Self employement", fill = "Seeking mental treatment",
     
     y = "Percentage")+
  
  theme_classic()+
  
    theme(plot.title = element_text(size = 12, face = "bold", hjust = 0.5),
          
          axis.text = element_text(face = "bold"))

Relation between family history and seeking treatment

table(dat$family_history, useNA = "ifany")

df<-dat %>% drop_na(family_history) %>% group_by(family_history) %>%
  
  count(treatment) %>%
  
  mutate(percentage = round((n/sum(n))*100))
ggplot(data = dat %>% drop_na(family_history), 
       
       aes(x = family_history, fill = treatment))+
  
  geom_bar(position = "fill", width = 0.5)+
  
  geom_text(data = df, aes(x = family_history, y = n, 
                           
                           label = paste(percentage,"%")),
            
            position = position_fill(vjust = 0.5))+
  
  scale_y_continuous(labels = scales::percent_format())+
  
  labs(title = "Relation between Family history of mental illness \n and seeking mental treatment",
     
     x = "Family history of mental illness", fill = "Seeking mental treatment",
     
     y = "Percentage")+
  
  theme_classic()+
  
    theme(plot.title = element_text(size = 12, face = "bold", hjust = 0.5),
          
          axis.text = element_text(face = "bold"))

Relation between employees number in the company and seeking mental treatment

table(dat$no_employees, useNA = "ifany")

df<-dat %>% drop_na(no_employees) %>% group_by(no_employees) %>%
  
  count(treatment) %>%
  
  mutate(percentage = round((n/sum(n))*100))
ggplot(data = dat %>% drop_na(no_employees) %>%
         
         dplyr::mutate(no_employees = fct_relevel(no_employees,
                                                  
                                                  "1-5", "6-25",
                                                  
                                                 "26-100", "100-500",
                                                 
                                                 "500-1000", 
                                                 "More than 1000")), 
       
       aes(x = no_employees, fill = treatment))+
  
  geom_bar(position = "fill", width = 0.5)+
  
  geom_text(data = df, aes(x = no_employees, y = n, 
                           
                           label = paste(percentage,"%")),
            
            position = position_fill(vjust = 0.5))+
  
  scale_y_continuous(labels = scales::percent_format())+
  
  labs(title = "Relation between employees number in the company \n and seeking mental treatment",
     
     x = "Employees number", fill = "Seeking mental treatment",
     
     y = "Percentage")+
  
  theme_classic()+
  
    theme(plot.title = element_text(size = 12, face = "bold", hjust = 0.5),
          
          axis.text = element_text(face = "bold"))+
  
  coord_flip()

Relation between remote work and seeking treatment

table(dat$remote_work, useNA = "ifany")

df<-dat %>% drop_na(remote_work) %>% group_by(remote_work) %>%
  
  count(treatment) %>%
  
  mutate(percentage = round((n/sum(n))*100))
ggplot(data = dat %>% drop_na(remote_work), 
       
       aes(x = remote_work, fill = treatment))+
  
  geom_bar(position = "fill", width = 0.5)+
  
  geom_text(data = df, aes(x = remote_work, y = n, 
                           
                           label = paste(percentage,"%")),
            
            position = position_fill(vjust = 0.5))+
  
  scale_y_continuous(labels = scales::percent_format())+
  
  labs(title = "Relation between working remotely and seeking mental treatment",
     
     x = "Remote work", fill = "Seeking mental treatment",
     
     y = "Percentage")+
  
  theme_classic()+
  
    theme(plot.title = element_text(size = 12, face = "bold", hjust = 0.5),
          
          axis.text = element_text(face = "bold"))

Relation between working in a tech company and seeking mental treatment

table(dat$tech_company, useNA = "ifany")

df<-dat %>% drop_na(tech_company) %>% group_by(tech_company) %>%
  
  count(treatment) %>%
  
  mutate(percentage = round((n/sum(n))*100))
ggplot(data = dat %>% drop_na(tech_company), 
       
       aes(x = tech_company, fill = treatment))+
  
  geom_bar(position = "fill", width = 0.5)+
  
  geom_text(data = df, aes(x = tech_company, y = n, 
                           
                           label = paste(percentage,"%")),
            
            position = position_fill(vjust = 0.5))+
  
  scale_y_continuous(labels = scales::percent_format())+
  
  labs(title = "Relation between working in a tech company and seeking mental treatment",
     
     x = "Working in a tech company", fill = "Seeking mental treatment",
     
     y = "Percentage")+
  
  theme_classic()+
  
    theme(plot.title = element_text(size = 12, face = "bold", hjust = 0.5),
          
          axis.text = element_text(face = "bold"))

Relation between Country and ease of mental medical leave

table(dat$leave, useNA = "ifany")

df<-dat %>% group_by(Country) %>%
  
  count(leave) %>% filter(sum(n)>10) %>% filter(!leave=="Don't know") %>%
  
  mutate(percentage = round((n/sum(n))*100)) %>% ungroup() %>%
         
         dplyr::mutate(leave = fct_relevel(leave,
                                           
                                    "Very easy", "Somewhat easy",
                                    
                                    "Somewhat difficult",
                                    "Very difficult"))
ggplot(data = dat %>% filter(Country %in% df$Country) %>%
         
         filter(!leave=="Don't know") %>%
         
         dplyr::mutate(leave = fct_relevel(leave,
                                           
                                    "Very easy", "Somewhat easy",
                                    
                                    "Somewhat difficult",
                                    "Very difficult")), 
       
       aes(x = Country, fill = leave))+
  
  geom_bar(position = "fill", width = 0.5)+
  
  geom_text(data = df, aes(x = Country, y = n, 
                           
                           label = paste(percentage,"%")),
            
            position = position_fill(vjust = 0.5))+
  
  
  scale_y_continuous(labels = scales::percent_format())+
  
  labs(title = "Relation between medical leave for a mental health condition \n and respondent's country",
       
       subtitle = "Countries with more than 10 respondents",
     
     x = "", fill = "Ease of medical mental leave",
     
     y = "Percentage")+
  
  theme_classic()+
  
    theme(plot.title = element_text(size = 12, face = "bold", hjust = 0.5),
          
          plot.subtitle = element_text(size = 10, hjust = 0.5),
          
          axis.text = element_text(face = "bold"))+
  
  coord_flip()