mental=read.csv("survey.csv",header = TRUE, stringsAsFactors = TRUE)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.4.4
library(vcdExtra)
## Warning: package 'vcdExtra' was built under R version 3.4.4
## Loading required package: vcd
## Warning: package 'vcd' was built under R version 3.4.4
## Loading required package: grid
## Loading required package: gnm
## Warning: package 'gnm' was built under R version 3.4.4
## 
## Attaching package: 'vcdExtra'
## The following object is masked from 'package:dplyr':
## 
##     summarise
head(table(mental$Gender))
## 
## A little about you            Agender                All 
##                  1                  1                  1 
##          Androgyne   cis-female/femme         Cis Female 
##                  1                  1                  1
tail(table(mental$Gender))
## 
##        queer/she/they something kinda male?          Trans-female 
##                     1                     1                     1 
##           Trans woman                 woman                 Woman 
##                     1                     1                     3
mental[mental$Gender == "male","Gender"] = "Male"
mental[mental$Gender == "Mail","Gender"] = "Male"
mental[mental$Gender == "m","Gender"] = "Male"
mental[mental$Gender == "Male-ish","Gender"] = "Male"
mental[mental$Gender == "maile","Gender"] = "Male"
mental[mental$Gender == "M","Gender"] = "Male"
mental[mental$Gender == "Cis Male","Gender"] = "Male"
mental[mental$Gender == "Male (CIS)","Gender"] = "Male"
mental[mental$Gender == "msle","Gender"] = "Male"
mental[mental$Gender == "Guy (-ish) ^_^","Gender"] = "Male"
mental[mental$Gender == "male leaning androgynous","Gender"] = "Male"
mental[mental$Gender == "Mal","Gender"] = "Male"
mental[mental$Gender == "Malr","Gender"] = "Male"
mental[mental$Gender == "ostensibly male, unsure what that really means","Gender"] = "Male"
mental[mental$Gender == "Cis Man","Gender"] = "Male"
mental[mental$Gender == "cis male","Gender"] = "Male"
mental[mental$Gender == "Man","Gender"] = "Male"
mental[mental$Gender == "Male","Gender"] = "Male"
mental[mental$Gender == "Male ","Gender"] = "Male"

mental[mental$Gender == "Trans-female","Gender"] = "Female"
mental[mental$Gender == "Cis Female","Gender"] = "Female"
mental[mental$Gender == "female","Gender"] = "Female"
mental[mental$Gender == "F","Gender"] = "Female"
mental[mental$Gender == "f","Gender"] = "Female"
mental[mental$Gender == "Woman","Gender"] = "Female"
mental[mental$Gender == "woman","Gender"] = "Female"
mental[mental$Gender == "cis-female/femme","Gender"] = "Female"
mental[mental$Gender == "Female (trans)","Gender"] = "Female"
mental[mental$Gender == "Female (cis)","Gender"] = "Female"
mental[mental$Gender == "Trans woman","Gender"] = "Female"
mental[mental$Gender == "femail","Gender"] = "Female"
mental[mental$Gender == "Female","Gender"] = "Female"
mental[mental$Gender == "Female ","Gender"] = "Female"

mental$Gender = as.character(mental$Gender)
mental[mental$Gender == "Androgyne","Gender"] = "NotSpecific"
mental[mental$Gender == "Agender","Gender"] = "NotSpecific"
mental[mental$Gender == "Genderqueer","Gender"] = "NotSpecific"
mental[mental$Gender == "Enby","Gender"] = "NotSpecific"
mental[mental$Gender == "fluid","Gender"] = "NotSpecific"
mental[mental$Gender == "non-binary","Gender"] = "NotSpecific"
mental[mental$Gender == "A little about you","Gender"] = "NotSpecific"
mental[mental$Gender == "something kinda male?","Gender"] = "NotSpecific"
mental[mental$Gender == "queer/she/they","Gender"] = "NotSpecific"
mental[mental$Gender == "All","Gender"] = "NotSpecific"
mental[mental$Gender == "Nah","Gender"] = "NotSpecific"
mental[mental$Gender == "Femake","Gender"] = "NotSpecific"
mental[mental$Gender == "Make","Gender"] = "NotSpecific"
mental[mental$Gender == "Neuter","Gender"] = "NotSpecific"
mental[mental$Gender == "queer","Gender"] = "NotSpecific"
mental[mental$Gender == "msle","Gender"] = "NotSpecific"
mental[mental$Gender == "unsure what that really means","Gender"] = "NotSpecific"
mental[mental$Gender == "p","Gender"] = "NotSpecific"
mentalnew = subset(mental[,c(2,3,4,6,7,8,11,12,13,15,17,19,20,23,24,25,26)])####Builing and subsetting a new dataset with variables that we need
glimpse(mentalnew)
## Observations: 1,259
## Variables: 17
## $ Age                       <dbl> 37, 44, 32, 31, 31, 33, 35, 39, 42, ...
## $ Gender                    <chr> "Female", "Male", "Male", "Male", "M...
## $ Country                   <fct> United States, United States, Canada...
## $ self_employed             <fct> NA, NA, NA, NA, NA, NA, NA, NA, NA, ...
## $ family_history            <fct> No, No, No, Yes, No, Yes, Yes, No, Y...
## $ treatment                 <fct> Yes, No, No, Yes, No, No, Yes, No, Y...
## $ remote_work               <fct> No, No, No, No, Yes, No, Yes, Yes, N...
## $ tech_company              <fct> Yes, No, Yes, Yes, Yes, Yes, Yes, Ye...
## $ benefits                  <fct> Yes, Don't know, No, No, Yes, Yes, N...
## $ wellness_program          <fct> No, Don't know, No, No, Don't know, ...
## $ anonymity                 <fct> Yes, Don't know, Don't know, No, Don...
## $ mental_health_consequence <fct> No, Maybe, No, Yes, No, No, Maybe, N...
## $ phys_health_consequence   <fct> No, No, No, Yes, No, No, Maybe, No, ...
## $ mental_health_interview   <fct> No, No, Yes, Maybe, Yes, No, No, No,...
## $ phys_health_interview     <fct> Maybe, No, Yes, Maybe, Yes, Maybe, N...
## $ mental_vs_physical        <fct> Yes, Don't know, No, No, Don't know,...
## $ obs_consequence           <fct> No, No, No, Yes, No, No, No, No, No,...
mentalnew[959,"Gender"] = "Male"
mentalnew[1214,"Gender"] = "Male"
mentalnew[611,"Gender"] = "Male"
mentalnew[1168,"Gender"] = "Female"
mentalnew[698,"Age"] = 0
mentalnew[126,"Age"] = 0
mentalnew[1110,"Age"] = 0
head(mentalnew$Gender)
## [1] "Female" "Male"   "Male"   "Male"   "Male"   "Male"
tail(mentalnew$Gender)
## [1] "Male"   "Male"   "Male"   "Male"   "Female" "Male"
colSums(is.na(mentalnew))
##                       Age                    Gender 
##                         0                         0 
##                   Country             self_employed 
##                         0                        18 
##            family_history                 treatment 
##                         0                         0 
##               remote_work              tech_company 
##                         0                         0 
##                  benefits          wellness_program 
##                         0                         0 
##                 anonymity mental_health_consequence 
##                         0                         0 
##   phys_health_consequence   mental_health_interview 
##                         0                         0 
##     phys_health_interview        mental_vs_physical 
##                         0                         0 
##           obs_consequence 
##                         0
mentalnew = mentalnew %>% filter(!is.na(self_employed)) 
## Warning: package 'bindrcpp' was built under R version 3.4.4
colSums(is.na(mentalnew))
##                       Age                    Gender 
##                         0                         0 
##                   Country             self_employed 
##                         0                         0 
##            family_history                 treatment 
##                         0                         0 
##               remote_work              tech_company 
##                         0                         0 
##                  benefits          wellness_program 
##                         0                         0 
##                 anonymity mental_health_consequence 
##                         0                         0 
##   phys_health_consequence   mental_health_interview 
##                         0                         0 
##     phys_health_interview        mental_vs_physical 
##                         0                         0 
##           obs_consequence 
##                         0
glimpse(mentalnew)
## Observations: 1,241
## Variables: 17
## $ Age                       <dbl> 46, 36, 29, 31, 46, 41, 33, 35, 33, ...
## $ Gender                    <chr> "Male", "Male", "Male", "Male", "Mal...
## $ Country                   <fct> United States, France, United States...
## $ self_employed             <fct> Yes, Yes, No, Yes, No, No, No, No, N...
## $ family_history            <fct> Yes, Yes, Yes, No, No, No, Yes, Yes,...
## $ treatment                 <fct> No, No, Yes, No, Yes, Yes, Yes, Yes,...
## $ remote_work               <fct> Yes, Yes, No, Yes, Yes, No, No, No, ...
## $ tech_company              <fct> Yes, Yes, Yes, Yes, Yes, No, Yes, No...
## $ benefits                  <fct> Yes, No, Yes, No, Yes, Don't know, Y...
## $ wellness_program          <fct> Yes, Yes, No, No, No, No, Don't know...
## $ anonymity                 <fct> Yes, Yes, No, Yes, Don't know, Don't...
## $ mental_health_consequence <fct> No, No, Maybe, No, Maybe, Maybe, No,...
## $ phys_health_consequence   <fct> No, No, No, No, No, No, No, No, Mayb...
## $ mental_health_interview   <fct> No, Maybe, No, No, No, No, No, No, N...
## $ phys_health_interview     <fct> Yes, Maybe, No, Maybe, Maybe, Yes, Y...
## $ mental_vs_physical        <fct> Yes, Don't know, No, Yes, No, Don't ...
## $ obs_consequence           <fct> Yes, No, No, No, No, No, No, No, No,...
model = glm(treatment ~ Age + Gender + Country + self_employed + family_history + remote_work + tech_company + benefits + wellness_program + anonymity + mental_health_consequence + phys_health_consequence + mental_health_interview + phys_health_interview + mental_vs_physical + obs_consequence, family = "binomial", data = mentalnew) 
model
## 
## Call:  glm(formula = treatment ~ Age + Gender + Country + self_employed + 
##     family_history + remote_work + tech_company + benefits + 
##     wellness_program + anonymity + mental_health_consequence + 
##     phys_health_consequence + mental_health_interview + phys_health_interview + 
##     mental_vs_physical + obs_consequence, family = "binomial", 
##     data = mentalnew)
## 
## Coefficients:
##                   (Intercept)                            Age  
##                    -9.165e-01                     -9.462e-04  
##                    GenderMale              GenderNotSpecific  
##                    -7.316e-01                      5.397e-01  
##                CountryAustria            CountryBahamas, The  
##                    -1.616e+01                      1.232e+01  
##                CountryBelgium  CountryBosnia and Herzegovina  
##                    -2.094e+00                     -1.738e+01  
##                 CountryBrazil                CountryBulgaria  
##                    -7.232e-01                      8.680e-01  
##                 CountryCanada                   CountryChina  
##                    -4.518e-02                     -1.547e+01  
##               CountryColombia              CountryCosta Rica  
##                    -1.602e+01                     -1.626e+01  
##                CountryCroatia          CountryCzech Republic  
##                     1.549e+01                     -1.733e+01  
##                CountryDenmark                 CountryFinland  
##                     1.621e+01                     -1.748e-01  
##                 CountryFrance                 CountryGeorgia  
##                    -1.614e+00                     -1.591e+01  
##                CountryGermany                  CountryGreece  
##                    -2.725e-03                     -1.647e+01  
##                CountryHungary                   CountryIndia  
##                    -1.869e+01                     -2.292e-01  
##                CountryIreland                  CountryIsrael  
##                    -1.594e-01                     -1.669e+01  
##                  CountryItaly                   CountryJapan  
##                    -1.597e+00                      1.511e+01  
##                 CountryLatvia                  CountryMexico  
##                    -1.576e+01                     -2.564e-01  
##                CountryMoldova             CountryNetherlands  
##                     1.638e+01                     -6.149e-01  
##            CountryNew Zealand                 CountryNigeria  
##                     4.138e-01                     -1.617e+01  
##                 CountryNorway             CountryPhilippines  
##                    -1.750e+01                     -1.608e+01  
##                 CountryPoland                CountryPortugal  
##                     3.840e-01                     -1.634e+01  
##                CountryRomania                  CountryRussia  
##                    -1.571e+01                     -1.729e+01  
##              CountrySingapore                CountrySlovenia  
##                    -1.185e+00                      1.780e+01  
##           CountrySouth Africa                   CountrySpain  
##                     1.182e-01                     -1.816e+01  
##                 CountrySweden             CountrySwitzerland  
##                    -1.150e+00                     -4.982e-01  
##               CountryThailand          CountryUnited Kingdom  
##                    -1.579e+01                      8.086e-03  
##          CountryUnited States                 CountryUruguay  
##                    -1.645e-01                     -1.552e+01  
##               CountryZimbabwe               self_employedYes  
##                     9.462e+07                      3.428e-02  
##             family_historyYes                 remote_workYes  
##                     1.525e+00                      1.651e-01  
##               tech_companyYes                     benefitsNo  
##                     1.146e-01                      4.535e-01  
##                   benefitsYes             wellness_programNo  
##                     8.730e-01                      1.692e-02  
##           wellness_programYes                    anonymityNo  
##                    -1.552e-01                      9.966e-02  
##                  anonymityYes    mental_health_consequenceNo  
##                     4.332e-01                     -3.412e-01  
##  mental_health_consequenceYes      phys_health_consequenceNo  
##                     1.434e-01                      1.458e-01  
##    phys_health_consequenceYes      mental_health_interviewNo  
##                    -1.219e-01                      3.343e-01  
##    mental_health_interviewYes        phys_health_interviewNo  
##                     8.307e-01                     -5.274e-02  
##      phys_health_interviewYes           mental_vs_physicalNo  
##                     2.084e-01                      1.837e-01  
##         mental_vs_physicalYes             obs_consequenceYes  
##                     5.154e-02                      5.239e-01  
## 
## Degrees of Freedom: 1240 Total (i.e. Null);  1169 Residual
## Null Deviance:       1720 
## Residual Deviance: 1360  AIC: 1504
  1. How do Age, Gender, Country relate to mental health issues with peers?
model1 = glm(treatment ~ Age + Gender + Country , family = "binomial", data = mentalnew) 
model1
## 
## Call:  glm(formula = treatment ~ Age + Gender + Country, family = "binomial", 
##     data = mentalnew)
## 
## Coefficients:
##                   (Intercept)                            Age  
##                     1.209e+00                     -5.141e-04  
##                    GenderMale              GenderNotSpecific  
##                    -9.013e-01                      6.620e-01  
##                CountryAustria            CountryBahamas, The  
##                    -1.686e+01                      1.470e+01  
##                CountryBelgium  CountryBosnia and Herzegovina  
##                    -2.263e+00                     -1.686e+01  
##                 CountryBrazil                CountryBulgaria  
##                    -9.868e-01                      1.325e-01  
##                 CountryCanada                   CountryChina  
##                    -2.900e-01                     -1.685e+01  
##               CountryColombia              CountryCosta Rica  
##                    -1.686e+01                     -1.685e+01  
##                CountryCroatia          CountryCzech Republic  
##                     1.628e+01                     -1.686e+01  
##                CountryDenmark                 CountryFinland  
##                     1.628e+01                     -9.858e-01  
##                 CountryFrance                 CountryGeorgia  
##                    -1.996e+00                     -1.686e+01  
##                CountryGermany                  CountryGreece  
##                    -5.074e-01                     -1.685e+01  
##                CountryHungary                   CountryIndia  
##                    -1.776e+01                     -1.346e+00  
##                CountryIreland                  CountryIsrael  
##                    -4.955e-01                     -1.706e+01  
##                  CountryItaly                   CountryJapan  
##                    -2.082e+00                      1.628e+01  
##                 CountryLatvia                  CountryMexico  
##                    -1.686e+01                     -9.832e-01  
##                CountryMoldova             CountryNetherlands  
##                     1.627e+01                     -1.094e+00  
##            CountryNew Zealand                 CountryNigeria  
##                     1.227e-01                     -1.686e+01  
##                 CountryNorway             CountryPhilippines  
##                    -1.686e+01                     -1.686e+01  
##                 CountryPoland                CountryPortugal  
##                    -2.452e-01                     -1.686e+01  
##                CountryRomania                  CountryRussia  
##                    -1.686e+01                     -1.751e+01  
##              CountrySingapore                CountrySlovenia  
##                    -1.389e+00                      1.627e+01  
##           CountrySouth Africa                   CountrySpain  
##                     2.753e-01                     -1.776e+01  
##                 CountrySweden             CountrySwitzerland  
##                    -1.500e+00                     -7.106e-01  
##               CountryThailand          CountryUnited Kingdom  
##                    -1.685e+01                     -4.615e-01  
##          CountryUnited States                 CountryUruguay  
##                    -3.352e-01                     -1.686e+01  
##               CountryZimbabwe  
##                     5.141e+07  
## 
## Degrees of Freedom: 1240 Total (i.e. Null);  1190 Residual
## Null Deviance:       1720 
## Residual Deviance: 1592  AIC: 1694
  1. Are mental health illnesses more frequent among Tech workers who have a family member with history of mental illness or not?
model2 = glm(treatment ~  family_history + tech_company, family = "binomial", data = mentalnew) 
model2
## 
## Call:  glm(formula = treatment ~ family_history + tech_company, family = "binomial", 
##     data = mentalnew)
## 
## Coefficients:
##       (Intercept)  family_historyYes    tech_companyYes  
##          -0.52543            1.65437           -0.08901  
## 
## Degrees of Freedom: 1240 Total (i.e. Null);  1238 Residual
## Null Deviance:       1720 
## Residual Deviance: 1536  AIC: 1542
  1. Do companies with susceptibility of working remotely tend to formally discuss mental health issues less or not?
model3 = glm(treatment ~ remote_work + wellness_program, family = "binomial", data = mentalnew) 
model3
## 
## Call:  glm(formula = treatment ~ remote_work + wellness_program, family = "binomial", 
##     data = mentalnew)
## 
## Coefficients:
##         (Intercept)       remote_workYes   wellness_programNo  
##             -0.3061               0.1519               0.2479  
## wellness_programYes  
##              0.6508  
## 
## Degrees of Freedom: 1240 Total (i.e. Null);  1237 Residual
## Null Deviance:       1720 
## Residual Deviance: 1708  AIC: 1716
  1. Do Benefit, Wellness-program, Anonymity have any effect on decision of treatment?
model4 = glm(treatment ~ benefits + wellness_program + anonymity, family = "binomial", data = mentalnew) 
model4
## 
## Call:  glm(formula = treatment ~ benefits + wellness_program + anonymity, 
##     family = "binomial", data = mentalnew)
## 
## Coefficients:
##         (Intercept)           benefitsNo          benefitsYes  
##            -0.75410              0.30386              1.00050  
##  wellness_programNo  wellness_programYes          anonymityNo  
##             0.24559              0.09822              0.47652  
##        anonymityYes  
##             0.35054  
## 
## Degrees of Freedom: 1240 Total (i.e. Null);  1234 Residual
## Null Deviance:       1720 
## Residual Deviance: 1647  AIC: 1661
  1. How much is the difference effect of Mental-Health-consequence on treatment?
model5 = glm(treatment ~ mental_health_consequence , family = "binomial", data = mentalnew) 
model5
## 
## Call:  glm(formula = treatment ~ mental_health_consequence, family = "binomial", 
##     data = mentalnew)
## 
## Coefficients:
##                  (Intercept)   mental_health_consequenceNo  
##                       0.1103                       -0.3834  
## mental_health_consequenceYes  
##                       0.2665  
## 
## Degrees of Freedom: 1240 Total (i.e. Null);  1238 Residual
## Null Deviance:       1720 
## Residual Deviance: 1700  AIC: 1706