Dataset: “Mental Health in Tech Survey”

https://towardsdatascience.com/data-cleaning-in-r-made-simple-1b77303b0b17

## Step 1: Familiarize  with the data set
data<-read.csv("mental-heath-in-tech-2016_20161114.csv")

#a. check the number of rows and columns the data frame
dim(data)
## [1] 1433   63
#an initial look at the data frame
str(data)
## 'data.frame':    1433 obs. of  63 variables:
##  $ Are.you.self.employed.                                                                                                                                                          : int  0 0 0 1 0 0 0 0 0 1 ...
##  $ How.many.employees.does.your.company.or.organization.have.                                                                                                                      : chr  "26-100" "6-25" "6-25" "" ...
##  $ Is.your.employer.primarily.a.tech.company.organization.                                                                                                                         : int  1 1 1 NA 0 1 1 1 0 NA ...
##  $ Is.your.primary.role.within.your.company.related.to.tech.IT.                                                                                                                    : int  NA NA NA NA 1 NA NA NA 1 NA ...
##  $ Does.your.employer.provide.mental.health.benefits.as.part.of.healthcare.coverage.                                                                                               : chr  "Not eligible for coverage / N/A" "No" "No" "" ...
##  $ Do.you.know.the.options.for.mental.health.care.available.under.your.employer.provided.coverage.                                                                                 : chr  "N/A" "Yes" "N/A" "" ...
##  $ Has.your.employer.ever.formally.discussed.mental.health..for.example..as.part.of.a.wellness.campaign.or.other.official.communication..                                          : chr  "No" "Yes" "No" "" ...
##  $ Does.your.employer.offer.resources.to.learn.more.about.mental.health.concerns.and.options.for.seeking.help.                                                                     : chr  "No" "Yes" "No" "" ...
##  $ Is.your.anonymity.protected.if.you.choose.to.take.advantage.of.mental.health.or.substance.abuse.treatment.resources.provided.by.your.employer.                                  : chr  "I don't know" "Yes" "I don't know" "" ...
##  $ If.a.mental.health.issue.prompted.you.to.request.a.medical.leave.from.work..asking.for.that.leave.would.be.                                                                     : chr  "Very easy" "Somewhat easy" "Neither easy nor difficult" "" ...
##  $ Do.you.think.that.discussing.a.mental.health.disorder.with.your.employer.would.have.negative.consequences.                                                                      : chr  "No" "No" "Maybe" "" ...
##  $ Do.you.think.that.discussing.a.physical.health.issue.with.your.employer.would.have.negative.consequences.                                                                       : chr  "No" "No" "No" "" ...
##  $ Would.you.feel.comfortable.discussing.a.mental.health.disorder.with.your.coworkers.                                                                                             : chr  "Maybe" "Maybe" "Maybe" "" ...
##  $ Would.you.feel.comfortable.discussing.a.mental.health.disorder.with.your.direct.supervisor.s..                                                                                  : chr  "Yes" "Yes" "Maybe" "" ...
##  $ Do.you.feel.that.your.employer.takes.mental.health.as.seriously.as.physical.health.                                                                                             : chr  "I don't know" "Yes" "I don't know" "" ...
##  $ Have.you.heard.of.or.observed.negative.consequences.for.co.workers.who.have.been.open.about.mental.health.issues.in.your.workplace.                                             : chr  "No" "No" "No" "" ...
##  $ Do.you.have.medical.coverage..private.insurance.or.state.provided..which.includes.treatment.of.Â.mental.health.issues.                                                          : int  NA NA NA 1 NA NA NA NA NA 1 ...
##  $ Do.you.know.local.or.online.resources.to.seek.help.for.a.mental.health.disorder.                                                                                                : chr  "" "" "" "Yes, I know several" ...
##  $ If.you.have.been.diagnosed.or.treated.for.a.mental.health.disorder..do.you.ever.reveal.this.to.clients.or.business.contacts.                                                    : chr  "" "" "" "Sometimes, if it comes up" ...
##  $ If.you.have.revealed.a.mental.health.issue.to.a.client.or.business.contact..do.you.believe.this.has.impacted.you.negatively.                                                    : chr  "" "" "" "I'm not sure" ...
##  $ If.you.have.been.diagnosed.or.treated.for.a.mental.health.disorder..do.you.ever.reveal.this.to.coworkers.or.employees.                                                          : chr  "" "" "" "Sometimes, if it comes up" ...
##  $ If.you.have.revealed.a.mental.health.issue.to.a.coworker.or.employee..do.you.believe.this.has.impacted.you.negatively.                                                          : chr  "" "" "" "I'm not sure" ...
##  $ Do.you.believe.your.productivity.is.ever.affected.by.a.mental.health.issue.                                                                                                     : chr  "" "" "" "Yes" ...
##  $ If.yes..what.percentage.of.your.work.time..time.performing.primary.or.secondary.job.functions..is.affected.by.a.mental.health.issue.                                            : chr  "" "" "" "1-25%" ...
##  $ Do.you.have.previous.employers.                                                                                                                                                 : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ Have.your.previous.employers.provided.mental.health.benefits.                                                                                                                   : chr  "No, none did" "Yes, they all did" "No, none did" "Some did" ...
##  $ Were.you.aware.of.the.options.for.mental.health.care.provided.by.your.previous.employers.                                                                                       : chr  "N/A (not currently aware)" "I was aware of some" "N/A (not currently aware)" "N/A (not currently aware)" ...
##  $ Did.your.previous.employers.ever.formally.discuss.mental.health..as.part.of.a.wellness.campaign.or.other.official.communication..                                               : chr  "I don't know" "None did" "None did" "None did" ...
##  $ Did.your.previous.employers.provide.resources.to.learn.more.about.mental.health.issues.and.how.to.seek.help.                                                                    : chr  "None did" "Some did" "Some did" "None did" ...
##  $ Was.your.anonymity.protected.if.you.chose.to.take.advantage.of.mental.health.or.substance.abuse.treatment.resources.with.previous.employers.                                    : chr  "I don't know" "Yes, always" "I don't know" "I don't know" ...
##  $ Do.you.think.that.discussing.a.mental.health.disorder.with.previous.employers.would.have.negative.consequences.                                                                 : chr  "Some of them" "None of them" "I don't know" "Some of them" ...
##  $ Do.you.think.that.discussing.a.physical.health.issue.with.previous.employers.would.have.negative.consequences.                                                                  : chr  "None of them" "None of them" "Some of them" "Some of them" ...
##  $ Would.you.have.been.willing.to.discuss.a.mental.health.issue.with.your.previous.co.workers.                                                                                     : chr  "Some of my previous employers" "No, at none of my previous employers" "Some of my previous employers" "Some of my previous employers" ...
##  $ Would.you.have.been.willing.to.discuss.a.mental.health.issue.with.your.direct.supervisor.s..                                                                                    : chr  "Some of my previous employers" "Some of my previous employers" "I don't know" "Some of my previous employers" ...
##  $ Did.you.feel.that.your.previous.employers.took.mental.health.as.seriously.as.physical.health.                                                                                   : chr  "I don't know" "Some did" "I don't know" "I don't know" ...
##  $ Did.you.hear.of.or.observe.negative.consequences.for.co.workers.with.mental.health.issues.in.your.previous.workplaces.                                                          : chr  "None of them" "None of them" "Some of them" "Some of them" ...
##  $ Would.you.be.willing.to.bring.up.a.physical.health.issue.with.a.potential.employer.in.an.interview.                                                                             : chr  "Maybe" "Maybe" "Yes" "Yes" ...
##  $ Why.or.why.not.                                                                                                                                                                 : chr  "" "It would depend on the health issue. If there is a health issue that would not immediately affect my job perfor"| __truncated__ "They would provable need to know, to Judge if I can do my job or not. " "old back injury, doesn't cause me many issues but occasionally impacts my ability to work at desk " ...
##  $ Would.you.bring.up.a.mental.health.issue.with.a.potential.employer.in.an.interview.                                                                                             : chr  "Maybe" "No" "Yes" "Maybe" ...
##  $ Why.or.why.not..1                                                                                                                                                               : chr  "" "While mental health has become a more prominent issue recently, I feel like there is still a lot of stigma surr"| __truncated__ "Stigma, mainly. " "would not if I was not 100% sure that the disclosure would not affect my application" ...
##  $ Do.you.feel.that.being.identified.as.a.person.with.a.mental.health.issue.would.hurt.your.career.                                                                                : chr  "Maybe" "No, I don't think it would" "Maybe" "Yes, I think it would" ...
##  $ Do.you.think.that.team.members.co.workers.would.view.you.more.negatively.if.they.knew.you.suffered.from.a.mental.health.issue.                                                  : chr  "No, I don't think they would" "No, I don't think they would" "Maybe" "Maybe" ...
##  $ How.willing.would.you.be.to.share.with.friends.and.family.that.you.have.a.mental.illness.                                                                                       : chr  "Somewhat open" "Somewhat open" "Somewhat open" "Neutral" ...
##  $ Have.you.observed.or.experienced.an.unsupportive.or.badly.handled.response.to.a.mental.health.issue.in.your.current.or.previous.workplace.                                      : chr  "No" "No" "Maybe/Not sure" "No" ...
##  $ Have.your.observations.of.how.another.individual.who.discussed.a.mental.health.disorder.made.you.less.likely.to.reveal.a.mental.health.issue.yourself.in.your.current.workplace.: chr  "" "" "Yes" "" ...
##  $ Do.you.have.a.family.history.of.mental.illness.                                                                                                                                 : chr  "No" "Yes" "No" "No" ...
##  $ Have.you.had.a.mental.health.disorder.in.the.past.                                                                                                                              : chr  "Yes" "Yes" "Maybe" "Yes" ...
##  $ Do.you.currently.have.a.mental.health.disorder.                                                                                                                                 : chr  "No" "Yes" "No" "Yes" ...
##  $ If.yes..what.condition.s..have.you.been.diagnosed.with.                                                                                                                         : chr  "" "Anxiety Disorder (Generalized, Social, Phobia, etc)|Mood Disorder (Depression, Bipolar Disorder, etc)" "" "Anxiety Disorder (Generalized, Social, Phobia, etc)|Mood Disorder (Depression, Bipolar Disorder, etc)" ...
##  $ If.maybe..what.condition.s..do.you.believe.you.have.                                                                                                                            : chr  "" "" "" "" ...
##  $ Have.you.been.diagnosed.with.a.mental.health.condition.by.a.medical.professional.                                                                                               : chr  "Yes" "Yes" "No" "Yes" ...
##  $ If.so..what.condition.s..were.you.diagnosed.with.                                                                                                                               : chr  "Anxiety Disorder (Generalized, Social, Phobia, etc)" "Anxiety Disorder (Generalized, Social, Phobia, etc)|Mood Disorder (Depression, Bipolar Disorder, etc)" "" "Anxiety Disorder (Generalized, Social, Phobia, etc)|Mood Disorder (Depression, Bipolar Disorder, etc)" ...
##  $ Have.you.ever.sought.treatment.for.a.mental.health.issue.from.a.mental.health.professional.                                                                                     : int  0 1 1 1 1 1 0 1 1 1 ...
##  $ If.you.have.a.mental.health.issue..do.you.feel.that.it.interferes.with.your.work.when.being.treated.effectively.                                                                : chr  "Not applicable to me" "Rarely" "Not applicable to me" "Sometimes" ...
##  $ If.you.have.a.mental.health.issue..do.you.feel.that.it.interferes.with.your.work.when.NOT.being.treated.effectively.                                                            : chr  "Not applicable to me" "Sometimes" "Not applicable to me" "Sometimes" ...
##  $ What.is.your.age.                                                                                                                                                               : int  39 29 38 43 43 42 30 37 44 30 ...
##  $ What.is.your.gender.                                                                                                                                                            : chr  "Male" "male" "Male " "male" ...
##  $ What.country.do.you.live.in.                                                                                                                                                    : chr  "United Kingdom" "United States of America" "United Kingdom" "United Kingdom" ...
##  $ What.US.state.or.territory.do.you.live.in.                                                                                                                                      : chr  "" "Illinois" "" "" ...
##  $ What.country.do.you.work.in.                                                                                                                                                    : chr  "United Kingdom" "United States of America" "United Kingdom" "United Kingdom" ...
##  $ What.US.state.or.territory.do.you.work.in.                                                                                                                                      : chr  "" "Illinois" "" "" ...
##  $ Which.of.the.following.best.describes.your.work.position.                                                                                                                       : chr  "Back-end Developer" "Back-end Developer|Front-end Developer" "Back-end Developer" "Supervisor/Team Lead" ...
##  $ Do.you.work.remotely.                                                                                                                                                           : chr  "Sometimes" "Never" "Always" "Sometimes" ...
#b. view the summary statistics for all the columns of the data frame
summary(data)

Step 2: Check for structural errors

# c. renaming long variable names
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.1.3
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
names(data)
##  [1] "Are.you.self.employed."                                                                                                                                                          
##  [2] "How.many.employees.does.your.company.or.organization.have."                                                                                                                      
##  [3] "Is.your.employer.primarily.a.tech.company.organization."                                                                                                                         
##  [4] "Is.your.primary.role.within.your.company.related.to.tech.IT."                                                                                                                    
##  [5] "Does.your.employer.provide.mental.health.benefits.as.part.of.healthcare.coverage."                                                                                               
##  [6] "Do.you.know.the.options.for.mental.health.care.available.under.your.employer.provided.coverage."                                                                                 
##  [7] "Has.your.employer.ever.formally.discussed.mental.health..for.example..as.part.of.a.wellness.campaign.or.other.official.communication.."                                          
##  [8] "Does.your.employer.offer.resources.to.learn.more.about.mental.health.concerns.and.options.for.seeking.help."                                                                     
##  [9] "Is.your.anonymity.protected.if.you.choose.to.take.advantage.of.mental.health.or.substance.abuse.treatment.resources.provided.by.your.employer."                                  
## [10] "If.a.mental.health.issue.prompted.you.to.request.a.medical.leave.from.work..asking.for.that.leave.would.be."                                                                     
## [11] "Do.you.think.that.discussing.a.mental.health.disorder.with.your.employer.would.have.negative.consequences."                                                                      
## [12] "Do.you.think.that.discussing.a.physical.health.issue.with.your.employer.would.have.negative.consequences."                                                                       
## [13] "Would.you.feel.comfortable.discussing.a.mental.health.disorder.with.your.coworkers."                                                                                             
## [14] "Would.you.feel.comfortable.discussing.a.mental.health.disorder.with.your.direct.supervisor.s.."                                                                                  
## [15] "Do.you.feel.that.your.employer.takes.mental.health.as.seriously.as.physical.health."                                                                                             
## [16] "Have.you.heard.of.or.observed.negative.consequences.for.co.workers.who.have.been.open.about.mental.health.issues.in.your.workplace."                                             
## [17] "Do.you.have.medical.coverage..private.insurance.or.state.provided..which.includes.treatment.of.Â.mental.health.issues."                                                          
## [18] "Do.you.know.local.or.online.resources.to.seek.help.for.a.mental.health.disorder."                                                                                                
## [19] "If.you.have.been.diagnosed.or.treated.for.a.mental.health.disorder..do.you.ever.reveal.this.to.clients.or.business.contacts."                                                    
## [20] "If.you.have.revealed.a.mental.health.issue.to.a.client.or.business.contact..do.you.believe.this.has.impacted.you.negatively."                                                    
## [21] "If.you.have.been.diagnosed.or.treated.for.a.mental.health.disorder..do.you.ever.reveal.this.to.coworkers.or.employees."                                                          
## [22] "If.you.have.revealed.a.mental.health.issue.to.a.coworker.or.employee..do.you.believe.this.has.impacted.you.negatively."                                                          
## [23] "Do.you.believe.your.productivity.is.ever.affected.by.a.mental.health.issue."                                                                                                     
## [24] "If.yes..what.percentage.of.your.work.time..time.performing.primary.or.secondary.job.functions..is.affected.by.a.mental.health.issue."                                            
## [25] "Do.you.have.previous.employers."                                                                                                                                                 
## [26] "Have.your.previous.employers.provided.mental.health.benefits."                                                                                                                   
## [27] "Were.you.aware.of.the.options.for.mental.health.care.provided.by.your.previous.employers."                                                                                       
## [28] "Did.your.previous.employers.ever.formally.discuss.mental.health..as.part.of.a.wellness.campaign.or.other.official.communication.."                                               
## [29] "Did.your.previous.employers.provide.resources.to.learn.more.about.mental.health.issues.and.how.to.seek.help."                                                                    
## [30] "Was.your.anonymity.protected.if.you.chose.to.take.advantage.of.mental.health.or.substance.abuse.treatment.resources.with.previous.employers."                                    
## [31] "Do.you.think.that.discussing.a.mental.health.disorder.with.previous.employers.would.have.negative.consequences."                                                                 
## [32] "Do.you.think.that.discussing.a.physical.health.issue.with.previous.employers.would.have.negative.consequences."                                                                  
## [33] "Would.you.have.been.willing.to.discuss.a.mental.health.issue.with.your.previous.co.workers."                                                                                     
## [34] "Would.you.have.been.willing.to.discuss.a.mental.health.issue.with.your.direct.supervisor.s.."                                                                                    
## [35] "Did.you.feel.that.your.previous.employers.took.mental.health.as.seriously.as.physical.health."                                                                                   
## [36] "Did.you.hear.of.or.observe.negative.consequences.for.co.workers.with.mental.health.issues.in.your.previous.workplaces."                                                          
## [37] "Would.you.be.willing.to.bring.up.a.physical.health.issue.with.a.potential.employer.in.an.interview."                                                                             
## [38] "Why.or.why.not."                                                                                                                                                                 
## [39] "Would.you.bring.up.a.mental.health.issue.with.a.potential.employer.in.an.interview."                                                                                             
## [40] "Why.or.why.not..1"                                                                                                                                                               
## [41] "Do.you.feel.that.being.identified.as.a.person.with.a.mental.health.issue.would.hurt.your.career."                                                                                
## [42] "Do.you.think.that.team.members.co.workers.would.view.you.more.negatively.if.they.knew.you.suffered.from.a.mental.health.issue."                                                  
## [43] "How.willing.would.you.be.to.share.with.friends.and.family.that.you.have.a.mental.illness."                                                                                       
## [44] "Have.you.observed.or.experienced.an.unsupportive.or.badly.handled.response.to.a.mental.health.issue.in.your.current.or.previous.workplace."                                      
## [45] "Have.your.observations.of.how.another.individual.who.discussed.a.mental.health.disorder.made.you.less.likely.to.reveal.a.mental.health.issue.yourself.in.your.current.workplace."
## [46] "Do.you.have.a.family.history.of.mental.illness."                                                                                                                                 
## [47] "Have.you.had.a.mental.health.disorder.in.the.past."                                                                                                                              
## [48] "Do.you.currently.have.a.mental.health.disorder."                                                                                                                                 
## [49] "If.yes..what.condition.s..have.you.been.diagnosed.with."                                                                                                                         
## [50] "If.maybe..what.condition.s..do.you.believe.you.have."                                                                                                                            
## [51] "Have.you.been.diagnosed.with.a.mental.health.condition.by.a.medical.professional."                                                                                               
## [52] "If.so..what.condition.s..were.you.diagnosed.with."                                                                                                                               
## [53] "Have.you.ever.sought.treatment.for.a.mental.health.issue.from.a.mental.health.professional."                                                                                     
## [54] "If.you.have.a.mental.health.issue..do.you.feel.that.it.interferes.with.your.work.when.being.treated.effectively."                                                                
## [55] "If.you.have.a.mental.health.issue..do.you.feel.that.it.interferes.with.your.work.when.NOT.being.treated.effectively."                                                            
## [56] "What.is.your.age."                                                                                                                                                               
## [57] "What.is.your.gender."                                                                                                                                                            
## [58] "What.country.do.you.live.in."                                                                                                                                                    
## [59] "What.US.state.or.territory.do.you.live.in."                                                                                                                                      
## [60] "What.country.do.you.work.in."                                                                                                                                                    
## [61] "What.US.state.or.territory.do.you.work.in."                                                                                                                                      
## [62] "Which.of.the.following.best.describes.your.work.position."                                                                                                                       
## [63] "Do.you.work.remotely."
data <- data %>% rename(self_employed = Are.you.self.employed.,
                         employees = How.many.employees.does.your.company.or.organization.have.,
                         company_role = Is.your.employer.primarily.a.tech.company.organization.,
                         primary_role = Is.your.primary.role.within.your.company.related.to.tech.IT.,
                         mh_benefit_options = Do.you.know.the.options.for.mental.health.care.available.under.your.employer.provided.coverage.,
                         mh_benefits = Does.your.employer.provide.mental.health.benefits.as.part.of.healthcare.coverage.,
                         mh_discussion = Has.your.employer.ever.formally.discussed.mental.health..for.example..as.part.of.a.wellness.campaign.or.other.official.communication..,
                         mh_resources = Does.your.employer.offer.resources.to.learn.more.about.mental.health.concerns.and.options.for.seeking.help.,
                         mh_anonymity = Is.your.anonymity.protected.if.you.choose.to.take.advantage.of.mental.health.or.substance.abuse.treatment.resources.provided.by.your.employer.,
                         mh_medical_leave = If.a.mental.health.issue.prompted.you.to.request.a.medical.leave.from.work..asking.for.that.leave.would.be.,
                         mh_discussion_negative = Do.you.think.that.discussing.a.mental.health.disorder.with.your.employer.would.have.negative.consequences.,
                         ph_discussion_negative = Do.you.think.that.discussing.a.physical.health.issue.with.your.employer.would.have.negative.consequences.,
                         mh_discussion_coworkers = Would.you.feel.comfortable.discussing.a.mental.health.disorder.with.your.coworkers.,
                         mh_discussion_supervisor = Would.you.feel.comfortable.discussing.a.mental.health.disorder.with.your.direct.supervisor.s..,
                         mh_ph_serious = Do.you.feel.that.your.employer.takes.mental.health.as.seriously.as.physical.health.,
                         mh_coworker_consequences = Have.you.heard.of.or.observed.negative.consequences.for.co.workers.who.have.been.open.about.mental.health.issues.in.your.workplace.,
                         mh_local_online_resources = Do.you.know.local.or.online.resources.to.seek.help.for.a.mental.health.disorder.,
                         mh_dx_reveal_contacts = If.you.have.been.diagnosed.or.treated.for.a.mental.health.disorder..do.you.ever.reveal.this.to.clients.or.business.contacts.,
                         mh_dx_reveal_contacts_impact = If.you.have.revealed.a.mental.health.issue.to.a.client.or.business.contact..do.you.believe.this.has.impacted.you.negatively.,
                         mh_dx_reveal_coworkers = If.you.have.been.diagnosed.or.treated.for.a.mental.health.disorder..do.you.ever.reveal.this.to.coworkers.or.employees.,
                         mh_dx_reveal_coworkers_impact = If.you.have.revealed.a.mental.health.issue.to.a.coworker.or.employee..do.you.believe.this.has.impacted.you.negatively.,
                         mh_productivity = Do.you.believe.your.productivity.is.ever.affected.by.a.mental.health.issue.,
                         mh_productivity_percent = If.yes..what.percentage.of.your.work.time..time.performing.primary.or.secondary.job.functions..is.affected.by.a.mental.health.issue.,
                         previous_employers = Do.you.have.previous.employers.,
                         previous_employers_mhbenefits = Have.your.previous.employers.provided.mental.health.benefits.,
                         previous_employers_mhbenefits_aware = Were.you.aware.of.the.options.for.mental.health.care.provided.by.your.previous.employers.,
                         previous_employers_mhbenefits_discuss = Did.your.previous.employers.ever.formally.discuss.mental.health..as.part.of.a.wellness.campaign.or.other.official.communication..,
                         previous_employers_resources = Did.your.previous.employers.provide.resources.to.learn.more.about.mental.health.issues.and.how.to.seek.help.,
                         previous_employers_anonymtity = Was.your.anonymity.protected.if.you.chose.to.take.advantage.of.mental.health.or.substance.abuse.treatment.resources.with.previous.employers.,
                         previous_employers_mh_discuss_impact = Do.you.think.that.discussing.a.mental.health.disorder.with.previous.employers.would.have.negative.consequences.,
                         previous_employers_ph_discuss_impact = Do.you.think.that.discussing.a.physical.health.issue.with.previous.employers.would.have.negative.consequences.,
                         previous_coworkers_mh_discuss = Would.you.have.been.willing.to.discuss.a.mental.health.issue.with.your.previous.co.workers.,
                         previous_supervisor_mh_discuss = Would.you.have.been.willing.to.discuss.a.mental.health.issue.with.your.direct.supervisor.s..,
                         previous_employers_mh_ph_serious = Did.you.feel.that.your.previous.employers.took.mental.health.as.seriously.as.physical.health.,
                         previous_employers_mh_coworkers_consequences = Did.you.hear.of.or.observe.negative.consequences.for.co.workers.with.mental.health.issues.in.your.previous.workplaces.,
                         ph_interview = Would.you.be.willing.to.bring.up.a.physical.health.issue.with.a.potential.employer.in.an.interview.,
                         ph_interview_why =Why.or.why.not.,
                         mh_interview = Would.you.bring.up.a.mental.health.issue.with.a.potential.employer.in.an.interview.,
                         mh_interview_why = Why.or.why.not..1,
                         mh_id_career_impact = Do.you.feel.that.being.identified.as.a.person.with.a.mental.health.issue.would.hurt.your.career.,
                         mh_id_coworkers_impact = Do.you.think.that.team.members.co.workers.would.view.you.more.negatively.if.they.knew.you.suffered.from.a.mental.health.issue.,
                         mh_family_friends = How.willing.would.you.be.to.share.with.friends.and.family.that.you.have.a.mental.illness.,
                         mh_unsupportive_workplace = Have.you.observed.or.experienced.an.unsupportive.or.badly.handled.response.to.a.mental.health.issue.in.your.current.or.previous.workplace.,
                         mh_other_impact = Have.your.observations.of.how.another.individual.who.discussed.a.mental.health.disorder.made.you.less.likely.to.reveal.a.mental.health.issue.yourself.in.your.current.workplace.,
                         mi_family = Do.you.have.a.family.history.of.mental.illness.,
                         mh_past = Have.you.had.a.mental.health.disorder.in.the.past.,
                         mh_current = Do.you.currently.have.a.mental.health.disorder.,
                         mh_current_specify = If.yes..what.condition.s..have.you.been.diagnosed.with.,
                         mh_maybe_specify = If.maybe..what.condition.s..do.you.believe.you.have.,
                         mh_dx = Have.you.been.diagnosed.with.a.mental.health.condition.by.a.medical.professional.,
                         mh_dx_specify = If.so..what.condition.s..were.you.diagnosed.with.,
                         mh_treatment = Have.you.ever.sought.treatment.for.a.mental.health.issue.from.a.mental.health.professional.,
                         mh_interfere_treated = If.you.have.a.mental.health.issue..do.you.feel.that.it.interferes.with.your.work.when.being.treated.effectively.,
                         mh_interfere_not_treated = If.you.have.a.mental.health.issue..do.you.feel.that.it.interferes.with.your.work.when.NOT.being.treated.effectively.,
                         age = What.is.your.age.,
                         gender = What.is.your.gender.,
                         resident_country = What.country.do.you.live.in.,
                         resident_state_territory = What.US.state.or.territory.do.you.live.in.,
                         work_state_territory = What.US.state.or.territory.do.you.work.in.,
                         work_country = What.country.do.you.work.in.,
                         work_position = Which.of.the.following.best.describes.your.work.position.,
                         work_remote = Do.you.work.remotely.) 
#d. checking for duplicated data
duplicated(data)
##    [1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##   [13] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##   [25] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##   [37] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##   [49] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##   [61] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##   [73] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##   [85] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##   [97] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [109] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [121] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [133] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [145] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [157] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [169] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [181] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [193] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [205] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [217] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [229] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [241] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [253] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [265] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [277] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [289] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [301] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [313] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [325] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [337] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [349] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [361] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [373] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [385] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [397] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [409] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [421] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [433] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [445] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [457] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [469] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [481] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [493] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [505] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [517] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [529] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [541] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [553] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [565] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [577] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [589] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [601] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [613] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [625] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [637] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [649] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [661] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [673] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [685] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [697] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [709] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [721] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [733] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [745] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [757] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [769] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [781] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [793] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [805] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [817] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [829] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [841] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [853] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [865] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [877] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [889] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [901] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [913] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [925] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [937] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [949] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [961] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [973] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [985] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [997] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [1009] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [1021] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [1033] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [1045] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [1057] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [1069] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [1081] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [1093] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [1105] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [1117] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [1129] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [1141] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [1153] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [1165] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [1177] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [1189] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [1201] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [1213] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [1225] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [1237] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [1249] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [1261] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [1273] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [1285] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [1297] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [1309] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [1321] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [1333] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [1345] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [1357] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [1369] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [1381] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [1393] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [1405] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [1417] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [1429] FALSE FALSE FALSE FALSE FALSE
any(duplicated(data))
## [1] FALSE
data <- data[!duplicated(data),] #remove duplicated data

#e. cleaning irregular gender values
unique(data$gender)
##  [1] "Male"                                                                                                                                                         
##  [2] "male"                                                                                                                                                         
##  [3] "Male "                                                                                                                                                        
##  [4] "Female"                                                                                                                                                       
##  [5] "M"                                                                                                                                                            
##  [6] "female"                                                                                                                                                       
##  [7] "m"                                                                                                                                                            
##  [8] "I identify as female."                                                                                                                                        
##  [9] "female "                                                                                                                                                      
## [10] "Bigender"                                                                                                                                                     
## [11] "non-binary"                                                                                                                                                   
## [12] "Female assigned at birth "                                                                                                                                    
## [13] "F"                                                                                                                                                            
## [14] "Woman"                                                                                                                                                        
## [15] "man"                                                                                                                                                          
## [16] "fm"                                                                                                                                                           
## [17] "f"                                                                                                                                                            
## [18] "Cis female "                                                                                                                                                  
## [19] "Transitioned, M2F"                                                                                                                                            
## [20] "Genderfluid (born female)"                                                                                                                                    
## [21] "Other/Transfeminine"                                                                                                                                          
## [22] "Female or Multi-Gender Femme"                                                                                                                                 
## [23] "Female "                                                                                                                                                      
## [24] "woman"                                                                                                                                                        
## [25] "female/woman"                                                                                                                                                 
## [26] "Cis male"                                                                                                                                                     
## [27] "Male."                                                                                                                                                        
## [28] "Androgynous"                                                                                                                                                  
## [29] "male 9:1 female, roughly"                                                                                                                                     
## [30] "N/A"                                                                                                                                                          
## [31] "Male (cis)"                                                                                                                                                   
## [32] "Other"                                                                                                                                                        
## [33] "nb masculine"                                                                                                                                                 
## [34] "Cisgender Female"                                                                                                                                             
## [35] "Man"                                                                                                                                                          
## [36] "Sex is male"                                                                                                                                                  
## [37] "none of your business"                                                                                                                                        
## [38] "genderqueer"                                                                                                                                                  
## [39] "cis male"                                                                                                                                                     
## [40] "Human"                                                                                                                                                        
## [41] "Genderfluid"                                                                                                                                                  
## [42] "Enby"                                                                                                                                                         
## [43] "Malr"                                                                                                                                                         
## [44] "genderqueer woman"                                                                                                                                            
## [45] "mtf"                                                                                                                                                          
## [46] "Queer"                                                                                                                                                        
## [47] "Agender"                                                                                                                                                      
## [48] "Dude"                                                                                                                                                         
## [49] "Fluid"                                                                                                                                                        
## [50] "I'm a man why didn't you make this a drop down question. You should of asked sex? And I would of answered yes please. Seriously how much text can this take? "
## [51] "mail"                                                                                                                                                         
## [52] "M|"                                                                                                                                                           
## [53] "Male/genderqueer"                                                                                                                                             
## [54] "fem"                                                                                                                                                          
## [55] "Nonbinary"                                                                                                                                                    
## [56] "male "                                                                                                                                                        
## [57] "human"                                                                                                                                                        
## [58] "Female (props for making this a freeform field, though)"                                                                                                      
## [59] " Female"                                                                                                                                                      
## [60] "Unicorn"                                                                                                                                                      
## [61] "Cis Male"                                                                                                                                                     
## [62] "Male (trans, FtM)"                                                                                                                                            
## [63] "Cis-woman"                                                                                                                                                    
## [64] "Genderqueer"                                                                                                                                                  
## [65] "cisdude"                                                                                                                                                      
## [66] "Genderflux demi-girl"                                                                                                                                         
## [67] "female-bodied; no feelings about gender"                                                                                                                      
## [68] "cis man"                                                                                                                                                      
## [69] ""                                                                                                                                                             
## [70] "AFAB"                                                                                                                                                         
## [71] "Transgender woman"                                                                                                                                            
## [72] "MALE"
table(data$gender) 
data$gender <- as.factor(data$gender)
# Convert all female-identifying responses to 1 and all male-identifying responses to 0
data$gender <- gsub("(?i)F|(?i)Female", "1", data$gender)
data$gender <- gsub("(?i)M|(?i)Male", "0", data$gender)

# Convert gender column to numeric data type
data$gender <- as.numeric(data$gender)
## Warning: NAs introduced by coercion
# Replace instances of "NA" with actual NA data type in gender column
data$gender <- replace(data$gender, data$gender == "NA", NA)
data$gender
##    [1]  0  0  0  0  1  0  0  1  1  0  0  0  1  0  0  0 NA  0  1  0  0  0  0  0
##   [25]  0  0  0  0  1 NA NA  0  0  0  0  0  0  0  0  0  0  0 NA  0  0  0  0  0
##   [49]  0  1  0  0  0  0  0 NA  0  0  1  0 NA  0  0  0  0  0  1  0  0  0  0  0
##   [73]  0  0  0  0  0  1  0  0  0  0  0  0  0  0  0  0  0  1  0  0  0  0  1  0
##   [97]  1  0  0  1  0  0  0 10  0  0  1  1  0  0  0  0  0  0  1  0  0  1  1  0
##  [121]  0  1  0 NA  1  1  1  0  0  1  0  0  0  0  0  0  0  0  1  0 NA  1  1  0
##  [145]  1  0  0  0 NA  0  0  0  1  0  1  0  0  0  0  0  0  1  1  1  0  0  1  0
##  [169]  0  0  0  0  0  0  0  0  1  1  0  0  0  0 NA  0  0  0  0  1  0  0  0  0
##  [193]  0  0  1  0  0  0  0  0  1  0  0 NA  0  0  0  0  0  0  0  0  1  0  0  0
##  [217]  0  0  0  0  0  0  0  1  0  1  0  0  0  1  1  0  0  1  0  0  0  0  0  0
##  [241]  0  0  1  0  0  1  0 NA  0  0  1  1  1  0  0  0  0  0  1  0  0  1  0  1
##  [265]  0  0  1  1  1  1  0  0  0  0  0  0  0  0  0 NA NA  0  1  0  0  0  1  0
##  [289]  0  0  0 NA  0  0  1  0  0  1  1  0 NA  0 NA  0  1  0  0  1  0  0  1  0
##  [313]  0  0  0  1  0  0  0  0  1  1  1  1  1  1  0  0  0  0  0  1  0  0  0  0
##  [337]  1  0  1  0  0  0  0  0  1  1  0  0  1  0  0  0  0  0  0  0  0  1  0  0
##  [361]  0  1  0  0  0  1  0  1 NA  1  1  0 NA  0  0  0  1  1  1  1  0  0  1  0
##  [385]  1  0  0  1  1  0  1  1  0  0  0  0  1 NA  0  1  0  0  0  1  0  0  0  0
##  [409]  0  0  0  0  1  0  0  0  1  1  0  0 NA  1  0  0  0  0  0  1  0 NA  0  0
##  [433]  0  0  0  0  0  0  0  0  1  0  0  0  0  0  0  1  0  0  0  0  0  0  1  0
##  [457]  0  1  0  0  0  1  0  0 NA  0  0  1  0  0  0  0  0  0  1  1  0 NA  0  0
##  [481]  0  0  0  0  1  0  0  0  0  0  0  1  0  0  0  0  0  0  0  0  0  1 NA  0
##  [505]  0  0  0  1  0  0  1  0  0  0  1  0  0  0  1  1  1  0  0  0  0  0  0  1
##  [529]  0  0  0  0  0  0  0  0  1  0  0  0  1  0  0  0  1  0  0  0  0  0  0  0
##  [553]  0  0  0  0  0  0  1  1  0 NA  0  0  0  0  1  0  0  0  0  1  0  0  1  0
##  [577]  0  1  0  0  0  0  0  0  0  0  0  0  0  0  0  1  0  0  0  0  1  0  0  0
##  [601]  0  0  0  0  0  0  1  0  0  0  1  0  0  0  0  0  0 NA  0  0 NA  0  0  1
##  [625] NA  0  0  0  0  1  0  0  0  0  0  0  0  0  0  0  0  0  0 NA  0  0  0  1
##  [649] NA  0  0 NA  0  1  0  0  0  0  1  0  1  0  1  1  0  0  0  0  1  1  1  1
##  [673]  0 NA  0  0  1  0  1  1  1  0  0  1  0  0  1  1  0  0  0  0  0  0  0  0
##  [697]  1  1 NA  1  1  0  0  0  0  0  0  0  0  0  1 NA  1  0  0  1  0  0  1  0
##  [721]  1  0  0  0  0  0  1  0  0  0  0  0  0  0  1  1  0  0  0  0  0  1 NA  1
##  [745]  1  0  0  0  0  0 NA  0  1  1  0  0  1  1  0  0  0  1  0  1  0  0  0  0
##  [769]  0  1  0  0  0 NA  1  0  0  0  0  1  1  0  1  1  0  0 NA NA  0  0  0  1
##  [793]  0  0  0  1  0 NA  0  0  1  1  0  0  0  0  0  0  0  0  1  0  0  0  0  0
##  [817]  1  0  0  0  0  0  1  0  0  0  0  0  1  0  1  1  0  0  0  0  0  0  0  0
##  [841] NA  0  0  0  0  0 NA  0  1  0  0  0  0  0  0 NA  1  1  0  0  0  1  0  0
##  [865]  0  0  0  0  1  0  1  0  0  0  0  1  0  0  1  0  1  0  0  0  0  1  0  1
##  [889]  0  0  0  0  0  0  0  0  0  1  0  0  0  1  0  0  0  1  0  1  1  0  0  0
##  [913]  1  0  1  0  0  0  1  0  0  0  0  0  0 NA  1  1  0  0 NA  0  0  1  0  0
##  [937]  0  0  0  1  0  0  1  0  0  0  0  0  1  0  0  0  0  0  0  0  0  1  0  0
##  [961]  0  0  1  0  0  0  0  0  0  0  1  0  1  0  0  0  0  0  0  1  0  0  0  0
##  [985]  0  0  0  0  0  0  1  0  0  0  0  0  0  0  0  0  0  0  0  0 NA  0  0  0
## [1009]  1  1  0  0  1  1  0  0  0  1  0  0  0  0  1  0  0  0  0  0  0  0  1  1
## [1033]  1  1  0  0  1  0  0  0  0  0  0  0  0  0  0  0  1  1  0  0  0  0  0  1
## [1057]  0  0  0  0  0  0 NA  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0 NA  0
## [1081]  0  0  1  1  0  1  0 NA  0  0 NA  1  0  0  0  0  0  0  1  0  0  1 NA  0
## [1105]  0  0  0  1  0  1  0  0  0  0  0  0  0  1  0  0  0  0  0  0  1  1  0  0
## [1129]  1  0  0  0  1  0  1  0  1  1  0  0 NA  0  0  0 NA  0  0  0  0  1  0  0
## [1153]  0  0  1  0  0  1  0  0  0  0  1  0  0  1  1  1  0  0  0  0  0  0 NA  0
## [1177]  0  0  1  0  0  1  1  0  0  0  0  0  0  0  0  0  1  1  1  1  1  0  0  0
## [1201]  0  0  1  0  0  0  0  1  1  1  0  0  1  0  1  1  0  0  0  0  1  0  0  0
## [1225]  1  0  0 NA  0  0  0  0  0 NA  0 NA NA  0  0  0  0  0  0  0  0  0  0  0
## [1249]  1  0 NA  1  1  0  0  0  0  0  0  0  0  0  1  0  1  0  0  0  0  0  1  0
## [1273]  0 NA  0  0 NA  1  0  0  0  0  0  0  0  0  1  0  0  0  1  0 NA  0  1  0
## [1297]  0  0  0  0  0  0  0  1  0  0  1  0  0  0  0 NA NA  1  0  1  1  0  1  0
## [1321]  1  1 NA  1  0  0  1  1  0  0  1  1  0  0  0  0  1  0  0  0  0  1  0  0
## [1345]  1  0  1  0  0  0  0  1  0  1  0  0  0  0  0  1  0  0  0  0  1  1  0  0
## [1369]  0  0  0  0  1  0 NA  0  0  0  1  1  1  0  0  0  0  0  1  1  0  0  0  0
## [1393]  1 NA  0  0  0  0  0  0  1  0  0  0  0  0 NA  0  0  0  0  1  0  1  1  1
## [1417]  0  0  0  0  0  0  0  0  1  0  1  1  1  0  0  1 NA
#filtering self-employed and company-employed into separate data frame
df_self <- data %>% filter(self_employed == 1)
data <- data %>% filter(self_employed == 0)

#removing cols with duplicate information or only applicable to self-employed or company-employed respondents
data <- data %>% select(-c("mh_local_online_resources", "mh_dx_reveal_contacts", "mh_dx_reveal_contacts_impact",
                       "mh_dx_reveal_coworkers", "mh_dx_reveal_coworkers_impact", "mh_productivity", 
                       "mh_productivity_percent", "mh_maybe_specify", "mh_dx_specify", "self_employed"))
df_self <- df_self %>% select(-c("employees", "mh_benefits", "mh_benefit_options", "mh_discussion", "mh_resources",
                                 "mh_anonymity", "mh_medical_leave", "mh_discussion_negative", "ph_discussion_negative",
                                 "mh_discussion_coworkers", "mh_discussion_supervisor", "mh_ph_serious", 
                                 "mh_coworker_consequences", "mh_maybe_specify", "mh_dx_specify", "self_employed"))

Step 3: Deal with missing values

#check the total missing values for the company-employed and self-employed
sum(is.na(data)) 
## [1] 2088
sum(is.na(df_self))
## [1] 586
#identifying the rows with NAs (Company employed)
rownames(data)[apply(data, 2, anyNA)]
##  [1] "3"    "16"   "30"   "47"   "56"   "69"   "83"   "100"  "109"  "122" 
## [11] "136"  "153"  "162"  "175"  "189"  "206"  "215"  "228"  "242"  "259" 
## [21] "268"  "281"  "295"  "312"  "321"  "334"  "348"  "365"  "374"  "387" 
## [31] "401"  "418"  "427"  "440"  "454"  "471"  "480"  "493"  "507"  "524" 
## [41] "533"  "546"  "560"  "577"  "586"  "599"  "613"  "630"  "639"  "652" 
## [51] "666"  "683"  "692"  "705"  "719"  "736"  "745"  "758"  "772"  "789" 
## [61] "798"  "811"  "825"  "842"  "851"  "864"  "878"  "895"  "904"  "917" 
## [71] "931"  "948"  "957"  "970"  "984"  "1001" "1010" "1023" "1037" "1054"
## [81] "1063" "1076" "1090" "1107" "1116" "1129" "1143"
#identifying the rows with NAs (self employed)
rownames(df_self)[apply(df_self, 2, anyNA)]
##  [1] "1"   "2"   "41"  "48"  "49"  "88"  "95"  "96"  "135" "142" "143" "182"
## [13] "189" "190" "229" "236" "237" "276" "283" "284"
##ColMeans() function to calculate the percentage of missing values in each column,
#and then remove columns with a high percentage of missing values using indexing.

##--data (company employed)
# calculate percentage of missing values for each column
missing_pct <- colMeans(is.na(data)) * 100
# identify columns with more than 50% missing values
cols_to_drop <- which(missing_pct > 50)
# drop columns with more than 50% missing values
data_clean <- data[, -cols_to_drop]

##--df_self (self employed)
# calculate percentage of missing values for each column
missing_pct <- colMeans(is.na(df_self)) * 100
# identify columns with more than 50% missing values
cols_to_drop <- which(missing_pct > 50)
# drop columns with more than 50% missing values
df_self_clean <- df_self[, -cols_to_drop]
# check the number of missing values for each column in the data data frame.
colSums(is.na(data_clean))
##                                    employees 
##                                            0 
##                                 company_role 
##                                            0 
##                                  mh_benefits 
##                                            0 
##                           mh_benefit_options 
##                                            0 
##                                mh_discussion 
##                                            0 
##                                 mh_resources 
##                                            0 
##                                 mh_anonymity 
##                                            0 
##                             mh_medical_leave 
##                                            0 
##                       mh_discussion_negative 
##                                            0 
##                       ph_discussion_negative 
##                                            0 
##                      mh_discussion_coworkers 
##                                            0 
##                     mh_discussion_supervisor 
##                                            0 
##                                mh_ph_serious 
##                                            0 
##                     mh_coworker_consequences 
##                                            0 
##                           previous_employers 
##                                            0 
##                previous_employers_mhbenefits 
##                                            0 
##          previous_employers_mhbenefits_aware 
##                                            0 
##        previous_employers_mhbenefits_discuss 
##                                            0 
##                 previous_employers_resources 
##                                            0 
##                previous_employers_anonymtity 
##                                            0 
##         previous_employers_mh_discuss_impact 
##                                            0 
##         previous_employers_ph_discuss_impact 
##                                            0 
##                previous_coworkers_mh_discuss 
##                                            0 
##               previous_supervisor_mh_discuss 
##                                            0 
##             previous_employers_mh_ph_serious 
##                                            0 
## previous_employers_mh_coworkers_consequences 
##                                            0 
##                                 ph_interview 
##                                            0 
##                             ph_interview_why 
##                                            1 
##                                 mh_interview 
##                                            0 
##                             mh_interview_why 
##                                            0 
##                          mh_id_career_impact 
##                                            0 
##                       mh_id_coworkers_impact 
##                                            0 
##                            mh_family_friends 
##                                            0 
##                    mh_unsupportive_workplace 
##                                            0 
##                              mh_other_impact 
##                                            0 
##                                    mi_family 
##                                            0 
##                                      mh_past 
##                                            0 
##                                   mh_current 
##                                            0 
##                           mh_current_specify 
##                                            0 
##                                        mh_dx 
##                                            0 
##                                 mh_treatment 
##                                            0 
##                         mh_interfere_treated 
##                                            0 
##                     mh_interfere_not_treated 
##                                            0 
##                                          age 
##                                            0 
##                                       gender 
##                                           58 
##                             resident_country 
##                                            0 
##                     resident_state_territory 
##                                            0 
##                                 work_country 
##                                            0 
##                         work_state_territory 
##                                            0 
##                                work_position 
##                                            0 
##                                  work_remote 
##                                            0
colSums(is.na(df_self_clean))
## Do.you.have.medical.coverage..private.insurance.or.state.provided..which.includes.treatment.of.Â.mental.health.issues. 
##                                                                                                                      0 
##                                                                                              mh_local_online_resources 
##                                                                                                                      0 
##                                                                                                  mh_dx_reveal_contacts 
##                                                                                                                      0 
##                                                                                           mh_dx_reveal_contacts_impact 
##                                                                                                                      0 
##                                                                                                 mh_dx_reveal_coworkers 
##                                                                                                                      0 
##                                                                                          mh_dx_reveal_coworkers_impact 
##                                                                                                                      0 
##                                                                                                        mh_productivity 
##                                                                                                                      0 
##                                                                                                mh_productivity_percent 
##                                                                                                                      0 
##                                                                                                     previous_employers 
##                                                                                                                      0 
##                                                                                          previous_employers_mhbenefits 
##                                                                                                                      0 
##                                                                                    previous_employers_mhbenefits_aware 
##                                                                                                                      0 
##                                                                                  previous_employers_mhbenefits_discuss 
##                                                                                                                      0 
##                                                                                           previous_employers_resources 
##                                                                                                                      0 
##                                                                                          previous_employers_anonymtity 
##                                                                                                                      0 
##                                                                                   previous_employers_mh_discuss_impact 
##                                                                                                                      0 
##                                                                                   previous_employers_ph_discuss_impact 
##                                                                                                                      0 
##                                                                                          previous_coworkers_mh_discuss 
##                                                                                                                      0 
##                                                                                         previous_supervisor_mh_discuss 
##                                                                                                                      0 
##                                                                                       previous_employers_mh_ph_serious 
##                                                                                                                      0 
##                                                                           previous_employers_mh_coworkers_consequences 
##                                                                                                                      0 
##                                                                                                           ph_interview 
##                                                                                                                      0 
##                                                                                                       ph_interview_why 
##                                                                                                                      0 
##                                                                                                           mh_interview 
##                                                                                                                      0 
##                                                                                                       mh_interview_why 
##                                                                                                                      0 
##                                                                                                    mh_id_career_impact 
##                                                                                                                      0 
##                                                                                                 mh_id_coworkers_impact 
##                                                                                                                      0 
##                                                                                                      mh_family_friends 
##                                                                                                                      0 
##                                                                                              mh_unsupportive_workplace 
##                                                                                                                      0 
##                                                                                                        mh_other_impact 
##                                                                                                                      0 
##                                                                                                              mi_family 
##                                                                                                                      0 
##                                                                                                                mh_past 
##                                                                                                                      0 
##                                                                                                             mh_current 
##                                                                                                                      0 
##                                                                                                     mh_current_specify 
##                                                                                                                      0 
##                                                                                                                  mh_dx 
##                                                                                                                      0 
##                                                                                                           mh_treatment 
##                                                                                                                      0 
##                                                                                                   mh_interfere_treated 
##                                                                                                                      0 
##                                                                                               mh_interfere_not_treated 
##                                                                                                                      0 
##                                                                                                                    age 
##                                                                                                                      0 
##                                                                                                                 gender 
##                                                                                                                     12 
##                                                                                                       resident_country 
##                                                                                                                      0 
##                                                                                               resident_state_territory 
##                                                                                                                      0 
##                                                                                                           work_country 
##                                                                                                                      0 
##                                                                                                   work_state_territory 
##                                                                                                                      0 
##                                                                                                          work_position 
##                                                                                                                      0 
##                                                                                                            work_remote 
##                                                                                                                      0
#remove NA from rows 
df_self_clean <- na.omit(df_self_clean)
data_clean <- na.omit(data_clean)

# last check for NA
sum(is.na(df_self_clean))
## [1] 0
sum(is.na(data_clean)) 
## [1] 0

Step 4: Document data versions

#writing clean dataframes to the working directory
library(readr)
## Warning: package 'readr' was built under R version 4.1.3
## Warning: replacing previous import 'ellipsis::check_dots_unnamed' by
## 'rlang::check_dots_unnamed' when loading 'hms'
## Warning: replacing previous import 'ellipsis::check_dots_used' by
## 'rlang::check_dots_used' when loading 'hms'
## Warning: replacing previous import 'ellipsis::check_dots_empty' by
## 'rlang::check_dots_empty' when loading 'hms'
write_csv(df_self_clean, "C:/Users/User/Documents/UM Sem 2/WQD 7006 ML FOR DS/mental-health-in-tech-2016-self-employed-clean.csv")
write_csv(data_clean, "C:/Users/User/Documents/UM Sem 2/WQD 7006 ML FOR DS/mental-health-in-tech-2016-clean.csv")