#dir= 'D:/wallpapers and photos/csv/r data'
mr_data=read_csv("D:/wallpapers and photos/csv/r data/mental_health_dataset.csv" )## age gender employment_status
## FALSE TRUE TRUE
## work_environment mental_health_history seeks_treatment
## TRUE TRUE TRUE
## stress_level sleep_hours physical_activity_days
## FALSE FALSE FALSE
## depression_score anxiety_score social_support_score
## FALSE FALSE FALSE
## productivity_score mental_health_risk
## FALSE TRUE
## [1] "Employed" "Student" "Self-employed" "Unemployed"
#a function for nominal encoding
factor_change=function(x){
col_names=names(x)
for (col in col_names){
if (is.character(x[[col]])) {
x[[col]]=as.factor(x[[col]])
}
}
return(x)
}#doing some ordinal encoding
mr_data=mr_data %>%
mutate(mental_health_risk=factor(mental_health_risk,levels = c('High','Medium','Low')),
employment_status=factor(employment_status,levels = c("Self-employed","Employed","Student","Unemployed"))) I always try to work with a target variable and question in mind to keep me in track in the whole analysis process. Right now the i decided to work with mental_health_risk
No missing data
library(viridis)
library(hrbrthemes)
library(plotly)
plot_of_anxiety=ggplot(mr_data,aes(x=gender,y=anxiety_score,fill=mental_health_risk))+
geom_bar(position = "dodge", stat = "identity")+
facet_wrap(~mental_health_risk) + theme(panel.background = element_rect(fill = NA),
plot.background = element_rect(fill = "white"))+
theme_minimal()+
scale_fill_viridis(discrete = T, option = "E")
plot_of_anxietyplot_of_anxiety2=ggplot(mr_data,aes(x=mental_health_risk,y=anxiety_score,fill=mental_health_risk))+
geom_bar(position = "dodge", stat = "identity")+
facet_wrap(~gender) + theme(panel.background = element_rect(fill = NA),
plot.background = element_rect(fill = "white"))+
theme_minimal()+
scale_fill_viridis(discrete = T, option = "E")
plot_of_anxiety2##is there any association between mental health risk and gender:
mr_data %>%
select(mental_health_risk,gender) %>%
table() %>%
chisq.test()##
## Pearson's Chi-squared test
##
## data: .
## X-squared = 3.2609, df = 6, p-value = 0.7755
p>0.05, no association.
#similarly employment status and mental health risk:
mr_data %>%
select(mental_health_risk,employment_status) %>%
table() %>%
chisq.test()##
## Pearson's Chi-squared test
##
## data: .
## X-squared = 9.2337, df = 6, p-value = 0.1609
p>0.05, no association.
ggplot(mr_data) +
aes(x = gender, y = anxiety_score, fill = gender) +
geom_col() +
scale_fill_hue(direction = 1) +
theme_minimal()ggplot(mr_data) +
aes(x = gender, y = depression_score, fill = gender) +
geom_col() +
scale_fill_hue(direction = 1) +
theme_minimal()ggplot(mr_data) +
aes(x = mental_health_risk, y = physical_activity_days, fill = mental_health_risk) +
geom_boxplot() +
scale_fill_viridis_d(option = "plasma", direction = 1) +
theme_minimal()#distribution of all data:
distribution= function(data){
plots=list()
columns=names(data)
for(col in columns){
if (is.numeric(data[[col]])){
p=data %>%
ggplot(aes_string(x=col))+
geom_histogram(fill="skyblue")+
theme_ipsum()
plots[[col]] = p
}
}
print(plots)
}## $age
##
## $stress_level
##
## $sleep_hours
##
## $physical_activity_days
##
## $depression_score
##
## $anxiety_score
##
## $social_support_score
##
## $productivity_score
###mental health history and gender association:
mr_data %>%
select(mental_health_history,gender) %>%
table() %>%
chisq.test()##
## Pearson's Chi-squared test
##
## data: .
## X-squared = 2.262, df = 3, p-value = 0.5198
no association.
###is the sleep hour between all kind of employed status people are same?
mr_data %>%
select(employment_status,sleep_hours) %>%
aov(sleep_hours~employment_status,alternative = "two.sided",data=.) %>%
summary()## Df Sum Sq Mean Sq F value Pr(>F)
## employment_status 3 11 3.639 1.676 0.17
## Residuals 9996 21712 2.172
we do not have statistically significant evidence that employment status has an effect on the outcome variable as p>0.05 aka no effect.
ggplot(mr_data) +
aes(x = sleep_hours, fill = employment_status) +
geom_histogram(bins = 30L) +
scale_fill_viridis_d(option = "cividis",
direction = 1) +
theme_minimal() +
facet_wrap(vars(employment_status))