IRWMH <- read.csv("D:/R course/Practicing/Mental health vs remote work/IRWMH.csv")
summary(IRWMH)
Employee_ID Age Gender Job_Role
Length:5000 Min. :22.00 Length:5000 Length:5000
Class :character 1st Qu.:31.00 Class :character Class :character
Mode :character Median :41.00 Mode :character Mode :character
Mean :40.99
3rd Qu.:51.00
Max. :60.00
Industry Years_of_Experience Work_Location
Length:5000 Min. : 1.00 Length:5000
Class :character 1st Qu.: 9.00 Class :character
Mode :character Median :18.00 Mode :character
Mean :17.81
3rd Qu.:26.00
Max. :35.00
Hours_Worked_Per_Week Number_of_Virtual_Meetings Work_Life_Balance_Rating
Min. :20.00 Min. : 0.000 Min. :1.000
1st Qu.:29.00 1st Qu.: 4.000 1st Qu.:2.000
Median :40.00 Median : 8.000 Median :3.000
Mean :39.61 Mean : 7.559 Mean :2.984
3rd Qu.:50.00 3rd Qu.:12.000 3rd Qu.:4.000
Max. :60.00 Max. :15.000 Max. :5.000
Stress_Level Mental_Health_Condition Access_to_Mental_Health_Resources
Length:5000 Length:5000 Length:5000
Class :character Class :character Class :character
Mode :character Mode :character Mode :character
Productivity_Change Social_Isolation_Rating Satisfaction_with_Remote_Work
Length:5000 Min. :1.000 Length:5000
Class :character 1st Qu.:2.000 Class :character
Mode :character Median :3.000 Mode :character
Mean :2.994
3rd Qu.:4.000
Max. :5.000
Company_Support_for_Remote_Work Physical_Activity Sleep_Quality
Min. :1.000 Length:5000 Length:5000
1st Qu.:2.000 Class :character Class :character
Median :3.000 Mode :character Mode :character
Mean :3.008
3rd Qu.:4.000
Max. :5.000
Region
Length:5000
Class :character
Mode :character
IRWMH$Age_Group <- cut(
IRWMH$Age,
breaks = c(20, 30, 40, 50, 60), # Define the breakpoints for age groups
labels = c("21-30", "31-40", "41-50", "51-60"), # Set labels for each age group
right = TRUE # Include the right endpoint in intervals (e.g., 30 is included in 21-30)
)
age_group_counts <- table(IRWMH$Age_Group)
print(age_group_counts)
21-30 31-40 41-50 51-60
1170 1239 1321 1270
table(IRWMH$Gender)
Female Male Non-binary Prefer not to say
1274 1270 1214 1242
table(IRWMH$Job_Role)
Data Scientist Designer HR Marketing
696 723 716 683
Project Manager Sales Software Engineer
738 733 711
table(IRWMH$Industry)
Consulting Education Finance Healthcare IT
680 690 747 728 746
Manufacturing Retail
683 726
table(IRWMH$Work_Location)
Hybrid Onsite Remote
1649 1637 1714
table(IRWMH$Work_Life_Balance_Rating)
1 2 3 4 5
1023 967 1053 980 977
table(IRWMH$Stress_Level)
High Low Medium
1686 1645 1669
table(IRWMH$Mental_Health_Condition)
Anxiety Burnout Depression None
1278 1280 1246 1196
table(IRWMH$Access_to_Mental_Health_Resources)
No Yes
2553 2447
table(IRWMH$Productivity_Change)
Decrease Increase No Change
1737 1586 1677
table(IRWMH$Social_Isolation_Rating)
1 2 3 4 5
953 1066 992 1037 952
table(IRWMH$Satisfaction_with_Remote_Work)
Neutral Satisfied Unsatisfied
1648 1675 1677
table(IRWMH$Company_Support_for_Remote_Work)
1 2 3 4 5
967 985 1077 984 987
table(IRWMH$Physical_Activity)
Daily None Weekly
1616 1629 1755
table(IRWMH$Sleep_Quality)
Average Good Poor
1628 1687 1685
table(IRWMH$Region)
Africa Asia Europe North America Oceania
860 829 840 777 867
South America
827
table(IRWMH$Experience_Group)
< table of extent 0 >
average_hours_by_job_function <- IRWMH %>%
group_by(Job_Role) %>% # Group data by Job_Role
summarise(Average_Hours = mean(Hours_Worked_Per_Week, na.rm = TRUE))
print(average_hours_by_job_function)
IRWMH <- data.frame(
Job_Role = rep(c("Data Scientist", "Designer", "HR", "Marketing",
"Project Manager", "Sales", "Software Engineer"), each = 30),
Hours_Worked_Per_Week = c(
rnorm(30, mean = 38.95402, sd = 1), # Data Scientist
rnorm(30, mean = 38.88105, sd = 1), # Designer
rnorm(30, mean = 39.66061, sd = 1), # HR
rnorm(30, mean = 39.73499, sd = 1), # Marketing
rnorm(30, mean = 39.92276, sd = 1), # Project Manager
rnorm(30, mean = 39.86085, sd = 1), # Sales
rnorm(30, mean = 40.27145, sd = 1) # Software Engineer
)
)
anova_model <- aov(Hours_Worked_Per_Week ~ Job_Role, data = IRWMH)
summary(anova_model)
Df Sum Sq Mean Sq F value Pr(>F)
Job_Role 6 63.35 10.559 10.51 3.82e-10 ***
Residuals 203 204.05 1.005
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
IRWMH <- read.csv("D:/R course/Practicing/Mental health vs remote work/IRWMH.csv")
average_wlb_by_job_function <- IRWMH %>%
group_by(Job_Role) %>% # Group data by Job_Role
summarise(Average_rating = mean(Work_Life_Balance_Rating, na.rm = TRUE))
print(average_wlb_by_job_function)
average_vm_by_job_function <- IRWMH %>%
group_by(Job_Role) %>% # Group data by Job_Role
summarise(Average_vm = mean(Number_of_Virtual_Meetings, na.rm = TRUE))
print(average_vm_by_job_function)
average_cs_by_job_function <- IRWMH %>%
group_by(Job_Role) %>% # Group data by Job_Role
summarise(Average_cs = mean(Company_Support_for_Remote_Work, na.rm = TRUE))
print(average_cs_by_job_function)
average_wlb_by_job_function <- IRWMH %>%
group_by(Region) %>% # Group data by Region
summarise(Average_wlb = mean(Work_Life_Balance_Rating, na.rm = TRUE))
print(average_wlb_by_job_function)
average_wpw_by_job_function <- IRWMH %>%
group_by(Region) %>% # Group data by Region
summarise(Average_wpw = mean(Hours_Worked_Per_Week, na.rm = TRUE))
print(average_wpw_by_job_function)
sleep_quality_percentages <- IRWMH %>%
group_by(Region, Sleep_Quality) %>%
summarise(Count = n(), .groups = 'drop') %>%
mutate(Percentage = Count / sum(Count) * 100) %>%
arrange(Region, Sleep_Quality)
print(sleep_quality_percentages)
ggplot(sleep_quality_percentages, aes(x = Region, y = Percentage, fill = Sleep_Quality)) +
geom_bar(stat = "identity", position = "dodge") +
labs(title = "Percentage of Sleep Quality Ratings by Region",
x = "Region",
y = "Percentage of Sleep Quality Ratings") +
theme_minimal() +
scale_fill_brewer(palette = "Set2")

stress_level_percentages <- IRWMH %>%
group_by(Region, Stress_Level) %>%
summarise(Count = n(), .groups = 'drop') %>%
mutate(Percentage = Count / sum(Count) * 100) %>%
arrange(Region, Stress_Level)
print(stress_level_percentages)
ggplot(stress_level_percentages, aes(x = Region, y = Percentage, fill = Stress_Level)) +
geom_bar(stat = "identity", position = "dodge") +
labs(title = "Percentage of Stress Level Ratings by Region",
x = "Region",
y = "Percentage of Stress Level Ratings") +
theme_minimal() +
scale_fill_brewer(palette = "Set2")

stress_level_percentages <- IRWMH %>%
group_by(Job_Role, Stress_Level) %>%
summarise(Count = n(), .groups = 'drop') %>%
mutate(Percentage = Count / sum(Count) * 100) %>%
arrange(Job_Role, Stress_Level)
print(stress_level_percentages)
ggplot(stress_level_percentages, aes(x = Job_Role, y = Percentage, fill = Stress_Level)) +
geom_bar(stat = "identity", position = "dodge") +
labs(title = "Percentage of Stress Level Ratings by Job Role",
x = "Job_Role",
y = "Percentage of Stress Level Ratings") +
theme_minimal() +
scale_fill_brewer(palette = "Set2")

# Define the data
work_modes <- c(Hybrid = 1649, Onsite = 1637, Remote = 1714)
# Calculate percentages
percentages <- round((work_modes / sum(work_modes)) * 100, 1)
# Define labels with percentages
labels <- paste(names(work_modes), "\n", percentages, "%", sep = "")
# Create a pie chart with percentages
pie(work_modes, labels = labels,
main = "Distribution of Work Modes",
col = c("skyblue", "orange", "green")) # Optional: Customize colors

NA
NA
# Define the data
experience_counts <- c("1-10" = 1473, "11-20" = 1386, "21-30" = 1488, "31-40" = 653)
# Calculate percentages
percentages <- round((experience_counts / sum(experience_counts)) * 100, 1)
# Define labels with percentages
labels <- paste(names(experience_counts), "\n", percentages, "%", sep = "")
# Create a pie chart with percentages
pie(experience_counts, labels = labels,
main = "Distribution of Employees by Job Experience",
col = c("skyblue", "orange", "green", "purple")) # Customize colors as desired

work_mode_percentages <- IRWMH %>%
group_by(Job_Role, Work_Location) %>%
summarise(Count = n(), .groups = 'drop') %>%
mutate(Percentage = Count / sum(Count) * 100) %>%
arrange(Job_Role, Work_Location)
print(work_mode_percentages)
ggplot(work_mode_percentages, aes(x = Job_Role, y = Percentage, fill = Work_Location)) +
geom_bar(stat = "identity", position = "dodge") +
labs(title = "Percentage of Work Location Type by Job Role",
x = "Job_Role",
y = "Percentage of Location Types") +
theme_minimal() +
scale_fill_brewer(palette = "Set2")

stress_level_percentages <- IRWMH %>%
group_by(Work_Location, Stress_Level) %>%
summarise(Count = n(), .groups = 'drop') %>%
mutate(Percentage = Count / sum(Count) * 100) %>%
arrange(Work_Location, Stress_Level)
print(stress_level_percentages)
ggplot(stress_level_percentages, aes(x = Work_Location, y = Percentage, fill = Stress_Level)) +
geom_bar(stat = "identity", position = "dodge") +
labs(title = "Percentage of Different Stress Level by Work Location",
x = "Work Location",
y = "Percentage of Stress Level") +
theme_minimal() +
scale_fill_brewer(palette = "Set2")

virtual_meetings_summary <- IRWMH %>%
group_by(Job_Role) %>%
summarise(Total_Virtual_Meetings = sum(Number_of_Virtual_Meetings, na.rm = TRUE)) %>%
arrange(desc(Total_Virtual_Meetings))
print(virtual_meetings_summary)
mental_health_condition_percentages <- IRWMH %>%
group_by(Job_Role, Mental_Health_Condition) %>%
summarise(Count = n(), .groups = 'drop') %>%
mutate(Percentage = Count / sum(Count) * 100) %>%
arrange(Job_Role, Mental_Health_Condition)
print(mental_health_condition_percentages)
ggplot(mental_health_condition_percentages, aes(x = Job_Role, y = Percentage, fill = Mental_Health_Condition)) +
geom_bar(stat = "identity", position = "dodge") +
labs(title = "Percentage of Different Mental Health Condition by Job Role",
x = "Job Role",
y = "Mental Health Condition") +
theme_minimal() +
scale_fill_brewer(palette = "Set2")

mental_health_condition_percentages <- IRWMH %>%
group_by(Gender, Mental_Health_Condition) %>%
summarise(Count = n(), .groups = 'drop') %>%
mutate(Percentage = Count / sum(Count) * 100) %>%
arrange(Gender, Mental_Health_Condition)
print(mental_health_condition_percentages)
ggplot(mental_health_condition_percentages, aes(x = Gender, y = Percentage, fill = Mental_Health_Condition)) +
geom_bar(stat = "identity", position = "dodge") +
labs(title = "Percentage of Different Mental Health Condition by Gender",
x = "Gender",
y = "Mental Health Condition") +
theme_minimal() +
scale_fill_brewer(palette = "Set2")

job_role_by_gender_summary <- IRWMH %>%
group_by(Gender, Job_Role) %>%
summarise(Count = n(), .groups = 'drop') %>%
arrange(Gender, Job_Role)
print(job_role_by_gender_summary)
ggplot(job_role_by_gender_summary, aes(x = Gender, y = Count, fill = Job_Role)) +
geom_bar(stat = "identity", position = "dodge") +
labs(title = "Number of Different Job Roles by Gender",
x = "Gender",
y = "Job Role") +
theme_minimal() +
scale_fill_brewer(palette = "Set2")

productivity_change_percentages <- IRWMH %>%
group_by(Stress_Level, Productivity_Change) %>%
summarise(Count = n(), .groups = 'drop') %>%
mutate(Percentage = Count / sum(Count) * 100) %>%
arrange(Stress_Level, Productivity_Change)
print(productivity_change_percentages)
ggplot(productivity_change_percentages, aes(x = Stress_Level, y = Percentage, fill = Productivity_Change)) +
geom_bar(stat = "identity", position = "dodge") +
labs(title = "Productivity Change due to Stress Level",
x = "Stress Level",
y = "Productivity Change") +
theme_minimal() +
scale_fill_brewer(palette = "Set2")

productivity_change_percentages <- IRWMH %>%
group_by(Sleep_Quality, Productivity_Change) %>%
summarise(Count = n(), .groups = 'drop') %>%
mutate(Percentage = Count / sum(Count) * 100) %>%
arrange(Sleep_Quality, Productivity_Change)
print(productivity_change_percentages)
ggplot(productivity_change_percentages, aes(x = Sleep_Quality, y = Percentage, fill = Productivity_Change)) +
geom_bar(stat = "identity", position = "dodge") +
labs(title = "Productivity Change according to Sleep Quality",
x = "Sleep Quality",
y = "Productivity Change") +
theme_minimal() +
scale_fill_brewer(palette = "Set2")

# Create a contingency table
contingency_table <- table(productivity_change_percentages$Sleep_Quality, productivity_change_percentages$Productivity_Change)
fisher_test <- fisher.test(contingency_table)
# View the results
print(fisher_test)
Fisher's Exact Test for Count Data
data: contingency_table
p-value = 1
alternative hypothesis: two.sided
satisfaction_with_remote_work_percentages <- IRWMH %>%
group_by(Job_Role, Satisfaction_with_Remote_Work) %>%
summarise(Count = n(), .groups = 'drop') %>%
mutate(Percentage = Count / sum(Count) * 100) %>%
arrange(Job_Role, Satisfaction_with_Remote_Work)
print(satisfaction_with_remote_work_percentages)
ggplot(satisfaction_with_remote_work_percentages, aes(x = Job_Role, y = Percentage, fill = Satisfaction_with_Remote_Work)) +
geom_bar(stat = "identity", position = "dodge") +
labs(title = "Satisfaction with Remote Work by Job Role",
x = "Job Role",
y = "Satisfaction Level") +
theme_minimal() +
scale_fill_brewer(palette = "Set2")

ggplot(satisfaction_with_remote_work_percentages, aes(x = Job_Role, y = Count, fill = Satisfaction_with_Remote_Work)) +
geom_violin(trim = FALSE) + # Creates the violin plot
geom_boxplot(width = 0.1, outlier.shape = NA, alpha = 0.5) + # Adds a boxplot inside the violin
labs(
title = "satisfaction_with_remote_work",
x = "Job Role",
y = "Satisfaction_with_Remote_Work"
) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) + # Rotates x-axis labels for better readability
scale_fill_brewer(palette = "Set3")

# Create a data frame with the provided data
data <- data.frame(
Profession = c("Data Scientist", "Designer", "HR", "Marketing",
"Project Manager", "Sales", "Software Engineer"),
Avg_Hours_Worked = c(38.95402, 38.88105, 39.66061, 39.73499,
39.92276, 39.86085, 40.27145),
Work_Life_Balance_Rating = c(3.007184, 2.966805, 2.930168,
2.945827, 2.960705, 3.043656, 3.033755)
)
# Calculate the Pearson correlation coefficient between Avg_Hours_Worked and Work_Life_Balance_Rating
correlation <- cor(data$Avg_Hours_Worked, data$Work_Life_Balance_Rating)
print(paste("Correlation coefficient between average hours worked and work-life balance rating:", round(correlation, 4)))
[1] "Correlation coefficient between average hours worked and work-life balance rating: 0.2232"
# Optional: Visualize the relationship with a scatter plot
ggplot(data, aes(x = Avg_Hours_Worked, y = Work_Life_Balance_Rating, label = Profession)) +
geom_point(size = 3, color = "blue") +
geom_text(vjust = -0.5, hjust = 0.5) + # Adds profession labels near points
labs(
title = "Relationship Between Average Hours Worked and Work-Life Balance Rating",
x = "Average Hours Worked",
y = "Work-Life Balance Rating"
) +
theme_minimal()

job_role_by_industry_summary <- IRWMH %>%
group_by(Industry, Job_Role) %>%
summarise(Count = n(), .groups = 'drop') %>%
arrange(Industry, Job_Role)
print(job_role_by_industry_summary)
ggplot(job_role_by_industry_summary, aes(x = Industry, y = Count, fill = Job_Role)) +
geom_bar(stat = "identity", position = "dodge") +
labs(title = "Number of Different Job Roles by Industry",
x = "Industry",
y = "Job Role") +
theme_minimal() +
scale_fill_brewer(palette = "Set2")

acmh_by_industry_summary <- IRWMH %>%
group_by(Industry, Access_to_Mental_Health_Resources) %>%
summarise(Count = n(), .groups = 'drop') %>%
arrange(Industry, Access_to_Mental_Health_Resources)
print(acmh_by_industry_summary)
ggplot(acmh_by_industry_summary, aes(x = Industry, y = Count, fill = Access_to_Mental_Health_Resources)) +
geom_bar(stat = "identity", position = "dodge") +
labs(title = "Access to Mental Health by Industry",
x = "Industry",
y = "Access to Mental Health") +
theme_minimal() +
scale_fill_brewer(palette = "Set2")

ggplot(IRWMH, aes(x = Job_Role, y = Age, fill = Job_Role)) +
geom_violin(trim = FALSE) + # Creates the violin plot
geom_boxplot(width = 0.1, outlier.shape = NA, alpha = 0.5) + # Adds a boxplot inside the violin
labs(
title = "Age Distribution by Job Role",
x = "Job Role",
y = "Age"
) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) + # Rotates x-axis labels for better readability
scale_fill_brewer(palette = "Set3") # Optional: customize the color palette

average_age_by_job <- IRWMH %>%
group_by(Job_Role) %>%
summarise(Average_Age = mean(Age, na.rm = TRUE)) # Calculate the mean age
# Display the results
print(average_age_by_job)
ggplot(IRWMH, aes(x = Job_Role, y = Years_of_Experience, fill = Job_Role)) +
geom_violin(trim = FALSE) + # Creates the violin plot
geom_boxplot(width = 0.1, outlier.shape = NA, alpha = 0.5) + # Adds a boxplot inside the violin
labs(
title = "Experience Distribution by Job Role",
x = "Job Role",
y = "Years of Experience"
) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) + # Rotates x-axis labels for better readability
scale_fill_brewer(palette = "Set3") # Optional: customize the color palette

IRWMH$Stress_Level <- factor(IRWMH$Stress_Level, ordered = TRUE, levels = c("Low", "Medium", "High"))
# Remove rows with missing values
data_clean <- IRWMH %>%
filter(!is.na(Stress_Level), !is.na(Hours_Worked_Per_Week))
# Perform ordinal logistic regression
model_stress_ordinal <- polr(Stress_Level ~ Hours_Worked_Per_Week, data = data_clean, method = "logistic")
# View the summary of the model
summary(model_stress_ordinal)
Re-fitting to get Hessian
Call:
polr(formula = Stress_Level ~ Hours_Worked_Per_Week, data = data_clean,
method = "logistic")
Coefficients:
Value Std. Error t value
Hours_Worked_Per_Week 0.002565 0.002192 1.17
Intercepts:
Value Std. Error t value
Low|Medium -0.6113 0.0917 -6.6647
Medium|High 0.7775 0.0920 8.4525
Residual Deviance: 10984.24
AIC: 10990.24
IRWMH <- read.csv("D:/R course/Practicing/Mental health vs remote work/IRWMH.csv")
model <- lm(Number_of_Virtual_Meetings ~ Hours_Worked_Per_Week, data = IRWMH)
# Check the model summary
summary(model)
Call:
lm(formula = Number_of_Virtual_Meetings ~ Hours_Worked_Per_Week,
data = IRWMH)
Residuals:
Min 1Q Median 3Q Max
-7.5943 -3.5943 0.4111 4.4093 7.4777
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 7.630367 0.228640 33.373 <2e-16 ***
Hours_Worked_Per_Week -0.001802 0.005529 -0.326 0.745
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 4.637 on 4998 degrees of freedom
Multiple R-squared: 2.124e-05, Adjusted R-squared: -0.0001788
F-statistic: 0.1062 on 1 and 4998 DF, p-value: 0.7446
# Plot with regression line
plot(IRWMH$Hours_Worked_Per_Week, IRWMH$Number_of_Virtual_Meetings,
main = "Regression Analysis",
xlab = "Hours Worked Per Week",
ylab = "Number of Virtual Meetings",
pch = 19, col = "blue")
abline(model, col = "red", lwd = 2)
# Add the regression line
abline(model, col = "red", lwd = 2)

stress_level_by_work_location_summary <- IRWMH %>%
group_by(Work_Location, Stress_Level) %>%
summarise(Count = n(), .groups = 'drop') %>%
arrange(Work_Location, Stress_Level)
print(stress_level_by_work_location_summary)
ggplot(stress_level_by_work_location_summary, aes(x = Work_Location, y = Count, fill = Stress_Level)) +
geom_bar(stat = "identity", position = "dodge") +
labs(title = "Stress Level by Job Location",
x = "Work Location",
y = "Stress Level") +
theme_minimal() +
scale_fill_brewer(palette = "Set2")

cs_by_industry_summary <- IRWMH %>%
group_by(Industry, Company_Support_for_Remote_Work) %>%
summarise(Count = n(), .groups = 'drop') %>%
arrange(Industry, Company_Support_for_Remote_Work)
print(cs_by_industry_summary)
average_cs_by_industry <- IRWMH %>%
group_by(Industry) %>%
summarise(Average_CS = mean(Company_Support_for_Remote_Work, na.rm = TRUE))
# Display the results
print(average_cs_by_industry)
ggplot(IRWMH, aes(x = Industry, y = Company_Support_for_Remote_Work, fill = Industry)) +
geom_violin(trim = FALSE) + # Creates the violin plot
geom_boxplot(width = 0.1, outlier.shape = NA, alpha = 0.5) + # Adds a boxplot inside the violin
labs(
title = "Company Support for Remote Work by Industry",
x = "Industry",
y = "Company Support Rating"
) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) + # Rotates x-axis labels for better readability
scale_fill_brewer(palette = "Set3")

average_csr_by_region <- IRWMH %>%
group_by(Region) %>%
summarise(Average_CSR = mean(Company_Support_for_Remote_Work, na.rm = TRUE))
# Display the results
print(average_csr_by_region)
average_sir_by_region <- IRWMH %>%
group_by(Region) %>%
summarise(Average_SIR = mean(Social_Isolation_Rating, na.rm = TRUE))
# Display the results
print(average_sir_by_region)
# Data: Average Company Support Rating by Region
company_support <- c(3.010465, 2.965018, 2.964286, 3.037323, 3.055363, 3.014510)
# Data: Average Social Isolation Rating by Region
social_isolation <- c(2.987209, 2.980700, 2.936905, 3.011583, 3.065744, 2.979444)
# Data: Region Names
regions <- c("Africa", "Asia", "Europe", "North America", "Oceania", "South America")
# Create a data frame
data <- data.frame(Region = regions, Company_Support = company_support, Social_Isolation = social_isolation)
# Perform linear regression
model <- lm(Social_Isolation ~ Company_Support, data = data)
# Summary of the regression model
summary(model)
Call:
lm(formula = Social_Isolation ~ Company_Support, data = data)
Residuals:
1 2 3 4 5 6
-0.009011 0.029661 -0.013406 -0.011337 0.024890 -0.020797
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.003419 0.871723 0.004 0.9971
Company_Support 0.994132 0.289800 3.430 0.0265 *
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.02409 on 4 degrees of freedom
Multiple R-squared: 0.7463, Adjusted R-squared: 0.6829
F-statistic: 11.77 on 1 and 4 DF, p-value: 0.02653
# Plotting the data points
plot(data$Company_Support, data$Social_Isolation,
main = "Regression of Social Isolation on Company Support",
xlab = "Average Company Support Rating",
ylab = "Average Social Isolation Rating",
pch = 19, col = "blue")
# Add regression line
abline(model, col = "red", lwd = 2)

---
title: "Effect of Mental Health on Remote Work"
output: html_notebook
---
```{r}
IRWMH <- read.csv("D:/R course/Practicing/Mental health vs remote work/IRWMH.csv")
```
```{r}
summary(IRWMH)
```
```{r}
IRWMH$Age_Group <- cut(
     IRWMH$Age,
     breaks = c(20, 30, 40, 50, 60), # Define the breakpoints for age groups
    labels = c("21-30", "31-40", "41-50", "51-60"), # Set labels for each age group
     right = TRUE # Include the right endpoint in intervals (e.g., 30 is included in 21-30)
 )
age_group_counts <- table(IRWMH$Age_Group)
print(age_group_counts)
```
```{r}
table(IRWMH$Gender)
```
```{r}
table(IRWMH$Job_Role)
table(IRWMH$Industry)
```
```{r}
table(IRWMH$Work_Location)
table(IRWMH$Work_Life_Balance_Rating)
table(IRWMH$Stress_Level)
table(IRWMH$Mental_Health_Condition)
table(IRWMH$Access_to_Mental_Health_Resources)
table(IRWMH$Productivity_Change)
table(IRWMH$Social_Isolation_Rating)
table(IRWMH$Satisfaction_with_Remote_Work)
table(IRWMH$Company_Support_for_Remote_Work)
table(IRWMH$Physical_Activity)
table(IRWMH$Sleep_Quality)
table(IRWMH$Region)
table(IRWMH$Experience_Group)
```
```{r}
average_hours_by_job_function <- IRWMH %>%
     group_by(Job_Role) %>% # Group data by Job_Role
     summarise(Average_Hours = mean(Hours_Worked_Per_Week, na.rm = TRUE))
print(average_hours_by_job_function)
```
```{r}
IRWMH <- data.frame(
     Job_Role = rep(c("Data Scientist", "Designer", "HR", "Marketing", 
                      "Project Manager", "Sales", "Software Engineer"), each = 30),
     Hours_Worked_Per_Week = c(
         rnorm(30, mean = 38.95402, sd = 1), # Data Scientist
         rnorm(30, mean = 38.88105, sd = 1), # Designer
         rnorm(30, mean = 39.66061, sd = 1), # HR
         rnorm(30, mean = 39.73499, sd = 1), # Marketing
         rnorm(30, mean = 39.92276, sd = 1), # Project Manager
         rnorm(30, mean = 39.86085, sd = 1), # Sales
         rnorm(30, mean = 40.27145, sd = 1)  # Software Engineer
     )
 )
 anova_model <- aov(Hours_Worked_Per_Week ~ Job_Role, data = IRWMH)
 summary(anova_model)
```
```{r}
IRWMH <- read.csv("D:/R course/Practicing/Mental health vs remote work/IRWMH.csv")
average_wlb_by_job_function <- IRWMH %>%
     group_by(Job_Role) %>% # Group data by Job_Role
     summarise(Average_rating = mean(Work_Life_Balance_Rating, na.rm = TRUE))
print(average_wlb_by_job_function)
```
```{r}
average_vm_by_job_function <- IRWMH %>%
     group_by(Job_Role) %>% # Group data by Job_Role
     summarise(Average_vm = mean(Number_of_Virtual_Meetings, na.rm = TRUE))
print(average_vm_by_job_function)
```
```{r}
average_cs_by_job_function <- IRWMH %>%
     group_by(Job_Role) %>% # Group data by Job_Role
     summarise(Average_cs = mean(Company_Support_for_Remote_Work, na.rm = TRUE))
print(average_cs_by_job_function)
```
```{r}
average_wlb_by_job_function <- IRWMH %>%
     group_by(Region) %>% # Group data by Region
     summarise(Average_wlb = mean(Work_Life_Balance_Rating, na.rm = TRUE))
print(average_wlb_by_job_function)
```
```{r}
average_wpw_by_job_function <- IRWMH %>%
     group_by(Region) %>% # Group data by Region
     summarise(Average_wpw = mean(Hours_Worked_Per_Week, na.rm = TRUE))
print(average_wpw_by_job_function)
```
```{r}
sleep_quality_percentages <- IRWMH %>%
     group_by(Region, Sleep_Quality) %>%
     summarise(Count = n(), .groups = 'drop') %>%
     mutate(Percentage = Count / sum(Count) * 100) %>%
     arrange(Region, Sleep_Quality)
print(sleep_quality_percentages)
```

```{r}
ggplot(sleep_quality_percentages, aes(x = Region, y = Percentage, fill = Sleep_Quality)) +
  geom_bar(stat = "identity", position = "dodge") +
  labs(title = "Percentage of Sleep Quality Ratings by Region",
       x = "Region",
       y = "Percentage of Sleep Quality Ratings") +
  theme_minimal() +
  scale_fill_brewer(palette = "Set2")
```
```{r}
stress_level_percentages <- IRWMH %>%
     group_by(Region, Stress_Level) %>%
     summarise(Count = n(), .groups = 'drop') %>%
     mutate(Percentage = Count / sum(Count) * 100) %>%
     arrange(Region, Stress_Level)
print(stress_level_percentages)
```
```{r}
ggplot(stress_level_percentages, aes(x = Region, y = Percentage, fill = Stress_Level)) +
  geom_bar(stat = "identity", position = "dodge") +
  labs(title = "Percentage of Stress Level Ratings by Region",
       x = "Region",
       y = "Percentage of Stress Level Ratings") +
  theme_minimal() +
  scale_fill_brewer(palette = "Set2")
```
```{r}
stress_level_percentages <- IRWMH %>%
     group_by(Job_Role, Stress_Level) %>%
     summarise(Count = n(), .groups = 'drop') %>%
     mutate(Percentage = Count / sum(Count) * 100) %>%
     arrange(Job_Role, Stress_Level)
print(stress_level_percentages)
```
```{r}
ggplot(stress_level_percentages, aes(x = Job_Role, y = Percentage, fill = Stress_Level)) +
  geom_bar(stat = "identity", position = "dodge") +
  labs(title = "Percentage of Stress Level Ratings by Job Role",
       x = "Job_Role",
       y = "Percentage of Stress Level Ratings") +
  theme_minimal() +
  scale_fill_brewer(palette = "Set2")
```
```{r}
# Define the data
work_modes <- c(Hybrid = 1649, Onsite = 1637, Remote = 1714)

# Calculate percentages
percentages <- round((work_modes / sum(work_modes)) * 100, 1)

# Define labels with percentages
labels <- paste(names(work_modes), "\n", percentages, "%", sep = "")

# Create a pie chart with percentages
pie(work_modes, labels = labels, 
    main = "Distribution of Work Modes",
    col = c("skyblue", "orange", "green"))  # Optional: Customize colors


```
```{r}
# Define the data
experience_counts <- c("1-10" = 1473, "11-20" = 1386, "21-30" = 1488, "31-40" = 653)

# Calculate percentages
percentages <- round((experience_counts / sum(experience_counts)) * 100, 1)

# Define labels with percentages
labels <- paste(names(experience_counts), "\n", percentages, "%", sep = "")

# Create a pie chart with percentages
pie(experience_counts, labels = labels, 
    main = "Distribution of Employees by Job Experience",
    col = c("skyblue", "orange", "green", "purple"))  # Customize colors as desired

```
```{r}
work_mode_percentages <- IRWMH %>%
     group_by(Job_Role, Work_Location) %>%
     summarise(Count = n(), .groups = 'drop') %>%
     mutate(Percentage = Count / sum(Count) * 100) %>%
     arrange(Job_Role, Work_Location)
print(work_mode_percentages)
```
```{r}
ggplot(work_mode_percentages, aes(x = Job_Role, y = Percentage, fill = Work_Location)) +
  geom_bar(stat = "identity", position = "dodge") +
  labs(title = "Percentage of Work Location Type by Job Role",
       x = "Job_Role",
       y = "Percentage of Location Types") +
  theme_minimal() +
  scale_fill_brewer(palette = "Set2")
```
```{r}
stress_level_percentages <- IRWMH %>%
     group_by(Work_Location, Stress_Level) %>%
     summarise(Count = n(), .groups = 'drop') %>%
     mutate(Percentage = Count / sum(Count) * 100) %>%
     arrange(Work_Location, Stress_Level)
print(stress_level_percentages)
```
```{r}
ggplot(stress_level_percentages, aes(x = Work_Location, y = Percentage, fill = Stress_Level)) +
  geom_bar(stat = "identity", position = "dodge") +
  labs(title = "Percentage of Different Stress Level by Work Location",
       x = "Work Location",
       y = "Percentage of Stress Level") +
  theme_minimal() +
  scale_fill_brewer(palette = "Set2")
```
```{r}
virtual_meetings_summary <- IRWMH %>%
  group_by(Job_Role) %>%
  summarise(Total_Virtual_Meetings = sum(Number_of_Virtual_Meetings, na.rm = TRUE)) %>%
  arrange(desc(Total_Virtual_Meetings))
print(virtual_meetings_summary)
```
```{r}
mental_health_condition_percentages <- IRWMH %>%
     group_by(Job_Role, Mental_Health_Condition) %>%
     summarise(Count = n(), .groups = 'drop') %>%
     mutate(Percentage = Count / sum(Count) * 100) %>%
     arrange(Job_Role, Mental_Health_Condition)
print(mental_health_condition_percentages)
```
```{r}
ggplot(mental_health_condition_percentages, aes(x = Job_Role, y = Percentage, fill = Mental_Health_Condition)) +
  geom_bar(stat = "identity", position = "dodge") +
  labs(title = "Percentage of Different Mental Health Condition by Job Role",
       x = "Job Role",
       y = "Mental Health Condition") +
  theme_minimal() +
  scale_fill_brewer(palette = "Set2")
```
```{r}
mental_health_condition_percentages <- IRWMH %>%
     group_by(Gender, Mental_Health_Condition) %>%
     summarise(Count = n(), .groups = 'drop') %>%
     mutate(Percentage = Count / sum(Count) * 100) %>%
     arrange(Gender, Mental_Health_Condition)
print(mental_health_condition_percentages)
```
```{r}
ggplot(mental_health_condition_percentages, aes(x = Gender, y = Percentage, fill = Mental_Health_Condition)) +
  geom_bar(stat = "identity", position = "dodge") +
  labs(title = "Percentage of Different Mental Health Condition by Gender",
       x = "Gender",
       y = "Mental Health Condition") +
  theme_minimal() +
  scale_fill_brewer(palette = "Set2")
```
```{r}
job_role_by_gender_summary <- IRWMH %>%
     group_by(Gender, Job_Role) %>%
     summarise(Count = n(), .groups = 'drop') %>%
     arrange(Gender, Job_Role)
print(job_role_by_gender_summary)
```
```{r}
ggplot(job_role_by_gender_summary, aes(x = Gender, y = Count, fill = Job_Role)) +
  geom_bar(stat = "identity", position = "dodge") +
  labs(title = "Number of Different Job Roles by Gender",
       x = "Gender",
       y = "Job Role") +
  theme_minimal() +
  scale_fill_brewer(palette = "Set2")
```
```{r}
productivity_change_percentages <- IRWMH %>%
     group_by(Stress_Level, Productivity_Change) %>%
     summarise(Count = n(), .groups = 'drop') %>%
     mutate(Percentage = Count / sum(Count) * 100) %>%
     arrange(Stress_Level, Productivity_Change)
print(productivity_change_percentages)
```
```{r}
ggplot(productivity_change_percentages, aes(x = Stress_Level, y = Percentage, fill = Productivity_Change)) +
  geom_bar(stat = "identity", position = "dodge") +
  labs(title = "Productivity Change due to Stress Level",
       x = "Stress Level",
       y = "Productivity Change") +
  theme_minimal() +
  scale_fill_brewer(palette = "Set2")
```
```{r}
productivity_change_percentages <- IRWMH %>%
     group_by(Sleep_Quality, Productivity_Change) %>%
     summarise(Count = n(), .groups = 'drop') %>%
     mutate(Percentage = Count / sum(Count) * 100) %>%
     arrange(Sleep_Quality, Productivity_Change)
print(productivity_change_percentages)
```
```{r}
ggplot(productivity_change_percentages, aes(x = Sleep_Quality, y = Percentage, fill = Productivity_Change)) +
  geom_bar(stat = "identity", position = "dodge") +
  labs(title = "Productivity Change according to Sleep Quality",
       x = "Sleep Quality",
       y = "Productivity Change") +
  theme_minimal() +
  scale_fill_brewer(palette = "Set2")
```
```{r}
# Create a contingency table
contingency_table <- table(productivity_change_percentages$Sleep_Quality, productivity_change_percentages$Productivity_Change)

fisher_test <- fisher.test(contingency_table)
# View the results
print(fisher_test)

```
```{r}
satisfaction_with_remote_work_percentages <- IRWMH %>%
     group_by(Job_Role, Satisfaction_with_Remote_Work) %>%
     summarise(Count = n(), .groups = 'drop') %>%
     mutate(Percentage = Count / sum(Count) * 100) %>%
     arrange(Job_Role, Satisfaction_with_Remote_Work)
print(satisfaction_with_remote_work_percentages)
```
```{r}
ggplot(satisfaction_with_remote_work_percentages, aes(x = Job_Role, y = Percentage, fill = Satisfaction_with_Remote_Work)) +
  geom_bar(stat = "identity", position = "dodge") +
  labs(title = "Satisfaction with Remote Work by Job Role",
       x = "Job Role",
       y = "Satisfaction Level") +
  theme_minimal() +
  scale_fill_brewer(palette = "Set2")
```
```{r}
ggplot(satisfaction_with_remote_work_percentages, aes(x = Job_Role, y = Count, fill = Satisfaction_with_Remote_Work)) +
  geom_violin(trim = FALSE) +  # Creates the violin plot
  geom_boxplot(width = 0.1, outlier.shape = NA, alpha = 0.5) + # Adds a boxplot inside the violin
  labs(
    title = "satisfaction_with_remote_work",
    x = "Job Role",
    y = "Satisfaction_with_Remote_Work"
  ) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +  # Rotates x-axis labels for better readability
  scale_fill_brewer(palette = "Set3")

```
```{r}
# Create a data frame with the provided data
data <- data.frame(
  Profession = c("Data Scientist", "Designer", "HR", "Marketing", 
                 "Project Manager", "Sales", "Software Engineer"),
  Avg_Hours_Worked = c(38.95402, 38.88105, 39.66061, 39.73499, 
                       39.92276, 39.86085, 40.27145),
  Work_Life_Balance_Rating = c(3.007184, 2.966805, 2.930168, 
                               2.945827, 2.960705, 3.043656, 3.033755)
)

# Calculate the Pearson correlation coefficient between Avg_Hours_Worked and Work_Life_Balance_Rating
correlation <- cor(data$Avg_Hours_Worked, data$Work_Life_Balance_Rating)
print(paste("Correlation coefficient between average hours worked and work-life balance rating:", round(correlation, 4)))

# Optional: Visualize the relationship with a scatter plot
ggplot(data, aes(x = Avg_Hours_Worked, y = Work_Life_Balance_Rating, label = Profession)) +
  geom_point(size = 3, color = "blue") +
  geom_text(vjust = -0.5, hjust = 0.5) +  # Adds profession labels near points
  labs(
    title = "Relationship Between Average Hours Worked and Work-Life Balance Rating",
    x = "Average Hours Worked",
    y = "Work-Life Balance Rating"
  ) +
  theme_minimal()
```
```{r}
job_role_by_industry_summary <- IRWMH %>%
     group_by(Industry, Job_Role) %>%
     summarise(Count = n(), .groups = 'drop') %>%
     arrange(Industry, Job_Role)
print(job_role_by_industry_summary)
```
```{r}
ggplot(job_role_by_industry_summary, aes(x = Industry, y = Count, fill = Job_Role)) +
  geom_bar(stat = "identity", position = "dodge") +
  labs(title = "Number of Different Job Roles by Industry",
       x = "Industry",
       y = "Job Role") +
  theme_minimal() +
  scale_fill_brewer(palette = "Set2")
```
```{r}
acmh_by_industry_summary <- IRWMH %>%
     group_by(Industry, Access_to_Mental_Health_Resources) %>%
     summarise(Count = n(), .groups = 'drop') %>%
     arrange(Industry, Access_to_Mental_Health_Resources)
print(acmh_by_industry_summary)
```

```{r}
ggplot(acmh_by_industry_summary, aes(x = Industry, y = Count, fill = Access_to_Mental_Health_Resources)) +
  geom_bar(stat = "identity", position = "dodge") +
  labs(title = "Access to Mental Health by Industry",
       x = "Industry",
       y = "Access to Mental Health") +
  theme_minimal() +
  scale_fill_brewer(palette = "Set2")
```
```{r}
ggplot(IRWMH, aes(x = Job_Role, y = Age, fill = Job_Role)) +
  geom_violin(trim = FALSE) +  # Creates the violin plot
  geom_boxplot(width = 0.1, outlier.shape = NA, alpha = 0.5) + # Adds a boxplot inside the violin
  labs(
    title = "Age Distribution by Job Role",
    x = "Job Role",
    y = "Age"
  ) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +  # Rotates x-axis labels for better readability
  scale_fill_brewer(palette = "Set3")  # Optional: customize the color palette
```
```{r}
average_age_by_job <- IRWMH %>%
  group_by(Job_Role) %>%
  summarise(Average_Age = mean(Age, na.rm = TRUE))  # Calculate the mean age

# Display the results
print(average_age_by_job)
```
```{r}
ggplot(IRWMH, aes(x = Job_Role, y = Years_of_Experience, fill = Job_Role)) +
  geom_violin(trim = FALSE) +  # Creates the violin plot
  geom_boxplot(width = 0.1, outlier.shape = NA, alpha = 0.5) + # Adds a boxplot inside the violin
  labs(
    title = "Experience Distribution by Job Role",
    x = "Job Role",
    y = "Years of Experience"
  ) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +  # Rotates x-axis labels for better readability
  scale_fill_brewer(palette = "Set3")  # Optional: customize the color palette
```
```{r}
IRWMH$Stress_Level <- factor(IRWMH$Stress_Level, ordered = TRUE, levels = c("Low", "Medium", "High"))

# Remove rows with missing values
data_clean <- IRWMH %>%
  filter(!is.na(Stress_Level), !is.na(Hours_Worked_Per_Week))

# Perform ordinal logistic regression
model_stress_ordinal <- polr(Stress_Level ~ Hours_Worked_Per_Week, data = data_clean, method = "logistic")

# View the summary of the model
summary(model_stress_ordinal)

```
```{r}

IRWMH <- read.csv("D:/R course/Practicing/Mental health vs remote work/IRWMH.csv")

```

```{r}
model <- lm(Number_of_Virtual_Meetings ~ Hours_Worked_Per_Week, data = IRWMH)

# Check the model summary
summary(model)

# Plot with regression line
plot(IRWMH$Hours_Worked_Per_Week, IRWMH$Number_of_Virtual_Meetings,
     main = "Regression Analysis",
     xlab = "Hours Worked Per Week",
     ylab = "Number of Virtual Meetings",
     pch = 19, col = "blue")
abline(model, col = "red", lwd = 2)

# Add the regression line
abline(model, col = "red", lwd = 2)

```
```{r}
stress_level_by_work_location_summary <- IRWMH %>%
     group_by(Work_Location, Stress_Level) %>%
     summarise(Count = n(), .groups = 'drop') %>%
     arrange(Work_Location, Stress_Level)
print(stress_level_by_work_location_summary)
```
```{r}
ggplot(stress_level_by_work_location_summary, aes(x = Work_Location, y = Count, fill = Stress_Level)) +
  geom_bar(stat = "identity", position = "dodge") +
  labs(title = "Stress Level by Job Location",
       x = "Work Location",
       y = "Stress Level") +
  theme_minimal() +
  scale_fill_brewer(palette = "Set2")
```
```{r}
cs_by_industry_summary <- IRWMH %>%
     group_by(Industry, Company_Support_for_Remote_Work) %>%
     summarise(Count = n(), .groups = 'drop') %>%
     arrange(Industry, Company_Support_for_Remote_Work)
print(cs_by_industry_summary)
```
```{r}
average_cs_by_industry <- IRWMH %>%
  group_by(Industry) %>%
  summarise(Average_CS = mean(Company_Support_for_Remote_Work, na.rm = TRUE))  

# Display the results
print(average_cs_by_industry)
```
```{r}
ggplot(IRWMH, aes(x = Industry, y = Company_Support_for_Remote_Work, fill = Industry)) +
  geom_violin(trim = FALSE) +  # Creates the violin plot
  geom_boxplot(width = 0.1, outlier.shape = NA, alpha = 0.5) + # Adds a boxplot inside the violin
  labs(
    title = "Company Support for Remote Work by Industry",
    x = "Industry",
    y = "Company Support Rating"
  ) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +  # Rotates x-axis labels for better readability
  scale_fill_brewer(palette = "Set3")
```
```{r}
average_csr_by_region <- IRWMH %>%
  group_by(Region) %>%
  summarise(Average_CSR = mean(Company_Support_for_Remote_Work, na.rm = TRUE))  

# Display the results
print(average_csr_by_region)
```
```{r}
average_sir_by_region <- IRWMH %>%
  group_by(Region) %>%
  summarise(Average_SIR = mean(Social_Isolation_Rating, na.rm = TRUE))  

# Display the results
print(average_sir_by_region)
```
```{r}
# Data: Average Company Support Rating by Region
company_support <- c(3.010465, 2.965018, 2.964286, 3.037323, 3.055363, 3.014510)

# Data: Average Social Isolation Rating by Region
social_isolation <- c(2.987209, 2.980700, 2.936905, 3.011583, 3.065744, 2.979444)

# Data: Region Names
regions <- c("Africa", "Asia", "Europe", "North America", "Oceania", "South America")

# Create a data frame
data <- data.frame(Region = regions, Company_Support = company_support, Social_Isolation = social_isolation)
# Perform linear regression
model <- lm(Social_Isolation ~ Company_Support, data = data)

# Summary of the regression model
summary(model)
```
```{r}
# Plotting the data points
plot(data$Company_Support, data$Social_Isolation, 
     main = "Regression of Social Isolation on Company Support",
     xlab = "Average Company Support Rating", 
     ylab = "Average Social Isolation Rating", 
     pch = 19, col = "blue")

# Add regression line
abline(model, col = "red", lwd = 2)
```

