# load packages
install.packages("tidyverse")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.4'
## (as 'lib' is unspecified)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
# load dataset
mental_tech <- read.csv("/cloud/project/Technology/mental_health_and_technology_usage_2024.csv")
colnames (mental_tech)
## [1] "User_ID" "Age"
## [3] "Gender" "Technology_Usage_Hours"
## [5] "Social_Media_Usage_Hours" "Gaming_Hours"
## [7] "Screen_Time_Hours" "Mental_Health_Status"
## [9] "Stress_Level" "Sleep_Hours"
## [11] "Physical_Activity_Hours" "Support_Systems_Access"
## [13] "Work_Environment_Impact" "Online_Support_Usage"
# What is the correlation between age and total technology usage hours
age_usagehrs_cor <- cor(mental_tech$Age,mental_tech$Technology_Usage_Hours)
print (age_usagehrs_cor)
## [1] 0.01745498
# How does mental health status vary across different age groups?
age_groups <- cut(mental_tech$Age,
breaks = c(15,21,36,54,70),
labels = c("15-21","22-36","37-54","55-70"),
include.lowest=TRUE) # creating ages categories
mental_tech <- mental_tech %>%
mutate(Age_Groups = age_groups) # creating new age group column
age_status <- mental_tech %>%
group_by (Age_Groups, Mental_Health_Status) %>%
summarise (mental_status_count = n(),.groups='drop')
# print results
print (age_status) # establish relationship
## # A tibble: 16 × 3
## Age_Groups Mental_Health_Status mental_status_count
## <fct> <chr> <int>
## 1 15-21 Excellent 209
## 2 15-21 Fair 188
## 3 15-21 Good 230
## 4 15-21 Poor 201
## 5 22-36 Excellent 787
## 6 22-36 Fair 777
## 7 22-36 Good 765
## 8 22-36 Poor 800
## 9 37-54 Excellent 960
## 10 37-54 Fair 906
## 11 37-54 Good 954
## 12 37-54 Poor 910
## 13 55-70 Excellent 562
## 14 55-70 Fair 619
## 15 55-70 Good 559
## 16 55-70 Poor 573
# how does stress level vary by gender?
stress_gender <- mental_tech %>%
group_by (Gender, Stress_Level) %>%
summarise(stress_count = n(),.groups='drop')
# print results
print (stress_gender)
## # A tibble: 9 × 3
## Gender Stress_Level stress_count
## <chr> <chr> <int>
## 1 Female High 1107
## 2 Female Low 1099
## 3 Female Medium 1080
## 4 Male High 1110
## 5 Male Low 1120
## 6 Male Medium 1120
## 7 Other High 1113
## 8 Other Low 1113
## 9 Other Medium 1138
# Relationship between technology usage hrs and stress levels.
tech_hrs_stress <- mental_tech %>%
group_by (Stress_Level)%>%
summarise (avg_hours = mean (Technology_Usage_Hours,na.rm=TRUE),.groups='drop')
# print results
print (tech_hrs_stress)
## # A tibble: 3 × 2
## Stress_Level avg_hours
## <chr> <dbl>
## 1 High 6.46
## 2 Low 6.53
## 3 Medium 6.43
# Is there a correlation between screen time hours and number of sleep hours?
screen_sleep_cor <- cor(mental_tech$Screen_Time_Hours,mental_tech$Sleep_Hours)
print(screen_sleep_cor)
## [1] -0.01118056
# Relationship between physical activity and mental health status?
physical_status <- mental_tech %>%
group_by (Mental_Health_Status) %>%
summarise (Avg_Physical_Hrs = mean (Physical_Activity_Hours, na.rm = TRUE),.groups='drop')
# print results
print(physical_status)
## # A tibble: 4 × 2
## Mental_Health_Status Avg_Physical_Hrs
## <chr> <dbl>
## 1 Excellent 5.05
## 2 Fair 4.98
## 3 Good 4.96
## 4 Poor 5.02
# Visualization
ggplot(data=physical_status, mapping=aes(x=Mental_Health_Status, y=Avg_Physical_Hrs, color = Mental_Health_Status))+
geom_boxplot() + labs(title='Physical Activity and Mental Health Status')

# How work environment impacts stress levels?
work_stress <- mental_tech %>%
group_by (Work_Environment_Impact, Stress_Level) %>%
summarise (stress_count = n(),.groups='drop')
# print results
print(work_stress)
## # A tibble: 9 × 3
## Work_Environment_Impact Stress_Level stress_count
## <chr> <chr> <int>
## 1 Negative High 1126
## 2 Negative Low 1145
## 3 Negative Medium 1107
## 4 Neutral High 1116
## 5 Neutral Low 1103
## 6 Neutral Medium 1093
## 7 Positive High 1088
## 8 Positive Low 1084
## 9 Positive Medium 1138
# Relationship between social media usage hours & support systems
hour_classification <- cut(mental_tech$Social_Media_Usage_Hours,
breaks = c(0,2,4,6,8),
labels = c("0-2","2.1-4","4.1-6","6.1-8"),
include.lowest = TRUE) # create classification
mental_tech <- mental_tech %>%
mutate(Hour_Classification = hour_classification) # create new hrs column
social_media_support <- mental_tech %>%
group_by (Support_Systems_Access,Hour_Classification ) %>%
summarise (support_count = n (),.groups='drop')
# print results
print (social_media_support)
## # A tibble: 8 × 3
## Support_Systems_Access Hour_Classification support_count
## <chr> <fct> <int>
## 1 No 0-2 1244
## 2 No 2.1-4 1253
## 3 No 4.1-6 1246
## 4 No 6.1-8 1263
## 5 Yes 0-2 1296
## 6 Yes 2.1-4 1273
## 7 Yes 4.1-6 1204
## 8 Yes 6.1-8 1221
# Visualization
ggplot(data=social_media_support, mapping=aes(x=Hour_Classification, y=support_count, fill=Support_Systems_Access))+
geom_bar(stat='identity', position='dodge')+theme_minimal()+
labs(title='social media usage hours and support systems')

# What impact does gaming time have on mental health status?
gaming_status <- mental_tech %>%
group_by (Mental_Health_Status) %>%
summarise (avg_gaming_time = mean (Gaming_Hours,na.rm=TRUE),.groups='drop')
# print results
print(gaming_status)
## # A tibble: 4 × 2
## Mental_Health_Status avg_gaming_time
## <chr> <dbl>
## 1 Excellent 2.54
## 2 Fair 2.49
## 3 Good 2.50
## 4 Poor 2.52
# How does the use of online support systems affect stress
online_stress <- mental_tech %>%
group_by (Stress_Level, Online_Support_Usage)%>%
summarise (support_count = n(),.groups='drop')
# print results
print(online_stress)
## # A tibble: 6 × 3
## Stress_Level Online_Support_Usage support_count
## <chr> <chr> <int>
## 1 High No 1693
## 2 High Yes 1637
## 3 Low No 1640
## 4 Low Yes 1692
## 5 Medium No 1680
## 6 Medium Yes 1658
# Visualization
ggplot(data=online_stress, mapping=aes(x=Stress_Level, y=support_count, fill=Online_Support_Usage))+
geom_bar(stat='identity', position='dodge',)+
labs(title='Online support systems and stress')
