# load packages
install.packages("tidyverse")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.4'
## (as 'lib' is unspecified)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
# load dataset
mental_tech <- read.csv("/cloud/project/Technology/mental_health_and_technology_usage_2024.csv")
colnames (mental_tech)
##  [1] "User_ID"                  "Age"                     
##  [3] "Gender"                   "Technology_Usage_Hours"  
##  [5] "Social_Media_Usage_Hours" "Gaming_Hours"            
##  [7] "Screen_Time_Hours"        "Mental_Health_Status"    
##  [9] "Stress_Level"             "Sleep_Hours"             
## [11] "Physical_Activity_Hours"  "Support_Systems_Access"  
## [13] "Work_Environment_Impact"  "Online_Support_Usage"
# What is the correlation between age and total technology usage hours 

age_usagehrs_cor <- cor(mental_tech$Age,mental_tech$Technology_Usage_Hours)
print (age_usagehrs_cor)
## [1] 0.01745498
# How does mental health status vary across different age groups?

age_groups <- cut(mental_tech$Age,
                  breaks = c(15,21,36,54,70),
                  labels = c("15-21","22-36","37-54","55-70"),
                  include.lowest=TRUE) # creating ages categories

mental_tech <- mental_tech %>%
mutate(Age_Groups = age_groups) # creating new age group column

age_status <- mental_tech %>%
  group_by (Age_Groups, Mental_Health_Status) %>%
  summarise (mental_status_count = n(),.groups='drop')

# print results
print (age_status) # establish relationship
## # A tibble: 16 × 3
##    Age_Groups Mental_Health_Status mental_status_count
##    <fct>      <chr>                              <int>
##  1 15-21      Excellent                            209
##  2 15-21      Fair                                 188
##  3 15-21      Good                                 230
##  4 15-21      Poor                                 201
##  5 22-36      Excellent                            787
##  6 22-36      Fair                                 777
##  7 22-36      Good                                 765
##  8 22-36      Poor                                 800
##  9 37-54      Excellent                            960
## 10 37-54      Fair                                 906
## 11 37-54      Good                                 954
## 12 37-54      Poor                                 910
## 13 55-70      Excellent                            562
## 14 55-70      Fair                                 619
## 15 55-70      Good                                 559
## 16 55-70      Poor                                 573
# how does stress level vary by gender?

stress_gender <- mental_tech %>%
  group_by (Gender, Stress_Level) %>%
  summarise(stress_count = n(),.groups='drop')
  
# print results
print (stress_gender)
## # A tibble: 9 × 3
##   Gender Stress_Level stress_count
##   <chr>  <chr>               <int>
## 1 Female High                 1107
## 2 Female Low                  1099
## 3 Female Medium               1080
## 4 Male   High                 1110
## 5 Male   Low                  1120
## 6 Male   Medium               1120
## 7 Other  High                 1113
## 8 Other  Low                  1113
## 9 Other  Medium               1138
# Relationship between technology usage hrs and stress levels.

tech_hrs_stress <- mental_tech %>%
  group_by (Stress_Level)%>%
  summarise (avg_hours = mean (Technology_Usage_Hours,na.rm=TRUE),.groups='drop')

# print results
print (tech_hrs_stress)
## # A tibble: 3 × 2
##   Stress_Level avg_hours
##   <chr>            <dbl>
## 1 High              6.46
## 2 Low               6.53
## 3 Medium            6.43
# Is there a correlation between screen time hours and number of sleep hours?

screen_sleep_cor <- cor(mental_tech$Screen_Time_Hours,mental_tech$Sleep_Hours)
print(screen_sleep_cor)
## [1] -0.01118056
# Relationship between physical activity and mental health status?

physical_status <- mental_tech %>%
  group_by (Mental_Health_Status) %>%
  summarise (Avg_Physical_Hrs = mean (Physical_Activity_Hours, na.rm = TRUE),.groups='drop')

# print results
print(physical_status)
## # A tibble: 4 × 2
##   Mental_Health_Status Avg_Physical_Hrs
##   <chr>                           <dbl>
## 1 Excellent                        5.05
## 2 Fair                             4.98
## 3 Good                             4.96
## 4 Poor                             5.02
# Visualization
ggplot(data=physical_status, mapping=aes(x=Mental_Health_Status, y=Avg_Physical_Hrs, color = Mental_Health_Status))+
  geom_boxplot() + labs(title='Physical Activity and Mental Health Status')

# How work environment impacts stress levels?

work_stress <- mental_tech %>%
  group_by (Work_Environment_Impact, Stress_Level) %>%
  summarise (stress_count = n(),.groups='drop')

# print results
print(work_stress)
## # A tibble: 9 × 3
##   Work_Environment_Impact Stress_Level stress_count
##   <chr>                   <chr>               <int>
## 1 Negative                High                 1126
## 2 Negative                Low                  1145
## 3 Negative                Medium               1107
## 4 Neutral                 High                 1116
## 5 Neutral                 Low                  1103
## 6 Neutral                 Medium               1093
## 7 Positive                High                 1088
## 8 Positive                Low                  1084
## 9 Positive                Medium               1138
# Relationship between social media usage hours & support systems

hour_classification <- cut(mental_tech$Social_Media_Usage_Hours,
                           breaks = c(0,2,4,6,8),
                           labels = c("0-2","2.1-4","4.1-6","6.1-8"),
                           include.lowest = TRUE) # create classification

mental_tech <- mental_tech %>%
  mutate(Hour_Classification = hour_classification) # create new hrs column

social_media_support <- mental_tech %>%
  group_by (Support_Systems_Access,Hour_Classification ) %>%
  summarise (support_count = n (),.groups='drop')

# print results
print (social_media_support)
## # A tibble: 8 × 3
##   Support_Systems_Access Hour_Classification support_count
##   <chr>                  <fct>                       <int>
## 1 No                     0-2                          1244
## 2 No                     2.1-4                        1253
## 3 No                     4.1-6                        1246
## 4 No                     6.1-8                        1263
## 5 Yes                    0-2                          1296
## 6 Yes                    2.1-4                        1273
## 7 Yes                    4.1-6                        1204
## 8 Yes                    6.1-8                        1221
# Visualization
ggplot(data=social_media_support, mapping=aes(x=Hour_Classification, y=support_count, fill=Support_Systems_Access))+
  geom_bar(stat='identity', position='dodge')+theme_minimal()+
  labs(title='social media usage hours and support systems')

# What impact does gaming time have on mental health status?

gaming_status <- mental_tech %>%
  group_by (Mental_Health_Status) %>%
  summarise (avg_gaming_time = mean (Gaming_Hours,na.rm=TRUE),.groups='drop')

# print results
print(gaming_status)
## # A tibble: 4 × 2
##   Mental_Health_Status avg_gaming_time
##   <chr>                          <dbl>
## 1 Excellent                       2.54
## 2 Fair                            2.49
## 3 Good                            2.50
## 4 Poor                            2.52
# How does the use of online support systems affect stress 

online_stress <- mental_tech %>%
  group_by (Stress_Level, Online_Support_Usage)%>%
  summarise (support_count = n(),.groups='drop')

# print results
print(online_stress)
## # A tibble: 6 × 3
##   Stress_Level Online_Support_Usage support_count
##   <chr>        <chr>                        <int>
## 1 High         No                            1693
## 2 High         Yes                           1637
## 3 Low          No                            1640
## 4 Low          Yes                           1692
## 5 Medium       No                            1680
## 6 Medium       Yes                           1658
# Visualization
ggplot(data=online_stress, mapping=aes(x=Stress_Level, y=support_count, fill=Online_Support_Usage))+
  geom_bar(stat='identity', position='dodge',)+ 
  labs(title='Online support systems and stress')