R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

summary(cars)
##      speed           dist       
##  Min.   : 4.0   Min.   :  2.00  
##  1st Qu.:12.0   1st Qu.: 26.00  
##  Median :15.0   Median : 36.00  
##  Mean   :15.4   Mean   : 42.98  
##  3rd Qu.:19.0   3rd Qu.: 56.00  
##  Max.   :25.0   Max.   :120.00

Including Plots

You can also embed plots, for example:

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.

# Load required libraries
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.1     ✔ stringr   1.6.0
## ✔ ggplot2   4.0.0     ✔ tibble    3.3.0
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.2.0     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ggplot2)
library(corrplot)
## corrplot 0.95 loaded
library(summarytools)
## 
## Attaching package: 'summarytools'
## 
## The following object is masked from 'package:tibble':
## 
##     view
library(knitr)
library(dplyr)
data <- read_csv("Tech_Use_Stress_Wellness.csv")
## Rows: 5000 Columns: 25
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (2): gender, location_type
## dbl (21): user_id, age, daily_screen_time_hours, phone_usage_hours, laptop_u...
## lgl  (2): uses_wellness_apps, eats_healthy
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Data Structure and Basic Information
str(data)
## spc_tbl_ [5,000 × 25] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
##  $ user_id                         : num [1:5000] 1 2 3 4 5 6 7 8 9 10 ...
##  $ age                             : num [1:5000] 53 66 43 29 57 22 35 53 72 33 ...
##  $ gender                          : chr [1:5000] "Male" "Female" "Male" "Female" ...
##  $ daily_screen_time_hours         : num [1:5000] 6.8 4.1 4.7 6 6.7 8.6 5.9 7.3 4.2 5.6 ...
##  $ phone_usage_hours               : num [1:5000] 2.9 2.1 3.6 4.5 3.4 2.4 4.1 2.5 1.2 2 ...
##  $ laptop_usage_hours              : num [1:5000] 2.1 1.6 0 0 2 3.4 1.8 2.2 0.5 2.6 ...
##  $ tablet_usage_hours              : num [1:5000] 0.5 0.4 0.5 0.2 0.8 1.4 0 0.9 1.2 0.3 ...
##  $ tv_usage_hours                  : num [1:5000] 2 0.8 1.4 1.3 1.2 1.1 0.4 2.4 2.3 1.4 ...
##  $ social_media_hours              : num [1:5000] 2.3 2.8 3.7 4.5 3.8 4.5 3.8 4.5 0.3 2.6 ...
##  $ work_related_hours              : num [1:5000] 3 3.5 3.6 4 3.5 3.8 3.9 3.8 1.1 3.2 ...
##  $ entertainment_hours             : num [1:5000] 2.3 2 1 0.8 1.2 1 1.9 1.2 2.3 1.6 ...
##  $ gaming_hours                    : num [1:5000] 2.2 1.6 0.9 0.3 2 0.7 1.2 0.9 2.6 1.4 ...
##  $ sleep_duration_hours            : num [1:5000] 7.1 6.8 6 7.3 7 6 6.6 6.9 7.2 8 ...
##  $ sleep_quality                   : num [1:5000] 4 4 4 4 4 3 4 4 4 4 ...
##  $ mood_rating                     : num [1:5000] 5.6 6 4.3 1.3 3.6 1.2 3.1 1.4 9.1 7.7 ...
##  $ stress_level                    : num [1:5000] 3 4 6 10 6 9 6 10 1 4 ...
##  $ physical_activity_hours_per_week: num [1:5000] 4.4 4.1 1.4 0 2.8 1.4 1.9 0 5.1 3.6 ...
##  $ location_type                   : chr [1:5000] "Rural" "Rural" "Urban" "Rural" ...
##  $ mental_health_score             : num [1:5000] 79 71 67 55 62 51 62 45 77 78 ...
##  $ uses_wellness_apps              : logi [1:5000] TRUE TRUE TRUE TRUE FALSE FALSE ...
##  $ eats_healthy                    : logi [1:5000] TRUE TRUE TRUE TRUE FALSE FALSE ...
##  $ caffeine_intake_mg_per_day      : num [1:5000] 150 124 218 134 193 ...
##  $ weekly_anxiety_score            : num [1:5000] 7 7 11 17 5 14 7 21 2 11 ...
##  $ weekly_depression_score         : num [1:5000] 3 10 7 14 8 13 5 12 2 8 ...
##  $ mindfulness_minutes_per_day     : num [1:5000] 14 15.5 19.5 13.9 26.7 9.2 21.4 13.5 19.6 21.4 ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   user_id = col_double(),
##   ..   age = col_double(),
##   ..   gender = col_character(),
##   ..   daily_screen_time_hours = col_double(),
##   ..   phone_usage_hours = col_double(),
##   ..   laptop_usage_hours = col_double(),
##   ..   tablet_usage_hours = col_double(),
##   ..   tv_usage_hours = col_double(),
##   ..   social_media_hours = col_double(),
##   ..   work_related_hours = col_double(),
##   ..   entertainment_hours = col_double(),
##   ..   gaming_hours = col_double(),
##   ..   sleep_duration_hours = col_double(),
##   ..   sleep_quality = col_double(),
##   ..   mood_rating = col_double(),
##   ..   stress_level = col_double(),
##   ..   physical_activity_hours_per_week = col_double(),
##   ..   location_type = col_character(),
##   ..   mental_health_score = col_double(),
##   ..   uses_wellness_apps = col_logical(),
##   ..   eats_healthy = col_logical(),
##   ..   caffeine_intake_mg_per_day = col_double(),
##   ..   weekly_anxiety_score = col_double(),
##   ..   weekly_depression_score = col_double(),
##   ..   mindfulness_minutes_per_day = col_double()
##   .. )
##  - attr(*, "problems")=<externalptr>
summary(data)
##     user_id          age          gender          daily_screen_time_hours
##  Min.   :   1   Min.   :15.0   Length:5000        Min.   : 1.000         
##  1st Qu.:1251   1st Qu.:30.0   Class :character   1st Qu.: 3.700         
##  Median :2500   Median :45.0   Mode  :character   Median : 5.000         
##  Mean   :2500   Mean   :44.7                      Mean   : 5.038         
##  3rd Qu.:3750   3rd Qu.:60.0                      3rd Qu.: 6.300         
##  Max.   :5000   Max.   :74.0                      Max.   :10.000         
##  phone_usage_hours laptop_usage_hours tablet_usage_hours tv_usage_hours 
##  Min.   :0.200     Min.   :0.000      Min.   :0.0000     Min.   :0.000  
##  1st Qu.:1.400     1st Qu.:0.700      1st Qu.:0.2000     1st Qu.:0.700  
##  Median :2.000     Median :1.500      Median :0.6000     Median :1.400  
##  Mean   :1.994     Mean   :1.555      Mean   :0.6631     Mean   :1.433  
##  3rd Qu.:2.600     3rd Qu.:2.300      3rd Qu.:1.0000     3rd Qu.:2.100  
##  Max.   :5.000     Max.   :5.000      Max.   :2.9000     Max.   :4.000  
##  social_media_hours work_related_hours entertainment_hours  gaming_hours  
##  Min.   :0.300      Min.   :0.70       Min.   :0.000       Min.   :0.000  
##  1st Qu.:2.400      1st Qu.:3.10       1st Qu.:1.200       1st Qu.:1.100  
##  Median :3.600      Median :3.70       Median :1.600       Median :1.400  
##  Mean   :3.277      Mean   :3.36       Mean   :1.662       Mean   :1.561  
##  3rd Qu.:4.500      3rd Qu.:3.90       3rd Qu.:2.100       3rd Qu.:2.000  
##  Max.   :4.500      Max.   :4.50       Max.   :3.900       Max.   :3.800  
##  sleep_duration_hours sleep_quality    mood_rating      stress_level   
##  Min.   :5.40         Min.   :1.000   Min.   : 1.000   Min.   : 1.000  
##  1st Qu.:7.00         1st Qu.:4.000   1st Qu.: 1.700   1st Qu.: 3.000  
##  Median :7.40         Median :4.000   Median : 4.300   Median : 6.000  
##  Mean   :7.37         Mean   :4.009   Mean   : 4.448   Mean   : 5.718  
##  3rd Qu.:7.70         3rd Qu.:4.000   3rd Qu.: 6.700   3rd Qu.: 8.000  
##  Max.   :9.00         Max.   :5.000   Max.   :10.000   Max.   :10.000  
##  physical_activity_hours_per_week location_type      mental_health_score
##  Min.   : 0.000                   Length:5000        Min.   : 31.00     
##  1st Qu.: 0.600                   Class :character   1st Qu.: 54.00     
##  Median : 2.300                   Mode  :character   Median : 65.00     
##  Mean   : 2.659                                      Mean   : 64.77     
##  3rd Qu.: 4.200                                      3rd Qu.: 75.00     
##  Max.   :11.800                                      Max.   :100.00     
##  uses_wellness_apps eats_healthy    caffeine_intake_mg_per_day
##  Mode :logical      Mode :logical   Min.   :  0.0             
##  FALSE:2927         FALSE:2486      1st Qu.:108.5             
##  TRUE :2073         TRUE :2514      Median :141.7             
##                                     Mean   :142.3             
##                                     3rd Qu.:176.4             
##                                     Max.   :341.2             
##  weekly_anxiety_score weekly_depression_score mindfulness_minutes_per_day
##  Min.   : 0.000       Min.   : 0.00           Min.   : 5.00              
##  1st Qu.: 5.000       1st Qu.: 4.00           1st Qu.:12.40              
##  Median : 8.000       Median : 7.00           Median :17.20              
##  Mean   : 8.632       Mean   : 7.52           Mean   :18.55              
##  3rd Qu.:12.000       3rd Qu.:11.00           3rd Qu.:23.80              
##  Max.   :21.000       Max.   :21.00           Max.   :42.00
#  Data Preprocessing
# Convert categorical variables to factors

data$gender <- as.factor(data$gender)
data$location_type <- as.factor(data$location_type)
data$uses_wellness_apps <- as.factor(data$uses_wellness_apps)
data$eats_healthy <- as.factor(data$eats_healthy)
data$mood_rating <- as.factor(data$mood_rating)
data$stress_level <- as.factor(data$stress_level)
data$sleep_quality<- as.factor(data$sleep_quality)
#Convert Numerical variables to factors

data$daily_screen_time_hours<-as.factor(data$daily_screen_time_hours)
data$user_id <- as.factor(data$user_id)
data$phone_usage_hours<- as.factor(data$phone_usage_hours)
data$tablet_usage_hours<- as.factor(data$tablet_usage_hours)
data$tv_usage_hours<- as.factor(data$tv_usage_hours)
data$social_media_hours<- as.factor(data$social_media_hours)
data$work_related_hours<-as.factor(data$work_related_hours)
data$entertainment_hours<- as.factor(data$entertainment_hours)
data$gaming_hours <- as.factor(data$gaming_hours )
data$sleep_duration_hours <- as.factor(data$sleep_duration_hours)
data$mood_rating<- as.factor(data$mood_rating)
data$physical_activity_hours_per_week <- as.factor(data$physical_activity_hours_per_week )
data$mental_health_score <- as.factor(data$mental_health_score )
data$caffeine_intake_mg_per_day<- as.factor(data$caffeine_intake_mg_per_day)
data$weekly_anxiety_score <- as.factor(data$weekly_anxiety_score)
data$weekly_depression_score <- as.factor(data$weekly_depression_score)
data$mindfulness_minutes_per_day <- as.factor(data$mindfulness_minutes_per_day)
data$sleep_quality<- as.factor(data$sleep_quality)
# Descriptive Statistics
desc_stats <- descr(data)
print(desc_stats)
## Non-numerical variable(s) ignored: user_id, gender, daily_screen_time_hours, phone_usage_hours, tablet_usage_hours, tv_usage_hours, social_media_hours, work_related_hours, entertainment_hours, gaming_hours, sleep_duration_hours, sleep_quality, mood_rating, stress_level, physical_activity_hours_per_week, location_type, mental_health_score, uses_wellness_apps, eats_healthy, caffeine_intake_mg_per_day, weekly_anxiety_score, weekly_depression_score, mindfulness_minutes_per_day
## Descriptive Statistics  
## data  
## N: 5000  
## 
##                         age   laptop_usage_hours
## ----------------- --------- --------------------
##              Mean     44.70                 1.56
##           Std.Dev     17.27                 1.03
##               Min     15.00                 0.00
##                Q1     30.00                 0.70
##            Median     45.00                 1.50
##                Q3     60.00                 2.30
##               Max     74.00                 5.00
##               MAD     22.24                 1.19
##               IQR     30.00                 1.60
##                CV      0.39                 0.66
##          Skewness     -0.01                 0.30
##       SE.Skewness      0.03                 0.03
##          Kurtosis     -1.18                -0.45
##           N.Valid   5000.00              5000.00
##                 N   5000.00              5000.00
##         Pct.Valid    100.00               100.00
desc_stats <- descr(data)
print(desc_stats)
## Non-numerical variable(s) ignored: user_id, gender, daily_screen_time_hours, phone_usage_hours, tablet_usage_hours, tv_usage_hours, social_media_hours, work_related_hours, entertainment_hours, gaming_hours, sleep_duration_hours, sleep_quality, mood_rating, stress_level, physical_activity_hours_per_week, location_type, mental_health_score, uses_wellness_apps, eats_healthy, caffeine_intake_mg_per_day, weekly_anxiety_score, weekly_depression_score, mindfulness_minutes_per_day
## Descriptive Statistics  
## data  
## N: 5000  
## 
##                         age   laptop_usage_hours
## ----------------- --------- --------------------
##              Mean     44.70                 1.56
##           Std.Dev     17.27                 1.03
##               Min     15.00                 0.00
##                Q1     30.00                 0.70
##            Median     45.00                 1.50
##                Q3     60.00                 2.30
##               Max     74.00                 5.00
##               MAD     22.24                 1.19
##               IQR     30.00                 1.60
##                CV      0.39                 0.66
##          Skewness     -0.01                 0.30
##       SE.Skewness      0.03                 0.03
##          Kurtosis     -1.18                -0.45
##           N.Valid   5000.00              5000.00
##                 N   5000.00              5000.00
##         Pct.Valid    100.00               100.00
# Load required packages
library(ggplot2)
library(dplyr)
library(reshape2)
## 
## Attaching package: 'reshape2'
## The following object is masked from 'package:tidyr':
## 
##     smiths
# Load the data
data <- read.csv('Tech_Use_Stress_Wellness.csv')

# 1. Distribution plots
p1 <- ggplot(data, aes(x = daily_screen_time_hours)) +
  geom_histogram(bins = 30, fill = "yellow", alpha = 0.7) +
  labs(title = "Distribution of Daily Screen Time", x = "Hours", y = "Frequency")

p2 <- ggplot(data, aes(x = mental_health_score)) +
  geom_histogram(bins = 30, fill = "white", alpha = 0.7) +
  labs(title = "Distribution of Mental Health Score", x = "Score", y = "Frequency")

p3 <- ggplot(data, aes(x = stress_level)) +
  geom_histogram(bins = 30, fill = "red", alpha = 0.7) +
  labs(title = "Distribution of Stress Level", x = "Stress Level", y = "Frequency")

# 2. Technology usage by gender
p4 <- ggplot(data, aes(x = gender, y = daily_screen_time_hours, fill = gender)) +
  geom_boxplot() +
  labs(title = "Daily Screen Time by Gender", x = "Gender", y = "Hours")

# 3. Mental health by location type
p5 <- ggplot(data, aes(x = location_type, y = mental_health_score, fill = location_type)) +
  geom_boxplot() +
  labs(title = "Mental Health Score by Location Type", x = "Location", y = "Mental Health Score")

# 4. Sleep patterns analysis - FIXED: sleep_quality as factor
p6 <- ggplot(data, aes(x = sleep_duration_hours, y = mental_health_score)) +
  geom_point(alpha = 0.6) +
  geom_smooth(method = "lm", color = "red") +
  labs(title = "Mental Health vs Sleep Duration", 
       x = "Sleep Duration (hours)", y = "Mental Health Score")

p7 <- ggplot(data, aes(x = factor(sleep_quality), y = stress_level)) +
  geom_boxplot(fill = "lightblue") +
  labs(title = "Stress Level by Sleep Quality", 
       x = "Sleep Quality", y = "Stress Level")

# 5. Social media impact
p8 <- ggplot(data, aes(x = social_media_hours, y = mental_health_score)) +
  geom_point(alpha = 0.6) +
  geom_smooth(method = "lm", color = "purple") +
  labs(title = "Mental Health vs Social Media Usage", 
       x = "Social Media Hours", y = "Mental Health Score")

# 6. Wellness apps effectiveness
p9 <- ggplot(data, aes(x = uses_wellness_apps, y = mental_health_score, fill = uses_wellness_apps)) +
  geom_boxplot() +
  labs(title = "Mental Health Score: Wellness App Users vs Non-Users", 
       x = "Uses Wellness Apps", y = "Mental Health Score")

# 7. Physical activity impact
p10 <- ggplot(data, aes(x = physical_activity_hours_per_week, y = stress_level)) +
  geom_point(alpha = 0.6) +
  geom_smooth(method = "lm", color = "darkgreen") +
  labs(title = "Stress Level vs Physical Activity", 
       x = "Physical Activity (hours/week)", y = "Stress Level")

# 8. Caffeine intake analysis
p11 <- ggplot(data, aes(x = caffeine_intake_mg_per_day, y = stress_level)) +
  geom_point(alpha = 0.6) +
  geom_smooth(method = "lm", color = "brown") +
  labs(title = "Stress Level vs Caffeine Intake", 
       x = "Caffeine Intake (mg/day)", y = "Stress Level")

# 9. Mindfulness practice
p12 <- ggplot(data, aes(x = mindfulness_minutes_per_day, y = mental_health_score)) +
  geom_point(alpha = 0.6) +
  geom_smooth(method = "lm", color = "orange") +
  labs(title = "Mental Health vs Mindfulness Practice", 
       x = "Mindfulness Minutes/Day", y = "Mental Health Score")

# 10. Healthy eating impact
p13 <- ggplot(data, aes(x = eats_healthy, y = mental_health_score, fill = eats_healthy)) +
  geom_boxplot() +
  labs(title = "Mental Health Score: Healthy Eaters vs Non-Healthy Eaters", 
       x = "Eats Healthy", y = "Mental Health Score")

# 11. Age distribution and mental health
p14 <- ggplot(data, aes(x = age, y = mental_health_score)) +
  geom_point(alpha = 0.6) +
  geom_smooth(method = "lm", color = "darkblue") +
  labs(title = "Mental Health Score by Age", 
       x = "Age", y = "Mental Health Score")

# 12. Correlation heatmap of key variables
cor_vars <- data %>% select(mental_health_score, stress_level, daily_screen_time_hours, 
                          social_media_hours, sleep_duration_hours, 
                          physical_activity_hours_per_week, mindfulness_minutes_per_day)
cor_matrix <- cor(cor_vars, use = "complete.obs")

p15 <- ggplot(melt(cor_matrix), aes(Var1, Var2, fill = value)) +
  geom_tile() +
  scale_fill_gradient2(low = "blue", high = "red", mid = "white", 
                      midpoint = 0, limit = c(-1,1)) +
  labs(title = "Correlation Heatmap of Key Variables", 
       x = "", y = "", fill = "Correlation") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

# Print all plots
print(p1)

print(p2)

print(p3)

print(p4)

print(p5)

print(p6)
## `geom_smooth()` using formula = 'y ~ x'

print(p7)

print(p8)
## `geom_smooth()` using formula = 'y ~ x'

print(p9)

print(p10)
## `geom_smooth()` using formula = 'y ~ x'

print(p11)
## `geom_smooth()` using formula = 'y ~ x'

print(p12)
## `geom_smooth()` using formula = 'y ~ x'

print(p13)

print(p14)
## `geom_smooth()` using formula = 'y ~ x'

print(p15)