2025/11/14This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
summary(cars)
## speed dist
## Min. : 4.0 Min. : 2.00
## 1st Qu.:12.0 1st Qu.: 26.00
## Median :15.0 Median : 36.00
## Mean :15.4 Mean : 42.98
## 3rd Qu.:19.0 3rd Qu.: 56.00
## Max. :25.0 Max. :120.00
You can also embed plots, for example:
Note that the echo = FALSE parameter was added to the
code chunk to prevent printing of the R code that generated the
plot.
# Load required libraries
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.1 ✔ stringr 1.6.0
## ✔ ggplot2 4.0.0 ✔ tibble 3.3.0
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.2.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ggplot2)
library(corrplot)
## corrplot 0.95 loaded
library(summarytools)
##
## Attaching package: 'summarytools'
##
## The following object is masked from 'package:tibble':
##
## view
library(knitr)
library(dplyr)
data <- read_csv("Tech_Use_Stress_Wellness.csv")
## Rows: 5000 Columns: 25
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): gender, location_type
## dbl (21): user_id, age, daily_screen_time_hours, phone_usage_hours, laptop_u...
## lgl (2): uses_wellness_apps, eats_healthy
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Data Structure and Basic Information
str(data)
## spc_tbl_ [5,000 × 25] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ user_id : num [1:5000] 1 2 3 4 5 6 7 8 9 10 ...
## $ age : num [1:5000] 53 66 43 29 57 22 35 53 72 33 ...
## $ gender : chr [1:5000] "Male" "Female" "Male" "Female" ...
## $ daily_screen_time_hours : num [1:5000] 6.8 4.1 4.7 6 6.7 8.6 5.9 7.3 4.2 5.6 ...
## $ phone_usage_hours : num [1:5000] 2.9 2.1 3.6 4.5 3.4 2.4 4.1 2.5 1.2 2 ...
## $ laptop_usage_hours : num [1:5000] 2.1 1.6 0 0 2 3.4 1.8 2.2 0.5 2.6 ...
## $ tablet_usage_hours : num [1:5000] 0.5 0.4 0.5 0.2 0.8 1.4 0 0.9 1.2 0.3 ...
## $ tv_usage_hours : num [1:5000] 2 0.8 1.4 1.3 1.2 1.1 0.4 2.4 2.3 1.4 ...
## $ social_media_hours : num [1:5000] 2.3 2.8 3.7 4.5 3.8 4.5 3.8 4.5 0.3 2.6 ...
## $ work_related_hours : num [1:5000] 3 3.5 3.6 4 3.5 3.8 3.9 3.8 1.1 3.2 ...
## $ entertainment_hours : num [1:5000] 2.3 2 1 0.8 1.2 1 1.9 1.2 2.3 1.6 ...
## $ gaming_hours : num [1:5000] 2.2 1.6 0.9 0.3 2 0.7 1.2 0.9 2.6 1.4 ...
## $ sleep_duration_hours : num [1:5000] 7.1 6.8 6 7.3 7 6 6.6 6.9 7.2 8 ...
## $ sleep_quality : num [1:5000] 4 4 4 4 4 3 4 4 4 4 ...
## $ mood_rating : num [1:5000] 5.6 6 4.3 1.3 3.6 1.2 3.1 1.4 9.1 7.7 ...
## $ stress_level : num [1:5000] 3 4 6 10 6 9 6 10 1 4 ...
## $ physical_activity_hours_per_week: num [1:5000] 4.4 4.1 1.4 0 2.8 1.4 1.9 0 5.1 3.6 ...
## $ location_type : chr [1:5000] "Rural" "Rural" "Urban" "Rural" ...
## $ mental_health_score : num [1:5000] 79 71 67 55 62 51 62 45 77 78 ...
## $ uses_wellness_apps : logi [1:5000] TRUE TRUE TRUE TRUE FALSE FALSE ...
## $ eats_healthy : logi [1:5000] TRUE TRUE TRUE TRUE FALSE FALSE ...
## $ caffeine_intake_mg_per_day : num [1:5000] 150 124 218 134 193 ...
## $ weekly_anxiety_score : num [1:5000] 7 7 11 17 5 14 7 21 2 11 ...
## $ weekly_depression_score : num [1:5000] 3 10 7 14 8 13 5 12 2 8 ...
## $ mindfulness_minutes_per_day : num [1:5000] 14 15.5 19.5 13.9 26.7 9.2 21.4 13.5 19.6 21.4 ...
## - attr(*, "spec")=
## .. cols(
## .. user_id = col_double(),
## .. age = col_double(),
## .. gender = col_character(),
## .. daily_screen_time_hours = col_double(),
## .. phone_usage_hours = col_double(),
## .. laptop_usage_hours = col_double(),
## .. tablet_usage_hours = col_double(),
## .. tv_usage_hours = col_double(),
## .. social_media_hours = col_double(),
## .. work_related_hours = col_double(),
## .. entertainment_hours = col_double(),
## .. gaming_hours = col_double(),
## .. sleep_duration_hours = col_double(),
## .. sleep_quality = col_double(),
## .. mood_rating = col_double(),
## .. stress_level = col_double(),
## .. physical_activity_hours_per_week = col_double(),
## .. location_type = col_character(),
## .. mental_health_score = col_double(),
## .. uses_wellness_apps = col_logical(),
## .. eats_healthy = col_logical(),
## .. caffeine_intake_mg_per_day = col_double(),
## .. weekly_anxiety_score = col_double(),
## .. weekly_depression_score = col_double(),
## .. mindfulness_minutes_per_day = col_double()
## .. )
## - attr(*, "problems")=<externalptr>
summary(data)
## user_id age gender daily_screen_time_hours
## Min. : 1 Min. :15.0 Length:5000 Min. : 1.000
## 1st Qu.:1251 1st Qu.:30.0 Class :character 1st Qu.: 3.700
## Median :2500 Median :45.0 Mode :character Median : 5.000
## Mean :2500 Mean :44.7 Mean : 5.038
## 3rd Qu.:3750 3rd Qu.:60.0 3rd Qu.: 6.300
## Max. :5000 Max. :74.0 Max. :10.000
## phone_usage_hours laptop_usage_hours tablet_usage_hours tv_usage_hours
## Min. :0.200 Min. :0.000 Min. :0.0000 Min. :0.000
## 1st Qu.:1.400 1st Qu.:0.700 1st Qu.:0.2000 1st Qu.:0.700
## Median :2.000 Median :1.500 Median :0.6000 Median :1.400
## Mean :1.994 Mean :1.555 Mean :0.6631 Mean :1.433
## 3rd Qu.:2.600 3rd Qu.:2.300 3rd Qu.:1.0000 3rd Qu.:2.100
## Max. :5.000 Max. :5.000 Max. :2.9000 Max. :4.000
## social_media_hours work_related_hours entertainment_hours gaming_hours
## Min. :0.300 Min. :0.70 Min. :0.000 Min. :0.000
## 1st Qu.:2.400 1st Qu.:3.10 1st Qu.:1.200 1st Qu.:1.100
## Median :3.600 Median :3.70 Median :1.600 Median :1.400
## Mean :3.277 Mean :3.36 Mean :1.662 Mean :1.561
## 3rd Qu.:4.500 3rd Qu.:3.90 3rd Qu.:2.100 3rd Qu.:2.000
## Max. :4.500 Max. :4.50 Max. :3.900 Max. :3.800
## sleep_duration_hours sleep_quality mood_rating stress_level
## Min. :5.40 Min. :1.000 Min. : 1.000 Min. : 1.000
## 1st Qu.:7.00 1st Qu.:4.000 1st Qu.: 1.700 1st Qu.: 3.000
## Median :7.40 Median :4.000 Median : 4.300 Median : 6.000
## Mean :7.37 Mean :4.009 Mean : 4.448 Mean : 5.718
## 3rd Qu.:7.70 3rd Qu.:4.000 3rd Qu.: 6.700 3rd Qu.: 8.000
## Max. :9.00 Max. :5.000 Max. :10.000 Max. :10.000
## physical_activity_hours_per_week location_type mental_health_score
## Min. : 0.000 Length:5000 Min. : 31.00
## 1st Qu.: 0.600 Class :character 1st Qu.: 54.00
## Median : 2.300 Mode :character Median : 65.00
## Mean : 2.659 Mean : 64.77
## 3rd Qu.: 4.200 3rd Qu.: 75.00
## Max. :11.800 Max. :100.00
## uses_wellness_apps eats_healthy caffeine_intake_mg_per_day
## Mode :logical Mode :logical Min. : 0.0
## FALSE:2927 FALSE:2486 1st Qu.:108.5
## TRUE :2073 TRUE :2514 Median :141.7
## Mean :142.3
## 3rd Qu.:176.4
## Max. :341.2
## weekly_anxiety_score weekly_depression_score mindfulness_minutes_per_day
## Min. : 0.000 Min. : 0.00 Min. : 5.00
## 1st Qu.: 5.000 1st Qu.: 4.00 1st Qu.:12.40
## Median : 8.000 Median : 7.00 Median :17.20
## Mean : 8.632 Mean : 7.52 Mean :18.55
## 3rd Qu.:12.000 3rd Qu.:11.00 3rd Qu.:23.80
## Max. :21.000 Max. :21.00 Max. :42.00
# Data Preprocessing
# Convert categorical variables to factors
data$gender <- as.factor(data$gender)
data$location_type <- as.factor(data$location_type)
data$uses_wellness_apps <- as.factor(data$uses_wellness_apps)
data$eats_healthy <- as.factor(data$eats_healthy)
data$mood_rating <- as.factor(data$mood_rating)
data$stress_level <- as.factor(data$stress_level)
data$sleep_quality<- as.factor(data$sleep_quality)
#Convert Numerical variables to factors
data$daily_screen_time_hours<-as.factor(data$daily_screen_time_hours)
data$user_id <- as.factor(data$user_id)
data$phone_usage_hours<- as.factor(data$phone_usage_hours)
data$tablet_usage_hours<- as.factor(data$tablet_usage_hours)
data$tv_usage_hours<- as.factor(data$tv_usage_hours)
data$social_media_hours<- as.factor(data$social_media_hours)
data$work_related_hours<-as.factor(data$work_related_hours)
data$entertainment_hours<- as.factor(data$entertainment_hours)
data$gaming_hours <- as.factor(data$gaming_hours )
data$sleep_duration_hours <- as.factor(data$sleep_duration_hours)
data$mood_rating<- as.factor(data$mood_rating)
data$physical_activity_hours_per_week <- as.factor(data$physical_activity_hours_per_week )
data$mental_health_score <- as.factor(data$mental_health_score )
data$caffeine_intake_mg_per_day<- as.factor(data$caffeine_intake_mg_per_day)
data$weekly_anxiety_score <- as.factor(data$weekly_anxiety_score)
data$weekly_depression_score <- as.factor(data$weekly_depression_score)
data$mindfulness_minutes_per_day <- as.factor(data$mindfulness_minutes_per_day)
data$sleep_quality<- as.factor(data$sleep_quality)
# Descriptive Statistics
desc_stats <- descr(data)
print(desc_stats)
## Non-numerical variable(s) ignored: user_id, gender, daily_screen_time_hours, phone_usage_hours, tablet_usage_hours, tv_usage_hours, social_media_hours, work_related_hours, entertainment_hours, gaming_hours, sleep_duration_hours, sleep_quality, mood_rating, stress_level, physical_activity_hours_per_week, location_type, mental_health_score, uses_wellness_apps, eats_healthy, caffeine_intake_mg_per_day, weekly_anxiety_score, weekly_depression_score, mindfulness_minutes_per_day
## Descriptive Statistics
## data
## N: 5000
##
## age laptop_usage_hours
## ----------------- --------- --------------------
## Mean 44.70 1.56
## Std.Dev 17.27 1.03
## Min 15.00 0.00
## Q1 30.00 0.70
## Median 45.00 1.50
## Q3 60.00 2.30
## Max 74.00 5.00
## MAD 22.24 1.19
## IQR 30.00 1.60
## CV 0.39 0.66
## Skewness -0.01 0.30
## SE.Skewness 0.03 0.03
## Kurtosis -1.18 -0.45
## N.Valid 5000.00 5000.00
## N 5000.00 5000.00
## Pct.Valid 100.00 100.00
desc_stats <- descr(data)
print(desc_stats)
## Non-numerical variable(s) ignored: user_id, gender, daily_screen_time_hours, phone_usage_hours, tablet_usage_hours, tv_usage_hours, social_media_hours, work_related_hours, entertainment_hours, gaming_hours, sleep_duration_hours, sleep_quality, mood_rating, stress_level, physical_activity_hours_per_week, location_type, mental_health_score, uses_wellness_apps, eats_healthy, caffeine_intake_mg_per_day, weekly_anxiety_score, weekly_depression_score, mindfulness_minutes_per_day
## Descriptive Statistics
## data
## N: 5000
##
## age laptop_usage_hours
## ----------------- --------- --------------------
## Mean 44.70 1.56
## Std.Dev 17.27 1.03
## Min 15.00 0.00
## Q1 30.00 0.70
## Median 45.00 1.50
## Q3 60.00 2.30
## Max 74.00 5.00
## MAD 22.24 1.19
## IQR 30.00 1.60
## CV 0.39 0.66
## Skewness -0.01 0.30
## SE.Skewness 0.03 0.03
## Kurtosis -1.18 -0.45
## N.Valid 5000.00 5000.00
## N 5000.00 5000.00
## Pct.Valid 100.00 100.00
# Load required packages
library(ggplot2)
library(dplyr)
library(reshape2)
##
## Attaching package: 'reshape2'
## The following object is masked from 'package:tidyr':
##
## smiths
# Load the data
data <- read.csv('Tech_Use_Stress_Wellness.csv')
# 1. Distribution plots
p1 <- ggplot(data, aes(x = daily_screen_time_hours)) +
geom_histogram(bins = 30, fill = "yellow", alpha = 0.7) +
labs(title = "Distribution of Daily Screen Time", x = "Hours", y = "Frequency")
p2 <- ggplot(data, aes(x = mental_health_score)) +
geom_histogram(bins = 30, fill = "white", alpha = 0.7) +
labs(title = "Distribution of Mental Health Score", x = "Score", y = "Frequency")
p3 <- ggplot(data, aes(x = stress_level)) +
geom_histogram(bins = 30, fill = "red", alpha = 0.7) +
labs(title = "Distribution of Stress Level", x = "Stress Level", y = "Frequency")
# 2. Technology usage by gender
p4 <- ggplot(data, aes(x = gender, y = daily_screen_time_hours, fill = gender)) +
geom_boxplot() +
labs(title = "Daily Screen Time by Gender", x = "Gender", y = "Hours")
# 3. Mental health by location type
p5 <- ggplot(data, aes(x = location_type, y = mental_health_score, fill = location_type)) +
geom_boxplot() +
labs(title = "Mental Health Score by Location Type", x = "Location", y = "Mental Health Score")
# 4. Sleep patterns analysis - FIXED: sleep_quality as factor
p6 <- ggplot(data, aes(x = sleep_duration_hours, y = mental_health_score)) +
geom_point(alpha = 0.6) +
geom_smooth(method = "lm", color = "red") +
labs(title = "Mental Health vs Sleep Duration",
x = "Sleep Duration (hours)", y = "Mental Health Score")
p7 <- ggplot(data, aes(x = factor(sleep_quality), y = stress_level)) +
geom_boxplot(fill = "lightblue") +
labs(title = "Stress Level by Sleep Quality",
x = "Sleep Quality", y = "Stress Level")
# 5. Social media impact
p8 <- ggplot(data, aes(x = social_media_hours, y = mental_health_score)) +
geom_point(alpha = 0.6) +
geom_smooth(method = "lm", color = "purple") +
labs(title = "Mental Health vs Social Media Usage",
x = "Social Media Hours", y = "Mental Health Score")
# 6. Wellness apps effectiveness
p9 <- ggplot(data, aes(x = uses_wellness_apps, y = mental_health_score, fill = uses_wellness_apps)) +
geom_boxplot() +
labs(title = "Mental Health Score: Wellness App Users vs Non-Users",
x = "Uses Wellness Apps", y = "Mental Health Score")
# 7. Physical activity impact
p10 <- ggplot(data, aes(x = physical_activity_hours_per_week, y = stress_level)) +
geom_point(alpha = 0.6) +
geom_smooth(method = "lm", color = "darkgreen") +
labs(title = "Stress Level vs Physical Activity",
x = "Physical Activity (hours/week)", y = "Stress Level")
# 8. Caffeine intake analysis
p11 <- ggplot(data, aes(x = caffeine_intake_mg_per_day, y = stress_level)) +
geom_point(alpha = 0.6) +
geom_smooth(method = "lm", color = "brown") +
labs(title = "Stress Level vs Caffeine Intake",
x = "Caffeine Intake (mg/day)", y = "Stress Level")
# 9. Mindfulness practice
p12 <- ggplot(data, aes(x = mindfulness_minutes_per_day, y = mental_health_score)) +
geom_point(alpha = 0.6) +
geom_smooth(method = "lm", color = "orange") +
labs(title = "Mental Health vs Mindfulness Practice",
x = "Mindfulness Minutes/Day", y = "Mental Health Score")
# 10. Healthy eating impact
p13 <- ggplot(data, aes(x = eats_healthy, y = mental_health_score, fill = eats_healthy)) +
geom_boxplot() +
labs(title = "Mental Health Score: Healthy Eaters vs Non-Healthy Eaters",
x = "Eats Healthy", y = "Mental Health Score")
# 11. Age distribution and mental health
p14 <- ggplot(data, aes(x = age, y = mental_health_score)) +
geom_point(alpha = 0.6) +
geom_smooth(method = "lm", color = "darkblue") +
labs(title = "Mental Health Score by Age",
x = "Age", y = "Mental Health Score")
# 12. Correlation heatmap of key variables
cor_vars <- data %>% select(mental_health_score, stress_level, daily_screen_time_hours,
social_media_hours, sleep_duration_hours,
physical_activity_hours_per_week, mindfulness_minutes_per_day)
cor_matrix <- cor(cor_vars, use = "complete.obs")
p15 <- ggplot(melt(cor_matrix), aes(Var1, Var2, fill = value)) +
geom_tile() +
scale_fill_gradient2(low = "blue", high = "red", mid = "white",
midpoint = 0, limit = c(-1,1)) +
labs(title = "Correlation Heatmap of Key Variables",
x = "", y = "", fill = "Correlation") +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
# Print all plots
print(p1)
print(p2)
print(p3)
print(p4)
print(p5)
print(p6)
## `geom_smooth()` using formula = 'y ~ x'
print(p7)
print(p8)
## `geom_smooth()` using formula = 'y ~ x'
print(p9)
print(p10)
## `geom_smooth()` using formula = 'y ~ x'
print(p11)
## `geom_smooth()` using formula = 'y ~ x'
print(p12)
## `geom_smooth()` using formula = 'y ~ x'
print(p13)
print(p14)
## `geom_smooth()` using formula = 'y ~ x'
print(p15)