2025/11/14This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
summary(cars)
## speed dist
## Min. : 4.0 Min. : 2.00
## 1st Qu.:12.0 1st Qu.: 26.00
## Median :15.0 Median : 36.00
## Mean :15.4 Mean : 42.98
## 3rd Qu.:19.0 3rd Qu.: 56.00
## Max. :25.0 Max. :120.00
You can also embed plots, for example:
Note that the echo = FALSE parameter was added to the
code chunk to prevent printing of the R code that generated the
plot.
# Load required libraries
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.1 ✔ stringr 1.6.0
## ✔ ggplot2 4.0.0 ✔ tibble 3.3.0
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.2.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ggplot2)
library(corrplot)
## corrplot 0.95 loaded
library(summarytools)
##
## Attaching package: 'summarytools'
##
## The following object is masked from 'package:tibble':
##
## view
library(knitr)
# Install if needed
install.packages("dplyr")
## # Downloading packages -------------------------------------------------------
## - Downloading dplyr from CRAN ... OK [1.5 Mb in 4.9s]
## Successfully downloaded 1 package in 7.5 seconds.
##
## The following package(s) will be installed:
## - dplyr [1.1.4]
## These packages will be installed into "~/OneDrive/Desktop/probability and stat sem 1/groupe_project_stat/renv/library/windows/R-4.5/x86_64-w64-mingw32".
##
## # Installing packages --------------------------------------------------------
## - Installing dplyr ... OK [installed binary and cached in 0.81s]
## Successfully installed 1 package in 0.91 seconds.
# Load the package
library(dplyr)
# Read the file
data <- read_csv("Tech_Use_Stress_Wellness.csv")
## Rows: 5000 Columns: 25
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): gender, location_type
## dbl (21): user_id, age, daily_screen_time_hours, phone_usage_hours, laptop_u...
## lgl (2): uses_wellness_apps, eats_healthy
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
variable_info <- data.frame(
Variable = names(data),
Type = sapply(data, class),
Description = c(
"Unique identifier for each user",
"Age of the user in years",
"Gender identity (Male, Female, Other)",
"Total daily screen time in hours",
"Daily phone usage in hours",
"Daily laptop usage in hours",
"Daily tablet usage in hours",
"Daily TV usage in hours",
"Daily social media usage in hours",
"Daily work-related screen time in hours",
"Daily entertainment screen time in hours",
"Daily gaming time in hours",
"Sleep duration in hours per night",
"Sleep quality rating (1-5 scale)",
"Mood rating (1-10 scale)",
"Stress level (1-10 scale)",
"Physical activity hours per week",
"Type of location (Urban, Suburban, Rural)",
"Mental health score (0-100 scale)",
"Whether user uses wellness apps (TRUE/FALSE)",
"Whether user eats healthy (TRUE/FALSE)",
"Daily caffeine intake in milligrams",
"Weekly anxiety score (0-21 scale)",
"Weekly depression score (0-21 scale)",
"Daily mindfulness practice in minutes"
)
)
kable(variable_info, caption = "Variable Descriptions")
| Variable | Type | Description | |
|---|---|---|---|
| user_id | user_id | numeric | Unique identifier for each user |
| age | age | numeric | Age of the user in years |
| gender | gender | character | Gender identity (Male, Female, Other) |
| daily_screen_time_hours | daily_screen_time_hours | numeric | Total daily screen time in hours |
| phone_usage_hours | phone_usage_hours | numeric | Daily phone usage in hours |
| laptop_usage_hours | laptop_usage_hours | numeric | Daily laptop usage in hours |
| tablet_usage_hours | tablet_usage_hours | numeric | Daily tablet usage in hours |
| tv_usage_hours | tv_usage_hours | numeric | Daily TV usage in hours |
| social_media_hours | social_media_hours | numeric | Daily social media usage in hours |
| work_related_hours | work_related_hours | numeric | Daily work-related screen time in hours |
| entertainment_hours | entertainment_hours | numeric | Daily entertainment screen time in hours |
| gaming_hours | gaming_hours | numeric | Daily gaming time in hours |
| sleep_duration_hours | sleep_duration_hours | numeric | Sleep duration in hours per night |
| sleep_quality | sleep_quality | numeric | Sleep quality rating (1-5 scale) |
| mood_rating | mood_rating | numeric | Mood rating (1-10 scale) |
| stress_level | stress_level | numeric | Stress level (1-10 scale) |
| physical_activity_hours_per_week | physical_activity_hours_per_week | numeric | Physical activity hours per week |
| location_type | location_type | character | Type of location (Urban, Suburban, Rural) |
| mental_health_score | mental_health_score | numeric | Mental health score (0-100 scale) |
| uses_wellness_apps | uses_wellness_apps | logical | Whether user uses wellness apps (TRUE/FALSE) |
| eats_healthy | eats_healthy | logical | Whether user eats healthy (TRUE/FALSE) |
| caffeine_intake_mg_per_day | caffeine_intake_mg_per_day | numeric | Daily caffeine intake in milligrams |
| weekly_anxiety_score | weekly_anxiety_score | numeric | Weekly anxiety score (0-21 scale) |
| weekly_depression_score | weekly_depression_score | numeric | Weekly depression score (0-21 scale) |
| mindfulness_minutes_per_day | mindfulness_minutes_per_day | numeric | Daily mindfulness practice in minutes |
variable_info <- data.frame(
Variable = names(data),
Type = sapply(data, class),
Description = c(
"Unique identifier for each user",
"Age of the user in years",
"Gender identity (Male, Female, Other)",
"Total daily screen time in hours",
"Daily phone usage in hours",
"Daily laptop usage in hours",
"Daily tablet usage in hours",
"Daily TV usage in hours",
"Daily social media usage in hours",
"Daily work-related screen time in hours",
"Daily entertainment screen time in hours",
"Daily gaming time in hours",
"Sleep duration in hours per night",
"Sleep quality rating (1-5 scale)",
"Mood rating (1-10 scale)",
"Stress level (1-10 scale)",
"Physical activity hours per week",
"Type of location (Urban, Suburban, Rural)",
"Mental health score (0-100 scale)",
"Whether user uses wellness apps (TRUE/FALSE)",
"Whether user eats healthy (TRUE/FALSE)",
"Daily caffeine intake in milligrams",
"Weekly anxiety score (0-21 scale)",
"Weekly depression score (0-21 scale)",
"Daily mindfulness practice in minutes"
)
)
# 1. Data Structure and Basic Information
str(data)
## spc_tbl_ [5,000 × 25] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ user_id : num [1:5000] 1 2 3 4 5 6 7 8 9 10 ...
## $ age : num [1:5000] 53 66 43 29 57 22 35 53 72 33 ...
## $ gender : chr [1:5000] "Male" "Female" "Male" "Female" ...
## $ daily_screen_time_hours : num [1:5000] 6.8 4.1 4.7 6 6.7 8.6 5.9 7.3 4.2 5.6 ...
## $ phone_usage_hours : num [1:5000] 2.9 2.1 3.6 4.5 3.4 2.4 4.1 2.5 1.2 2 ...
## $ laptop_usage_hours : num [1:5000] 2.1 1.6 0 0 2 3.4 1.8 2.2 0.5 2.6 ...
## $ tablet_usage_hours : num [1:5000] 0.5 0.4 0.5 0.2 0.8 1.4 0 0.9 1.2 0.3 ...
## $ tv_usage_hours : num [1:5000] 2 0.8 1.4 1.3 1.2 1.1 0.4 2.4 2.3 1.4 ...
## $ social_media_hours : num [1:5000] 2.3 2.8 3.7 4.5 3.8 4.5 3.8 4.5 0.3 2.6 ...
## $ work_related_hours : num [1:5000] 3 3.5 3.6 4 3.5 3.8 3.9 3.8 1.1 3.2 ...
## $ entertainment_hours : num [1:5000] 2.3 2 1 0.8 1.2 1 1.9 1.2 2.3 1.6 ...
## $ gaming_hours : num [1:5000] 2.2 1.6 0.9 0.3 2 0.7 1.2 0.9 2.6 1.4 ...
## $ sleep_duration_hours : num [1:5000] 7.1 6.8 6 7.3 7 6 6.6 6.9 7.2 8 ...
## $ sleep_quality : num [1:5000] 4 4 4 4 4 3 4 4 4 4 ...
## $ mood_rating : num [1:5000] 5.6 6 4.3 1.3 3.6 1.2 3.1 1.4 9.1 7.7 ...
## $ stress_level : num [1:5000] 3 4 6 10 6 9 6 10 1 4 ...
## $ physical_activity_hours_per_week: num [1:5000] 4.4 4.1 1.4 0 2.8 1.4 1.9 0 5.1 3.6 ...
## $ location_type : chr [1:5000] "Rural" "Rural" "Urban" "Rural" ...
## $ mental_health_score : num [1:5000] 79 71 67 55 62 51 62 45 77 78 ...
## $ uses_wellness_apps : logi [1:5000] TRUE TRUE TRUE TRUE FALSE FALSE ...
## $ eats_healthy : logi [1:5000] TRUE TRUE TRUE TRUE FALSE FALSE ...
## $ caffeine_intake_mg_per_day : num [1:5000] 150 124 218 134 193 ...
## $ weekly_anxiety_score : num [1:5000] 7 7 11 17 5 14 7 21 2 11 ...
## $ weekly_depression_score : num [1:5000] 3 10 7 14 8 13 5 12 2 8 ...
## $ mindfulness_minutes_per_day : num [1:5000] 14 15.5 19.5 13.9 26.7 9.2 21.4 13.5 19.6 21.4 ...
## - attr(*, "spec")=
## .. cols(
## .. user_id = col_double(),
## .. age = col_double(),
## .. gender = col_character(),
## .. daily_screen_time_hours = col_double(),
## .. phone_usage_hours = col_double(),
## .. laptop_usage_hours = col_double(),
## .. tablet_usage_hours = col_double(),
## .. tv_usage_hours = col_double(),
## .. social_media_hours = col_double(),
## .. work_related_hours = col_double(),
## .. entertainment_hours = col_double(),
## .. gaming_hours = col_double(),
## .. sleep_duration_hours = col_double(),
## .. sleep_quality = col_double(),
## .. mood_rating = col_double(),
## .. stress_level = col_double(),
## .. physical_activity_hours_per_week = col_double(),
## .. location_type = col_character(),
## .. mental_health_score = col_double(),
## .. uses_wellness_apps = col_logical(),
## .. eats_healthy = col_logical(),
## .. caffeine_intake_mg_per_day = col_double(),
## .. weekly_anxiety_score = col_double(),
## .. weekly_depression_score = col_double(),
## .. mindfulness_minutes_per_day = col_double()
## .. )
## - attr(*, "problems")=<externalptr>
summary(data)
## user_id age gender daily_screen_time_hours
## Min. : 1 Min. :15.0 Length:5000 Min. : 1.000
## 1st Qu.:1251 1st Qu.:30.0 Class :character 1st Qu.: 3.700
## Median :2500 Median :45.0 Mode :character Median : 5.000
## Mean :2500 Mean :44.7 Mean : 5.038
## 3rd Qu.:3750 3rd Qu.:60.0 3rd Qu.: 6.300
## Max. :5000 Max. :74.0 Max. :10.000
## phone_usage_hours laptop_usage_hours tablet_usage_hours tv_usage_hours
## Min. :0.200 Min. :0.000 Min. :0.0000 Min. :0.000
## 1st Qu.:1.400 1st Qu.:0.700 1st Qu.:0.2000 1st Qu.:0.700
## Median :2.000 Median :1.500 Median :0.6000 Median :1.400
## Mean :1.994 Mean :1.555 Mean :0.6631 Mean :1.433
## 3rd Qu.:2.600 3rd Qu.:2.300 3rd Qu.:1.0000 3rd Qu.:2.100
## Max. :5.000 Max. :5.000 Max. :2.9000 Max. :4.000
## social_media_hours work_related_hours entertainment_hours gaming_hours
## Min. :0.300 Min. :0.70 Min. :0.000 Min. :0.000
## 1st Qu.:2.400 1st Qu.:3.10 1st Qu.:1.200 1st Qu.:1.100
## Median :3.600 Median :3.70 Median :1.600 Median :1.400
## Mean :3.277 Mean :3.36 Mean :1.662 Mean :1.561
## 3rd Qu.:4.500 3rd Qu.:3.90 3rd Qu.:2.100 3rd Qu.:2.000
## Max. :4.500 Max. :4.50 Max. :3.900 Max. :3.800
## sleep_duration_hours sleep_quality mood_rating stress_level
## Min. :5.40 Min. :1.000 Min. : 1.000 Min. : 1.000
## 1st Qu.:7.00 1st Qu.:4.000 1st Qu.: 1.700 1st Qu.: 3.000
## Median :7.40 Median :4.000 Median : 4.300 Median : 6.000
## Mean :7.37 Mean :4.009 Mean : 4.448 Mean : 5.718
## 3rd Qu.:7.70 3rd Qu.:4.000 3rd Qu.: 6.700 3rd Qu.: 8.000
## Max. :9.00 Max. :5.000 Max. :10.000 Max. :10.000
## physical_activity_hours_per_week location_type mental_health_score
## Min. : 0.000 Length:5000 Min. : 31.00
## 1st Qu.: 0.600 Class :character 1st Qu.: 54.00
## Median : 2.300 Mode :character Median : 65.00
## Mean : 2.659 Mean : 64.77
## 3rd Qu.: 4.200 3rd Qu.: 75.00
## Max. :11.800 Max. :100.00
## uses_wellness_apps eats_healthy caffeine_intake_mg_per_day
## Mode :logical Mode :logical Min. : 0.0
## FALSE:2927 FALSE:2486 1st Qu.:108.5
## TRUE :2073 TRUE :2514 Median :141.7
## Mean :142.3
## 3rd Qu.:176.4
## Max. :341.2
## weekly_anxiety_score weekly_depression_score mindfulness_minutes_per_day
## Min. : 0.000 Min. : 0.00 Min. : 5.00
## 1st Qu.: 5.000 1st Qu.: 4.00 1st Qu.:12.40
## Median : 8.000 Median : 7.00 Median :17.20
## Mean : 8.632 Mean : 7.52 Mean :18.55
## 3rd Qu.:12.000 3rd Qu.:11.00 3rd Qu.:23.80
## Max. :21.000 Max. :21.00 Max. :42.00
# Check for missing values
colSums(is.na(data))
## user_id age
## 0 0
## gender daily_screen_time_hours
## 0 0
## phone_usage_hours laptop_usage_hours
## 0 0
## tablet_usage_hours tv_usage_hours
## 0 0
## social_media_hours work_related_hours
## 0 0
## entertainment_hours gaming_hours
## 0 0
## sleep_duration_hours sleep_quality
## 0 0
## mood_rating stress_level
## 0 0
## physical_activity_hours_per_week location_type
## 0 0
## mental_health_score uses_wellness_apps
## 0 0
## eats_healthy caffeine_intake_mg_per_day
## 0 0
## weekly_anxiety_score weekly_depression_score
## 0 0
## mindfulness_minutes_per_day
## 0
# 2. Data Preprocessing
# Convert categorical variables to factors
data$gender <- as.factor(data$gender)
data$location_type <- as.factor(data$location_type)
data$uses_wellness_apps <- as.factor(data$uses_wellness_apps)
data$eats_healthy <- as.factor(data$eats_healthy)
data$mood_rating <- as.factor(data$mood_rating)
data$stress_level <- as.factor(data$stress_level)
data$sleep_quality<- as.factor(data$sleep_quality)
# Load the data
f <- read.csv('Tech_Use_Stress_Wellness.csv')
# View the first few rows
head(f)
## user_id age gender daily_screen_time_hours phone_usage_hours
## 1 1 53 Male 6.8 2.9
## 2 2 66 Female 4.1 2.1
## 3 3 43 Male 4.7 3.6
## 4 4 29 Female 6.0 4.5
## 5 5 57 Male 6.7 3.4
## 6 6 22 Male 8.6 2.4
## laptop_usage_hours tablet_usage_hours tv_usage_hours social_media_hours
## 1 2.1 0.5 2.0 2.3
## 2 1.6 0.4 0.8 2.8
## 3 0.0 0.5 1.4 3.7
## 4 0.0 0.2 1.3 4.5
## 5 2.0 0.8 1.2 3.8
## 6 3.4 1.4 1.1 4.5
## work_related_hours entertainment_hours gaming_hours sleep_duration_hours
## 1 3.0 2.3 2.2 7.1
## 2 3.5 2.0 1.6 6.8
## 3 3.6 1.0 0.9 6.0
## 4 4.0 0.8 0.3 7.3
## 5 3.5 1.2 2.0 7.0
## 6 3.8 1.0 0.7 6.0
## sleep_quality mood_rating stress_level physical_activity_hours_per_week
## 1 4 5.6 3 4.4
## 2 4 6.0 4 4.1
## 3 4 4.3 6 1.4
## 4 4 1.3 10 0.0
## 5 4 3.6 6 2.8
## 6 3 1.2 9 1.4
## location_type mental_health_score uses_wellness_apps eats_healthy
## 1 Rural 79 True True
## 2 Rural 71 True True
## 3 Urban 67 True True
## 4 Rural 55 True True
## 5 Urban 62 False False
## 6 Urban 51 False False
## caffeine_intake_mg_per_day weekly_anxiety_score weekly_depression_score
## 1 150.4 7 3
## 2 124.1 7 10
## 3 218.3 11 7
## 4 133.7 17 14
## 5 192.8 5 8
## 6 191.4 14 13
## mindfulness_minutes_per_day
## 1 14.0
## 2 15.5
## 3 19.5
## 4 13.9
## 5 26.7
## 6 9.2
# Get structure of the data
str(f)
## 'data.frame': 5000 obs. of 25 variables:
## $ user_id : int 1 2 3 4 5 6 7 8 9 10 ...
## $ age : int 53 66 43 29 57 22 35 53 72 33 ...
## $ gender : chr "Male" "Female" "Male" "Female" ...
## $ daily_screen_time_hours : num 6.8 4.1 4.7 6 6.7 8.6 5.9 7.3 4.2 5.6 ...
## $ phone_usage_hours : num 2.9 2.1 3.6 4.5 3.4 2.4 4.1 2.5 1.2 2 ...
## $ laptop_usage_hours : num 2.1 1.6 0 0 2 3.4 1.8 2.2 0.5 2.6 ...
## $ tablet_usage_hours : num 0.5 0.4 0.5 0.2 0.8 1.4 0 0.9 1.2 0.3 ...
## $ tv_usage_hours : num 2 0.8 1.4 1.3 1.2 1.1 0.4 2.4 2.3 1.4 ...
## $ social_media_hours : num 2.3 2.8 3.7 4.5 3.8 4.5 3.8 4.5 0.3 2.6 ...
## $ work_related_hours : num 3 3.5 3.6 4 3.5 3.8 3.9 3.8 1.1 3.2 ...
## $ entertainment_hours : num 2.3 2 1 0.8 1.2 1 1.9 1.2 2.3 1.6 ...
## $ gaming_hours : num 2.2 1.6 0.9 0.3 2 0.7 1.2 0.9 2.6 1.4 ...
## $ sleep_duration_hours : num 7.1 6.8 6 7.3 7 6 6.6 6.9 7.2 8 ...
## $ sleep_quality : num 4 4 4 4 4 3 4 4 4 4 ...
## $ mood_rating : num 5.6 6 4.3 1.3 3.6 1.2 3.1 1.4 9.1 7.7 ...
## $ stress_level : int 3 4 6 10 6 9 6 10 1 4 ...
## $ physical_activity_hours_per_week: num 4.4 4.1 1.4 0 2.8 1.4 1.9 0 5.1 3.6 ...
## $ location_type : chr "Rural" "Rural" "Urban" "Rural" ...
## $ mental_health_score : num 79 71 67 55 62 51 62 45 77 78 ...
## $ uses_wellness_apps : chr "True" "True" "True" "True" ...
## $ eats_healthy : chr "True" "True" "True" "True" ...
## $ caffeine_intake_mg_per_day : num 150 124 218 134 193 ...
## $ weekly_anxiety_score : num 7 7 11 17 5 14 7 21 2 11 ...
## $ weekly_depression_score : num 3 10 7 14 8 13 5 12 2 8 ...
## $ mindfulness_minutes_per_day : num 14 15.5 19.5 13.9 26.7 9.2 21.4 13.5 19.6 21.4 ...
# Descriptive statistics for numerical variables
desc_stats <- summary(f)
print(desc_stats)
## user_id age gender daily_screen_time_hours
## Min. : 1 Min. :15.0 Length:5000 Min. : 1.000
## 1st Qu.:1251 1st Qu.:30.0 Class :character 1st Qu.: 3.700
## Median :2500 Median :45.0 Mode :character Median : 5.000
## Mean :2500 Mean :44.7 Mean : 5.038
## 3rd Qu.:3750 3rd Qu.:60.0 3rd Qu.: 6.300
## Max. :5000 Max. :74.0 Max. :10.000
## phone_usage_hours laptop_usage_hours tablet_usage_hours tv_usage_hours
## Min. :0.200 Min. :0.000 Min. :0.0000 Min. :0.000
## 1st Qu.:1.400 1st Qu.:0.700 1st Qu.:0.2000 1st Qu.:0.700
## Median :2.000 Median :1.500 Median :0.6000 Median :1.400
## Mean :1.994 Mean :1.555 Mean :0.6631 Mean :1.433
## 3rd Qu.:2.600 3rd Qu.:2.300 3rd Qu.:1.0000 3rd Qu.:2.100
## Max. :5.000 Max. :5.000 Max. :2.9000 Max. :4.000
## social_media_hours work_related_hours entertainment_hours gaming_hours
## Min. :0.300 Min. :0.70 Min. :0.000 Min. :0.000
## 1st Qu.:2.400 1st Qu.:3.10 1st Qu.:1.200 1st Qu.:1.100
## Median :3.600 Median :3.70 Median :1.600 Median :1.400
## Mean :3.277 Mean :3.36 Mean :1.662 Mean :1.561
## 3rd Qu.:4.500 3rd Qu.:3.90 3rd Qu.:2.100 3rd Qu.:2.000
## Max. :4.500 Max. :4.50 Max. :3.900 Max. :3.800
## sleep_duration_hours sleep_quality mood_rating stress_level
## Min. :5.40 Min. :1.000 Min. : 1.000 Min. : 1.000
## 1st Qu.:7.00 1st Qu.:4.000 1st Qu.: 1.700 1st Qu.: 3.000
## Median :7.40 Median :4.000 Median : 4.300 Median : 6.000
## Mean :7.37 Mean :4.009 Mean : 4.448 Mean : 5.718
## 3rd Qu.:7.70 3rd Qu.:4.000 3rd Qu.: 6.700 3rd Qu.: 8.000
## Max. :9.00 Max. :5.000 Max. :10.000 Max. :10.000
## physical_activity_hours_per_week location_type mental_health_score
## Min. : 0.000 Length:5000 Min. : 31.00
## 1st Qu.: 0.600 Class :character 1st Qu.: 54.00
## Median : 2.300 Mode :character Median : 65.00
## Mean : 2.659 Mean : 64.77
## 3rd Qu.: 4.200 3rd Qu.: 75.00
## Max. :11.800 Max. :100.00
## uses_wellness_apps eats_healthy caffeine_intake_mg_per_day
## Length:5000 Length:5000 Min. : 0.0
## Class :character Class :character 1st Qu.:108.5
## Mode :character Mode :character Median :141.7
## Mean :142.3
## 3rd Qu.:176.4
## Max. :341.2
## weekly_anxiety_score weekly_depression_score mindfulness_minutes_per_day
## Min. : 0.000 Min. : 0.00 Min. : 5.00
## 1st Qu.: 5.000 1st Qu.: 4.00 1st Qu.:12.40
## Median : 8.000 Median : 7.00 Median :17.20
## Mean : 8.632 Mean : 7.52 Mean :18.55
## 3rd Qu.:12.000 3rd Qu.:11.00 3rd Qu.:23.80
## Max. :21.000 Max. :21.00 Max. :42.00
# For categorical variables analysis
categorical_vars <- c('gender', 'sleep_quality', 'mood_rating', 'stress_level',
'location_type', 'uses_wellness_apps', 'eats_healthy')
# Get frequency tables for categorical variables
for(col in categorical_vars) {
cat("\n---", col, "---\n")
print(table(f[[col]]))
}
##
## --- gender ---
##
## Female Male Other
## 2359 2446 195
##
## --- sleep_quality ---
##
## 1 2 3 4 5
## 1 31 979 2898 1091
##
## --- mood_rating ---
##
## 1 1.1 1.2 1.3 1.4 1.5 1.6 1.7 1.8 1.9 2 2.1 2.2 2.3 2.4 2.5 2.6 2.7 2.8 2.9
## 951 54 47 44 60 42 41 49 45 53 51 37 52 46 34 39 62 45 43 49
## 3 3.1 3.2 3.3 3.4 3.5 3.6 3.7 3.8 3.9 4 4.1 4.2 4.3 4.4 4.5 4.6 4.7 4.8 4.9
## 51 47 46 47 43 39 38 45 56 57 59 65 45 59 51 43 57 43 67 44
## 5 5.1 5.2 5.3 5.4 5.5 5.6 5.7 5.8 5.9 6 6.1 6.2 6.3 6.4 6.5 6.6 6.7 6.8 6.9
## 56 54 47 50 47 59 48 51 57 56 45 57 51 38 36 50 54 52 49 53
## 7 7.1 7.2 7.3 7.4 7.5 7.6 7.7 7.8 7.9 8 8.1 8.2 8.3 8.4 8.5 8.6 8.7 8.8 8.9
## 53 39 50 43 51 40 44 45 45 48 37 50 52 42 41 44 30 33 25 30
## 9 9.1 9.2 9.3 9.4 9.5 9.6 9.7 9.8 9.9 10
## 27 27 21 21 25 23 16 17 18 14 93
##
## --- stress_level ---
##
## 1 2 3 4 5 6 7 8 9 10
## 443 413 515 543 508 489 470 470 408 741
##
## --- location_type ---
##
## Rural Suburban Urban
## 1043 1477 2480
##
## --- uses_wellness_apps ---
##
## False True
## 2927 2073
##
## --- eats_healthy ---
##
## False True
## 2486 2514
# Descriptive Statistics
desc_stats <- descr(data)
print(desc_stats)
## Non-numerical variable(s) ignored: gender, sleep_quality, mood_rating, stress_level, location_type, uses_wellness_apps, eats_healthy
## Descriptive Statistics
## data
## N: 5000
##
## age caffeine_intake_mg_per_day daily_screen_time_hours
## ----------------- --------- ---------------------------- -------------------------
## Mean 44.70 142.32 5.04
## Std.Dev 17.27 50.47 1.84
## Min 15.00 0.00 1.00
## Q1 30.00 108.45 3.70
## Median 45.00 141.65 5.00
## Q3 60.00 176.40 6.30
## Max 74.00 341.20 10.00
## MAD 22.24 50.63 1.93
## IQR 30.00 67.93 2.60
## CV 0.39 0.35 0.36
## Skewness -0.01 0.03 0.15
## SE.Skewness 0.03 0.03 0.03
## Kurtosis -1.18 -0.06 -0.32
## N.Valid 5000.00 5000.00 5000.00
## N 5000.00 5000.00 5000.00
## Pct.Valid 100.00 100.00 100.00
##
## Table: Table continues below
##
##
##
## entertainment_hours gaming_hours laptop_usage_hours mental_health_score
## ----------------- --------------------- -------------- -------------------- ---------------------
## Mean 1.66 1.56 1.56 64.77
## Std.Dev 0.66 0.69 1.03 13.10
## Min 0.00 0.00 0.00 31.00
## Q1 1.20 1.10 0.70 54.00
## Median 1.60 1.40 1.50 65.00
## Q3 2.10 2.00 2.30 75.00
## Max 3.90 3.80 5.00 100.00
## MAD 0.59 0.59 1.19 15.57
## IQR 0.90 0.90 1.60 21.00
## CV 0.40 0.44 0.66 0.20
## Skewness 0.13 0.51 0.30 0.04
## SE.Skewness 0.03 0.03 0.03 0.03
## Kurtosis -0.26 -0.30 -0.45 -0.87
## N.Valid 5000.00 5000.00 5000.00 5000.00
## N 5000.00 5000.00 5000.00 5000.00
## Pct.Valid 100.00 100.00 100.00 100.00
##
## Table: Table continues below
##
##
##
## mindfulness_minutes_per_day phone_usage_hours
## ----------------- ----------------------------- -------------------
## Mean 18.55 1.99
## Std.Dev 7.99 0.89
## Min 5.00 0.20
## Q1 12.40 1.40
## Median 17.20 2.00
## Q3 23.80 2.60
## Max 42.00 5.00
## MAD 8.01 0.89
## IQR 11.40 1.20
## CV 0.43 0.45
## Skewness 0.60 0.13
## SE.Skewness 0.03 0.03
## Kurtosis -0.33 -0.30
## N.Valid 5000.00 5000.00
## N 5000.00 5000.00
## Pct.Valid 100.00 100.00
##
## Table: Table continues below
##
##
##
## physical_activity_hours_per_week sleep_duration_hours social_media_hours
## ----------------- ---------------------------------- ---------------------- --------------------
## Mean 2.66 7.37 3.28
## Std.Dev 2.29 0.54 1.20
## Min 0.00 5.40 0.30
## Q1 0.60 7.00 2.40
## Median 2.30 7.40 3.60
## Q3 4.20 7.70 4.50
## Max 11.80 9.00 4.50
## MAD 2.67 0.59 1.33
## IQR 3.60 0.70 2.10
## CV 0.86 0.07 0.37
## Skewness 0.71 -0.02 -0.67
## SE.Skewness 0.03 0.03 0.03
## Kurtosis -0.06 -0.14 -0.74
## N.Valid 5000.00 5000.00 5000.00
## N 5000.00 5000.00 5000.00
## Pct.Valid 100.00 100.00 100.00
##
## Table: Table continues below
##
##
##
## tablet_usage_hours tv_usage_hours user_id weekly_anxiety_score
## ----------------- -------------------- ---------------- --------- ----------------------
## Mean 0.66 1.43 2500.50 8.63
## Std.Dev 0.53 0.93 1443.52 5.09
## Min 0.00 0.00 1.00 0.00
## Q1 0.20 0.70 1250.50 5.00
## Median 0.60 1.40 2500.50 8.00
## Q3 1.00 2.10 3750.50 12.00
## Max 2.90 4.00 5000.00 21.00
## MAD 0.59 1.04 1853.25 5.93
## IQR 0.80 1.40 2499.50 7.00
## CV 0.79 0.65 0.58 0.59
## Skewness 0.59 0.32 0.00 0.30
## SE.Skewness 0.03 0.03 0.03 0.03
## Kurtosis -0.19 -0.51 -1.20 -0.67
## N.Valid 5000.00 5000.00 5000.00 5000.00
## N 5000.00 5000.00 5000.00 5000.00
## Pct.Valid 100.00 100.00 100.00 100.00
##
## Table: Table continues below
##
##
##
## weekly_depression_score work_related_hours
## ----------------- ------------------------- --------------------
## Mean 7.52 3.36
## Std.Dev 4.67 0.83
## Min 0.00 0.70
## Q1 4.00 3.10
## Median 7.00 3.70
## Q3 11.00 3.90
## Max 21.00 4.50
## MAD 4.45 0.44
## IQR 7.00 0.80
## CV 0.62 0.25
## Skewness 0.37 -1.42
## SE.Skewness 0.03 0.03
## Kurtosis -0.58 1.20
## N.Valid 5000.00 5000.00
## N 5000.00 5000.00
## Pct.Valid 100.00 100.00
# Method 1: Using base R
f <- read.csv('Tech_Use_Stress_Wellness.csv')
# Method 2: Using readr package (better for large files)
# install.packages("readr") # Run this once if you don't have readr
library(readr)
f <- read_csv('Tech_Use_Stress_Wellness.csv')
## Rows: 5000 Columns: 25
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): gender, location_type
## dbl (21): user_id, age, daily_screen_time_hours, phone_usage_hours, laptop_u...
## lgl (2): uses_wellness_apps, eats_healthy
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
desc_stats <- descr(data)
print(desc_stats)
## Non-numerical variable(s) ignored: gender, sleep_quality, mood_rating, stress_level, location_type, uses_wellness_apps, eats_healthy
## Descriptive Statistics
## data
## N: 5000
##
## age caffeine_intake_mg_per_day daily_screen_time_hours
## ----------------- --------- ---------------------------- -------------------------
## Mean 44.70 142.32 5.04
## Std.Dev 17.27 50.47 1.84
## Min 15.00 0.00 1.00
## Q1 30.00 108.45 3.70
## Median 45.00 141.65 5.00
## Q3 60.00 176.40 6.30
## Max 74.00 341.20 10.00
## MAD 22.24 50.63 1.93
## IQR 30.00 67.93 2.60
## CV 0.39 0.35 0.36
## Skewness -0.01 0.03 0.15
## SE.Skewness 0.03 0.03 0.03
## Kurtosis -1.18 -0.06 -0.32
## N.Valid 5000.00 5000.00 5000.00
## N 5000.00 5000.00 5000.00
## Pct.Valid 100.00 100.00 100.00
##
## Table: Table continues below
##
##
##
## entertainment_hours gaming_hours laptop_usage_hours mental_health_score
## ----------------- --------------------- -------------- -------------------- ---------------------
## Mean 1.66 1.56 1.56 64.77
## Std.Dev 0.66 0.69 1.03 13.10
## Min 0.00 0.00 0.00 31.00
## Q1 1.20 1.10 0.70 54.00
## Median 1.60 1.40 1.50 65.00
## Q3 2.10 2.00 2.30 75.00
## Max 3.90 3.80 5.00 100.00
## MAD 0.59 0.59 1.19 15.57
## IQR 0.90 0.90 1.60 21.00
## CV 0.40 0.44 0.66 0.20
## Skewness 0.13 0.51 0.30 0.04
## SE.Skewness 0.03 0.03 0.03 0.03
## Kurtosis -0.26 -0.30 -0.45 -0.87
## N.Valid 5000.00 5000.00 5000.00 5000.00
## N 5000.00 5000.00 5000.00 5000.00
## Pct.Valid 100.00 100.00 100.00 100.00
##
## Table: Table continues below
##
##
##
## mindfulness_minutes_per_day phone_usage_hours
## ----------------- ----------------------------- -------------------
## Mean 18.55 1.99
## Std.Dev 7.99 0.89
## Min 5.00 0.20
## Q1 12.40 1.40
## Median 17.20 2.00
## Q3 23.80 2.60
## Max 42.00 5.00
## MAD 8.01 0.89
## IQR 11.40 1.20
## CV 0.43 0.45
## Skewness 0.60 0.13
## SE.Skewness 0.03 0.03
## Kurtosis -0.33 -0.30
## N.Valid 5000.00 5000.00
## N 5000.00 5000.00
## Pct.Valid 100.00 100.00
##
## Table: Table continues below
##
##
##
## physical_activity_hours_per_week sleep_duration_hours social_media_hours
## ----------------- ---------------------------------- ---------------------- --------------------
## Mean 2.66 7.37 3.28
## Std.Dev 2.29 0.54 1.20
## Min 0.00 5.40 0.30
## Q1 0.60 7.00 2.40
## Median 2.30 7.40 3.60
## Q3 4.20 7.70 4.50
## Max 11.80 9.00 4.50
## MAD 2.67 0.59 1.33
## IQR 3.60 0.70 2.10
## CV 0.86 0.07 0.37
## Skewness 0.71 -0.02 -0.67
## SE.Skewness 0.03 0.03 0.03
## Kurtosis -0.06 -0.14 -0.74
## N.Valid 5000.00 5000.00 5000.00
## N 5000.00 5000.00 5000.00
## Pct.Valid 100.00 100.00 100.00
##
## Table: Table continues below
##
##
##
## tablet_usage_hours tv_usage_hours user_id weekly_anxiety_score
## ----------------- -------------------- ---------------- --------- ----------------------
## Mean 0.66 1.43 2500.50 8.63
## Std.Dev 0.53 0.93 1443.52 5.09
## Min 0.00 0.00 1.00 0.00
## Q1 0.20 0.70 1250.50 5.00
## Median 0.60 1.40 2500.50 8.00
## Q3 1.00 2.10 3750.50 12.00
## Max 2.90 4.00 5000.00 21.00
## MAD 0.59 1.04 1853.25 5.93
## IQR 0.80 1.40 2499.50 7.00
## CV 0.79 0.65 0.58 0.59
## Skewness 0.59 0.32 0.00 0.30
## SE.Skewness 0.03 0.03 0.03 0.03
## Kurtosis -0.19 -0.51 -1.20 -0.67
## N.Valid 5000.00 5000.00 5000.00 5000.00
## N 5000.00 5000.00 5000.00 5000.00
## Pct.Valid 100.00 100.00 100.00 100.00
##
## Table: Table continues below
##
##
##
## weekly_depression_score work_related_hours
## ----------------- ------------------------- --------------------
## Mean 7.52 3.36
## Std.Dev 4.67 0.83
## Min 0.00 0.70
## Q1 4.00 3.10
## Median 7.00 3.70
## Q3 11.00 3.90
## Max 21.00 4.50
## MAD 4.45 0.44
## IQR 7.00 0.80
## CV 0.62 0.25
## Skewness 0.37 -1.42
## SE.Skewness 0.03 0.03
## Kurtosis -0.58 1.20
## N.Valid 5000.00 5000.00
## N 5000.00 5000.00
## Pct.Valid 100.00 100.00
install.packages("psych") # Run this once
## # Downloading packages -------------------------------------------------------
## - Downloading psych from CRAN ... OK [3.4 Mb in 7.0s]
## Successfully downloaded 1 package in 7.4 seconds.
##
## The following package(s) will be installed:
## - psych [2.5.6]
## These packages will be installed into "~/OneDrive/Desktop/probability and stat sem 1/groupe_project_stat/renv/library/windows/R-4.5/x86_64-w64-mingw32".
##
## # Installing packages --------------------------------------------------------
## - Installing psych ... OK [installed binary and cached in 0.5s]
## Successfully installed 1 package in 0.62 seconds.
library(psych)
##
## Attaching package: 'psych'
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
# Now use describe() instead of descr()
descriptive_stats <- describe(data)
print(descriptive_stats)
## vars n mean sd median trimmed
## user_id 1 5000 2500.50 1443.52 2500.50 2500.50
## age 2 5000 44.70 17.27 45.00 44.73
## gender* 3 5000 1.57 0.57 2.00 1.54
## daily_screen_time_hours 4 5000 5.04 1.84 5.00 5.01
## phone_usage_hours 5 5000 1.99 0.89 2.00 1.99
## laptop_usage_hours 6 5000 1.56 1.03 1.50 1.52
## tablet_usage_hours 7 5000 0.66 0.53 0.60 0.62
## tv_usage_hours 8 5000 1.43 0.93 1.40 1.40
## social_media_hours 9 5000 3.28 1.20 3.60 3.42
## work_related_hours 10 5000 3.36 0.83 3.70 3.50
## entertainment_hours 11 5000 1.66 0.66 1.60 1.65
## gaming_hours 12 5000 1.56 0.69 1.40 1.52
## sleep_duration_hours 13 5000 7.37 0.54 7.40 7.37
## sleep_quality* 14 5000 4.01 0.66 4.00 4.02
## mood_rating* 15 5000 35.48 27.68 34.00 33.84
## stress_level* 16 5000 5.72 2.92 6.00 5.76
## physical_activity_hours_per_week 17 5000 2.66 2.29 2.30 2.43
## location_type* 18 5000 2.29 0.79 2.00 2.36
## mental_health_score 19 5000 64.77 13.10 65.00 64.68
## uses_wellness_apps* 20 5000 1.41 0.49 1.00 1.39
## eats_healthy* 21 5000 1.50 0.50 2.00 1.50
## caffeine_intake_mg_per_day 22 5000 142.32 50.47 141.65 142.22
## weekly_anxiety_score 23 5000 8.63 5.09 8.00 8.44
## weekly_depression_score 24 5000 7.52 4.67 7.00 7.31
## mindfulness_minutes_per_day 25 5000 18.55 7.99 17.20 17.97
## mad min max range skew kurtosis
## user_id 1853.25 1.0 5000.0 4999.0 0.00 -1.20
## age 22.24 15.0 74.0 59.0 -0.01 -1.18
## gender* 1.48 1.0 3.0 2.0 0.37 -0.80
## daily_screen_time_hours 1.93 1.0 10.0 9.0 0.15 -0.32
## phone_usage_hours 0.89 0.2 5.0 4.8 0.13 -0.30
## laptop_usage_hours 1.19 0.0 5.0 5.0 0.30 -0.45
## tablet_usage_hours 0.59 0.0 2.9 2.9 0.59 -0.19
## tv_usage_hours 1.04 0.0 4.0 4.0 0.32 -0.51
## social_media_hours 1.33 0.3 4.5 4.2 -0.67 -0.74
## work_related_hours 0.44 0.7 4.5 3.8 -1.42 1.20
## entertainment_hours 0.59 0.0 3.9 3.9 0.13 -0.26
## gaming_hours 0.59 0.0 3.8 3.8 0.51 -0.30
## sleep_duration_hours 0.59 5.4 9.0 3.6 -0.02 -0.14
## sleep_quality* 0.00 1.0 5.0 4.0 -0.15 -0.27
## mood_rating* 37.06 1.0 91.0 90.0 0.27 -1.16
## stress_level* 4.45 1.0 10.0 9.0 -0.02 -1.22
## physical_activity_hours_per_week 2.67 0.0 11.8 11.8 0.71 -0.06
## location_type* 1.48 1.0 3.0 2.0 -0.56 -1.18
## mental_health_score 15.57 31.0 100.0 69.0 0.04 -0.87
## uses_wellness_apps* 0.00 1.0 2.0 1.0 0.35 -1.88
## eats_healthy* 0.00 1.0 2.0 1.0 -0.01 -2.00
## caffeine_intake_mg_per_day 50.63 0.0 341.2 341.2 0.03 -0.06
## weekly_anxiety_score 5.93 0.0 21.0 21.0 0.30 -0.67
## weekly_depression_score 4.45 0.0 21.0 21.0 0.37 -0.58
## mindfulness_minutes_per_day 8.01 5.0 42.0 37.0 0.60 -0.33
## se
## user_id 20.41
## age 0.24
## gender* 0.01
## daily_screen_time_hours 0.03
## phone_usage_hours 0.01
## laptop_usage_hours 0.01
## tablet_usage_hours 0.01
## tv_usage_hours 0.01
## social_media_hours 0.02
## work_related_hours 0.01
## entertainment_hours 0.01
## gaming_hours 0.01
## sleep_duration_hours 0.01
## sleep_quality* 0.01
## mood_rating* 0.39
## stress_level* 0.04
## physical_activity_hours_per_week 0.03
## location_type* 0.01
## mental_health_score 0.19
## uses_wellness_apps* 0.01
## eats_healthy* 0.01
## caffeine_intake_mg_per_day 0.71
## weekly_anxiety_score 0.07
## weekly_depression_score 0.07
## mindfulness_minutes_per_day 0.11
desc_stats <- descr(data)
print(desc_stats)
## Non-numerical variable(s) ignored: gender, sleep_quality, mood_rating, stress_level, location_type, uses_wellness_apps, eats_healthy
## Descriptive Statistics
## data
## N: 5000
##
## age caffeine_intake_mg_per_day daily_screen_time_hours
## ----------------- --------- ---------------------------- -------------------------
## Mean 44.70 142.32 5.04
## Std.Dev 17.27 50.47 1.84
## Min 15.00 0.00 1.00
## Q1 30.00 108.45 3.70
## Median 45.00 141.65 5.00
## Q3 60.00 176.40 6.30
## Max 74.00 341.20 10.00
## MAD 22.24 50.63 1.93
## IQR 30.00 67.93 2.60
## CV 0.39 0.35 0.36
## Skewness -0.01 0.03 0.15
## SE.Skewness 0.03 0.03 0.03
## Kurtosis -1.18 -0.06 -0.32
## N.Valid 5000.00 5000.00 5000.00
## N 5000.00 5000.00 5000.00
## Pct.Valid 100.00 100.00 100.00
##
## Table: Table continues below
##
##
##
## entertainment_hours gaming_hours laptop_usage_hours mental_health_score
## ----------------- --------------------- -------------- -------------------- ---------------------
## Mean 1.66 1.56 1.56 64.77
## Std.Dev 0.66 0.69 1.03 13.10
## Min 0.00 0.00 0.00 31.00
## Q1 1.20 1.10 0.70 54.00
## Median 1.60 1.40 1.50 65.00
## Q3 2.10 2.00 2.30 75.00
## Max 3.90 3.80 5.00 100.00
## MAD 0.59 0.59 1.19 15.57
## IQR 0.90 0.90 1.60 21.00
## CV 0.40 0.44 0.66 0.20
## Skewness 0.13 0.51 0.30 0.04
## SE.Skewness 0.03 0.03 0.03 0.03
## Kurtosis -0.26 -0.30 -0.45 -0.87
## N.Valid 5000.00 5000.00 5000.00 5000.00
## N 5000.00 5000.00 5000.00 5000.00
## Pct.Valid 100.00 100.00 100.00 100.00
##
## Table: Table continues below
##
##
##
## mindfulness_minutes_per_day phone_usage_hours
## ----------------- ----------------------------- -------------------
## Mean 18.55 1.99
## Std.Dev 7.99 0.89
## Min 5.00 0.20
## Q1 12.40 1.40
## Median 17.20 2.00
## Q3 23.80 2.60
## Max 42.00 5.00
## MAD 8.01 0.89
## IQR 11.40 1.20
## CV 0.43 0.45
## Skewness 0.60 0.13
## SE.Skewness 0.03 0.03
## Kurtosis -0.33 -0.30
## N.Valid 5000.00 5000.00
## N 5000.00 5000.00
## Pct.Valid 100.00 100.00
##
## Table: Table continues below
##
##
##
## physical_activity_hours_per_week sleep_duration_hours social_media_hours
## ----------------- ---------------------------------- ---------------------- --------------------
## Mean 2.66 7.37 3.28
## Std.Dev 2.29 0.54 1.20
## Min 0.00 5.40 0.30
## Q1 0.60 7.00 2.40
## Median 2.30 7.40 3.60
## Q3 4.20 7.70 4.50
## Max 11.80 9.00 4.50
## MAD 2.67 0.59 1.33
## IQR 3.60 0.70 2.10
## CV 0.86 0.07 0.37
## Skewness 0.71 -0.02 -0.67
## SE.Skewness 0.03 0.03 0.03
## Kurtosis -0.06 -0.14 -0.74
## N.Valid 5000.00 5000.00 5000.00
## N 5000.00 5000.00 5000.00
## Pct.Valid 100.00 100.00 100.00
##
## Table: Table continues below
##
##
##
## tablet_usage_hours tv_usage_hours user_id weekly_anxiety_score
## ----------------- -------------------- ---------------- --------- ----------------------
## Mean 0.66 1.43 2500.50 8.63
## Std.Dev 0.53 0.93 1443.52 5.09
## Min 0.00 0.00 1.00 0.00
## Q1 0.20 0.70 1250.50 5.00
## Median 0.60 1.40 2500.50 8.00
## Q3 1.00 2.10 3750.50 12.00
## Max 2.90 4.00 5000.00 21.00
## MAD 0.59 1.04 1853.25 5.93
## IQR 0.80 1.40 2499.50 7.00
## CV 0.79 0.65 0.58 0.59
## Skewness 0.59 0.32 0.00 0.30
## SE.Skewness 0.03 0.03 0.03 0.03
## Kurtosis -0.19 -0.51 -1.20 -0.67
## N.Valid 5000.00 5000.00 5000.00 5000.00
## N 5000.00 5000.00 5000.00 5000.00
## Pct.Valid 100.00 100.00 100.00 100.00
##
## Table: Table continues below
##
##
##
## weekly_depression_score work_related_hours
## ----------------- ------------------------- --------------------
## Mean 7.52 3.36
## Std.Dev 4.67 0.83
## Min 0.00 0.70
## Q1 4.00 3.10
## Median 7.00 3.70
## Q3 11.00 3.90
## Max 21.00 4.50
## MAD 4.45 0.44
## IQR 7.00 0.80
## CV 0.62 0.25
## Skewness 0.37 -1.42
## SE.Skewness 0.03 0.03
## Kurtosis -0.58 1.20
## N.Valid 5000.00 5000.00
## N 5000.00 5000.00
## Pct.Valid 100.00 100.00
# Load required packages
library(psych)
library(dplyr)
# Method 1: Using psych package (most comprehensive)
descriptive_stats <- describe(data)
print(descriptive_stats)
## vars n mean sd median trimmed
## user_id 1 5000 2500.50 1443.52 2500.50 2500.50
## age 2 5000 44.70 17.27 45.00 44.73
## gender* 3 5000 1.57 0.57 2.00 1.54
## daily_screen_time_hours 4 5000 5.04 1.84 5.00 5.01
## phone_usage_hours 5 5000 1.99 0.89 2.00 1.99
## laptop_usage_hours 6 5000 1.56 1.03 1.50 1.52
## tablet_usage_hours 7 5000 0.66 0.53 0.60 0.62
## tv_usage_hours 8 5000 1.43 0.93 1.40 1.40
## social_media_hours 9 5000 3.28 1.20 3.60 3.42
## work_related_hours 10 5000 3.36 0.83 3.70 3.50
## entertainment_hours 11 5000 1.66 0.66 1.60 1.65
## gaming_hours 12 5000 1.56 0.69 1.40 1.52
## sleep_duration_hours 13 5000 7.37 0.54 7.40 7.37
## sleep_quality* 14 5000 4.01 0.66 4.00 4.02
## mood_rating* 15 5000 35.48 27.68 34.00 33.84
## stress_level* 16 5000 5.72 2.92 6.00 5.76
## physical_activity_hours_per_week 17 5000 2.66 2.29 2.30 2.43
## location_type* 18 5000 2.29 0.79 2.00 2.36
## mental_health_score 19 5000 64.77 13.10 65.00 64.68
## uses_wellness_apps* 20 5000 1.41 0.49 1.00 1.39
## eats_healthy* 21 5000 1.50 0.50 2.00 1.50
## caffeine_intake_mg_per_day 22 5000 142.32 50.47 141.65 142.22
## weekly_anxiety_score 23 5000 8.63 5.09 8.00 8.44
## weekly_depression_score 24 5000 7.52 4.67 7.00 7.31
## mindfulness_minutes_per_day 25 5000 18.55 7.99 17.20 17.97
## mad min max range skew kurtosis
## user_id 1853.25 1.0 5000.0 4999.0 0.00 -1.20
## age 22.24 15.0 74.0 59.0 -0.01 -1.18
## gender* 1.48 1.0 3.0 2.0 0.37 -0.80
## daily_screen_time_hours 1.93 1.0 10.0 9.0 0.15 -0.32
## phone_usage_hours 0.89 0.2 5.0 4.8 0.13 -0.30
## laptop_usage_hours 1.19 0.0 5.0 5.0 0.30 -0.45
## tablet_usage_hours 0.59 0.0 2.9 2.9 0.59 -0.19
## tv_usage_hours 1.04 0.0 4.0 4.0 0.32 -0.51
## social_media_hours 1.33 0.3 4.5 4.2 -0.67 -0.74
## work_related_hours 0.44 0.7 4.5 3.8 -1.42 1.20
## entertainment_hours 0.59 0.0 3.9 3.9 0.13 -0.26
## gaming_hours 0.59 0.0 3.8 3.8 0.51 -0.30
## sleep_duration_hours 0.59 5.4 9.0 3.6 -0.02 -0.14
## sleep_quality* 0.00 1.0 5.0 4.0 -0.15 -0.27
## mood_rating* 37.06 1.0 91.0 90.0 0.27 -1.16
## stress_level* 4.45 1.0 10.0 9.0 -0.02 -1.22
## physical_activity_hours_per_week 2.67 0.0 11.8 11.8 0.71 -0.06
## location_type* 1.48 1.0 3.0 2.0 -0.56 -1.18
## mental_health_score 15.57 31.0 100.0 69.0 0.04 -0.87
## uses_wellness_apps* 0.00 1.0 2.0 1.0 0.35 -1.88
## eats_healthy* 0.00 1.0 2.0 1.0 -0.01 -2.00
## caffeine_intake_mg_per_day 50.63 0.0 341.2 341.2 0.03 -0.06
## weekly_anxiety_score 5.93 0.0 21.0 21.0 0.30 -0.67
## weekly_depression_score 4.45 0.0 21.0 21.0 0.37 -0.58
## mindfulness_minutes_per_day 8.01 5.0 42.0 37.0 0.60 -0.33
## se
## user_id 20.41
## age 0.24
## gender* 0.01
## daily_screen_time_hours 0.03
## phone_usage_hours 0.01
## laptop_usage_hours 0.01
## tablet_usage_hours 0.01
## tv_usage_hours 0.01
## social_media_hours 0.02
## work_related_hours 0.01
## entertainment_hours 0.01
## gaming_hours 0.01
## sleep_duration_hours 0.01
## sleep_quality* 0.01
## mood_rating* 0.39
## stress_level* 0.04
## physical_activity_hours_per_week 0.03
## location_type* 0.01
## mental_health_score 0.19
## uses_wellness_apps* 0.01
## eats_healthy* 0.01
## caffeine_intake_mg_per_day 0.71
## weekly_anxiety_score 0.07
## weekly_depression_score 0.07
## mindfulness_minutes_per_day 0.11
# Method 2: For specific variables
numeric_vars <- data %>% select(where(is.numeric))
desc_numeric <- describe(numeric_vars)
print(desc_numeric)
## vars n mean sd median trimmed
## user_id 1 5000 2500.50 1443.52 2500.50 2500.50
## age 2 5000 44.70 17.27 45.00 44.73
## daily_screen_time_hours 3 5000 5.04 1.84 5.00 5.01
## phone_usage_hours 4 5000 1.99 0.89 2.00 1.99
## laptop_usage_hours 5 5000 1.56 1.03 1.50 1.52
## tablet_usage_hours 6 5000 0.66 0.53 0.60 0.62
## tv_usage_hours 7 5000 1.43 0.93 1.40 1.40
## social_media_hours 8 5000 3.28 1.20 3.60 3.42
## work_related_hours 9 5000 3.36 0.83 3.70 3.50
## entertainment_hours 10 5000 1.66 0.66 1.60 1.65
## gaming_hours 11 5000 1.56 0.69 1.40 1.52
## sleep_duration_hours 12 5000 7.37 0.54 7.40 7.37
## physical_activity_hours_per_week 13 5000 2.66 2.29 2.30 2.43
## mental_health_score 14 5000 64.77 13.10 65.00 64.68
## caffeine_intake_mg_per_day 15 5000 142.32 50.47 141.65 142.22
## weekly_anxiety_score 16 5000 8.63 5.09 8.00 8.44
## weekly_depression_score 17 5000 7.52 4.67 7.00 7.31
## mindfulness_minutes_per_day 18 5000 18.55 7.99 17.20 17.97
## mad min max range skew kurtosis
## user_id 1853.25 1.0 5000.0 4999.0 0.00 -1.20
## age 22.24 15.0 74.0 59.0 -0.01 -1.18
## daily_screen_time_hours 1.93 1.0 10.0 9.0 0.15 -0.32
## phone_usage_hours 0.89 0.2 5.0 4.8 0.13 -0.30
## laptop_usage_hours 1.19 0.0 5.0 5.0 0.30 -0.45
## tablet_usage_hours 0.59 0.0 2.9 2.9 0.59 -0.19
## tv_usage_hours 1.04 0.0 4.0 4.0 0.32 -0.51
## social_media_hours 1.33 0.3 4.5 4.2 -0.67 -0.74
## work_related_hours 0.44 0.7 4.5 3.8 -1.42 1.20
## entertainment_hours 0.59 0.0 3.9 3.9 0.13 -0.26
## gaming_hours 0.59 0.0 3.8 3.8 0.51 -0.30
## sleep_duration_hours 0.59 5.4 9.0 3.6 -0.02 -0.14
## physical_activity_hours_per_week 2.67 0.0 11.8 11.8 0.71 -0.06
## mental_health_score 15.57 31.0 100.0 69.0 0.04 -0.87
## caffeine_intake_mg_per_day 50.63 0.0 341.2 341.2 0.03 -0.06
## weekly_anxiety_score 5.93 0.0 21.0 21.0 0.30 -0.67
## weekly_depression_score 4.45 0.0 21.0 21.0 0.37 -0.58
## mindfulness_minutes_per_day 8.01 5.0 42.0 37.0 0.60 -0.33
## se
## user_id 20.41
## age 0.24
## daily_screen_time_hours 0.03
## phone_usage_hours 0.01
## laptop_usage_hours 0.01
## tablet_usage_hours 0.01
## tv_usage_hours 0.01
## social_media_hours 0.02
## work_related_hours 0.01
## entertainment_hours 0.01
## gaming_hours 0.01
## sleep_duration_hours 0.01
## physical_activity_hours_per_week 0.03
## mental_health_score 0.19
## caffeine_intake_mg_per_day 0.71
## weekly_anxiety_score 0.07
## weekly_depression_score 0.07
## mindfulness_minutes_per_day 0.11
# Method 3: For categorical variables
categorical_vars <- c('gender', 'location_type', 'uses_wellness_apps', 'eats_healthy')
for(var in categorical_vars) {
cat("\n---", var, "---\n")
print(table(data[[var]]))
print(prop.table(table(data[[var]])))
}
##
## --- gender ---
##
## Female Male Other
## 2359 2446 195
##
## Female Male Other
## 0.4718 0.4892 0.0390
##
## --- location_type ---
##
## Rural Suburban Urban
## 1043 1477 2480
##
## Rural Suburban Urban
## 0.2086 0.2954 0.4960
##
## --- uses_wellness_apps ---
##
## FALSE TRUE
## 2927 2073
##
## FALSE TRUE
## 0.5854 0.4146
##
## --- eats_healthy ---
##
## FALSE TRUE
## 2486 2514
##
## FALSE TRUE
## 0.4972 0.5028
desc_stats <- descr(data)
print(desc_stats)
## Non-numerical variable(s) ignored: gender, sleep_quality, mood_rating, stress_level, location_type, uses_wellness_apps, eats_healthy
## Descriptive Statistics
## data
## N: 5000
##
## age caffeine_intake_mg_per_day daily_screen_time_hours
## ----------------- --------- ---------------------------- -------------------------
## Mean 44.70 142.32 5.04
## Std.Dev 17.27 50.47 1.84
## Min 15.00 0.00 1.00
## Q1 30.00 108.45 3.70
## Median 45.00 141.65 5.00
## Q3 60.00 176.40 6.30
## Max 74.00 341.20 10.00
## MAD 22.24 50.63 1.93
## IQR 30.00 67.93 2.60
## CV 0.39 0.35 0.36
## Skewness -0.01 0.03 0.15
## SE.Skewness 0.03 0.03 0.03
## Kurtosis -1.18 -0.06 -0.32
## N.Valid 5000.00 5000.00 5000.00
## N 5000.00 5000.00 5000.00
## Pct.Valid 100.00 100.00 100.00
##
## Table: Table continues below
##
##
##
## entertainment_hours gaming_hours laptop_usage_hours mental_health_score
## ----------------- --------------------- -------------- -------------------- ---------------------
## Mean 1.66 1.56 1.56 64.77
## Std.Dev 0.66 0.69 1.03 13.10
## Min 0.00 0.00 0.00 31.00
## Q1 1.20 1.10 0.70 54.00
## Median 1.60 1.40 1.50 65.00
## Q3 2.10 2.00 2.30 75.00
## Max 3.90 3.80 5.00 100.00
## MAD 0.59 0.59 1.19 15.57
## IQR 0.90 0.90 1.60 21.00
## CV 0.40 0.44 0.66 0.20
## Skewness 0.13 0.51 0.30 0.04
## SE.Skewness 0.03 0.03 0.03 0.03
## Kurtosis -0.26 -0.30 -0.45 -0.87
## N.Valid 5000.00 5000.00 5000.00 5000.00
## N 5000.00 5000.00 5000.00 5000.00
## Pct.Valid 100.00 100.00 100.00 100.00
##
## Table: Table continues below
##
##
##
## mindfulness_minutes_per_day phone_usage_hours
## ----------------- ----------------------------- -------------------
## Mean 18.55 1.99
## Std.Dev 7.99 0.89
## Min 5.00 0.20
## Q1 12.40 1.40
## Median 17.20 2.00
## Q3 23.80 2.60
## Max 42.00 5.00
## MAD 8.01 0.89
## IQR 11.40 1.20
## CV 0.43 0.45
## Skewness 0.60 0.13
## SE.Skewness 0.03 0.03
## Kurtosis -0.33 -0.30
## N.Valid 5000.00 5000.00
## N 5000.00 5000.00
## Pct.Valid 100.00 100.00
##
## Table: Table continues below
##
##
##
## physical_activity_hours_per_week sleep_duration_hours social_media_hours
## ----------------- ---------------------------------- ---------------------- --------------------
## Mean 2.66 7.37 3.28
## Std.Dev 2.29 0.54 1.20
## Min 0.00 5.40 0.30
## Q1 0.60 7.00 2.40
## Median 2.30 7.40 3.60
## Q3 4.20 7.70 4.50
## Max 11.80 9.00 4.50
## MAD 2.67 0.59 1.33
## IQR 3.60 0.70 2.10
## CV 0.86 0.07 0.37
## Skewness 0.71 -0.02 -0.67
## SE.Skewness 0.03 0.03 0.03
## Kurtosis -0.06 -0.14 -0.74
## N.Valid 5000.00 5000.00 5000.00
## N 5000.00 5000.00 5000.00
## Pct.Valid 100.00 100.00 100.00
##
## Table: Table continues below
##
##
##
## tablet_usage_hours tv_usage_hours user_id weekly_anxiety_score
## ----------------- -------------------- ---------------- --------- ----------------------
## Mean 0.66 1.43 2500.50 8.63
## Std.Dev 0.53 0.93 1443.52 5.09
## Min 0.00 0.00 1.00 0.00
## Q1 0.20 0.70 1250.50 5.00
## Median 0.60 1.40 2500.50 8.00
## Q3 1.00 2.10 3750.50 12.00
## Max 2.90 4.00 5000.00 21.00
## MAD 0.59 1.04 1853.25 5.93
## IQR 0.80 1.40 2499.50 7.00
## CV 0.79 0.65 0.58 0.59
## Skewness 0.59 0.32 0.00 0.30
## SE.Skewness 0.03 0.03 0.03 0.03
## Kurtosis -0.19 -0.51 -1.20 -0.67
## N.Valid 5000.00 5000.00 5000.00 5000.00
## N 5000.00 5000.00 5000.00 5000.00
## Pct.Valid 100.00 100.00 100.00 100.00
##
## Table: Table continues below
##
##
##
## weekly_depression_score work_related_hours
## ----------------- ------------------------- --------------------
## Mean 7.52 3.36
## Std.Dev 4.67 0.83
## Min 0.00 0.70
## Q1 4.00 3.10
## Median 7.00 3.70
## Q3 11.00 3.90
## Max 21.00 4.50
## MAD 4.45 0.44
## IQR 7.00 0.80
## CV 0.62 0.25
## Skewness 0.37 -1.42
## SE.Skewness 0.03 0.03
## Kurtosis -0.58 1.20
## N.Valid 5000.00 5000.00
## N 5000.00 5000.00
## Pct.Valid 100.00 100.00
# Load required packages
library(ggplot2)
library(dplyr)
library(reshape2)
##
## Attaching package: 'reshape2'
## The following object is masked from 'package:tidyr':
##
## smiths
# Load the data
data <- read.csv('Tech_Use_Stress_Wellness.csv')
# 1. Distribution plots
p1 <- ggplot(data, aes(x = daily_screen_time_hours)) +
geom_histogram(bins = 30, fill = "yellow", alpha = 0.7) +
labs(title = "Distribution of Daily Screen Time", x = "Hours", y = "Frequency")
p2 <- ggplot(data, aes(x = mental_health_score)) +
geom_histogram(bins = 30, fill = "white", alpha = 0.7) +
labs(title = "Distribution of Mental Health Score", x = "Score", y = "Frequency")
p3 <- ggplot(data, aes(x = stress_level)) +
geom_histogram(bins = 30, fill = "red", alpha = 0.7) +
labs(title = "Distribution of Stress Level", x = "Stress Level", y = "Frequency")
# 2. Technology usage by gender
p4 <- ggplot(data, aes(x = gender, y = daily_screen_time_hours, fill = gender)) +
geom_boxplot() +
labs(title = "Daily Screen Time by Gender", x = "Gender", y = "Hours")
# 3. Mental health by location type
p5 <- ggplot(data, aes(x = location_type, y = mental_health_score, fill = location_type)) +
geom_boxplot() +
labs(title = "Mental Health Score by Location Type", x = "Location", y = "Mental Health Score")
# 4. Sleep patterns analysis - FIXED: sleep_quality as factor
p6 <- ggplot(data, aes(x = sleep_duration_hours, y = mental_health_score)) +
geom_point(alpha = 0.6) +
geom_smooth(method = "lm", color = "red") +
labs(title = "Mental Health vs Sleep Duration",
x = "Sleep Duration (hours)", y = "Mental Health Score")
p7 <- ggplot(data, aes(x = factor(sleep_quality), y = stress_level)) +
geom_boxplot(fill = "lightblue") +
labs(title = "Stress Level by Sleep Quality",
x = "Sleep Quality", y = "Stress Level")
# 5. Social media impact
p8 <- ggplot(data, aes(x = social_media_hours, y = mental_health_score)) +
geom_point(alpha = 0.6) +
geom_smooth(method = "lm", color = "purple") +
labs(title = "Mental Health vs Social Media Usage",
x = "Social Media Hours", y = "Mental Health Score")
# 6. Wellness apps effectiveness
p9 <- ggplot(data, aes(x = uses_wellness_apps, y = mental_health_score, fill = uses_wellness_apps)) +
geom_boxplot() +
labs(title = "Mental Health Score: Wellness App Users vs Non-Users",
x = "Uses Wellness Apps", y = "Mental Health Score")
# 7. Physical activity impact
p10 <- ggplot(data, aes(x = physical_activity_hours_per_week, y = stress_level)) +
geom_point(alpha = 0.6) +
geom_smooth(method = "lm", color = "darkgreen") +
labs(title = "Stress Level vs Physical Activity",
x = "Physical Activity (hours/week)", y = "Stress Level")
# 8. Caffeine intake analysis
p11 <- ggplot(data, aes(x = caffeine_intake_mg_per_day, y = stress_level)) +
geom_point(alpha = 0.6) +
geom_smooth(method = "lm", color = "brown") +
labs(title = "Stress Level vs Caffeine Intake",
x = "Caffeine Intake (mg/day)", y = "Stress Level")
# 9. Mindfulness practice
p12 <- ggplot(data, aes(x = mindfulness_minutes_per_day, y = mental_health_score)) +
geom_point(alpha = 0.6) +
geom_smooth(method = "lm", color = "orange") +
labs(title = "Mental Health vs Mindfulness Practice",
x = "Mindfulness Minutes/Day", y = "Mental Health Score")
# 10. Healthy eating impact
p13 <- ggplot(data, aes(x = eats_healthy, y = mental_health_score, fill = eats_healthy)) +
geom_boxplot() +
labs(title = "Mental Health Score: Healthy Eaters vs Non-Healthy Eaters",
x = "Eats Healthy", y = "Mental Health Score")
# 11. Age distribution and mental health
p14 <- ggplot(data, aes(x = age, y = mental_health_score)) +
geom_point(alpha = 0.6) +
geom_smooth(method = "lm", color = "darkblue") +
labs(title = "Mental Health Score by Age",
x = "Age", y = "Mental Health Score")
# 12. Correlation heatmap of key variables
cor_vars <- data %>% select(mental_health_score, stress_level, daily_screen_time_hours,
social_media_hours, sleep_duration_hours,
physical_activity_hours_per_week, mindfulness_minutes_per_day)
cor_matrix <- cor(cor_vars, use = "complete.obs")
p15 <- ggplot(melt(cor_matrix), aes(Var1, Var2, fill = value)) +
geom_tile() +
scale_fill_gradient2(low = "blue", high = "red", mid = "white",
midpoint = 0, limit = c(-1,1)) +
labs(title = "Correlation Heatmap of Key Variables",
x = "", y = "", fill = "Correlation") +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
# Print all plots
print(p1)
print(p2)
print(p3)
print(p4)
print(p5)
print(p6)
## `geom_smooth()` using formula = 'y ~ x'
print(p7)
print(p8)
## `geom_smooth()` using formula = 'y ~ x'
print(p9)
print(p10)
## `geom_smooth()` using formula = 'y ~ x'
print(p11)
## `geom_smooth()` using formula = 'y ~ x'
print(p12)
## `geom_smooth()` using formula = 'y ~ x'
print(p13)
print(p14)
## `geom_smooth()` using formula = 'y ~ x'
print(p15)