# Load necessary library
library(tidyverse)
# Read the dataset
data <- read_csv("Students Social Media Addiction.csv")
Rows: 705 Columns: 13
── Column specification ────────────────────────────────────────────────────────────
Delimiter: ","
chr (6): Gender, Academic_Level, Country, Most_Used_Platform, Affects_Academic_P...
dbl (7): Student_ID, Age, Avg_Daily_Usage_Hours, Sleep_Hours_Per_Night, Mental_H...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Prepare the environment by loading necessary tools (tidyverse) and import a dataset about students’ social media addiction for subsequent analysis.
model <- lm(Addicted_Score ~ Sleep_Hours_Per_Night, data = data)
summary(model)
Call:
lm(formula = Addicted_Score ~ Sleep_Hours_Per_Night, data = data)
Residuals:
Min 1Q Median 3Q Max
-3.2184 -0.7570 0.0884 0.7347 2.4280
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 13.83680 0.23820 58.09 <2e-16 ***
Sleep_Hours_Per_Night -1.07730 0.03422 -31.48 <2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 1.023 on 703 degrees of freedom
Multiple R-squared: 0.585, Adjusted R-squared: 0.5844
F-statistic: 991 on 1 and 703 DF, p-value: < 2.2e-16
Students’ nightly sleep and their social media addiction scores relationship.
# Load libraries
library(tidyverse)
# Read the data
data <- read.csv("Students Social Media Addiction.csv")
# Linear regression model
model_linear <- lm(Addicted_Score ~ Sleep_Hours_Per_Night, data = data)
# Summary of the model
summary(model_linear)
Call:
lm(formula = Addicted_Score ~ Sleep_Hours_Per_Night, data = data)
Residuals:
Min 1Q Median 3Q Max
-3.2184 -0.7570 0.0884 0.7347 2.4280
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 13.83680 0.23820 58.09 <2e-16 ***
Sleep_Hours_Per_Night -1.07730 0.03422 -31.48 <2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 1.023 on 703 degrees of freedom
Multiple R-squared: 0.585, Adjusted R-squared: 0.5844
F-statistic: 991 on 1 and 703 DF, p-value: < 2.2e-16
library(tidyverse)
# Load your CSV file
data <- read.csv("Students Social Media Addiction.csv")
# Check structure
glimpse(data)
Rows: 705
Columns: 13
$ Student_ID <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14…
$ Age <int> 19, 22, 20, 18, 21, 19, 23, 20, 18, 21, 19, 2…
$ Gender <chr> "Female", "Male", "Female", "Male", "Male", "…
$ Academic_Level <chr> "Undergraduate", "Graduate", "Undergraduate",…
$ Country <chr> "Bangladesh", "India", "USA", "UK", "Canada",…
$ Avg_Daily_Usage_Hours <dbl> 5.2, 2.1, 6.0, 3.0, 4.5, 7.2, 1.5, 5.8, 4.0, …
$ Most_Used_Platform <chr> "Instagram", "Twitter", "TikTok", "YouTube", …
$ Affects_Academic_Performance <chr> "Yes", "No", "Yes", "No", "Yes", "Yes", "No",…
$ Sleep_Hours_Per_Night <dbl> 6.5, 7.5, 5.0, 7.0, 6.0, 4.5, 8.0, 6.0, 6.5, …
$ Mental_Health_Score <int> 6, 8, 5, 7, 6, 4, 9, 6, 7, 7, 5, 6, 8, 5, 7, …
$ Relationship_Status <chr> "In Relationship", "Single", "Complicated", "…
$ Conflicts_Over_Social_Media <int> 3, 0, 4, 1, 2, 5, 0, 2, 1, 1, 3, 2, 1, 4, 2, …
$ Addicted_Score <int> 8, 3, 9, 4, 7, 9, 2, 8, 5, 4, 7, 8, 4, 9, 5, …
# Convert outcome variable to binary (1 = Yes, 0 = No)
data$Academic_Binary <- ifelse(data$Affects_Academic_Performance == "Yes", 1, 0)
library(dplyr)
library(ggplot2)
line_data <- data %>%
group_by(Avg_Daily_Usage_Hours) %>%
summarise(mean_mental_health = mean(Mental_Health_Score, na.rm = TRUE))
ggplot(line_data, aes(x = Avg_Daily_Usage_Hours, y = mean_mental_health)) +
geom_point(color = "black", size = 1.5) +
geom_smooth(method = "lm", color = "blue") +
labs(title = "Avg Social Media Usage vs Mental Health",
x = "Average Daily Usage (hours)",
y = "Average Mental Health Score") +
theme_minimal()
`geom_smooth()` using formula = 'y ~ x'
To see whether there’s a pattern or trend between how much time students spend on social media and their mental health.
library(dplyr)
library(ggplot2)
line_data2 <- data %>%
group_by(Sleep_Hours_Per_Night) %>%
summarise(mean_addicted_score = mean(Addicted_Score, na.rm = TRUE))
ggplot(line_data2, aes(x = Sleep_Hours_Per_Night, y = mean_addicted_score)) +
geom_point(color = "black") +
geom_smooth(method = "lm", color = "red", se = TRUE) +
labs(title = "Sleep Hours vs Addicted Score",
x = "Sleep Hours Per Night",
y = "Average Addicted Score") +
theme_minimal()
`geom_smooth()` using formula = 'y ~ x'
The chart shows that the more one sleeps, the lower the degree of addiction to social media
ggplot(data, aes(x = as.factor(Addicted_Score), y = Sleep_Hours_Per_Night)) +
geom_boxplot(fill = "lightblue") +
labs(title = "Sleep Hours by Addiction Score",
x = "Addicted Score",
y = "Sleep Hours per Night")
The chart shows that the higher the addiction, the less sleep time
usage_summary <- data %>%
group_by(Gender, Avg_Daily_Usage_Hours) %>%
summarise(avg_mental_health = mean(Mental_Health_Score, na.rm = TRUE), .groups = 'drop')
ggplot(usage_summary, aes(x = Avg_Daily_Usage_Hours, y = avg_mental_health, color = Gender)) +
geom_point(size = 1.4) +
geom_smooth(method = "lm", se = TRUE, size = 1.2) +
labs(
title = "Mental Health Score vs Social Media Usage by Gender",
x = "Average Daily Social Media Usage (hours)",
y = "Average Mental Health Score"
) +
theme_minimal()
`geom_smooth()` using formula = 'y ~ x'
The chart shows that the longer the usage time, the lower the mental health score (negative impact).