Kelompok 4: 1. Sharliz Mayalpen Zafirah 5052241003 2. Athaya Salsabila Alhen 5052241004 3. Shafira Fajriyatul Hasanah 5052241011
Untuk melakukan analisis mengenai hubungan antara musik dan kesehatan mental, kami menggunakan dataset yang tersedia di Kaggle. Dataset tersebut dapat diakses melalui tautan berikut: Mental Health & Music Relationship Analysis - EDA. Dataset ini memuat informasi mengenai genre musik yang sering didengarkan individu serta kondisi kesehatan mental mereka, sehingga sangat relevan untuk dianalisis dalam konteks ini.
data <- read.csv("dataset.csv", na.strings = c("", "N/A"), sep = ";")
str(data)
## 'data.frame': 736 obs. of 33 variables:
## $ Timestamp : chr "8/27/2022 19:29:02" "8/27/2022 19:57:31" "8/27/2022 21:28:18" "8/27/2022 21:40:40" ...
## $ Age : int 18 63 18 61 18 18 18 21 19 18 ...
## $ Primary.streaming.service : chr "Spotify" "Pandora" "Spotify" "YouTube Music" ...
## $ Hours.per.day : num 3 1.5 4 2.5 4 5 3 1 6 1 ...
## $ While.working : chr "Yes" "Yes" "No" "Yes" ...
## $ Instrumentalist : chr "Yes" "No" "No" "No" ...
## $ Composer : chr "Yes" "No" "No" "Yes" ...
## $ Fav.genre : chr "Latin" "Rock" "Video game music" "Jazz" ...
## $ Exploratory : chr "Yes" "Yes" "No" "Yes" ...
## $ Foreign.languages : chr "Yes" "No" "Yes" "Yes" ...
## $ BPM : int 156 119 132 84 107 86 66 95 94 155 ...
## $ Frequency..Classical. : chr "Rarely" "Sometimes" "Never" "Sometimes" ...
## $ Frequency..Country. : chr "Never" "Never" "Never" "Never" ...
## $ Frequency..EDM. : chr "Rarely" "Never" "Very frequently" "Never" ...
## $ Frequency..Folk. : chr "Never" "Rarely" "Never" "Rarely" ...
## $ Frequency..Gospel. : chr "Never" "Sometimes" "Never" "Sometimes" ...
## $ Frequency..Hip.hop. : chr "Sometimes" "Rarely" "Rarely" "Never" ...
## $ Frequency..Jazz. : chr "Never" "Very frequently" "Rarely" "Very frequently" ...
## $ Frequency..K.pop. : chr "Very frequently" "Rarely" "Very frequently" "Sometimes" ...
## $ Frequency..Latin. : chr "Very frequently" "Sometimes" "Never" "Very frequently" ...
## $ Frequency..Lofi. : chr "Rarely" "Rarely" "Sometimes" "Sometimes" ...
## $ Frequency..Metal. : chr "Never" "Never" "Sometimes" "Never" ...
## $ Frequency..Pop. : chr "Very frequently" "Sometimes" "Rarely" "Sometimes" ...
## $ Frequency..R.B. : chr "Sometimes" "Sometimes" "Never" "Sometimes" ...
## $ Frequency..Rap. : chr "Very frequently" "Rarely" "Rarely" "Never" ...
## $ Frequency..Rock. : chr "Never" "Very frequently" "Rarely" "Never" ...
## $ Frequency..Video.game.music.: chr "Sometimes" "Rarely" "Very frequently" "Never" ...
## $ Anxiety : num 3 7 7 9 7 8 4 5 2 2 ...
## $ Depression : num 0 2 7 7 2 8 8 3 0 2 ...
## $ Insomnia : num 1 2 10 3 5 7 6 5 0 5 ...
## $ OCD : num 0 1 2 3 9 7 0 3 0 1 ...
## $ Music.effects : chr NA NA "No effect" "Improve" ...
## $ Permissions : chr "I understand." "I understand." "I understand." "I understand." ...
summary(data)
## Timestamp Age Primary.streaming.service Hours.per.day
## Length:736 Min. :10.00 Length:736 Min. : 0.000
## Class :character 1st Qu.:18.00 Class :character 1st Qu.: 2.000
## Mode :character Median :21.00 Mode :character Median : 3.000
## Mean :25.21 Mean : 3.573
## 3rd Qu.:28.00 3rd Qu.: 5.000
## Max. :89.00 Max. :24.000
## NA's :1
## While.working Instrumentalist Composer Fav.genre
## Length:736 Length:736 Length:736 Length:736
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## Exploratory Foreign.languages BPM Frequency..Classical.
## Length:736 Length:736 Min. :0.00e+00 Length:736
## Class :character Class :character 1st Qu.:1.00e+02 Class :character
## Mode :character Mode :character Median :1.20e+02 Mode :character
## Mean :1.59e+06
## 3rd Qu.:1.44e+02
## Max. :1.00e+09
## NA's :107
## Frequency..Country. Frequency..EDM. Frequency..Folk. Frequency..Gospel.
## Length:736 Length:736 Length:736 Length:736
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## Frequency..Hip.hop. Frequency..Jazz. Frequency..K.pop. Frequency..Latin.
## Length:736 Length:736 Length:736 Length:736
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## Frequency..Lofi. Frequency..Metal. Frequency..Pop. Frequency..R.B.
## Length:736 Length:736 Length:736 Length:736
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## Frequency..Rap. Frequency..Rock. Frequency..Video.game.music.
## Length:736 Length:736 Length:736
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
##
##
##
##
## Anxiety Depression Insomnia OCD
## Min. : 0.000 Min. : 0.000 Min. : 0.000 Min. : 0.000
## 1st Qu.: 4.000 1st Qu.: 2.000 1st Qu.: 1.000 1st Qu.: 0.000
## Median : 6.000 Median : 5.000 Median : 3.000 Median : 2.000
## Mean : 5.838 Mean : 4.796 Mean : 3.738 Mean : 2.637
## 3rd Qu.: 8.000 3rd Qu.: 7.000 3rd Qu.: 6.000 3rd Qu.: 5.000
## Max. :10.000 Max. :10.000 Max. :10.000 Max. :10.000
##
## Music.effects Permissions
## Length:736 Length:736
## Class :character Class :character
## Mode :character Mode :character
##
##
##
##
nrow(data)
## [1] 736
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.2 ✔ tibble 3.3.0
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.0.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(dplyr)
library(ggplot2)
library(naniar)
library(tidyr)
library(patchwork)
library(reshape2)
##
## Attaching package: 'reshape2'
##
## The following object is masked from 'package:tidyr':
##
## smiths
library(ggcorrplot)
library(gridExtra)
##
## Attaching package: 'gridExtra'
##
## The following object is masked from 'package:dplyr':
##
## combine
sum(is.na(data))
## [1] 129
colSums(is.na(data))
## Timestamp Age
## 0 1
## Primary.streaming.service Hours.per.day
## 1 0
## While.working Instrumentalist
## 3 4
## Composer Fav.genre
## 1 0
## Exploratory Foreign.languages
## 0 4
## BPM Frequency..Classical.
## 107 0
## Frequency..Country. Frequency..EDM.
## 0 0
## Frequency..Folk. Frequency..Gospel.
## 0 0
## Frequency..Hip.hop. Frequency..Jazz.
## 0 0
## Frequency..K.pop. Frequency..Latin.
## 0 0
## Frequency..Lofi. Frequency..Metal.
## 0 0
## Frequency..Pop. Frequency..R.B.
## 0 0
## Frequency..Rap. Frequency..Rock.
## 0 0
## Frequency..Video.game.music. Anxiety
## 0 0
## Depression Insomnia
## 0 0
## OCD Music.effects
## 0 8
## Permissions
## 0
print(gg_miss_var(data) + labs(
title = "Jumlah Missing Value ", x="Kolom", y="Jumlah"))
#BPM
# Hitung rata-rata BPM per genre dari data yang tidak NA
bpm_per_genre <- data %>%
group_by(Fav.genre) %>%
summarise(mean_bpm = mean(BPM, na.rm = TRUE))
bpm_per_genre
## # A tibble: 16 × 2
## Fav.genre mean_bpm
## <chr> <dbl>
## 1 Classical 115.
## 2 Country 110.
## 3 EDM 148.
## 4 Folk 110.
## 5 Gospel 89.2
## 6 Hip hop 112.
## 7 Jazz 124.
## 8 K pop 129.
## 9 Latin 134.
## 10 Lofi 114.
## 11 Metal 139.
## 12 Pop 119.
## 13 R&B 114.
## 14 Rap 127.
## 15 Rock 124.
## 16 Video game music 27027144.
# Gabungkan kembali dengan data asli (join untuk akses mean BPM per genre)
data <- data %>%
left_join(bpm_per_genre, by = "Fav.genre")
# Imputasi: ganti NA pada BPM dengan rata-rata genre
data$BPM <- ifelse(is.na(data$BPM), data$mean_bpm, data$BPM)
# Hapus kolom bantu mean_bpm kalau sudah tidak diperlukan
data <- data %>% select(-mean_bpm)
# Cek hasil imputasi
colSums(is.na(data))
## Timestamp Age
## 0 1
## Primary.streaming.service Hours.per.day
## 1 0
## While.working Instrumentalist
## 3 4
## Composer Fav.genre
## 1 0
## Exploratory Foreign.languages
## 0 4
## BPM Frequency..Classical.
## 0 0
## Frequency..Country. Frequency..EDM.
## 0 0
## Frequency..Folk. Frequency..Gospel.
## 0 0
## Frequency..Hip.hop. Frequency..Jazz.
## 0 0
## Frequency..K.pop. Frequency..Latin.
## 0 0
## Frequency..Lofi. Frequency..Metal.
## 0 0
## Frequency..Pop. Frequency..R.B.
## 0 0
## Frequency..Rap. Frequency..Rock.
## 0 0
## Frequency..Video.game.music. Anxiety
## 0 0
## Depression Insomnia
## 0 0
## OCD Music.effects
## 0 8
## Permissions
## 0
#AGE
data$Age[is.na(data$Age)] <- mean(data$Age, na.rm = TRUE)
getmode <- function(v) {
uniqv <- na.omit(unique(v))
uniqv[which.max(tabulate(match(v, uniqv)))]
}
data$Instrumentalist[is.na(data$Instrumentalist)] <- getmode(data$Instrumentalist)
data$Music.effects[is.na(data$Music.effects)] <- getmode(data$Music.effects)
data$Foreign.languages[is.na(data$Foreign.languages)] <- getmode(data$Foreign.languages)
data$While.working[is.na(data$While.working)] <- getmode(data$While.working)
data$Composer[is.na(data$Composer)] <- getmode(data$Composer)
data$Primary.streaming.service[is.na(data$Primary.streaming.service)] <- getmode(data$Primary.streaming.service)
sum(is.na(data))
## [1] 0
colSums(is.na(data))
## Timestamp Age
## 0 0
## Primary.streaming.service Hours.per.day
## 0 0
## While.working Instrumentalist
## 0 0
## Composer Fav.genre
## 0 0
## Exploratory Foreign.languages
## 0 0
## BPM Frequency..Classical.
## 0 0
## Frequency..Country. Frequency..EDM.
## 0 0
## Frequency..Folk. Frequency..Gospel.
## 0 0
## Frequency..Hip.hop. Frequency..Jazz.
## 0 0
## Frequency..K.pop. Frequency..Latin.
## 0 0
## Frequency..Lofi. Frequency..Metal.
## 0 0
## Frequency..Pop. Frequency..R.B.
## 0 0
## Frequency..Rap. Frequency..Rock.
## 0 0
## Frequency..Video.game.music. Anxiety
## 0 0
## Depression Insomnia
## 0 0
## OCD Music.effects
## 0 0
## Permissions
## 0
Berikut adalah grafik yang tentang informasi responden yang ada pada dataset
ggplot(data, aes(x = Age)) +
geom_histogram(binwidth = 5,
fill = "steelblue",
color = "white") +
geom_freqpoly(binwidth = 5,
color = "red",
size = 1.2) +
labs(
title = "Distribusi Umur Responden",
x = "Umur (tahun)",
y = "Jumlah Responden" # Sumbu Y tetap Jumlah Responden
) +
theme_minimal()
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
Mayoritas responden berada pada rentang usia 15–30 tahun, dengan puncak
frekuensi sekitar umur 21 tahun. Hal ini menunjukkan bahwa hasil survei
lebih banyak merepresentasikan pandangan dan pengalaman generasi muda,
khususnya Gen Z dan awal milenial.
streaming_counts <- data %>%
group_by(Primary.streaming.service) %>%
summarise(count = n()) %>%
mutate(
percentage = round(count / sum(count) * 100),
label = Primary.streaming.service
)
ggplot(streaming_counts, aes(x = "", y = percentage, fill = label)) +
geom_col(width = 1, color = "white") +
coord_polar(theta = "y") +
geom_text(aes(label = paste0(percentage, "%")),
position = position_stack(vjust = 0.5), size = 4) +
theme_void() +
labs(title = "Primary Streaming Service Distribution") +
theme(
legend.title = element_blank(),
legend.text = element_text(size = 5),
plot.title = element_text(hjust = 0.1, size = 5)
)
Dominasi ini mencerminkan kuatnya brand presence Spotify, serta
kemungkinan bahwa pengguna merasa lebih puas dengan fitur, rekomendasi
musik, atau antarmuka pengguna dibanding kompetitor seperti Apple Music
atau YouTube Music. Hal ini juga bisa menunjukkan bahwa Spotify menjadi
platform utama dalam membentuk pengalaman musik generasi muda.
Heatmap berikut memperlihatkan hubungan korelasi antara variabel gangguan mental, umur responden, BPM lagu yang didengarkan, serta lama waktu mendengarkan musik setiap harinya.
df_num <- data %>%
select(where(is.numeric))
cor_matrix <- cor(df_num, use = "pairwise.complete.obs")
ggcorrplot(cor_matrix,
hc.order = TRUE,
type = "lower",
lab = TRUE,
colors = c("#6D9EC1", "white", "#E46726"),
title = "Analysis of Correlations",
lab_size = 3,
show.legend = FALSE) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 60, hjust = 1, size = 10),
axis.text.y = element_text(size = 10),
plot.title = element_text(size = 14, face = "bold"))
Visualisasi di bawah ini merupakan boxplot yang membandingkan kebiasaan mendengarkan musik saat bekerja (ya atau tidak) terhadap empat kondisi kesehatan mental, yaitu OCD, insomnia, depresi, dan anxiety.
df_cleaned <- data %>%
mutate(While_working_numeric = case_when(
.data[["While.working"]] == "Yes" ~ 1,
.data[["While.working"]] == "No" ~ 0,
TRUE ~ NA_real_
)) %>%
drop_na(While_working_numeric) %>%
mutate(While_working_numeric = as.integer(While_working_numeric))
mental_health_conditions <- c("Anxiety", "Depression", "Insomnia", "OCD")
plot_list <- list()
for (condition in mental_health_conditions) {
p <- ggplot(df_cleaned, aes(x = factor(While_working_numeric), y = .data[[condition]])) +
geom_boxplot(fill = "darkolivegreen3") +
labs(
title = paste("Distribusi", condition, "Berdasarkan Kebiasaan Mendengarkan Musik Saat Bekerja"),
x = "Mendengarkan Musik Saat Bekerja (0 = Tidak, 1 = Ya)",
y = condition
) +
scale_x_discrete(labels = c("0" = "Tidak", "1" = "Ya")) +
theme_minimal()
plot_list[[condition]] <- p
}
grid.arrange(
grobs = plot_list,
ncol = 2
)
Sebagian besar responden (75%) merasakan perbaikan kondisi mental akibat musik, terutama pada masalah depresi dan insomnia, yang terlihat dari dominasi bar “Improve” di berbagai skor kesehatan mental. Ditujukan dengan sebagian besar orang yang memilih menjadikan music sebagai media untuk coping mechanism di sela sela berbagai aktivitas mereka
data_long <- data %>%
pivot_longer(cols = c(Anxiety, Depression, Insomnia, OCD),
names_to = "Mental_Condition",
values_to = "Score") %>%
mutate(Score = round(Score))
summary_count <- data_long %>%
group_by(Music.effects, Mental_Condition, Score) %>%
summarise(count = n(), .groups = 'drop')
ggplot(summary_count, aes(x = factor(Score), y = count, fill = Music.effects)) +
geom_col(position = "dodge", width = 0.7) +
facet_wrap(~ Mental_Condition, scales = "free_y") +
labs(
title = "Jumlah Responden per Skor Kesehatan Mental dan Efek Musik",
x = "Skor Kesehatan Mental",
y = "Jumlah Responden",
fill = "Efek Musik"
) +
scale_fill_brewer(palette = "Set2") +
theme_minimal() +
theme(axis.text.x = element_text(size = 10))
music_effect <- data %>%
count(Music.effects) %>%
mutate(
percent = round(100 * n / sum(n)),
label = paste0(Music.effects, ": ", percent, "%")
)
# Buat pie chart
ggplot(music_effect, aes(x = "", y = n, fill = label)) +
geom_col(width = 1, color = "white") +
coord_polar("y") +
labs(title = "Distribusi Efek Musik terhadap Responden",
fill = "Efek Musik") +
theme_void() +
theme(
legend.position = "right",
plot.title = element_text(hjust = 0.5, face = "bold")
) +
scale_fill_manual(values = c(
"#66c2a5", "#fc8d62", "#8da0cb", "#e78ac3", "#a6d854", "#ffd92f"
))
Dari grafik ini memperkuat asumsi bahwa preferensi genre musik bisa berkaitan dengan kondisi mental seseorang. Musik digunakan tidak hanya untuk hiburan, tapi juga sebagai coping mechanism yang bersifat personal dan spesifik. Genre favorit bisa mencerminkan kebutuhan emosional berdasarkan kondisi mental masing-masing individu.
mental_health_list <- c("OCD", "Insomnia", "Depression", "Anxiety")
genre_cols <- names(data)[grepl("^Frequency\\.\\.", names(data))]
plot_list <- list()
for (mh in mental_health_list) {
data_filtered <- data %>%
filter(.data[[mh]] >= 6) %>%
select(all_of(genre_cols))
long_data <- data_filtered %>%
pivot_longer(cols = everything(), names_to = "Genre", values_to = "Frequency") %>%
filter(Frequency == "Very frequently")
long_data$Genre <- gsub("Frequency\\.\\.|\\.$", "", long_data$Genre)
long_data$Genre <- gsub("\\.", " ", long_data$Genre)
genre_count <- long_data %>%
group_by(Genre) %>%
summarise(count = n()) %>%
arrange(desc(count)) %>%
slice_max(order_by = count, n = 7)
color <- case_when(
mh == "OCD" ~ "orchid",
mh == "Insomnia" ~ "lightgreen",
mh == "Depression" ~ "salmon",
mh == "Anxiety" ~ "skyblue"
)
p <- ggplot(genre_count, aes(x = reorder(Genre, -count), y = count)) +
geom_bar(stat = "identity", fill = color) +
labs(title = paste("Top Genre -", mh),
x = "Genre", y = "Count") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 9))
plot_list[[mh]] <- p
}
(plot_list[["OCD"]] | plot_list[["Insomnia"]]) /
(plot_list[["Depression"]] | plot_list[["Anxiety"]])