# Load necessary libraries
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.2 ✔ tibble 3.2.1
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.0.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(readxl)
library(cluster)
library(factoextra)
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
# Read the Excel data
data <- read_excel("survey_results.xlsx")
# Rename the column with a space so it matches our code
data <- data %>%
rename(device_access = `device access`)
# Select relevant columns (numeric data)
data_selected <- data %>%
select(
overall_mental_health, negative_feelings, satisfaction_living_conditions,
frequency_noise_disturbances, mental_resources, safety,
city_anxiety, support, personalization, device_access
)
# Remove any rows with missing values
data_clean <- drop_na(data_selected)
# Remove any columns that have zero variance (constant values)
nzv_cols <- sapply(data_clean, function(x) sd(x, na.rm = TRUE) > 0)
data_for_clust <- data_clean[, nzv_cols]
# Scale the data to normalize it
data_scaled <- scale(data_for_clust)
# Perform k-means clustering
set.seed(123)
kmeans_result <- kmeans(data_scaled, centers = 3, nstart = 25)
# View the clustering result
kmeans_result$centers # Cluster centers
## overall_mental_health negative_feelings satisfaction_living_conditions
## 1 -1.3145361 -0.726372 -2.32417420
## 2 0.9716137 -0.726372 -0.07042952
## 3 -0.5016829 0.564956 0.30519459
## frequency_noise_disturbances mental_resources city_anxiety support
## 1 -1.6137431 -1.8874586 -3.75 -1.2173146
## 2 -0.5379144 -0.1655665 0.25 -0.2898368
## 3 0.5379144 0.3200953 0.25 0.3284817
## personalization device_access
## 1 -1.0978876 0.46513025
## 2 0.8539126 0.05168114
## 3 -0.4472875 -0.08613523
kmeans_result$cluster # Cluster assignments
## [1] 2 2 2 3 3 2 2 3 3 3 2 3 3 1 3 3
# Visualize the clusters
fviz_cluster(kmeans_result, data = data_scaled)
