MKTG Survey Results 4000- Cluster Analysis

# Load necessary libraries
library(tidyverse)

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.2     ✔ tibble    3.2.1
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.0.4     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(readxl)
library(cluster)
library(factoextra)

## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa

# Read the Excel data
data <- read_excel("survey_results.xlsx")

# Rename the column with a space so it matches our code
data <- data %>%
  rename(device_access = `device access`)

# Select relevant columns (numeric data)
data_selected <- data %>%
  select(
    overall_mental_health, negative_feelings, satisfaction_living_conditions,
    frequency_noise_disturbances, mental_resources, safety,
    city_anxiety, support, personalization, device_access
  )

# Remove any rows with missing values
data_clean <- drop_na(data_selected)

# Remove any columns that have zero variance (constant values)
nzv_cols <- sapply(data_clean, function(x) sd(x, na.rm = TRUE) > 0)
data_for_clust <- data_clean[, nzv_cols]

# Scale the data to normalize it
data_scaled <- scale(data_for_clust)

# Perform k-means clustering
set.seed(123)  
kmeans_result <- kmeans(data_scaled, centers = 3, nstart = 25)

# View the clustering result
kmeans_result$centers    # Cluster centers

##   overall_mental_health negative_feelings satisfaction_living_conditions
## 1            -1.3145361         -0.726372                    -2.32417420
## 2             0.9716137         -0.726372                    -0.07042952
## 3            -0.5016829          0.564956                     0.30519459
##   frequency_noise_disturbances mental_resources city_anxiety    support
## 1                   -1.6137431       -1.8874586        -3.75 -1.2173146
## 2                   -0.5379144       -0.1655665         0.25 -0.2898368
## 3                    0.5379144        0.3200953         0.25  0.3284817
##   personalization device_access
## 1      -1.0978876    0.46513025
## 2       0.8539126    0.05168114
## 3      -0.4472875   -0.08613523

kmeans_result$cluster    # Cluster assignments

##  [1] 2 2 2 3 3 2 2 3 3 3 2 3 3 1 3 3

# Visualize the clusters
fviz_cluster(kmeans_result, data = data_scaled)

MKTG Survey Results 4000- Cluster Analysis

2025-05-02