       
    

  
  


 
 
 
   
 
 

 

 
 
  
    
 
  
     
     
    
       
 
  

                  
            
               
               
                    
                
                 
                
         
                  
             
             
 

                
              
                  
                 
 
 
library(tidyverse)
library(here)
library(cluster)
library(ClusterR)
library(factoextra)
library(kmed)
library(plyr)
library(lattice)
library(graphics)
library(grid)
library(gridExtra)
library(pander)
library(clusterSim)
library(patchwork)
library(GGally)
 

     
   
    
   
   
   
   
    
   
   
   

  
   
   
   
   
   
   
   
   
   
 
 
 
 
 
 
 
 
 
 

   
      
       
      
      
      
      
       
      
      
   
                  
                 
                   
                   
 

 
     
      
      
      
      
      
               
              
             
               
              
           
 

      
0
25
50
75
4 6 8
happy
count
0
10
20
25 50 75 100
transpo
count
              
            
                
            
                 
                  
   

      
0
10
20
30
0 5000 10000 15000
pop_density
count
0
5
10
15
20
500 1000 1500 2000 2500
rent
count
0
5
10
15
2000 4000 6000
income
count
              
                 
                   
     
                
                
       


   
    
    
    
    
    
    
               
                  
               
               
                 
              
                 
 

 
Corr:
0.942***
Corr:
−0.187**
Corr:
−0.201***
Corr:
0.578***
Corr:
0.538***
Corr:
0.282***
Corr:
0.858***
Corr:
0.815***
Corr:
−0.400***
Corr:
0.464***
income
rent
pop_density
transpo
happy
income
rent
pop_density
transpo
happy
10002000300040005000 500 10001500200025000 5000 10000 15000 25 50 75 4 6 8
0e+00
1e−04
2e−04
3e−04
500
1000
1500
2000
2500
0
5000
10000
15000
25
50
75
4
6
8
               
              
              
             
                  
               
                 
              


   
  
  
  
  
  
  
               
              
             
                 
                
     
 

    
0
5000
10000
15000
happy income pop_density rent transpo
name
value
               
               
               
                 
               
         
 
                  
              
               
                  
               
             
  

     
    
    
    
    
    
    
    
    
    

      
400
800
1200
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
Number of clusters k
Total Within Sum of Square
Optimal number of clusters
                  
                 
                 
                    
                
        
 

   
−4
−2
0
2
−2.5 0.0 2.5
Dim1 (63.6%)
Dim2 (25.6%)
cluster
1
2
3
K−Means Plot: 3 centers
−4
−2
0
2
−2.5 0.0 2.5
Dim1 (63.6%)
Dim2 (25.6%)
cluster
1
2
3
4
K−Means Plot: 4 centers
−4
−2
0
2
−2.5 0.0 2.5
Dim1 (63.6%)
Dim2 (25.6%)
cluster
1
2
3
4
5
K−Means Plot: 5 centers
                
                 
           
               
            
              
              
           
          
 

   

 
 
 
               
                
              
               
             
 
    

 
     
    
    
    
                  
               
              
 
                
                
            
            
              
              
              
           
    
 
 
  
   
  
  
  
  
   
  

  
  
  
   
  
  
  
  
   
  
  
  
 
  
  
   

  
−4
−2
0
2
−2.5 0.0 2.5
Dim1 (63.6%)
Dim2 (25.6%)
Cluster Group
Under Developed
Developing
Fully Developed
K−Means Plot: 3 centers
                
               
                
              
              
               
                  
            

      

    
88
75
137
0
50
100
Under Developed Developing Fully Developed
label
count
               
                
                    
       
                 
           


 
library(tidyverse)
library(here)
library(cluster)
library(ClusterR)
library(factoextra)
library(kmed)
library(plyr)
library(lattice)
library(graphics)
library(grid)
library(gridExtra)
library(GGally)
library(clusterSim)
#
data <- read_csv("city_lifestyle_dataset.csv")
data1 <- data %>%
dplyr::select(country,
city_name,
avg_income,
avg_rent,
population_density,
public_transport_score,
happiness_score)
colnames(data1) <- c("continent", "city", "income", "rent",
"pop_density", "transpo", "happy")
data1 %>%
dplyr::select(income, rent, pop_density, transpo, happy) %>%
summary()
data1 %>%
ggplot(aes(x = happy)) +
geom_histogram(fill = "steelblue")
data1 %>%
ggplot(aes(x = transpo)) +
geom_histogram(fill = "steelblue")
data1 %>%
ggplot(aes(x = pop_density)) +
geom_histogram(fill = "steelblue")
data1 %>%
ggplot(aes(x = rent)) +
geom_histogram(fill = "steelblue")
data1 %>%
ggplot(aes(x = income)) +
geom_histogram(fill = "steelblue")

data1 %>%
dplyr::select(income, rent, pop_density, transpo, happy) %>%
ggpairs()
normal1 <- shapiro.test(data1$income)
normal2 <- shapiro.test(data1$rent)
normal3 <- shapiro.test(data1$pop_density)
normal4 <- shapiro.test(data1$transpo)
normal5 <- shapiro.test(data1$happy)
normality <- tibble(
variable = c("income", "rent", "pop_density", "transpo", "happy"),
statistic = c(
normal1$statistic,
normal2$statistic,
normal3$statistic,
normal4$statistic,
normal5$statistic),
p_value = c(
normal1$p.value,
normal2$p.value,
normal3$p.value,
normal4$p.value,
normal5$p.value))
normality
data1 %>%
dplyr::select(income, rent, pop_density, transpo, happy) %>%
pivot_longer(everything()) %>%
ggplot(aes(x = name, y = value)) +
geom_boxplot()
data1 %>%
dplyr::select(income, rent, pop_density, transpo, happy) %>% cor()
colSums(is.na(data1))
# k means clustering ===============
# scaling data
dataset <- data1 %>%
dplyr::select(income, rent, pop_density, transpo, happy) %>%
scale()
dataset
# identifying optimal clusters
optimal <- fviz_nbclust(x = dataset,
FUNcluster = kmeans,
method = "wss",
k.max = 20,
barfill = "steelblue",
barcolor = "black",
linecolor = "steelblue",
print.summary = T)
optimal

# start clustering
set.seed(2026)
km_cluster <- kmeans(dataset,
centers = 3,
nstart = 25)
summary(km_cluster)
km_cluster$iter == 10 # false
labels <- km_cluster$centers # to determine which clusters to name
rownames(labels) <- c("1", "2", "3")
labels
dataset1 <- data1 %>%
mutate(cluster = km_cluster$cluster,
label = factor(cluster,
levels = c(1,2,3),
labels = c("Under Developed",
"Developing",
"Fully Developed")
)
)
dataset2 <- data1 %>%
mutate(cluster = km_cluster$cluster,
label = factor(cluster,
levels = c(1,2,3),
labels = c("Under Developed",
"Developing",
"Fully Developed")
)
) %>%
dplyr::select(city, cluster, label)
fviz_cluster(km_cluster,
dataset,
repel = T,
main = "K-Means Plot: 3 centers")
fviz_cluster(km_cluster,
dataset,
repel = F,
main = "K-Means Plot: 3 centers")
#
km_cluster2 <- kmeans(dataset,
centers = 4,
nstart = 25)
summary(km_cluster2)
km_cluster2$iter == 10 # false
fviz_cluster(km_cluster2,
dataset,
repel = F,
main = "K-Means Plot: 4 centers")

#
km_cluster3 <- kmeans(dataset,
centers = 5,
nstart = 25)
summary(km_cluster3)
km_cluster3$iter == 10 # false
fviz_cluster(km_cluster3,
dataset,
repel = F,
main = "K-Means Plot: 5 centers")
index.DB(dataset, km_cluster$cluster)$DB # best cluster since low value
index.DB(dataset, km_cluster2$cluster)$DB
index.DB(dataset, km_cluster3$cluster)$DB
dataset1 %>%
ggplot(aes(x = label)) +
geom_bar(fill = "steelblue", color = "black") +
geom_text(aes(label = after_stat(count)),
stat = "count",
vjust = -0.5,
size = 4)
fviz_cluster(km_cluster,
dataset,
geom = "point",
repel = F,
main = "K-Means Plot: 3 centers") +
labs(color = "Cluster Group",
shape = "Cluster Group",
fill = "Cluster Group") +
scale_color_discrete(labels = c("1" = "Under Developed",
"2" = "Developing",
"3" = "Fully Developed")) +
scale_shape_discrete(labels = c("1" = "Under Developed",
"2" = "Developing",
"3" = "Fully Developed")) +
scale_fill_discrete(labels = c("1" = "Under Developed",
"2" = "Developing",
"3" = "Fully Developed"))
