Hierarchical Clustering

EastWest Airlines

Assignment 19

library(readxl)
mydata <- read_excel("C:\\Users\\RISHI RAHUL\\Desktop\\DS\\5 HClust\\Assignment\\EastWestAirlines.xlsx", sheet = "data")

mydata <- mydata[1:100,-1] # Excluding the #ID

normalized_data <- scale(mydata)  

# Calculate Distance
d <- dist(normalized_data, method="euclidean") # Distance matrix
str(d)
##  'dist' num [1:4950] 0.151 0.404 0.163 5.224 0.199 ...
##  - attr(*, "Size")= int 100
##  - attr(*, "Diag")= logi FALSE
##  - attr(*, "Upper")= logi FALSE
##  - attr(*, "method")= chr "euclidean"
##  - attr(*, "call")= language dist(x = normalized_data, method = "euclidean")
fit <- hclust(d,method="complete") # Complete Linkage

fit1 <- hclust(d,method="single") # Single Linkage

fit2 <- hclust(d,method="average") # Average Linkage

fit3 <- hclust(d,method="centroid") # Centroid Linkage

# Plot Dendrogram
plot <- plot(fit,hang=-1) # Complete Linkage
rect.hclust(fit,k=5,border="red")

groups <- cutree(fit, k = 5)

clustno <- as.data.frame(groups)

final <- data.frame(clustno,mydata)
final <- cbind(clustno,mydata)
View(final)

aggregate(final[,-1], by=list(final$groups), FUN=mean)
##   Group.1  Balance  Qual_miles cc1_miles cc2_miles cc3_miles Bonus_miles
## 1       1  83421.3    5.494505  2.120879         1         1    16661.69
## 2       2 443003.0    0.000000  3.000000         2         1     1753.00
## 3       3 132716.0  598.200000  1.000000         1         1    11343.60
## 4       4 119731.5    0.000000  4.000000         1         1    72313.00
## 5       5  84409.0 5031.000000  2.000000         1         1    15436.00
##   Bonus_trans Flight_miles_12mo Flight_trans_12 Days_since_enroll
## 1    11.16484          287.8132       0.9120879          7198.121
## 2    43.00000         3850.0000      12.0000000          6948.000
## 3    14.40000         3373.6000       7.6000000          7067.400
## 4    42.00000         4250.0000      22.5000000          6802.000
## 5    16.00000         1150.0000       4.0000000          7766.000
##      Award?
## 1 0.3956044
## 2 1.0000000
## 3 0.6000000
## 4 1.0000000
## 5 0.0000000