Assignment 19
library(readxl)
mydata <- read_excel("C:\\Users\\RISHI RAHUL\\Desktop\\DS\\5 HClust\\Assignment\\EastWestAirlines.xlsx", sheet = "data")
mydata <- mydata[1:100,-1] # Excluding the #ID
normalized_data <- scale(mydata)
# Calculate Distance
d <- dist(normalized_data, method="euclidean") # Distance matrix
str(d)
## 'dist' num [1:4950] 0.151 0.404 0.163 5.224 0.199 ...
## - attr(*, "Size")= int 100
## - attr(*, "Diag")= logi FALSE
## - attr(*, "Upper")= logi FALSE
## - attr(*, "method")= chr "euclidean"
## - attr(*, "call")= language dist(x = normalized_data, method = "euclidean")
fit <- hclust(d,method="complete") # Complete Linkage
fit1 <- hclust(d,method="single") # Single Linkage
fit2 <- hclust(d,method="average") # Average Linkage
fit3 <- hclust(d,method="centroid") # Centroid Linkage
# Plot Dendrogram
plot <- plot(fit,hang=-1) # Complete Linkage
rect.hclust(fit,k=5,border="red")

groups <- cutree(fit, k = 5)
clustno <- as.data.frame(groups)
final <- data.frame(clustno,mydata)
final <- cbind(clustno,mydata)
View(final)
aggregate(final[,-1], by=list(final$groups), FUN=mean)
## Group.1 Balance Qual_miles cc1_miles cc2_miles cc3_miles Bonus_miles
## 1 1 83421.3 5.494505 2.120879 1 1 16661.69
## 2 2 443003.0 0.000000 3.000000 2 1 1753.00
## 3 3 132716.0 598.200000 1.000000 1 1 11343.60
## 4 4 119731.5 0.000000 4.000000 1 1 72313.00
## 5 5 84409.0 5031.000000 2.000000 1 1 15436.00
## Bonus_trans Flight_miles_12mo Flight_trans_12 Days_since_enroll
## 1 11.16484 287.8132 0.9120879 7198.121
## 2 43.00000 3850.0000 12.0000000 6948.000
## 3 14.40000 3373.6000 7.6000000 7067.400
## 4 42.00000 4250.0000 22.5000000 6802.000
## 5 16.00000 1150.0000 4.0000000 7766.000
## Award?
## 1 0.3956044
## 2 1.0000000
## 3 0.6000000
## 4 1.0000000
## 5 0.0000000