East West Airlines using KMeans Clustering:

The Cluster with 4.8 % data seems to have more preminum customers in terms of their balance # miles, qualifying for topflight status ,miles earned using frequent flier credit card , Bonus # miles earned, Bonus transactions and also the Award.

The Cluster with 32.1 % Customers seems to be the next set of Premium customers.

The CLuster with more than 60 % Customer comes under average category.

install.packages("plyr",repos = "http://cran.us.r-project.org")

## Installing package into 'C:/Users/tswaminathan/Documents/R/win-library/3.5'
## (as 'lib' is unspecified)

## package 'plyr' successfully unpacked and MD5 sums checked
## 
## The downloaded binary packages are in
##  C:\Users\tswaminathan\AppData\Local\Temp\RtmpGyMpzi\downloaded_packages

library(plyr)

mydata <- read.csv(file.choose())
str(mydata)

## 'data.frame':    3999 obs. of  12 variables:
##  $ ID.              : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ Balance          : int  28143 19244 41354 14776 97752 16420 84914 20856 443003 104860 ...
##  $ Qual_miles       : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ cc1_miles        : int  1 1 1 1 4 1 3 1 3 3 ...
##  $ cc2_miles        : int  1 1 1 1 1 1 1 1 2 1 ...
##  $ cc3_miles        : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ Bonus_miles      : int  174 215 4123 500 43300 0 27482 5250 1753 28426 ...
##  $ Bonus_trans      : int  1 2 4 1 26 0 25 4 43 28 ...
##  $ Flight_miles_12mo: int  0 0 0 0 2077 0 0 250 3850 1150 ...
##  $ Flight_trans_12  : int  0 0 0 0 4 0 0 1 12 3 ...
##  $ Days_since_enroll: int  7000 6968 7034 6952 6935 6942 6994 6938 6948 6931 ...
##  $ Award.           : int  0 0 0 0 1 0 0 1 1 1 ...

normalized_data<-scale(mydata[,2:12])

wss = (nrow(normalized_data)-1)*sum(apply(normalized_data, 2, var))      # Determine number of clusters by scree-plot 
for (i in 2:12) wss[i] = sum(kmeans(normalized_data, centers=i)$withinss)
plot(1:12, wss, type="b", xlab="Number of Clusters", ylab="Within groups sum of squares")   # Look for an "elbow" in the scree plot #
title(sub = "K-Means Clustering Scree-Plot")

fit <- kmeans(normalized_data, 3) # 3 cluster solution
final2<- data.frame(mydata, fit$cluster) # append cluster membership
# final2
aggregate(mydata[,2:12], by=list(fit$cluster), FUN=mean)

##   Group.1   Balance Qual_miles cc1_miles cc2_miles cc3_miles Bonus_miles
## 1       1  43548.72   98.24822  1.208565  1.019429  1.000397    4602.883
## 2       2 114593.86  151.07004  3.679377  1.002335  1.004669   38182.099
## 3       3 194003.85  700.03646  2.395833  1.031250  1.218750   41092.568
##   Bonus_trans Flight_miles_12mo Flight_trans_12 Days_since_enroll
## 1    6.855274          201.9211       0.6268834          3683.918
## 2   18.529183          307.9113       0.9151751          4874.100
## 3   27.588542         4869.0208      14.2500000          4771.146
##      Award.
## 1 0.2002379
## 2 0.6389105
## 3 0.8072917

table(fit$cluster)

## 
##    1    2    3 
## 2522 1285  192