This is a transnational data set which contains all the transactions occurring between 12/01/2010 and 12/09/2011 for a UK-based and registered non-store online retail.The company mainly sells unique all-occasion gifts. Many customers of the company are wholesalers.
The raw data includes 541909 observations of 8 variables. I omitted 136534 missing observations and the new data set has 406829 observations.
Create an aggregated variable named Amount, by multiplying Quantity with Price, which gives the total amount of money spent per product / item in each transaction. The amount variable is assume to represent sales revenue. Separate the variable InvoiceDate into two variables Date and Time . This allows different transactions created by the same consumer on the same day but at different times to be treated separately. Create SKU variable that encodes the first 3 digits of Stockcode to indicate different stock keeping unit, which is related to later calculation of "Breadth" customer behavior indicator.
#omit NA
eRetail<-na.omit(eRetail)
#create Amount variable#
eRetail$Amount <- eRetail$Quantity * eRetail$UnitPrice
#create SKU variable
eRetail$SKU <- substr(eRetail$StockCode,1,3)
#separate date & time#
eRetail$InvoiceDate<-strptime(eRetail$InvoiceDate,"%m/%d/%Y %H:%M")
eRetail$InvoiceTime = format(eRetail$InvoiceDate,"%H")
eRetail$InvoiceDate<-as.Date(eRetail$InvoiceDate,"%m/%d/%Y")
#look at internal structure#
str(eRetail)## 'data.frame': 406829 obs. of 11 variables:
## $ InvoiceNo : Factor w/ 25900 levels "536365","536366",..: 1 1 1 1 1 1 1 2 2 3 ...
## $ StockCode : Factor w/ 4070 levels "10002","10080",..: 3538 2795 3045 2986 2985 1663 801 1548 1547 3306 ...
## $ Description: Factor w/ 4223 levels " 4 PURPLE FLOCK DINNER CANDLES",..: 4026 4034 931 1958 2979 3234 1572 1697 1694 258 ...
## $ Quantity : int 6 6 8 6 6 2 6 6 6 32 ...
## $ InvoiceDate: Date, format: "2010-12-01" "2010-12-01" ...
## $ UnitPrice : num 2.55 3.39 2.75 3.39 3.39 7.65 4.25 1.85 1.85 1.69 ...
## $ CustomerID : int 17850 17850 17850 17850 17850 17850 17850 17850 17850 13047 ...
## $ Country : Factor w/ 38 levels "Australia","Austria",..: 36 36 36 36 36 36 36 36 36 36 ...
## $ Amount : num 15.3 20.3 22 20.3 20.3 ...
## $ SKU : chr "851" "710" "844" "840" ...
## $ InvoiceTime: chr "08" "08" "08" "08" ...
## - attr(*, "na.action")=Class 'omit' Named int [1:135080] 623 1444 1445 1446 1447 1448 1449 1450 1451 1452 ...
## .. ..- attr(*, "names")= chr [1:135080] "623" "1444" "1445" "1446" ...
#View a summary#
summary(eRetail)## InvoiceNo StockCode
## 576339 : 542 85123A : 2077
## 579196 : 533 22423 : 1905
## 580727 : 529 85099B : 1662
## 578270 : 442 84879 : 1418
## 573576 : 435 47566 : 1416
## 567656 : 421 20725 : 1359
## (Other):403927 (Other):396992
## Description Quantity
## WHITE HANGING HEART T-LIGHT HOLDER: 2070 Min. :-80995.00
## REGENCY CAKESTAND 3 TIER : 1905 1st Qu.: 2.00
## JUMBO BAG RED RETROSPOT : 1662 Median : 5.00
## ASSORTED COLOUR BIRD ORNAMENT : 1418 Mean : 12.06
## PARTY BUNTING : 1416 3rd Qu.: 12.00
## LUNCH BAG RED RETROSPOT : 1358 Max. : 80995.00
## (Other) :397000
## InvoiceDate UnitPrice CustomerID
## Min. :2010-12-01 Min. : 0.00 Min. :12346
## 1st Qu.:2011-04-06 1st Qu.: 1.25 1st Qu.:13953
## Median :2011-07-31 Median : 1.95 Median :15152
## Mean :2011-07-10 Mean : 3.46 Mean :15288
## 3rd Qu.:2011-10-20 3rd Qu.: 3.75 3rd Qu.:16791
## Max. :2011-12-09 Max. :38970.00 Max. :18287
##
## Country Amount SKU
## United Kingdom:361878 Min. :-168469.6 Length:406829
## Germany : 9495 1st Qu.: 4.2 Class :character
## France : 8491 Median : 11.1 Mode :character
## EIRE : 7485 Mean : 20.4
## Spain : 2533 3rd Qu.: 19.5
## Netherlands : 2371 Max. : 168469.6
## (Other) : 14576
## InvoiceTime
## Length:406829
## Class :character
## Mode :character
##
##
##
##
#View the top#
head(eRetail)## InvoiceNo StockCode Description Quantity
## 1 536365 85123A WHITE HANGING HEART T-LIGHT HOLDER 6
## 2 536365 71053 WHITE METAL LANTERN 6
## 3 536365 84406B CREAM CUPID HEARTS COAT HANGER 8
## 4 536365 84029G KNITTED UNION FLAG HOT WATER BOTTLE 6
## 5 536365 84029E RED WOOLLY HOTTIE WHITE HEART. 6
## 6 536365 22752 SET 7 BABUSHKA NESTING BOXES 2
## InvoiceDate UnitPrice CustomerID Country Amount SKU InvoiceTime
## 1 2010-12-01 2.55 17850 United Kingdom 15.30 851 08
## 2 2010-12-01 3.39 17850 United Kingdom 20.34 710 08
## 3 2010-12-01 2.75 17850 United Kingdom 22.00 844 08
## 4 2010-12-01 3.39 17850 United Kingdom 20.34 840 08
## 5 2010-12-01 3.39 17850 United Kingdom 20.34 840 08
## 6 2010-12-01 7.65 17850 United Kingdom 15.30 227 08
## boxplot of Amount
boxplot(eRetail$Amount)$stats[c(1, 5), ]## [1] -18.75 42.45
#cutoff outliner
Retail<-subset(eRetail,eRetail$Amount>= 0 & eRetail$Amount<= 10000 )
boxplot(Retail$Amount)$stats[c(1, 5), ]## [1] 0.00 42.45
Retail1 <- ddply(Retail, .(StockCode,Description), summarize, sumAmount= sum(Amount), sumQuantity= sum(Quantity), nCustomer= length(unique(CustomerID)), nPurchase= length(unique(InvoiceNo)) )
head(Retail1[order(-Retail1$sumQuantity),] )## StockCode Description sumAmount sumQuantity
## 3020 84077 WORLD WAR 2 GLIDERS ASSTD DESIGNS 13586.25 54415
## 3444 85099B JUMBO BAG RED RETROSPOT 85220.78 46181
## 3459 85123A WHITE HANGING HEART T-LIGHT HOLDER 100448.15 36725
## 3278 84879 ASSORTED COLOUR BIRD ORNAMENT 56580.34 35362
## 433 21212 PACK OF 72 RETROSPOT CAKE CASES 16394.53 33693
## 1109 22197 POPCORN HOLDER 23427.71 30931
## nCustomer nPurchase
## 3020 307 472
## 3444 635 1600
## 3459 856 1971
## 3278 678 1375
## 433 635 1029
## 1109 295 632
head(Retail1[order(-Retail1$nCustomer),] )## StockCode Description sumAmount sumQuantity
## 1319 22423 REGENCY CAKESTAND 3 TIER 142592.95 12412
## 3459 85123A WHITE HANGING HEART T-LIGHT HOLDER 100448.15 36725
## 2799 47566 PARTY BUNTING 68844.33 15295
## 3278 84879 ASSORTED COLOUR BIRD ORNAMENT 56580.34 35362
## 1608 22720 SET OF 3 CAKE TINS PANTRY DESIGN 33347.80 7020
## 433 21212 PACK OF 72 RETROSPOT CAKE CASES 16394.53 33693
## nCustomer nPurchase
## 1319 881 1704
## 3459 856 1971
## 2799 708 1380
## 3278 678 1375
## 1608 640 1146
## 433 635 1029
head(Retail1[order(-Retail1$sumAmount),] )## StockCode Description sumAmount sumQuantity
## 1319 22423 REGENCY CAKESTAND 3 TIER 142592.95 12412
## 3459 85123A WHITE HANGING HEART T-LIGHT HOLDER 100448.15 36725
## 3444 85099B JUMBO BAG RED RETROSPOT 85220.78 46181
## 3896 POST POSTAGE 77803.96 3120
## 2799 47566 PARTY BUNTING 68844.33 15295
## 3278 84879 ASSORTED COLOUR BIRD ORNAMENT 56580.34 35362
## nCustomer nPurchase
## 1319 881 1704
## 3459 856 1971
## 3444 635 1600
## 3896 331 1099
## 2799 708 1380
## 3278 678 1375
head(Retail1[order(-Retail1$nPurchase),] )## StockCode Description sumAmount sumQuantity
## 3459 85123A WHITE HANGING HEART T-LIGHT HOLDER 100448.15 36725
## 1319 22423 REGENCY CAKESTAND 3 TIER 142592.95 12412
## 3444 85099B JUMBO BAG RED RETROSPOT 85220.78 46181
## 2799 47566 PARTY BUNTING 68844.33 15295
## 3278 84879 ASSORTED COLOUR BIRD ORNAMENT 56580.34 35362
## 175 20725 LUNCH BAG RED RETROSPOT 28048.45 17697
## nCustomer nPurchase
## 3459 856 1971
## 1319 881 1704
## 3444 635 1600
## 2799 708 1380
## 3278 678 1375
## 175 532 1288
Retail2 <- subset(Retail, Description%in%c("MEDIUM CERAMIC TOP STORAGE JAR","JUMBO BAG RED RETROSPOT","REGENCY CAKESTAND 3 TIER","WHITE HANGING HEART T-LIGHT HOLDER","PARTY BUNTING","WORLD WAR 2 GLIDERS ASSTD DESIGNS"), select = c(Description,InvoiceDate,InvoiceTime,Quantity,CustomerID,Amount,InvoiceNo))
Retail2$Invoice_month<-month(Retail2$InvoiceDate)
Retail2$Decription<-as.character(Retail2$Description)ggplot(Retail2, aes(x=Invoice_month, y= Quantity))+ facet_wrap(~Description, ncol=2) +
geom_bar(stat="identity") +
labs(title = "Sales by month", x = "Month", y = "Sales Volume")ggplot(Retail2, aes(x=Invoice_month, y= length(unique(CustomerID)) )) + facet_wrap(~Description, ncol=2) +
geom_bar(stat="identity") +
labs(title = "Sales by month", x = "Month", y = "Number of Customer") ggplot(Retail2, aes(x=Invoice_month, y= Amount )) + facet_wrap(~Description, ncol=2) +
geom_bar(stat="identity") +
labs(title = "Sales by month", x = "Month", y = "Sales Revenue") ggplot(Retail2, aes(x=Invoice_month, y= length(unique(InvoiceNo)) )) + facet_wrap(~Description, ncol=2) +
geom_bar(stat="identity") +
labs(title = "Sales by month", x = "Month", y = "Number of Purchases") *The sales of products changes with time.
Retail3<-ddply(Retail, .(InvoiceTime), summarize, sumAmount=sum(Amount), sumQuantity=sum(Quantity), nCustomer=length(unique(CustomerID)))
names(Retail3) [1] <-"InvoiceHour"ggplot(Retail3, aes(x=InvoiceHour, y= sumQuantity)) +
geom_bar(stat="identity") +
labs(title = "Sales by hours", x = "Hours", y = "Sales Volume")ggplot(Retail3, aes(x=InvoiceHour, y= nCustomer)) +
geom_bar(stat="identity") +
labs(title = "Sales by hours", x = "Hours", y = "Number of customer") *The busiest hour of the day is around 12 pm for sales volume, and 12 pm for number of customers.
getRFMdf<-function (RFM_raw){
RFM_raw <- RFM_raw[!duplicated(RFM_raw$CustomerID),]
RFM_raw <- cbind(RFM_raw, First_date = with(df,
as.Date(as.integer(by(InvoiceDate, CustomerID, min)), "1970/01/01")))
RFM_raw <- cbind(RFM_raw, Last_date = with(df,
as.Date(as.integer(by(InvoiceDate, CustomerID, max)), "1970/01/01")))
#Recency
AsOfDate <- max(RFM_raw$Last_date)
RFM_raw <- cbind(RFM_raw, Recency = with(df,
as.numeric(difftime(AsOfDate,RFM_raw$Last_date,units="days")))/30)
#First_purchase
RFM_raw <- cbind(RFM_raw, First_purchase = with(df,
as.numeric(difftime(AsOfDate,RFM_raw$First_date,units="days")))/30)
#Frequency
RFM_raw <- cbind(RFM_raw, Frequency = with(df,
as.numeric(by(InvoiceNo, CustomerID, function(x) length(unique(x))))))
#Monetary & related
RFM_raw <- cbind(RFM_raw, Monetary = with(df,
as.numeric(by(Amount, CustomerID, sum))))
RFM_raw <- cbind(RFM_raw, AvgM = with(df,
as.numeric(by(Amount, CustomerID, mean))))
RFM_raw <- cbind(RFM_raw, maxM = with(df,
as.numeric(by(Amount, CustomerID, max))))
#Breadth
RFM_raw <- cbind(RFM_raw, Breadth = with(df,
as.numeric(by(SKU, CustomerID, function(x) length(unique(x))))))
#Tenure
RFM_raw <- cbind(RFM_raw, Tenure = with(df, as.numeric(difftime(RFM_raw$Last_date,RFM_raw$First_date,units="days")))/30)
#sum Quantity
RFM_raw <- cbind(RFM_raw, sumQuant = with(df,
as.numeric(by(Quantity, CustomerID, mean))))
}getRFMnor<-function (RFMn){
RFMn<- as.data.frame(scale(df2[14:22], center= TRUE))
RFMn<- cbind(df2[,c(1:13)],RFMn)
RFMn<- rename(RFMn, c("Recency" = "R", "Frequency" = "Fq", "Monetary" = "M", "Breadth" = "B" , "Tenure" = "Ten", "sumQuant" = "Q" ) )
}#score 1 to 5
score15<-function(x){
ceiling((rank(x))/(length(x))*5)
}
getRFMscore<-function (RFMs){
RFMs <- as.data.frame(lapply(df3[,c(15:22)], score15))
RFMs <- cbind(df3[,c(1:13)], R= ceiling((rank(-df3$R))/(length(df3$R))*5), RFMs)
RFMs <- cbind(RFMs,RFMScore = 100*RFMs$R + 10*RFMs$Fq+RFMs$M)
}df<- eRetail
rawRFM<-as.data.frame(getRFMdf(df))#take a look at disturbution
par(mfrow = c(1,3))
boxplot(rawRFM$Recency)$stats[c(1, 5), ]## [1] 0.0 11.1
boxplot(rawRFM$Frequency)$stats[c(1, 5), ]## [1] 1 11
boxplot(rawRFM$Monetary)$stats[c(1, 5), ]## [1] -1592.49 3580.13
Strong left-skewness for Recency, Frequency, TotalAmount,Monetary,Breadth, and Tenure.
RFM<-subset(rawRFM,rawRFM$Recency<= 12 & rawRFM$Frequency<= 25 & rawRFM$Monetary>= 0 & rawRFM$Monetary<= 10000)
summary(rawRFM$Monetary)## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -4288.0 293.4 648.1 1898.0 1612.0 279500.0
par(mfrow = c(1,3))
hist(RFM$Recency)
hist(RFM$Frequency)
hist(RFM$Monetary)Now, the Left-skewness is better.
#take a look at disturbution
par(mfrow = c(1,3))
boxplot(RFM$Recency)$stats[c(1, 5), ]## [1] 0.0 10.3
boxplot(RFM$Frequency)$stats[c(1, 5), ]## [1] 1 11
boxplot(RFM$Monetary)$stats[c(1, 5), ]## [1] 0.00 3250.64
#data normalization
df2<- RFM
nRFM<-as.data.frame(getRFMnor(df2))
#score
df3 <- nRFM
RFMs<-as.data.frame(getRFMscore(df3))
par(mfrow = c(1,3))
hist(RFMs$R)
hist(RFMs$Fq)
hist(RFMs$M)RFM_cluster <- data.frame(nRFM$R,nRFM$Fq,nRFM$M)
km <- kmeans(RFM_cluster,centers=5)
RFM_cluster$cluster <- as.factor(km$cluster)
RFM_cluster <- cbind(RFM_cluster,RFMs)
ggplot(RFM_cluster,aes(x=nRFM.R, y=nRFM.M, color= cluster,size= nRFM.Fq))+geom_point()+ scale_size_area(max_size=10)+labs(x="Recency", y="Monetary")## Warning: Removed 2877 rows containing missing values (geom_point).
ggplot(RFM_cluster,aes(x=R, y= M, color= cluster,size= Fq))+geom_point()+ scale_size_area(max_size=20)+labs(x="Recency", y="Monetary")RFM_cluster1<-RFM_cluster[which(RFM_cluster$cluster==1),]
ggplot(RFM_cluster1,aes(x=R, y=M, color= Fq, size = 10))+geom_point()+ labs(x="Recency", y="Monetary")apply(RFM_cluster1[,c(18,20:27)],2,mean)## R Fq M AvgM maxM B
## 1.000000 1.457961 1.826476 3.048301 2.536673 1.955277
## Ten Q RFMScore
## 1.443649 2.783542 116.406082
ggplot(RFM_cluster1,aes(x=RFMScore))+geom_histogram(bins=50)+ labs(x="RFMScore", y="Count")RFM_cluster2<-RFM_cluster[which(RFM_cluster$cluster==2),]
ggplot(RFM_cluster2,aes(x=R, y=M, color= Fq, size = 10))+geom_point()+ labs(x="Recency", y="Monetary")apply(RFM_cluster2[,c(18,20:27)],2,mean)## R Fq M AvgM maxM B
## 3.891827 4.537260 4.591346 3.165865 3.765625 4.165865
## Ten Q RFMScore
## 4.271635 3.216346 439.146635
ggplot(RFM_cluster2,aes(x=RFMScore))+geom_histogram(bins=50)+ labs(x="RFMScore", y="Count")RFM_cluster3<-RFM_cluster[which(RFM_cluster$cluster==3),]
ggplot(RFM_cluster3,aes(x=R, y=M, color= Fq, size = 10))+geom_point()+ labs(x="Recency", y="Monetary")apply(RFM_cluster3[,c(18,20:27)],2,mean)## R Fq M AvgM maxM B
## 4.365759 4.933852 4.996109 3.498054 4.291829 4.389105
## Ten Q RFMScore
## 4.715953 3.463035 490.910506
ggplot(RFM_cluster3,aes(x=RFMScore))+geom_histogram(bins=50)+ labs(x="RFMScore", y="Count")RFM_cluster4<-RFM_cluster[which(RFM_cluster$cluster==4),]
ggplot(RFM_cluster4,aes(x=R, y=M, color= Fq, size = 10))+geom_point()+ labs(x="Recency", y="Monetary")apply(RFM_cluster4[,c(18,20:27)],2,mean)## R Fq M AvgM maxM B
## 1.620329 2.143498 2.379671 3.073244 2.799701 2.430493
## Ten Q RFMScore
## 2.239163 2.979073 185.847534
ggplot(RFM_cluster4,aes(x=RFMScore))+geom_histogram(bins=50)+ labs(x="RFMScore", y="Count")RFM_cluster5<-RFM_cluster[which(RFM_cluster$cluster==5),]
ggplot(RFM_cluster5,aes(x=R, y=M, color= Fq, size = 10))+geom_point()+ labs(x="Recency", y="Monetary")apply(RFM_cluster5[,c(18,20:27)],2,mean)## R Fq M AvgM maxM B
## 3.525641 2.326644 2.574136 2.810479 2.698997 2.840022
## Ten Q RFMScore
## 2.680045 2.910256 378.404682
ggplot(RFM_cluster5,aes(x=RFMScore))+geom_histogram(bins=50)+ labs(x="RFMScore", y="Count")cluster1 <- ddply(RFM_cluster1, .(StockCode,Description), summarize, sumAmount= sum(Amount), sumQuantity= sum(Quantity), nCustomer= length(unique(CustomerID)), nPurchase= length(unique(InvoiceNo)) )
head(cluster1[order(-cluster1$sumQuantity),] )## StockCode Description sumAmount sumQuantity
## 394 84077 WORLD WAR 2 GLIDERS ASSTD DESIGNS 604.80 2880
## 385 79321 CHILLI LIGHTS 2068.80 512
## 222 22616 PACK OF 12 LONDON TISSUES 95.04 432
## 375 62018 SOMBRERO 500.00 400
## 125 22084 PAPER CHAIN KIT EMPIRE 912.30 354
## 383 75049L LARGE CIRCULAR MIRROR MOBILE 259.80 300
## nCustomer nPurchase
## 394 2 2
## 385 3 3
## 222 1 1
## 375 1 1
## 125 2 2
## 383 2 2
head(cluster1[order(-cluster1$nCustomer),] )## StockCode Description sumAmount sumQuantity
## 426 85123A WHITE HANGING HEART T-LIGHT HOLDER 732.05 275
## 179 22423 REGENCY CAKESTAND 3 TIER 216.75 17
## 148 22178 VICTORIAN GLASS HANGING T-LIGHT 60.45 45
## 34 21034 REX CASH+CARRY JUMBO SHOPPER 10.45 11
## 126 22086 PAPER CHAIN KIT 50'S CHRISTMAS 279.00 100
## 135 22139 RETROSPOT TEA SET CERAMIC 11 PC 172.00 38
## nCustomer nPurchase
## 426 10 10
## 179 7 7
## 148 5 5
## 34 4 4
## 126 4 4
## 135 4 4
head(cluster1[order(-cluster1$sumAmount),] )## StockCode Description sumAmount sumQuantity
## 385 79321 CHILLI LIGHTS 2068.80 512
## 125 22084 PAPER CHAIN KIT EMPIRE 912.30 354
## 426 85123A WHITE HANGING HEART T-LIGHT HOLDER 732.05 275
## 394 84077 WORLD WAR 2 GLIDERS ASSTD DESIGNS 604.80 2880
## 267 22826 LOVE SEAT ANTIQUE WHITE METAL 535.00 5
## 311 23084 RABBIT NIGHT LIGHT 523.84 292
## nCustomer nPurchase
## 385 3 3
## 125 2 2
## 426 10 10
## 394 2 2
## 267 3 3
## 311 2 2
head(cluster1[order(-cluster1$nPurchase),] )## StockCode Description sumAmount sumQuantity
## 426 85123A WHITE HANGING HEART T-LIGHT HOLDER 732.05 275
## 179 22423 REGENCY CAKESTAND 3 TIER 216.75 17
## 148 22178 VICTORIAN GLASS HANGING T-LIGHT 60.45 45
## 34 21034 REX CASH+CARRY JUMBO SHOPPER 10.45 11
## 126 22086 PAPER CHAIN KIT 50'S CHRISTMAS 279.00 100
## 135 22139 RETROSPOT TEA SET CERAMIC 11 PC 172.00 38
## nCustomer nPurchase
## 426 10 10
## 179 7 7
## 148 5 5
## 34 4 4
## 126 4 4
## 135 4 4
cluster2 <- ddply(RFM_cluster2, .(StockCode,Description), summarize, sumAmount= sum(Amount), sumQuantity= sum(Quantity), nCustomer= length(unique(CustomerID)), nPurchase= length(unique(InvoiceNo)) )
head(cluster2[order(-cluster2$sumQuantity),] )## StockCode Description sumAmount sumQuantity
## 9 16014 SMALL CHINESE STYLE SCISSOR 320.00 1000
## 171 22086 PAPER CHAIN KIT 50'S CHRISTMAS 892.70 346
## 566 85123A WHITE HANGING HEART T-LIGHT HOLDER 808.55 313
## 571 85152 HAND OVER THE CHOCOLATE SIGN 558.00 300
## 85 21422 PORCELAIN ROSE SMALL 207.36 288
## 547 84949 SILVER HANGING T-LIGHT HOLDER 417.60 288
## nCustomer nPurchase
## 9 1 1
## 171 4 4
## 566 7 7
## 571 2 2
## 85 1 1
## 547 1 1
head(cluster2[order(-cluster2$nCustomer),] )## StockCode Description sumAmount sumQuantity
## 241 22423 REGENCY CAKESTAND 3 TIER 2320.80 208
## 7 15056BL EDWARDIAN PARASOL BLACK 630.20 116
## 566 85123A WHITE HANGING HEART T-LIGHT HOLDER 808.55 313
## 402 22961 JAM MAKING SET PRINTED 104.40 72
## 523 82494L WOODEN FRAME ANTIQUE WHITE 333.30 126
## 541 84879 ASSORTED COLOUR BIRD ORNAMENT 367.20 240
## nCustomer nPurchase
## 241 16 16
## 7 7 7
## 566 7 7
## 402 5 5
## 523 5 5
## 541 5 5
head(cluster2[order(-cluster2$sumAmount),] )## StockCode Description sumAmount sumQuantity
## 241 22423 REGENCY CAKESTAND 3 TIER 2320.80 208
## 171 22086 PAPER CHAIN KIT 50'S CHRISTMAS 892.70 346
## 566 85123A WHITE HANGING HEART T-LIGHT HOLDER 808.55 313
## 7 15056BL EDWARDIAN PARASOL BLACK 630.20 116
## 511 51008 AFGHAN SLIPPER SOCK PAIR 590.00 200
## 336 22777 GLASS CLOCHE LARGE 567.80 74
## nCustomer nPurchase
## 241 16 16
## 171 4 4
## 566 7 7
## 7 7 7
## 511 1 1
## 336 2 2
head(cluster2[order(-cluster2$nPurchase),] )## StockCode Description sumAmount sumQuantity
## 241 22423 REGENCY CAKESTAND 3 TIER 2320.80 208
## 7 15056BL EDWARDIAN PARASOL BLACK 630.20 116
## 566 85123A WHITE HANGING HEART T-LIGHT HOLDER 808.55 313
## 402 22961 JAM MAKING SET PRINTED 104.40 72
## 523 82494L WOODEN FRAME ANTIQUE WHITE 333.30 126
## 541 84879 ASSORTED COLOUR BIRD ORNAMENT 367.20 240
## nCustomer nPurchase
## 241 16 16
## 7 7 7
## 566 7 7
## 402 5 5
## 523 5 5
## 541 5 5
cluster3 <- ddply(RFM_cluster3, .(StockCode,Description), summarize, sumAmount= sum(Amount), sumQuantity= sum(Quantity), nCustomer= length(unique(CustomerID)), nPurchase= length(unique(InvoiceNo)) )
head(cluster3[order(-cluster3$sumQuantity),] )## StockCode Description sumAmount sumQuantity
## 193 84077 WORLD WAR 2 GLIDERS ASSTD DESIGNS 532.32 2928
## 107 22616 PACK OF 12 LONDON TISSUES 324.00 1296
## 202 84945 MULTI COLOUR SILVER T-LIGHT HOLDER 276.48 384
## 212 85099C JUMBO BAG BAROQUE BLACK WHITE 358.00 200
## 159 23211 RED ROCKING HORSE HAND PAINTED 149.76 144
## 186 79321 CHILLI LIGHTS 408.00 96
## nCustomer nPurchase
## 193 2 2
## 107 1 1
## 202 1 1
## 212 1 1
## 159 1 1
## 186 2 2
head(cluster3[order(-cluster3$nCustomer),] )## StockCode Description sumAmount sumQuantity
## 85 22423 REGENCY CAKESTAND 3 TIER 424.55 49
## 214 85123A WHITE HANGING HEART T-LIGHT HOLDER 199.60 72
## 6 20724 RED RETROSPOT CHARLOTTE BAG 25.50 30
## 60 22086 PAPER CHAIN KIT 50'S CHRISTMAS 159.30 54
## 90 22469 HEART OF WICKER SMALL 52.80 32
## 200 84879 ASSORTED COLOUR BIRD ORNAMENT 81.12 48
## nCustomer nPurchase
## 85 8 8
## 214 6 6
## 6 3 3
## 60 3 3
## 90 3 3
## 200 3 3
head(cluster3[order(-cluster3$sumAmount),] )## StockCode Description sumAmount sumQuantity
## 193 84077 WORLD WAR 2 GLIDERS ASSTD DESIGNS 532.32 2928
## 85 22423 REGENCY CAKESTAND 3 TIER 424.55 49
## 186 79321 CHILLI LIGHTS 408.00 96
## 212 85099C JUMBO BAG BAROQUE BLACK WHITE 358.00 200
## 107 22616 PACK OF 12 LONDON TISSUES 324.00 1296
## 202 84945 MULTI COLOUR SILVER T-LIGHT HOLDER 276.48 384
## nCustomer nPurchase
## 193 2 2
## 85 8 8
## 186 2 2
## 212 1 1
## 107 1 1
## 202 1 1
head(cluster3[order(-cluster3$nPurchase),] )## StockCode Description sumAmount sumQuantity
## 85 22423 REGENCY CAKESTAND 3 TIER 424.55 49
## 214 85123A WHITE HANGING HEART T-LIGHT HOLDER 199.60 72
## 6 20724 RED RETROSPOT CHARLOTTE BAG 25.50 30
## 60 22086 PAPER CHAIN KIT 50'S CHRISTMAS 159.30 54
## 90 22469 HEART OF WICKER SMALL 52.80 32
## 200 84879 ASSORTED COLOUR BIRD ORNAMENT 81.12 48
## nCustomer nPurchase
## 85 8 8
## 214 6 6
## 6 3 3
## 60 3 3
## 90 3 3
## 200 3 3
cluster4 <- ddply(RFM_cluster4, .(StockCode,Description), summarize, sumAmount= sum(Amount), sumQuantity= sum(Quantity), nCustomer= length(unique(CustomerID)), nPurchase= length(unique(InvoiceNo)) )
head(cluster4[order(-cluster4$sumQuantity),] )## StockCode Description sumAmount sumQuantity
## 11 17096 ASSORTED LAQUERED INCENSE HOLDERS 293.76 1728
## 343 23167 SMALL CERAMIC TOP STORAGE JAR 931.50 1350
## 9 17003 BROCADE RING PURSE 180.00 720
## 454 84947 ANTIQUE SILVER TEA GLASS ENGRAVED 389.10 366
## 227 22560 TRADITIONAL MODELLING CLAY 265.00 250
## 414 71459 HANGING JAM JAR T-LIGHT HOLDER 179.89 241
## nCustomer nPurchase
## 11 1 1
## 343 1 1
## 9 1 1
## 454 2 2
## 227 1 1
## 414 4 4
head(cluster4[order(-cluster4$nCustomer),] )## StockCode Description sumAmount sumQuantity
## 201 22423 REGENCY CAKESTAND 3 TIER 941.40 84
## 16 20685 DOORMAT RED RETROSPOT 96.30 12
## 485 M Manual -5719.22 0
## 135 22086 PAPER CHAIN KIT 50'S CHRISTMAS 307.25 115
## 215 22501 PICNIC BASKET WICKER LARGE 169.15 17
## 403 47566 PARTY BUNTING 151.95 31
## nCustomer nPurchase
## 201 7 7
## 16 6 6
## 485 6 6
## 135 5 5
## 215 5 5
## 403 5 5
head(cluster4[order(-cluster4$sumAmount),] )## StockCode Description sumAmount sumQuantity
## 201 22423 REGENCY CAKESTAND 3 TIER 941.40 84
## 343 23167 SMALL CERAMIC TOP STORAGE JAR 931.50 1350
## 153 22171 3 HOOK PHOTO SHELF ANTIQUE WHITE 734.40 96
## 431 84078A SET/4 WHITE RETRO STORAGE CUBES 699.00 20
## 454 84947 ANTIQUE SILVER TEA GLASS ENGRAVED 389.10 366
## 135 22086 PAPER CHAIN KIT 50'S CHRISTMAS 307.25 115
## nCustomer nPurchase
## 201 7 7
## 343 1 1
## 153 1 1
## 431 2 2
## 454 2 2
## 135 5 5
head(cluster4[order(-cluster4$nPurchase),] )## StockCode Description sumAmount sumQuantity
## 201 22423 REGENCY CAKESTAND 3 TIER 941.40 84
## 16 20685 DOORMAT RED RETROSPOT 96.30 12
## 485 M Manual -5719.22 0
## 135 22086 PAPER CHAIN KIT 50'S CHRISTMAS 307.25 115
## 215 22501 PICNIC BASKET WICKER LARGE 169.15 17
## 403 47566 PARTY BUNTING 151.95 31
## nCustomer nPurchase
## 201 7 7
## 16 6 6
## 485 6 6
## 135 5 5
## 215 5 5
## 403 5 5
cluster5 <- ddply(RFM_cluster5, .(StockCode,Description), summarize, sumAmount= sum(Amount), sumQuantity= sum(Quantity), nCustomer= length(unique(CustomerID)), nPurchase= length(unique(InvoiceNo)) )
head(cluster5[order(-cluster5$sumQuantity),] )## StockCode Description sumAmount sumQuantity
## 699 23166 MEDIUM CERAMIC TOP STORAGE JAR 77183.60 74215
## 901 84826 ASSTD DESIGN 3D PAPER STICKERS 0.00 12540
## 951 85123A WHITE HANGING HEART T-LIGHT HOLDER 6333.35 2465
## 28 18007 ESSENTIAL BALM 3.5g TIN IN ENVELOPE 144.00 2400
## 917 84950 ASSORTED COLOUR T-LIGHT HOLDER 1003.20 1824
## 886 84568 GIRLS ALPHABET IRON ON PATCHES 305.28 1728
## nCustomer nPurchase
## 699 1 1
## 901 1 1
## 951 20 20
## 28 1 1
## 917 1 1
## 886 2 2
head(cluster5[order(-cluster5$nCustomer),] )## StockCode Description sumAmount sumQuantity
## 406 22423 REGENCY CAKESTAND 3 TIER 1953.15 169
## 951 85123A WHITE HANGING HEART T-LIGHT HOLDER 6333.35 2465
## 822 47566 PARTY BUNTING 875.35 193
## 988 M Manual 362.80 126
## 638 22960 JAM MAKING SET WITH JARS 250.75 59
## 70 21034 REX CASH+CARRY JUMBO SHOPPER 14.25 15
## nCustomer nPurchase
## 406 29 29
## 951 20 20
## 822 15 15
## 988 14 14
## 638 13 13
## 70 11 11
head(cluster5[order(-cluster5$sumAmount),] )## StockCode Description sumAmount sumQuantity
## 699 23166 MEDIUM CERAMIC TOP STORAGE JAR 77183.60 74215
## 951 85123A WHITE HANGING HEART T-LIGHT HOLDER 6333.35 2465
## 400 22413 METAL SIGN TAKE IT OR LEAVE IT 3861.00 1404
## 406 22423 REGENCY CAKESTAND 3 TIER 1953.15 169
## 955 85152 HAND OVER THE CHOCOLATE SIGN 1192.50 621
## 917 84950 ASSORTED COLOUR T-LIGHT HOLDER 1003.20 1824
## nCustomer nPurchase
## 699 1 1
## 951 20 20
## 400 1 1
## 406 29 29
## 955 2 2
## 917 1 1
head(cluster5[order(-cluster5$nPurchase),] )## StockCode Description sumAmount sumQuantity
## 406 22423 REGENCY CAKESTAND 3 TIER 1953.15 169
## 951 85123A WHITE HANGING HEART T-LIGHT HOLDER 6333.35 2465
## 822 47566 PARTY BUNTING 875.35 193
## 988 M Manual 362.80 126
## 638 22960 JAM MAKING SET WITH JARS 250.75 59
## 70 21034 REX CASH+CARRY JUMBO SHOPPER 14.25 15
## nCustomer nPurchase
## 406 29 29
## 951 20 20
## 822 15 15
## 988 14 14
## 638 13 13
## 70 11 11