Blackwell Electronics’ board of directors is considering acquiring Electronidex, a start-up electronics online retailer. The board of directors has asked us to help them better understand the clientele that Electronidex currently is serving and if it would be an optimal partnership. For this task, we’ll identify purchasing patterns using a market basket analysis.
These tasks were carried out following the next steps:You can see a section of the used datasets and the complete code we used in R in the Appendix.
After a thorough analysis, we have the next conclusions:
We start the exploration thinking about which levels of analysis could be useful for analyzing this dataset. We decided to use two levels:
Before starting the creation of rules, we realized that Electronide’s clients are not only final customers, but also companies. In order to split the transactions into the two categories, we follow the next steps.
We classify some products as “main products”, other as “complementary products”. We give them scores according to the next categories:
Firstly, we visualize the frequency of the different categories. The most purchased categories are Desktops, Laptops and Monitors.
This way, we obtain the next 10 top rules (sorted by confidence):
Firstly, we visualize the frequency of the different products The most purchased products are 3 Apple products: the active earpods, the laptop MacBookAir and the iMac. It seems that products from Apple appear to be the best seller in the retail channel.
This way, we obtain the next 10 top rules (sorted by confidence):
Firstly, we visualize the frequency of the different categories. The most purchased categories are again Desktops, Laptops and Monitors.
This way, we obtain the next 10 top rules (sorted by confidence):
Firstly, we visualize the frequency of the different products The most purchased products are the iMac, the HP Laptop and the Cyberpower Gamer Desktop.
This way, we obtain the next 10 top rules (sorted by confidence):
Here you can obtain all the code used for this task.
Includes
#Load Libraries: p_load can install, load, and update packages
if(require("pacman")=="FALSE"){
install.packages("pacman")
}
pacman::p_load(arules, arulesViz, dplyr, ggplot2, treemap,RColorBrewer)
# Load Data
setwd("C:/SARA/Ubiqum/Section2/Task4")
DataTran<- read.transactions("ElectronidexTransactions2017.csv", format = "basket", sep=",", rm.duplicates=TRUE)
# Rename Variables using the same format
namesDataTran<-itemLabels(DataTran)
myLabels<- c("HardDrive_External_1TB_Portable","HardDrive_External_2TBPortable", "ComputerMouse_3Button", "HardDrive_External_3TBPortable","HardDrive_Desktop_5TB","Laptop_Acer_Aspire", "Desktop_Acer", "Monitor_Acer", "ComputerHeadPhones_Ailihen_Stereo", "Laptop_Alienware_AW17R4-7345SLV-PUS17", "Monitor_AOC", "ActiveHeadPhones_APIE_Bluetooh","ActiveHeadPhones_Earpods_Apple", "Laptop_AppleMacBookAir", "Laptop_AppleMacBookPro", "Keyboard_AppleMagic", "SmartHomeDevice_TV_Apple","Keyboard_AppleWired", "Keyboard_AppleWireless", "Monitor_ASUS2", "Laptop_ChromebookASUS", "Desktop_ASUS", "Monitor_ASUS","ComputerCord_AudioCable", "Keyboard_BacklitLED_Gaming", "Accessories_MousePad_Belkin", "Speaker_BoseCompanion", "Printer_Brother", "PrinterInk_Toner_Brother", "Speaker_Bluetooth_Cambridge","PrinterInk_Canon", "Printer_Canon_Office", "Software_ComputerGame", "Speaker_CyberAcoustics", "Desktop_CYBERPOWERGamer", "Desktop_Dell2", "Desktop_Dell", "MouseAndKeyboardCombo_Dell_KM117Wireless", "Laptop_Dell", "Monitor_Dell", "Keyboard_Dell_Wired", "Speaker_DOSS_Touch_Wireless_Bluetooth", "Printer_DYMO_LabelMaker","PrinterInk_DYMOLabelingTape", "MouseAndKeyboardCombo_EagleTec_Wireless", "Laptop_Eluktronics_ProGaming", "PrinterInk_Epson_Black", "Printer_Epson", "ComputerCord_Etekcity_PowerExtension","ComputerCord_EthernetCable", "ComputerTablet_FireHD", "SmartHomeDevice_FireTVStick", "ComputerStand_MonitorFullMotion", "ComputerMouse_GamingProfessional", "ComputerMouse_GenericBlack_3Button", "SmartHomeDevice_GoogleHome", "ComputerStand_HalterAcrylicMonitorStand", "ComputerStand_Halter_MeshMetal_MonitorStand", "ComputerCord_HDMIAdapter", "ComputerCord_HDMICable6ft", "ComputerStand_HeightAdjustable_StandingDesk", "PrinterInk_HP_BlackAndTricolor", "Desktop_HP", "Laptop_HP", "Monitor_HP", "Laptop_HP_Notebook_TouchScreen_PC", "Keyboard_HP_USB", "ComputerMouse_HP_Wireless", "Printer_HP_Wireless", "Desktop_iMac", "Desktop_Intel", "ComputerTablet_iPad", "ComputerTablet_iPadPro", "Accessories_iPhone_Charger_Cable", "Speaker_JBL_Splashproof_Portable_Bluetooth", "ComputerHeadPhones_Kensington", "ComputerTablet_Kindle","ComputerHeadPhones_Koss_Home", "Accessories_LargeMousePad", "Desktop_Lenovo_Computer", "Monitor_LG", "Laptop_LG_Touchscreen", "ComputerMouse_Logitech_3button", "ComputerHeadPhones_HeadSet_Logitech_ClearChat", "MouseAndKeyboardCombo_Logitech_Desktop_MK120", "Keyboard_Logitech", "MouseAndKeyboardCombo_Logitech_MK270_Wireless", "MouseAndKeyboardCombo_Logitech_MK360_Wireless", "MouseAndKeyboardCombo_Logitech_MK550_Wireless_Wave", "Speaker_Logitech_Multimedia", "ComputerHeadPhones_Headset_Logitech_Stereo", "Keyboard_Logitech_Wireless", "ComputerMouse_Logitech_Wireless", "Speaker_Mackie_CR", "ComputerMouse_Microsoft_BasicOptical", "ComputerHeadPhones_HeadSet_Microsoft", "Software_MicrosoftOffice_HomeandStudent2016","MouseAndKeyboardCombo_Microsoft_Wireless_Comfort","MouseAndKeyboardCombo_Microsoft_Wireless_Desktop", "ActiveHeadPhones_MonsterBeats_ByDrDre","ComputerStand_MultiMediaStand","ActiveHeadPhones_Otium_Wireless_Sports_Bluetooth", "ActiveHeadPhones_Panasonic_In-Ear", "ComputerHeadPhones_Panasonic_On-Ear_Stereo_RP-HT21", "ComputerHeadPhones_HeadSet_PC_Gaming", "ActiveHeadPhones_Philips_Flexible_Earhook","ComputerMouse_Redragon_Gaming", "MouseAndKeyboardCombo_Rii_LED_Gaming", "Keyboard_Rii_LED", "Speaker_Rokono_Mini", "SmartHomeDevice_RokuExpress", "ComputerCord_Samsung_ChargingCable", "ComputerTablet_SamsungGalaxy", "Monitor_Samsung", "Monitor_Sceptre","HardDrive_External_Slim_2TB_Portable", "ComputerMouse_Slim_Wireless", "SmartHomeDevice_SmartLightBulb", "Speaker_Sonos", "ComputerCord_USB_Cable", "ComputerCord_VGA_MonitorCable","Monitor_ViewSonic", "ComputerMouse_Wireless_Portable", "ComputerHeadPhones_HeadSet_XIBERIA_Gaming", "ComputerHeadPhones_HeadSet_Zombie_Gaming")
itemLabels(DataTran)<-myLabels
# Create Level
myLevel1<-as.factor(sub("\\_.*", "", myLabels))
myLevel1
# Add Level
DataTran@itemInfo$level1<-myLevel1
# Remove empty rows
DataTran<-DataTran[which(size(DataTran)!= 0)]
Creating groups (Retail / Business)
# We have 9833 transactions
dim(DataTran)[1]
# Create Scores
scores<-c()
for (i in 1:9833){
PAccessories=0
PActiveHeadPhones=0
PComputerCord=0
PComputerHeadPhones=0
PComputerMouse=0
PComputerStand=0
PComputerTablet=0
PDesktop_Laptop=0
PHardDrive=0
PKeyboard=0
PMonitor=0
PMouseAndKeyboardCombo=0
PPrinter=0
PPrinterInk=0
PSmartHomeDevice=0
PSoftware=0
PSpeaker=0
print("New Transaction")
print(i)
if (length(grep("Accessories_", LIST(DataTran)[[i]]))>=1){
PAccessories<-(3^length(grep("Accessories_", LIST(DataTran)[[i]])))
# print("PAccessories")
# print(PAccessories)
}
if (length(grep("ActiveHeadPhones_", LIST(DataTran)[[i]]))>=1){
PActiveHeadPhones<-(3^length(grep("ActiveHeadPhones_", LIST(DataTran)[[i]])))
# print("PActiveHeadPhones")
# print(PActiveHeadPhones)
}
if (length(grep("ComputerCord_", LIST(DataTran)[[i]]))>=1){
PComputerCord<-(3^length(grep("ComputerCord_", LIST(DataTran)[[i]])))
# print("PComputerCord")
# print(PComputerCord)
}
if (length(grep("ComputerHeadPhones_", LIST(DataTran)[[i]]))>=1){
PComputerHeadPhones<-(3^length(grep("ComputerHeadPhones_", LIST(DataTran)[[i]])))
# print("PComputerHeadPhones")
# print(PComputerHeadPhones)
}
if (length(grep("ComputerMouse_", LIST(DataTran)[[i]]))>=1){
PComputerMouse<-(3^length(grep("ComputerMouse_", LIST(DataTran)[[i]])))
# print("PComputerMouse")
# print(PComputerMouse)
}
if (length(grep("ComputerStand_", LIST(DataTran)[[i]]))>=1){
PComputerStand<-(3^length(grep("ComputerStand_", LIST(DataTran)[[i]])))
# print("PComputerStand")
# print(PComputerStand)
}
if (length(grep("ComputerTablet_", LIST(DataTran)[[i]]))>=1){
PComputerTablet<-(3^length(grep("ComputerTablet_", LIST(DataTran)[[i]])))
# print("PComputerTablet")
# print(PComputerTablet)
}
if (length(grep("Desktop_", LIST(DataTran)[[i]])>=1) | length(grep("Laptop_", LIST(DataTran)[[i]])>=1)) {
PDesktop_Laptop<-(10^(length(grep("Desktop_", LIST(DataTran)[[i]]))
+ length(grep("Laptop", LIST(DataTran)[[i]]))))
# print("PDesktop")
# print(PDesktop)
}
if (length(grep("HardDrive_", LIST(DataTran)[[i]]))>=1){
PHardDrive<-(3^length(grep("HardDrive_", LIST(DataTran)[[i]])))
# print("PHardDrive")
# print(PHardDrive)
}
if (length(grep("Keyboard_", LIST(DataTran)[[i]]))>=1){
PKeyboard<-(3^length(grep("Keyboard_", LIST(DataTran)[[i]])))
# print("PKeyboard")
# print(PKeyboard)
}
if (length(grep("Monitor_", LIST(DataTran)[[i]]))>=1){
PMonitor<-(3^length(grep("Monitor_", LIST(DataTran)[[i]])))
# print("PMonitor")
# print(PMonitor)
}
if (length(grep("MouseAndKeyboardCombo_", LIST(DataTran)[[i]]))>=1){
PMouseAndKeyboardCombo<-(3^length(grep("MouseAndKeyboardCombo_", LIST(DataTran)[[i]])))
# print("PMouseAndKeyboardCombo")
# print(PMouseAndKeyboardCombo)
}
if (length(grep("Printer_", LIST(DataTran)[[i]]))>=1){
PPrinter<-(3^length(grep("Printer_", LIST(DataTran)[[i]])))
# print("PPrinter")
# print(PPrinter)
}
if (length(grep("PrinterInk_", LIST(DataTran)[[i]]))>=1){
PPrinterInk<-(1^length(grep("PrinterInk_", LIST(DataTran)[[i]])))
# print("PPrinterInk")
# print(PPrinterInk)
}
if (length(grep("SmartHomeDevice_", LIST(DataTran)[[i]]))>=1){
PSmartHomeDevice<-(3^length(grep("SmartHomeDevice_", LIST(DataTran)[[i]])))
# print("PSmartHomeDevice")
# print(PSmartHomeDevice)
}
if (length(grep("Software_", LIST(DataTran)[[i]]))>=1){
PSoftware<-(3^length(grep("Software_", LIST(DataTran)[[i]])))
# print("PSoftware")
# print(PSoftware)
}
if (length(grep("Speaker_", LIST(DataTran)[[i]]))>=1){
PSpeaker<-(3^length(grep("Speaker_", LIST(DataTran)[[i]])))
# print("PSpeaker")
# print(PSpeaker)
}
finalpuntuation<-sum(PAccessories, PActiveHeadPhones, PComputerCord, PComputerHeadPhones,
PComputerMouse, PComputerStand, PComputerTablet, PDesktop_Laptop, PHardDrive,
PKeyboard, PMonitor, PMouseAndKeyboardCombo, PPrinter,
PPrinterInk, PSmartHomeDevice,PSoftware, PSpeaker)
# print(finalpuntuation)
#scores<-union(scores,finalpuntuation)
scores<-rbind(scores, finalpuntuation)
# print("scores")
# print(scores)
}
# Create groups
numbers_small<-c()
for(i in 1:9833)
{ if(scores[i, ]<40){
numbers_small<-rbind(numbers_small, i)
}
}
DataTran_Small<-DataTran[numbers_small]
numbers_high<-c()
for(i in 1:9833)
{ if(scores[i, ]>=40){
numbers_high<-rbind(numbers_high, i)
}
}
DataTran_High<-DataTran[numbers_high]
Retail Transactions: analysis per category
# We have 18 different product-types
print(levels(itemInfo(DataTran)[["level1"]]))
DataTran_ProdTypes<- aggregate(DataTran, itemInfo(DataTran)[["level1"]])
DataTran_ProdTypes_Small<-DataTran_ProdTypes[numbers_small]
DataTran_ProdTypes_High<-DataTran_ProdTypes[numbers_high]
#### Frequency Plot ####
itemFrequencyPlot(DataTran_ProdTypes_Small, topN = 18, type ="relative", horiz=TRUE, cex.names=0.8,
xlim=c(0,0.4), las=1, col="dark blue",
xlab = "% Transactions containing Products")
#### TreeMap ####
Labels_Small_ProdTypes<-levels(itemInfo(DataTran)[["level1"]])
Size_Small_ProdTypes<-c()
for (i in 1: length(levels(itemInfo(DataTran)[["level1"]]))) {
Size_Small_ProdTypes<-rbind(Size_Small_ProdTypes, itemFrequency(DataTran_ProdTypes_Small)[i] * 5308)
}
Size_Small_ProdTypes<-as.vector(Size_Small_ProdTypes)
dfsmallprod<-data.frame(Labels_Small_ProdTypes, Size_Small_ProdTypes)
treemap(dfsmallprod,index="Labels_Small_ProdTypes", vSize="Size_Small_ProdTypes",title="",palette="Dark2",border.col="#FFFFFF")
#### Rules ####
"%ni%" <- Negate("%in%")
# Rules with No Laptops/ Desktops on the right side
Rules_Small_ProductType<-apriori(DataTran_ProdTypes_Small,
parameter = list(support=0.025, confidence=0.1, minlen=2),
appearance = list(rhs=c("Accessories", "ActiveHeadPhones",
"ComputerCord","ComputerHeadPhones","ComputerMouse",
"ComputerStand", "ComputerTablet","HardDrive", "Keyboard",
"Monitor","MouseAndKeyboardCombo","Printer", "PrinterInk",
"SmartHomeDevice", "Software", "Speaker")))
norhs_laptpos_desktop<-myLabels[!myLabels %in%
# Inspect
inspect(sort(Rules_Small_ProductType, by='confidence', decreasing = T)[1:10])
# Rules Plots
plot(sort(Rules_Small_ProductType, by='confidence', decreasing = T)[1:10], method="grouped")
plot(sort(Rules_Small_ProductType, by='confidence', decreasing = T)[1:10], method="graph", engine="interactive")
Retail Transactions: analysis per product
#### Frequency Plot ####
itemFrequencyPlot(DataTran_Small, topN = 18, type ="relative", horiz=TRUE, cex.names=0.8,
xlim=c(0,0.2), las=1, col="dark blue",
xlab = "% Transactions containing Items")
#### TreeMap ####
Labels_Small_Items<-itemInfo(DataTran_Small)$level1
Labels_Small_ItemsProd<-itemInfo(DataTran_Small)$labels
Size_Small_Items<-c()
for (i in 1: length(itemInfo(DataTran_Small)[["labels"]])) {
Size_Small_Items<-rbind(Size_Small_Items, itemFrequency(DataTran_Small)[i] * 5308)
}
Size_Small_Items<-as.vector(Size_Small_Items)
Labels_Small_ItemsProd<-c(itemInfo(DataTran_Small)$level1)
dfsmallitem<-data.frame(itemInfo(DataTran_Small)$level1, itemInfo(DataTran_Small)$labels, Size_Small_Items)
treemap(dfsmallitem,index=c("itemInfo.DataTran_Small..level1","itemInfo.DataTran_Small..labels"), vSize="Size_Small_Items",title="",palette="Dark2",border.col="#FFFFFF")
#### Rules ####
# No Laptops/ Desktops on the right side
norhs_laptpos_desktop<-myLabels[!myLabels %in%
c("Laptop_Acer_Aspire", "Laptop_Alienware_AW17R4-7345SLV-PUS17",
"Laptop_AppleMacBookAir","Laptop_AppleMacBookPro",
"Laptop_ChromebookASUS", "Laptop_Dell",
"Laptop_Eluktronics_ProGaming", "Laptop_HP",
"Laptop_HP_Notebook_TouchScreen_PC", "Laptop_LG_Touchscreen",
"Desktop_Acer" ,"Desktop_ASUS", "Desktop_CYBERPOWERGamer",
"Desktop_Dell","Desktop_Dell2", "Desktop_HP", "Desktop_iMac",
"Desktop_Intel","Desktop_Lenovo_Computer")]
RulesSmallItems<-apriori(DataTran_Small, parameter = list(support=0.001, confidence=0.1, minlen=2),
appearance = list(rhs=norhs_laptpos_desktop))
inspect(sort(RulesSmallItems, by='confidence', decreasing = T)[1:10])
# Rules Plots
plot(sort(RulesSmallItems, by='confidence', decreasing = T)[1:10], method="grouped")
plot(sort(RulesSmallItems, by='confidence', decreasing = T)[1:10], method="graph", engine="interactive")
Business Transactions: analysis per category
#### Frequency Plot ####
itemFrequencyPlot(DataTran_ProdTypes_High, topN = 18, type ="relative", horiz=TRUE, cex.names=0.8,
xlim=c(0,1), las=1, col="dark blue",
xlab = "% Transactions containing Products")
#### TreeMap ####
Labels_High_ProdTypes<-levels(itemInfo(DataTran)[["level1"]])
Size_High_ProdTypes<-c()
for (i in 1: length(levels(itemInfo(DataTran)[["level1"]]))) {
Size_High_ProdTypes<-rbind(Size_High_ProdTypes, itemFrequency(DataTran_ProdTypes_High)[i] * 5308)
}
Size_High_ProdTypes<-as.vector(Size_High_ProdTypes)
dfhighprod<-data.frame(Labels_High_ProdTypes, Size_High_ProdTypes)
treemap(dfhighprod,index="Labels_High_ProdTypes", vSize="Size_High_ProdTypes",title="",palette="Dark2",border.col="#FFFFFF")
#### Rules ####
"%ni%" <- Negate("%in%")
# Rules with No Laptops/ Desktops on the right side
Rules_High_ProductType<-apriori(DataTran_ProdTypes_High,
parameter = list(support=0.025, confidence=0.1, minlen=2),
appearance = list(rhs=c("Accessories", "ActiveHeadPhones",
"ComputerCord","ComputerHeadPhones","ComputerMouse",
"ComputerStand", "ComputerTablet","HardDrive",
"Keyboard", "Monitor","MouseAndKeyboardCombo","Printer",
"PrinterInk", "SmartHomeDevice", "Software", "Speaker")))
inspect(sort(Rules_High_ProductType, by='confidence', decreasing = T)[1:10])
# Rules Plots
plot(sort(Rules_High_ProductType, by='confidence', decreasing = T)[1:10], method="grouped")
plot(sort(Rules_High_ProductType, by='confidence', decreasing = T)[1:10], method="graph", engine="interactive")
Business Transactions: analysis per product
#### Frequency Plot ####
itemFrequencyPlot(DataTran_High, topN = 18, type ="relative", horiz=TRUE, cex.names=0.8,
xlim=c(0,0.5), las=1, col="dark blue",
xlab = "% Transactions containing Items")
#### Rules ####
# No Laptops/ Desktops on the right side
norhs_laptpos_desktop2<-myLabels[!myLabels %in%
c("Laptop_Acer_Aspire",
"Laptop_Alienware_AW17R4-7345SLV-PUS17",
"Laptop_AppleMacBookAir","Laptop_AppleMacBookPro",
"Laptop_ChromebookASUS", "Laptop_Dell",
"Laptop_Eluktronics_ProGaming","Laptop_HP",
"Laptop_HP_Notebook_TouchScreen_PC",
"Laptop_LG_Touchscreen","Desktop_Acer",
"Desktop_ASUS", "Desktop_CYBERPOWERGamer",
"Desktop_Dell","Desktop_Dell2", "Desktop_HP",
"Desktop_iMac", "Desktop_Intel",
"Desktop_Lenovo_Computer")]
RulesHighItems<-apriori(DataTran_High, parameter = list(support=0.001, confidence=0.1, minlen=2),
appearance = list(rhs=norhs_laptpos_desktop))
inspect(sort(RulesHighItems, by='confidence', decreasing = T)[1:10])
# Rules Plots
plot(sort(RulesHighItems, by='confidence', decreasing = T)[1:10], method="grouped")
plot(sort(RulesHighItems, by='confidence', decreasing = T)[1:10], method="graph", engine="interactive")