Main Body
Pre-process
Packages
We used this packages: (readr)
(dplyr)
(ggplot2)
(plyr)
(sqldf)
(arulesViz)
(arules)
(plotly)
(dplyr)
(lubridate)
(stringr)
(knitr)
(kableExtra)
Import Data
orders <- read.csv("orders_translated.csv", header = TRUE, sep = ";")
lineitem <- read.csv("lineitems.csv", header = TRUE, sep = ";")
trans <- read.csv2("trans.csv")
brands = readLines("products_with_brands.txt")
## Warning in readLines("products_with_brands.txt"): incomplete final line
## found on 'products_with_brands.txt'
categories = readLines("products_with_category.txt")
## Warning in readLines("products_with_category.txt"): incomplete final line
## found on 'products_with_category.txt'
Exploration and Preparation
orders
glimpse(orders)
## Observations: 226,909
## Variables: 4
## $ id_order <int> 241319, 241423, 242832, 243330, 243784, 245275, 245…
## $ created_date <fct> 2017-01-02T13:35:40Z, 2017-11-06T13:10:02Z, 2017-12…
## $ state <fct> Cancelled, Completed, Completed, Completed, Cancell…
## $ total_paid <fct> " 44,99", " 136,15", " 15,76", " 84,98",…
lineitem
glimpse(lineitem)
## Observations: 293,983
## Variables: 7
## $ id <int> 1119109, 1119110, 1119111, 1119112, 1119113, 11…
## $ id_order <int> 299539, 299540, 299541, 299542, 299543, 295310,…
## $ product_id <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ product_quantity <int> 1, 1, 1, 1, 1, 10, 1, 1, 1, 1, 5, 1, 1, 1, 1, 1…
## $ sku <fct> OTT0127, LGE0037, PAR0065, WDT0309, JBL0098, WD…
## $ date <fct> 2017-01-01T00:07:19Z, 2017-01-01T00:19:45Z, 201…
## $ unit_price <fct> " 18,99", " 399,00", " 474,05", " 68,…
trans
glimpse(trans)
## Observations: 10,453
## Variables: 1
## $ items <fct> "IFX0049,TUC0302,IFX0014,MOP0083", "CRU0045,OWC0048,SEA003…
brands
Transformation .txt to data frame. Rename the columns. Remove first row.
brands = as.data.frame(do.call(rbind, strsplit(brands, split=" {2,10}")),
stringsAsFactors=FALSE)
names(brands) = c("labels", "Brand")
brands = brands[-c(1),]
glimpse(brands)
## Observations: 4,696
## Variables: 2
## $ labels <chr> "RAI0001", "APP0017", "APP0019", "APP0066", "KIN0001", "A…
## $ Brand <chr> "Rain Design", "Apple", "Apple", "Apple", "Kingston", "Ap…
categories
Transformation .txt to data frame. Rename the columns. Remove first row.
categories = as.data.frame(do.call(rbind, strsplit(categories, split=" {2,10}")),
stringsAsFactors=FALSE)
names(categories) = c("labels", "Category")
categories = categories[-c(1),]
glimpse(categories)
## Observations: 4,207
## Variables: 2
## $ labels <chr> "RAI0001", "APP0017", "APP0019", "APP0066", "KIN0001", …
## $ Category <chr> "accessories", "accessories", "accessories", "accessori…
Pre-Process 1
Select Completed orders - Join Tables
For this goal, we considered only the completed orders, so we had to filter them. And we made a join to see all the information in the same table.
# Select only Completed orders
orders <- orders[which(orders$state %in% "Completed"),]
# Inner Join by id_order
orders_items <- merge(x=lineitem, y=orders, by="id_order")
Convert variables
# convert unit price and total_paid as number, id_order as a factor
orders_items$unit_price <- as.character(orders_items$unit_price)
orders_items$unit_price<- as.numeric(sub(",", ".", orders_items$unit_price , fixed = TRUE))
orders_items$total_paid <- as.character(orders_items$total_paid)
orders_items$total_paid<- as.numeric(sub(",", ".", orders_items$total_paid , fixed = TRUE))
orders_items$id_order <- as.factor(orders_items$id_order)
orders_items$date <- as.Date(orders_items$date)
PxQ vs Total Paid
Total.paid is not equal to the price x unit_price. So, we created a new column (pxq) to check the difference.
#Create pxq: Price x Quantity
orders_items$pxq <- orders_items$unit_price * orders_items$product_quantity
#SUM PxQ
group <- aggregate(orders_items$pxq, by=list(id_order=orders_items$id_order), FUN=sum)
group <- group[order(-group$x),]
#Merge: total_paid vs PxQ ----
Check_total <- merge(x=group,y=orders_items, by="id_order")
Check_total <- Check_total[c(1,2,11)]
Check_total$dif <- Check_total$total_paid - Check_total$x
Check_total$dif <- round(Check_total$dif)
colnames(Check_total)[colnames(Check_total)=="x"] <- "pxq"
head(arrange(Check_total, dif))
## id_order pxq total_paid dif
## 1 432303 426.86 261.86 -165
## 2 289685 135.97 15.95 -120
## 3 289685 135.97 15.95 -120
## 4 289685 135.97 15.95 -120
## 5 319901 142.44 60.43 -82
## 6 319901 142.44 60.43 -82
This difference maybe corresponds to delivery costs, fees of transactions, etc. This is the reason why we considered “pxq”.
Quantity Items x Order
We created a new column to see how many items per transactions there are.
# Quantity Different Products per transaccion ----
Quant_items_x_order <- sqldf("SELECT id_order, COUNT(*) as Freq
FROM orders_items
GROUP BY id_order
HAVING freq > 1")
#Create Table with IDs and Orders ----
trans_id_orders <- trans
trans_id_orders$id_order <- Quant_items_x_order$id_order
trans_id_orders$quantity_dif_products <- Quant_items_x_order$Freq
#Add Date to transactions
orders_items$date <- as.Date(orders_items$date)
tt <- sqldf("SELECT id_order, date, pxq
FROM orders_items
GROUP BY id_order")
## New Table: id, items, date, pxq, quantity.
new_trans <- merge(x=trans_id_orders, y=tt, by="id_order")
new_trans$Month_Yr <- format(as.Date(new_trans$date), "%Y-%m")
Comparison: Blackwell vs. Other Company
Blackwell (table)
existing <- read.csv("existingProductAttributes.csv")
existing <- existing[c("Product.Type", "Product..", "Price", "Profit.margin", "Volume")]
existing$profit <- existing$Price * existing$Profit.margin * existing$Volume
existing <- existing[order(-existing$profit),]
existing$ExistNew <- paste0("Exist", existing$ExistNew)
existing$Profit.margin <- NULL
names(existing) = c("Product.Type", "Product", "Price", "Volume", "Profit", "Type")
existing[-which(existing$Product.Type %in% "Extended Warranty"),] %>%
mutate(Total_exist = Price * Volume) %>%
group_by(Product.Type) %>%
summarise(Total_Volume = sum(Volume),
Total_exist_sum = sum(Total_exist)) %>%
mutate(Total_Profits_Dol = paste(round((Total_exist_sum), 0),"$")) %>%
arrange(desc(Total_exist_sum)) -> categories_analysis_exist
kable(categories_analysis_exist %>% select(-Total_exist_sum)) %>%
kable_styling(bootstrap_options = "striped", full_width = F, position = "center")
| Product.Type | Total_Volume | Total_Profits_Dol |
|---|---|---|
| Game Console | 8720 | 1412827 $ |
| Display | 2428 | 1281467 $ |
| Accessories | 25548 | 1092806 $ |
| Smartphone | 1808 | 794440 $ |
| Tablet | 948 | 472092 $ |
| Printer | 2036 | 415124 $ |
| Software | 4268 | 401106 $ |
| Laptop | 516 | 398729 $ |
| PC | 116 | 85415 $ |
| Netbook | 92 | 32319 $ |
| Printer Supplies | 44 | 1507 $ |
Blackwell (graph)
p1 <- categories_analysis_exist %>%
arrange(Total_exist_sum) %>%
ggplot(aes(x = reorder(Product.Type, Total_exist_sum))) +
geom_col(aes(y = Total_exist_sum), fill = "green4") +
geom_col(aes(y = -Total_Volume), fill = "firebrick1") +
geom_label(aes(x = Product.Type, y = Total_exist_sum,
label = Total_Profits_Dol),
hjust = 0,
vjust = 0.4,
colour = "green4",
fill = NA,
label.size = NA,
size = 3.5) +
geom_label(aes(x = Product.Type, y = -Total_Volume,
label = round(Total_Volume, 0)),
hjust = 1,
vjust = 0.4,
colour = "firebrick1",
fill = NA,
label.size = NA,
size = 3.5) +
coord_flip() +
scale_y_continuous(limits = c(-400000, 2300000), labels = NULL) +
theme(axis.title.x = element_blank(),
axis.title.y = element_blank(),
axis.text.y = element_text()) +
labs(title= "Blackwell overview", subtitle = "Total Volume (items) vs. Total PxQ")
p1
Other Company (table)
#Merge with Brands
orders_items <- sqldf("SELECT * FROM orders_items LEFT JOIN brands ON (orders_items.sku = brands.labels)")
orders_items <- unique(orders_items)
#Merge with categories
orders_items <- sqldf("SELECT * FROM orders_items LEFT JOIN categories ON (orders_items.sku = categories.labels)")
orders_items %>%
mutate(Total_profit = pxq) %>%
group_by(Category) %>%
summarise(Total_Volume_ = sum(product_quantity),
Total_Profits = sum(pxq)) %>%
mutate(Total_Profits_Dols = paste(round((Total_Profits), 0),"$")) %>%
arrange(desc(Total_Profits)) -> categories_analysis
kable(categories_analysis %>% select(-Total_Profits)) %>%
kable_styling(bootstrap_options = "striped", full_width = F, position = "center")
| Category | Total_Volume_ | Total_Profits_Dols |
|---|---|---|
| accessories | 50784 | 4408129 $ |
| laptop | 1491 | 2839010 $ |
| pc | 1220 | 2627820 $ |
| NA | 5849 | 2169328 $ |
| smartphone | 2374 | 1668345 $ |
| display | 1985 | 744930 $ |
| tablet | 1807 | 672402 $ |
| smartwhatch | 625 | 224265 $ |
| other | 1498 | 181152 $ |
| extended warranty | 1408 | 107823 $ |
| camera | 232 | 29869 $ |
| service | 283 | 27295 $ |
| software | 44 | 4974 $ |
| printer | 31 | 4363 $ |
Other Company (graph)
p2 <- categories_analysis %>%
arrange(Total_Profits) %>%
ggplot(aes(x = reorder(Category, Total_Profits))) +
geom_col(aes(y = Total_Profits), fill = "green4") +
geom_col(aes(y = -Total_Volume_), fill = "firebrick1") +
geom_label(aes(x = Category, y = Total_Profits,
label = Total_Profits_Dols),
hjust = 0,
vjust = 0.4,
colour = "green4",
fill = NA,
label.size = NA,
size = 3.5) +
geom_label(aes(x = Category, y = -Total_Volume_,
label = round(Total_Volume_, 0)),
hjust = 1,
vjust = 0.4,
colour = "firebrick1",
fill = NA,
label.size = NA,
size = 3.5) +
coord_flip() +
scale_y_continuous(limits = c(-350000, 5000000), labels = NULL) +
theme(axis.title.x = element_blank(),
axis.title.y = element_blank(),
axis.text.y = element_text()) +
labs(title= "Other Company overview", subtitle = "Total Volume (items) vs. Total PxQ")
p2
Analysis of the Apple Laptop sales
How many laptops are Apple?
many_apple_laptop <- orders_items[which(orders_items$Category %in% "laptop"),] %>%
group_by(Brand) %>%
summarise(total = sum(product_quantity))
kable(head(many_apple_laptop)) %>%
kable_styling(bootstrap_options = "striped", full_width = F, position = "left")
| Brand | total |
|---|---|
| NA | 3 |
| Apple | 1468 |
| Pack | 20 |
We have sold 1468 Apple laptop, that means the 98.4% of the laptops are Apple.
How many items are sold with one Apple Laptop?
t <- merge(orders_items, Quant_items_x_order, by = "id_order", x.all=TRUE)
t <- t[which(t$Brand %in% "Apple" & t$Category %in% "laptop"),]
q <- sqldf("SELECT Freq as Quantity, count(*) as Times FROM t GROUP BY
Freq")
kable(q) %>%
kable_styling(bootstrap_options = "striped", full_width = F, position = "left")
| Quantity | Times |
|---|---|
| 2 | 300 |
| 3 | 122 |
| 4 | 48 |
| 5 | 28 |
| 6 | 12 |
| 7 | 10 |
| 8 | 2 |
| 9 | 3 |
| 11 | 1 |
This is how many Apple laptops are sold with more than 1 item. And the rest 965 are sold by themselves. That means 35% of the Apple laptops are sold with another item.
When are the Apple laptops sold?
plot_Laptop <- sqldf("SELECT *
FROM orders_items
WHERE Category = 'laptop'")
# Plot transactions Laptop per Month
ggplot(plot_Laptop, aes(x=date, y=pxq)) +
geom_line(aes(group=date))+
stat_summary(fun.y = sum, geom ='line')
This graph shows that during the week of blackfriday and cybermonday there is a clear improvement of the sales.
day_sale <- plot_Laptop %>%
group_by(date) %>%
summarise(total = sum(product_quantity))
day_sale <- day_sale[order(-day_sale$total),]
kable(head(day_sale)) %>%
kable_styling(bootstrap_options = "striped", full_width = F, position = "left")
| date | total |
|---|---|
| 2017-11-24 | 74 |
| 2017-11-23 | 43 |
| 2017-11-27 | 42 |
| 2017-09-25 | 22 |
| 2017-12-28 | 19 |
| 2017-11-25 | 15 |
This table shows that the highest amount of sales happened during november and december, and some of them happened in september. So, we suggest to launch the Apple laptop before september to boost our sales during these days and have time for this product to adjust to the market.
Relationship Laptop vs Category/Brands
Model 1
Model
# Load transactions as transactions
transactions <- read.transactions("trans.csv", sep = ",")
# Useful functions to explore transactions
# inspect(transactions)
# length(transactions)
# size(transactions)
# LIST(transactions)
# itemLabels(transactions)
# itemFrequencyPlot(transactions, topN=10)
# itemFrequencyPlot(transactions, topN=10)
# Transform data as data.frame to join with categories
# Data of transactions
data <- as.data.frame(transactions@itemInfo)
data$labels <- as.factor(data$labels)
# Join with Categories
trans_category <- sqldf("SELECT data.labels, categories.labels, categories.Category
FROM data LEFT JOIN categories ON data.labels = categories.labels")
# Put Categories inside transactions
Transaction_Catego <- aggregate(transactions, by = trans_category$Category)
# Run the Model
RulesCatego <- apriori (Transaction_Catego, parameter = list(supp = 0.073,
conf = 0.91))
## Apriori
##
## Parameter specification:
## confidence minval smax arem aval originalSupport maxtime support minlen
## 0.91 0.1 1 none FALSE TRUE 5 0.073 1
## maxlen target ext
## 10 rules FALSE
##
## Algorithmic control:
## filter tree heap memopt load sort verbose
## 0.1 TRUE TRUE FALSE TRUE 2 TRUE
##
## Absolute minimum support count: 763
##
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[13 item(s), 10454 transaction(s)] done [0.00s].
## sorting and recoding items ... [13 item(s)] done [0.00s].
## creating transaction tree ... done [0.00s].
## checking subsets of size 1 2 3 4 done [0.00s].
## writing ... [92 rule(s)] done [0.00s].
## creating S4 object ... done [0.00s].
#Remove duplicates
redundat_category <- is.redundant(RulesCatego)
RulesCatego <- RulesCatego[!redundat_category]
#Select only laptops
LaptopRules <- subset(RulesCatego, items %in% "laptop")
Inspect
inspect(LaptopRules[1:5])
## lhs rhs support confidence lift
## [1] {software} => {laptop} 0.07413430 0.9835025 8.462169
## [2] {display,software} => {laptop} 0.07308207 1.0000000 8.604115
## [3] {laptop,software} => {display} 0.07308207 0.9858065 9.292715
## [4] {display,laptop} => {software} 0.07308207 0.9695431 12.862442
## [5] {laptop,software} => {accessories} 0.07375167 0.9948387 1.044076
## count
## [1] 775
## [2] 764
## [3] 764
## [4] 764
## [5] 771
Graph
plot(LaptopRules[1:10], method="graph", control=list(type="items"))
## Warning: Unknown control parameters: type
## Available control parameters (with default values):
## main = Graph for 10 rules
## nodeColors = c("#66CC6680", "#9999CC80")
## nodeCol = c("#EE0000FF", "#EE0303FF", "#EE0606FF", "#EE0909FF", "#EE0C0CFF", "#EE0F0FFF", "#EE1212FF", "#EE1515FF", "#EE1818FF", "#EE1B1BFF", "#EE1E1EFF", "#EE2222FF", "#EE2525FF", "#EE2828FF", "#EE2B2BFF", "#EE2E2EFF", "#EE3131FF", "#EE3434FF", "#EE3737FF", "#EE3A3AFF", "#EE3D3DFF", "#EE4040FF", "#EE4444FF", "#EE4747FF", "#EE4A4AFF", "#EE4D4DFF", "#EE5050FF", "#EE5353FF", "#EE5656FF", "#EE5959FF", "#EE5C5CFF", "#EE5F5FFF", "#EE6262FF", "#EE6666FF", "#EE6969FF", "#EE6C6CFF", "#EE6F6FFF", "#EE7272FF", "#EE7575FF", "#EE7878FF", "#EE7B7BFF", "#EE7E7EFF", "#EE8181FF", "#EE8484FF", "#EE8888FF", "#EE8B8BFF", "#EE8E8EFF", "#EE9191FF", "#EE9494FF", "#EE9797FF", "#EE9999FF", "#EE9B9BFF", "#EE9D9DFF", "#EE9F9FFF", "#EEA0A0FF", "#EEA2A2FF", "#EEA4A4FF", "#EEA5A5FF", "#EEA7A7FF", "#EEA9A9FF", "#EEABABFF", "#EEACACFF", "#EEAEAEFF", "#EEB0B0FF", "#EEB1B1FF", "#EEB3B3FF", "#EEB5B5FF", "#EEB7B7FF", "#EEB8B8FF", "#EEBABAFF", "#EEBCBCFF", "#EEBDBDFF", "#EEBFBFFF", "#EEC1C1FF", "#EEC3C3FF", "#EEC4C4FF", "#EEC6C6FF", "#EEC8C8FF", "#EEC9C9FF", "#EECBCBFF", "#EECDCDFF", "#EECFCFFF", "#EED0D0FF", "#EED2D2FF", "#EED4D4FF", "#EED5D5FF", "#EED7D7FF", "#EED9D9FF", "#EEDBDBFF", "#EEDCDCFF", "#EEDEDEFF", "#EEE0E0FF", "#EEE1E1FF", "#EEE3E3FF", "#EEE5E5FF", "#EEE7E7FF", "#EEE8E8FF", "#EEEAEAFF", "#EEECECFF", "#EEEEEEFF")
## edgeCol = c("#474747FF", "#494949FF", "#4B4B4BFF", "#4D4D4DFF", "#4F4F4FFF", "#515151FF", "#535353FF", "#555555FF", "#575757FF", "#595959FF", "#5B5B5BFF", "#5E5E5EFF", "#606060FF", "#626262FF", "#646464FF", "#666666FF", "#686868FF", "#6A6A6AFF", "#6C6C6CFF", "#6E6E6EFF", "#707070FF", "#727272FF", "#747474FF", "#767676FF", "#787878FF", "#7A7A7AFF", "#7C7C7CFF", "#7E7E7EFF", "#808080FF", "#828282FF", "#848484FF", "#868686FF", "#888888FF", "#8A8A8AFF", "#8C8C8CFF", "#8D8D8DFF", "#8F8F8FFF", "#919191FF", "#939393FF", "#959595FF", "#979797FF", "#999999FF", "#9A9A9AFF", "#9C9C9CFF", "#9E9E9EFF", "#A0A0A0FF", "#A2A2A2FF", "#A3A3A3FF", "#A5A5A5FF", "#A7A7A7FF", "#A9A9A9FF", "#AAAAAAFF", "#ACACACFF", "#AEAEAEFF", "#AFAFAFFF", "#B1B1B1FF", "#B3B3B3FF", "#B4B4B4FF", "#B6B6B6FF", "#B7B7B7FF", "#B9B9B9FF", "#BBBBBBFF", "#BCBCBCFF", "#BEBEBEFF", "#BFBFBFFF", "#C1C1C1FF", "#C2C2C2FF", "#C3C3C4FF", "#C5C5C5FF", "#C6C6C6FF", "#C8C8C8FF", "#C9C9C9FF", "#CACACAFF", "#CCCCCCFF", "#CDCDCDFF", "#CECECEFF", "#CFCFCFFF", "#D1D1D1FF", "#D2D2D2FF", "#D3D3D3FF", "#D4D4D4FF", "#D5D5D5FF", "#D6D6D6FF", "#D7D7D7FF", "#D8D8D8FF", "#D9D9D9FF", "#DADADAFF", "#DBDBDBFF", "#DCDCDCFF", "#DDDDDDFF", "#DEDEDEFF", "#DEDEDEFF", "#DFDFDFFF", "#E0E0E0FF", "#E0E0E0FF", "#E1E1E1FF", "#E1E1E1FF", "#E2E2E2FF", "#E2E2E2FF", "#E2E2E2FF")
## alpha = 0.5
## cex = 1
## itemLabels = TRUE
## labelCol = #000000B3
## measureLabels = FALSE
## precision = 3
## layout = NULL
## layoutParams = list()
## arrowSize = 0.5
## engine = igraph
## plot = TRUE
## plot_options = list()
## max = 100
## verbose = FALSE
Observations
Software and Displays have a high correlation with Laptop, with a high lift.
Laptop also has a high correlation with Accesories, but has low lift.
There is a high relation of purchase between sofware, display and apple laptops.
This are the brands sold for Displays:
display <- orders_items[which(orders_items$Category %in% "display"),] %>%
group_by(Brand) %>%
summarise(total = sum(product_quantity))
kable(display)
| Brand | total |
|---|---|
| NA | 9 |
| BenQ | 154 |
| Dell | 689 |
| Eizo | 39 |
| Elgato | 7 |
| Jawbone | 208 |
| LG | 764 |
| Samsung | 88 |
| Wacom | 24 |
| Withings | 3 |
Model 2
For this model we lower the parameters to include other items that were sold less.
Model
# Run the Model
RulesCatego_less_param <- apriori (Transaction_Catego, parameter = list(supp = 0.05,
conf = 0.91))
## Apriori
##
## Parameter specification:
## confidence minval smax arem aval originalSupport maxtime support minlen
## 0.91 0.1 1 none FALSE TRUE 5 0.05 1
## maxlen target ext
## 10 rules FALSE
##
## Algorithmic control:
## filter tree heap memopt load sort verbose
## 0.1 TRUE TRUE FALSE TRUE 2 TRUE
##
## Absolute minimum support count: 522
##
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[13 item(s), 10454 transaction(s)] done [0.00s].
## sorting and recoding items ... [13 item(s)] done [0.00s].
## creating transaction tree ... done [0.00s].
## checking subsets of size 1 2 3 4 5 6 7 8 9 10
## Warning in apriori(Transaction_Catego, parameter = list(supp = 0.05, conf =
## 0.91)): Mining stopped (maxlen reached). Only patterns up to a length of 10
## returned!
## done [0.00s].
## writing ... [51976 rule(s)] done [0.02s].
## creating S4 object ... done [0.03s].
#Remove duplicates
redundat_category_less_param <- is.redundant(RulesCatego_less_param)
RulesCatego_less_param <- RulesCatego_less_param[!redundat_category_less_param]
#Select only laptops
LaptopRules_les_param <- subset(RulesCatego_less_param, items %in% "laptop")
Inspect
inspect(LaptopRules_les_param[1:5])
## lhs rhs support confidence lift count
## [1] {printer} => {laptop} 0.07289076 0.9719388 8.362673 762
## [2] {software} => {laptop} 0.07413430 0.9835025 8.462169 775
## [3] {camera} => {laptop} 0.07289076 0.9513109 8.185188 762
## [4] {printer,software} => {laptop} 0.07289076 1.0000000 8.604115 762
## [5] {laptop,printer} => {software} 0.07289076 1.0000000 13.266497 762
Graph
plot(LaptopRules_les_param[1:10], method="graph", control=list(type="items"))
## Warning: Unknown control parameters: type
## Available control parameters (with default values):
## main = Graph for 10 rules
## nodeColors = c("#66CC6680", "#9999CC80")
## nodeCol = c("#EE0000FF", "#EE0303FF", "#EE0606FF", "#EE0909FF", "#EE0C0CFF", "#EE0F0FFF", "#EE1212FF", "#EE1515FF", "#EE1818FF", "#EE1B1BFF", "#EE1E1EFF", "#EE2222FF", "#EE2525FF", "#EE2828FF", "#EE2B2BFF", "#EE2E2EFF", "#EE3131FF", "#EE3434FF", "#EE3737FF", "#EE3A3AFF", "#EE3D3DFF", "#EE4040FF", "#EE4444FF", "#EE4747FF", "#EE4A4AFF", "#EE4D4DFF", "#EE5050FF", "#EE5353FF", "#EE5656FF", "#EE5959FF", "#EE5C5CFF", "#EE5F5FFF", "#EE6262FF", "#EE6666FF", "#EE6969FF", "#EE6C6CFF", "#EE6F6FFF", "#EE7272FF", "#EE7575FF", "#EE7878FF", "#EE7B7BFF", "#EE7E7EFF", "#EE8181FF", "#EE8484FF", "#EE8888FF", "#EE8B8BFF", "#EE8E8EFF", "#EE9191FF", "#EE9494FF", "#EE9797FF", "#EE9999FF", "#EE9B9BFF", "#EE9D9DFF", "#EE9F9FFF", "#EEA0A0FF", "#EEA2A2FF", "#EEA4A4FF", "#EEA5A5FF", "#EEA7A7FF", "#EEA9A9FF", "#EEABABFF", "#EEACACFF", "#EEAEAEFF", "#EEB0B0FF", "#EEB1B1FF", "#EEB3B3FF", "#EEB5B5FF", "#EEB7B7FF", "#EEB8B8FF", "#EEBABAFF", "#EEBCBCFF", "#EEBDBDFF", "#EEBFBFFF", "#EEC1C1FF", "#EEC3C3FF", "#EEC4C4FF", "#EEC6C6FF", "#EEC8C8FF", "#EEC9C9FF", "#EECBCBFF", "#EECDCDFF", "#EECFCFFF", "#EED0D0FF", "#EED2D2FF", "#EED4D4FF", "#EED5D5FF", "#EED7D7FF", "#EED9D9FF", "#EEDBDBFF", "#EEDCDCFF", "#EEDEDEFF", "#EEE0E0FF", "#EEE1E1FF", "#EEE3E3FF", "#EEE5E5FF", "#EEE7E7FF", "#EEE8E8FF", "#EEEAEAFF", "#EEECECFF", "#EEEEEEFF")
## edgeCol = c("#474747FF", "#494949FF", "#4B4B4BFF", "#4D4D4DFF", "#4F4F4FFF", "#515151FF", "#535353FF", "#555555FF", "#575757FF", "#595959FF", "#5B5B5BFF", "#5E5E5EFF", "#606060FF", "#626262FF", "#646464FF", "#666666FF", "#686868FF", "#6A6A6AFF", "#6C6C6CFF", "#6E6E6EFF", "#707070FF", "#727272FF", "#747474FF", "#767676FF", "#787878FF", "#7A7A7AFF", "#7C7C7CFF", "#7E7E7EFF", "#808080FF", "#828282FF", "#848484FF", "#868686FF", "#888888FF", "#8A8A8AFF", "#8C8C8CFF", "#8D8D8DFF", "#8F8F8FFF", "#919191FF", "#939393FF", "#959595FF", "#979797FF", "#999999FF", "#9A9A9AFF", "#9C9C9CFF", "#9E9E9EFF", "#A0A0A0FF", "#A2A2A2FF", "#A3A3A3FF", "#A5A5A5FF", "#A7A7A7FF", "#A9A9A9FF", "#AAAAAAFF", "#ACACACFF", "#AEAEAEFF", "#AFAFAFFF", "#B1B1B1FF", "#B3B3B3FF", "#B4B4B4FF", "#B6B6B6FF", "#B7B7B7FF", "#B9B9B9FF", "#BBBBBBFF", "#BCBCBCFF", "#BEBEBEFF", "#BFBFBFFF", "#C1C1C1FF", "#C2C2C2FF", "#C3C3C4FF", "#C5C5C5FF", "#C6C6C6FF", "#C8C8C8FF", "#C9C9C9FF", "#CACACAFF", "#CCCCCCFF", "#CDCDCDFF", "#CECECEFF", "#CFCFCFFF", "#D1D1D1FF", "#D2D2D2FF", "#D3D3D3FF", "#D4D4D4FF", "#D5D5D5FF", "#D6D6D6FF", "#D7D7D7FF", "#D8D8D8FF", "#D9D9D9FF", "#DADADAFF", "#DBDBDBFF", "#DCDCDCFF", "#DDDDDDFF", "#DEDEDEFF", "#DEDEDEFF", "#DFDFDFFF", "#E0E0E0FF", "#E0E0E0FF", "#E1E1E1FF", "#E1E1E1FF", "#E2E2E2FF", "#E2E2E2FF", "#E2E2E2FF")
## alpha = 0.5
## cex = 1
## itemLabels = TRUE
## labelCol = #000000B3
## measureLabels = FALSE
## precision = 3
## layout = NULL
## layoutParams = list()
## arrowSize = 0.5
## engine = igraph
## plot = TRUE
## plot_options = list()
## max = 100
## verbose = FALSE
Observations
There is a correlation between printers and laptops. This company sold only 31 printers:
printers <- orders_items[which(orders_items$Category %in% "printer"),] %>%
group_by(Brand) %>%
summarise(total = sum(product_quantity))
kable(printers)
| Brand | total |
|---|---|
| Polaroid | 11 |
| Prynt | 20 |
There is a high relation of purchase between sofware, printers and apple laptops.
Model 3
To made this model we created a table joining Brands and Categories. As we had a lot of types, we
Model
trans_b <- merge(x= data, y=brands, by = "labels", all.x = TRUE)
trans_brand <- unique(trans_b)
trans_brand_category <- merge(x= trans_b, y=categories, by = "labels", all.x = TRUE)
trans_brand_category <- unique(trans_brand_category)
trans_brand_category$brand_category <- paste(trans_brand_category$Brand,"_",
trans_brand_category$Category)
Transaction_Brand_Category <- aggregate(transactions, by =
trans_brand_category$brand_category)
RulesBrand_Category <- apriori (Transaction_Brand_Category, parameter =
list(supp = 0.001, conf = 0.5))
## Apriori
##
## Parameter specification:
## confidence minval smax arem aval originalSupport maxtime support minlen
## 0.5 0.1 1 none FALSE TRUE 5 0.001 1
## maxlen target ext
## 10 rules FALSE
##
## Algorithmic control:
## filter tree heap memopt load sort verbose
## 0.1 TRUE TRUE FALSE TRUE 2 TRUE
##
## Absolute minimum support count: 10
##
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[280 item(s), 10454 transaction(s)] done [0.00s].
## sorting and recoding items ... [138 item(s)] done [0.00s].
## creating transaction tree ... done [0.00s].
## checking subsets of size 1 2 3 done [0.00s].
## writing ... [16 rule(s)] done [0.00s].
## creating S4 object ... done [0.00s].
# is.redundant
redundat_Brand_Category <- is.redundant(RulesBrand_Category)
RulesBrand_Category <- RulesBrand_Category[!redundat_Brand_Category]
Laptop_Apple <- subset(RulesBrand_Category, items %in% "Apple _ laptop")
Inspect
# Laptop - Apple ----
inspect(Laptop_Apple)
## lhs rhs support confidence
## [1] {My MW _ accessories} => {Apple _ laptop} 0.002582743 0.54
## lift count
## [1] 11.00421 27
Observations
We found a clear relationship between the Accessories My MV with Apple laptop with a high lift. That means people who bought a laptop also bought a Accessories My MV.