Yong Jiang 03/29/2022
library(tidyverse)## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5 v purrr 0.3.4
## v tibble 3.1.6 v dplyr 1.0.8
## v tidyr 1.2.0 v stringr 1.4.0
## v readr 2.1.2 v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(ggplot2)
library(readr)
library(readxl)
library(data.table)##
## Attaching package: 'data.table'
## The following objects are masked from 'package:dplyr':
##
## between, first, last
## The following object is masked from 'package:purrr':
##
## transpose
library(dplyr)
library(lubridate)##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:data.table':
##
## hour, isoweek, mday, minute, month, quarter, second, wday, week,
## yday, year
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
library(stringr)purchase_behavior <- read_csv("QVI_purchase_behaviour.csv")## Rows: 72637 Columns: 3
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (2): LIFESTAGE, PREMIUM_CUSTOMER
## dbl (1): LYLTY_CARD_NBR
##
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
transaction_data <- read_excel("QVI_transaction_data.xlsx")colnames(purchase_behavior)## [1] "LYLTY_CARD_NBR" "LIFESTAGE" "PREMIUM_CUSTOMER"
colnames(transaction_data)## [1] "DATE" "STORE_NBR" "LYLTY_CARD_NBR" "TXN_ID"
## [5] "PROD_NBR" "PROD_NAME" "PROD_QTY" "TOT_SALES"
str(purchase_behavior)## spec_tbl_df [72,637 x 3] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ LYLTY_CARD_NBR : num [1:72637] 1000 1002 1003 1004 1005 ...
## $ LIFESTAGE : chr [1:72637] "YOUNG SINGLES/COUPLES" "YOUNG SINGLES/COUPLES" "YOUNG FAMILIES" "OLDER SINGLES/COUPLES" ...
## $ PREMIUM_CUSTOMER: chr [1:72637] "Premium" "Mainstream" "Budget" "Mainstream" ...
## - attr(*, "spec")=
## .. cols(
## .. LYLTY_CARD_NBR = col_double(),
## .. LIFESTAGE = col_character(),
## .. PREMIUM_CUSTOMER = col_character()
## .. )
## - attr(*, "problems")=<externalptr>
str(transaction_data)## tibble [264,836 x 8] (S3: tbl_df/tbl/data.frame)
## $ DATE : num [1:264836] 43390 43599 43605 43329 43330 ...
## $ STORE_NBR : num [1:264836] 1 1 1 2 2 4 4 4 5 7 ...
## $ LYLTY_CARD_NBR: num [1:264836] 1000 1307 1343 2373 2426 ...
## $ TXN_ID : num [1:264836] 1 348 383 974 1038 ...
## $ PROD_NBR : num [1:264836] 5 66 61 69 108 57 16 24 42 52 ...
## $ PROD_NAME : chr [1:264836] "Natural Chip Compny SeaSalt175g" "CCs Nacho Cheese 175g" "Smiths Crinkle Cut Chips Chicken 170g" "Smiths Chip Thinly S/Cream&Onion 175g" ...
## $ PROD_QTY : num [1:264836] 2 3 2 5 3 1 1 1 1 2 ...
## $ TOT_SALES : num [1:264836] 6 6.3 2.9 15 13.8 5.1 5.7 3.6 3.9 7.2 ...
head(purchase_behavior)## # A tibble: 6 x 3
## LYLTY_CARD_NBR LIFESTAGE PREMIUM_CUSTOMER
## <dbl> <chr> <chr>
## 1 1000 YOUNG SINGLES/COUPLES Premium
## 2 1002 YOUNG SINGLES/COUPLES Mainstream
## 3 1003 YOUNG FAMILIES Budget
## 4 1004 OLDER SINGLES/COUPLES Mainstream
## 5 1005 MIDAGE SINGLES/COUPLES Mainstream
## 6 1007 YOUNG SINGLES/COUPLES Budget
head(transaction_data)## # A tibble: 6 x 8
## DATE STORE_NBR LYLTY_CARD_NBR TXN_ID PROD_NBR PROD_NAME PROD_QTY TOT_SALES
## <dbl> <dbl> <dbl> <dbl> <dbl> <chr> <dbl> <dbl>
## 1 43390 1 1000 1 5 Natural Chi~ 2 6
## 2 43599 1 1307 348 66 CCs Nacho C~ 3 6.3
## 3 43605 1 1343 383 61 Smiths Crin~ 2 2.9
## 4 43329 2 2373 974 69 Smiths Chip~ 5 15
## 5 43330 2 2426 1038 108 Kettle Tort~ 3 13.8
## 6 43604 4 4074 2982 57 Old El Paso~ 1 5.1
transaction_data$DATE <- as.Date(transaction_data$DATE, origin = "1899-12-30")
head(transaction_data)## # A tibble: 6 x 8
## DATE STORE_NBR LYLTY_CARD_NBR TXN_ID PROD_NBR PROD_NAME PROD_QTY
## <date> <dbl> <dbl> <dbl> <dbl> <chr> <dbl>
## 1 2018-10-17 1 1000 1 5 Natural Chip ~ 2
## 2 2019-05-14 1 1307 348 66 CCs Nacho Cheese~ 3
## 3 2019-05-20 1 1343 383 61 Smiths Crinkle C~ 2
## 4 2018-08-17 2 2373 974 69 Smiths Chip Thin~ 5
## 5 2018-08-18 2 2426 1038 108 Kettle Tortilla ~ 3
## 6 2019-05-19 4 4074 2982 57 Old El Paso Sals~ 1
## # ... with 1 more variable: TOT_SALES <dbl>
sum(purchase_behavior$LYLTY_CARD_NBR == '')## [1] 0
sum(purchase_behavior$LIFESTAGE == '')## [1] 0
sum(purchase_behavior$PREMIUM_CUSTOMER == '')## [1] 0
sum(transaction_data$DATE == '')## [1] NA
sum(transaction_data$STORE_NBR == '')## [1] 0
sum(transaction_data$LYLTY_CARD_NBR == '')## [1] 0
sum(transaction_data$TXN_ID == '')## [1] 0
sum(transaction_data$PROD_NBR == '')## [1] 0
sum(transaction_data$PROD_NAME == '')## [1] 0
sum(transaction_data$PROD_QTY == '')## [1] 0
sum(transaction_data$TOT_SALES == '')## [1] 0
There is no empty cell from both data sets, but the date column has N/A.
#### product name percentage
PN <- transaction_data %>%
group_by(PROD_NAME) %>%
summarise(cnt = n()) %>%
mutate(freq = round(cnt/sum(cnt), 3)) %>%
arrange(desc(freq))
View(PN)
#### product quantity percentage
PQ <- transaction_data %>%
group_by(PROD_QTY) %>%
summarise(cnt = n()) %>%
mutate(freq = round(cnt/sum(cnt), 3)) %>%
arrange(desc(freq))
View(PQ)There are two transactions that customer bought 200, they are outliers that need to be cleaned.
transaction_data_cleaned <- transaction_data %>%
filter(LYLTY_CARD_NBR != 226000)
View(transaction_data_cleaned)transaction_data_cleaned %>% summarise_if(is.numeric, max)## # A tibble: 1 x 6
## STORE_NBR LYLTY_CARD_NBR TXN_ID PROD_NBR PROD_QTY TOT_SALES
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 272 2373711 2415841 114 5 29.5
transaction_data_cleaned %>% summarise_if(is.numeric, min)## # A tibble: 1 x 6
## STORE_NBR LYLTY_CARD_NBR TXN_ID PROD_NBR PROD_QTY TOT_SALES
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1 1000 1 1 1 1.5
transaction_data_cleaned %>% summarise_if(is.numeric, mean)## # A tibble: 1 x 6
## STORE_NBR LYLTY_CARD_NBR TXN_ID PROD_NBR PROD_QTY TOT_SALES
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 135. 135549. 135158. 56.6 1.91 7.30
transaction_data_cleaned %>% summarise_if(is.numeric, median)## # A tibble: 1 x 6
## STORE_NBR LYLTY_CARD_NBR TXN_ID PROD_NBR PROD_QTY TOT_SALES
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 130 130357 135136. 56 2 7.4
product_name <- transaction_data_cleaned %>%
group_by(PROD_NAME) %>%
summarise(count = n())
View(product_name)Need to double confirm the definition of chip with Category Manager for Chips, for example, is the puff product in it? For now, I will exclude salsa only.
transaction_data_cleaned %>% filter(!grepl("Salsa", PROD_NAME))## # A tibble: 246,740 x 8
## DATE STORE_NBR LYLTY_CARD_NBR TXN_ID PROD_NBR PROD_NAME PROD_QTY
## <date> <dbl> <dbl> <dbl> <dbl> <chr> <dbl>
## 1 2018-10-17 1 1000 1 5 Natural Chip ~ 2
## 2 2019-05-14 1 1307 348 66 CCs Nacho Chees~ 3
## 3 2019-05-20 1 1343 383 61 Smiths Crinkle ~ 2
## 4 2018-08-17 2 2373 974 69 Smiths Chip Thi~ 5
## 5 2018-08-18 2 2426 1038 108 Kettle Tortilla~ 3
## 6 2019-05-16 4 4149 3333 16 Smiths Crinkle ~ 1
## 7 2019-05-16 4 4196 3539 24 Grain Waves ~ 1
## 8 2018-08-20 5 5026 4525 42 Doritos Corn Ch~ 1
## 9 2018-08-18 7 7150 6900 52 Grain Waves Sou~ 2
## 10 2019-05-17 7 7215 7176 16 Smiths Crinkle ~ 1
## # ... with 246,730 more rows, and 1 more variable: TOT_SALES <dbl>
product_name <- transaction_data_cleaned %>%
group_by(PROD_NAME) %>%
summarise(count = n())
View(product_name)transaction_data_cleaned$PACK_SIZE <- parse_number(transaction_data_cleaned$PROD_NAME)
max(transaction_data_cleaned$PACK_SIZE)## [1] 380
min(transaction_data_cleaned$PACK_SIZE)## [1] 70
pack_size_transaction <- transaction_data_cleaned %>%
group_by(PACK_SIZE) %>%
summarise(count = n())
ggplot(transaction_data_cleaned, aes(factor(PACK_SIZE))) +
geom_bar(color="black", fill="yellow") +
labs(x = "Pack Size", y = "Number of transactions", title = "Transactions by Pack Size")The top 5 best seller sizes are 175g, 150g, 134g 110g, and 170g.
transaction_data_cleaned$BrandName <- word(transaction_data_cleaned$PROD_NAME, 1)
BrandName_count <- transaction_data_cleaned %>%
group_by(BrandName) %>%
summarise(count = n())Some brand names are the same brands but the spells are different. We can convert same brands into the same spells.
transaction_data_cleaned$BrandName[transaction_data_cleaned$BrandName == "Dorito"] <- "Doritos"
transaction_data_cleaned$BrandName[transaction_data_cleaned$BrandName == "Red"] <- "RRD"
transaction_data_cleaned$BrandName[transaction_data_cleaned$BrandName == "Smith"] <- "Smiths"
BrandName_count <- transaction_data_cleaned %>%
group_by(BrandName) %>%
summarise(count = n()) %>%
arrange(desc(count))BrandName_count <- transaction_data_cleaned %>%
group_by(BrandName) %>%
summarise(cnt = n()) %>%
arrange(desc(cnt)) %>%
ggplot(aes(x= reorder(BrandName, cnt), y = cnt)) +
geom_col(color = "black", fill = "yellow")+
labs(title = "Transaction by Brand Name", y = "Count", x = "Brand Name") +
scale_y_continuous(labels = scales::comma) +
coord_flip() +
theme_minimal()
print(BrandName_count)5 top popular brands are Kettle, Smiths, Doritos, Pringles and RRD.
date_number <- transaction_data_cleaned %>%
group_by(DATE) %>%
summarise(count = n())
print(date_number)## # A tibble: 364 x 2
## DATE count
## <date> <int>
## 1 2018-07-01 724
## 2 2018-07-02 711
## 3 2018-07-03 722
## 4 2018-07-04 714
## 5 2018-07-05 712
## 6 2018-07-06 762
## 7 2018-07-07 750
## 8 2018-07-08 696
## 9 2018-07-09 749
## 10 2018-07-10 705
## # ... with 354 more rows
There are 364 rows in total while there should be 365 days from 2018/07/01 to 2019/06/30.
### Fulfill the date
start_date <- ymd("2018/07/01")
end_date <- ymd("2019/06/30")
range_date <- seq(start_date, by = "day", length.out = 365)
date_2018_2019 <- data.frame(DATE = range_date)
View(date_2018_2019)
transaction_data_2018_2019 <- full_join(date_2018_2019, transaction_data_cleaned)## Joining, by = "DATE"
### check
date_number <- transaction_data_2018_2019 %>%
group_by(DATE) %>%
summarise(count = n())
print(date_number)## # A tibble: 365 x 2
## DATE count
## <date> <int>
## 1 2018-07-01 724
## 2 2018-07-02 711
## 3 2018-07-03 722
## 4 2018-07-04 714
## 5 2018-07-05 712
## 6 2018-07-06 762
## 7 2018-07-07 750
## 8 2018-07-08 696
## 9 2018-07-09 749
## 10 2018-07-10 705
## # ... with 355 more rows
ggplot(date_number, aes(x = DATE, y = count)) +
geom_line() +
labs(x = "Day", y = "Number of transactions", title = "Transactions over time") +
scale_x_date(breaks = "1 month") + theme(axis.text.x = element_text(angle = 65, vjust = 0.5))There are big changes in Dec 2018, let’s take a close look.
Dec_2018 <- transaction_data_2018_2019 %>%
filter(between(DATE, as.Date("2018-12-01"), as.Date("2018-12-31")))
View(Dec_2018)
transaction_data_Dec_2018 <- Dec_2018 %>%
group_by(DATE) %>%
summarise(count = n())
ggplot(transaction_data_Dec_2018, aes(x = DATE, y = count)) +
geom_line() +
labs(x = "Day", y = "Number of transactions", title = "Transactions over time") +
scale_x_date(breaks = "1 day") + theme(axis.text.x = element_text(angle = 65, vjust = 0.5))The missing date with no transaction is Dec-24, the Christmas eve that no shops are open. The increased sales on Dec may because of the Christmas holiday sales.
LP <- purchase_behavior %>%
group_by(LIFESTAGE) %>%
summarise(cnt = n()) %>%
mutate(freq = round(cnt/sum(cnt), 3)) %>%
arrange(desc(freq))
View(LP)blank_theme <- theme_minimal() +
theme(
axis.title.x = element_blank(),
axis.title.y = element_blank(),
panel.border = element_blank(),
panel.grid=element_blank(),
axis.ticks = element_blank(),
plot.title=element_text(size=14, face="bold")
)
ggplot(LP, aes(x = "", y = cnt, fill = LIFESTAGE)) +
geom_bar(width = 1, stat = "identity") +
coord_polar("y", start = 0) + scale_fill_brewer("") +
blank_theme +
theme(axis.text.x = element_blank())PC <- purchase_behavior %>%
group_by(PREMIUM_CUSTOMER) %>%
summarise(count = n()) %>%
arrange(desc(count))
ggplot(PC, aes(x= PREMIUM_CUSTOMER, y = count)) +
geom_bar(stat = "identity", color = "Black", fill = "Yellow")+
labs(title = "Transaction by Premium Customer", x = "Premium Customer") +
geom_text(aes(label = count), size = 3, position = position_stack(vjust = 1.05))QVI_data <- merge(transaction_data_2018_2019, purchase_behavior, all.x = TRUE)QVI_data$CUSTOMER_SEGMENT <- paste(QVI_data$PREMIUM_CUSTOMER, QVI_data$LIFESTAGE)ggplot(data = QVI_data) +
geom_col(mapping = aes(x = BrandName, y = PROD_QTY, fill = BrandName)) +
labs(x = "Brand Name", y = "Sale Count", title = "Brand Preference by Customer Segment") +
theme(axis.text.x=element_blank()) +
facet_wrap(~CUSTOMER_SEGMENT)## Warning: Removed 1 rows containing missing values (position_stack).
Too many different customer segments.
X <- QVI_data %>%
group_by(PREMIUM_CUSTOMER, LIFESTAGE) %>%
summarise(
cnt = n(),
total_sale = sum(TOT_SALES),
total_chip_amount = sum(PROD_QTY),
sales_per_purchase = round(sum(TOT_SALES)/cnt, 3),
average_chip_amt = round(sum(PROD_QTY)/cnt, 3),
chip_unit_price = round(sum(TOT_SALES)/sum(PROD_QTY), 3)
) %>%
arrange(desc(total_sale))## `summarise()` has grouped output by 'PREMIUM_CUSTOMER'. You can override using
## the `.groups` argument.
Top 3 sales come from Budget - Older families, Mainstream - Young singles/couples, and Mainstream - Retirees. Top 3 sales every purchase come from Mainstream - Midages singles/couples and Young singles/couples, and Premium - Retirees.
Top 3 chip sales quantity come from Older families. Top 3 chip unit price come from Mainstream - Young singles/couples, Midages singles/couples and New families
ggplot(data = X) +
geom_col(mapping = aes(x = LIFESTAGE, y = total_sale, fill = LIFESTAGE)) +
labs(x = "Life Stage", title = "Total Sales by Customer Segment 1")+
theme(axis.text.x=element_blank())+
facet_wrap(~PREMIUM_CUSTOMER)## Warning: Removed 1 rows containing missing values (position_stack).
ggplot(data = X) +
geom_col(mapping = aes(x = PREMIUM_CUSTOMER, y = total_sale, fill = PREMIUM_CUSTOMER)) +
labs(x = "Purchase Behavior", title = "Total Sales by Customer Segment 2")+
theme(axis.text.x=element_blank())+
facet_wrap(~LIFESTAGE)## Warning: Removed 1 rows containing missing values (position_stack).
X_1 <- QVI_data %>%
group_by(PREMIUM_CUSTOMER, LIFESTAGE) %>%
summarise(
cnt = n(),
total_sale = sum(TOT_SALES)) ## `summarise()` has grouped output by 'PREMIUM_CUSTOMER'. You can override using
## the `.groups` argument.
X_1$CUSTOMER_SEGMENT <- paste(X_1$PREMIUM_CUSTOMER, X_1$LIFESTAGE)
x_1 <- X_1[, c("CUSTOMER_SEGMENT", "total_sale")] %>%
arrange(desc(total_sale)) %>%
top_n(3)## Selecting by total_sale
ggplot(x_1, aes(x = reorder(CUSTOMER_SEGMENT, desc(total_sale)), y = total_sale)) +
geom_col(color = "Black", fill = "Yellow", width = 0.75) +
labs(x = "Customer Segment", y = "Total Sale", title = "Top 3 Total Sale by Customer Segment") +
theme(axis.text.x = element_text(angle = 15, vjust = 0.5))X_2 <- QVI_data %>%
group_by(PREMIUM_CUSTOMER, LIFESTAGE) %>%
summarise(
cnt = n(),
sales_per_purchase = round(sum(TOT_SALES)/cnt, 3))## `summarise()` has grouped output by 'PREMIUM_CUSTOMER'. You can override using
## the `.groups` argument.
X_2$CUSTOMER_SEGMENT <- paste(X_2$PREMIUM_CUSTOMER, X_2$LIFESTAGE)
x_2 <- X_2[, c("CUSTOMER_SEGMENT", "sales_per_purchase")] %>%
arrange(desc(sales_per_purchase)) %>%
top_n(3)## Selecting by sales_per_purchase
ggplot(x_2, aes(x = reorder(CUSTOMER_SEGMENT, desc(sales_per_purchase)), y = sales_per_purchase)) +
geom_col(color = "Black", fill = "Yellow", width = 0.75) +
labs(x = "Customer Segment", y = "Sales per Purchase", title = "Top 3 Sale per Purchase by Customer Segment") +
theme(axis.text.x = element_text(angle = 15, vjust = 0.5))X_3 <- QVI_data %>%
group_by(PREMIUM_CUSTOMER, LIFESTAGE) %>%
summarise(
cnt = n(),
average_chip_amt = round(sum(PROD_QTY)/cnt, 3))## `summarise()` has grouped output by 'PREMIUM_CUSTOMER'. You can override using
## the `.groups` argument.
X_3$CUSTOMER_SEGMENT <- paste(X_3$PREMIUM_CUSTOMER, X_3$LIFESTAGE)
x_3 <- X_3[, c("CUSTOMER_SEGMENT", "average_chip_amt")] %>%
arrange(desc(average_chip_amt)) %>%
top_n(3)## Selecting by average_chip_amt
ggplot(x_3, aes(x = reorder(CUSTOMER_SEGMENT, desc(average_chip_amt)), y = average_chip_amt)) +
geom_col(color = "Black", fill = "Yellow", width = 0.75) +
labs(x = "Customer Segment", y = "Chip Quantity", title = "Top 3 Average Chip Purchase Quantity by Customer Segment") +
theme(axis.text.x = element_text(angle = 15, vjust = 0.5))X_4 <- QVI_data %>%
group_by(PREMIUM_CUSTOMER, LIFESTAGE) %>%
summarise(
cnt = n(),
chip_unit_price = round(sum(TOT_SALES)/sum(PROD_QTY), 3))## `summarise()` has grouped output by 'PREMIUM_CUSTOMER'. You can override using
## the `.groups` argument.
X_4$CUSTOMER_SEGMENT <- paste(X_4$PREMIUM_CUSTOMER, X_4$LIFESTAGE)
x_4 <- X_4[, c("CUSTOMER_SEGMENT", "chip_unit_price")] %>%
arrange(desc(chip_unit_price)) %>%
top_n(3)## Selecting by chip_unit_price
ggplot(x_4, aes(x = reorder(CUSTOMER_SEGMENT, desc(chip_unit_price)), y = chip_unit_price)) +
geom_col(color = "Black", fill = "Yellow", width = 0.75) +
labs(x = "Customer Segment", y = "Unit Price", title = "Top 3 Unit Price by Customer Segment") +
theme(axis.text.x = element_text(angle = 15, vjust = 0.5))From conclusions above, these are seven customer segments that have more potential to purchase chips: Premium - Older families, Retirees Mainstream - Older families, Midages singles/couples, Young singles/couples, Retirees Budget - Older families
target_customer <- filter(QVI_data, CUSTOMER_SEGMENT %in% c("Premium OLDER FAMILIES",
"Premium RETIREES",
"Mainstream OLDER FAMILIES",
"Mainstream MIDAGE SINGLES/COUPLES",
"Mainstream YOUNG SINGLES/COUPLES",
"Mainstream RETIREES",
"Budget OLDER FAMILIES"
))
P <- target_customer %>%
group_by(CUSTOMER_SEGMENT, PACK_SIZE) %>%
summarise(cnt = n()) %>%
arrange(CUSTOMER_SEGMENT, desc(cnt)) ## `summarise()` has grouped output by 'CUSTOMER_SEGMENT'. You can override using
## the `.groups` argument.
ggplot(P, aes(x= PACK_SIZE, y = cnt, fill = PACK_SIZE))+
geom_col()+
labs(x = "Pack Size", y = "Count", title = "Pack Size Preference by Target Customer Segment") +
facet_wrap(~CUSTOMER_SEGMENT) theme(axis.text.x = element_text(angle = 15, vjust = 0.5))## List of 1
## $ axis.text.x:List of 11
## ..$ family : NULL
## ..$ face : NULL
## ..$ colour : NULL
## ..$ size : NULL
## ..$ hjust : NULL
## ..$ vjust : num 0.5
## ..$ angle : num 15
## ..$ lineheight : NULL
## ..$ margin : NULL
## ..$ debug : NULL
## ..$ inherit.blank: logi FALSE
## ..- attr(*, "class")= chr [1:2] "element_text" "element"
## - attr(*, "class")= chr [1:2] "theme" "gg"
## - attr(*, "complete")= logi FALSE
## - attr(*, "validate")= logi TRUE
Top 3 pack size preference for targeted customer segments: 175g, 150g, 134g
B <- target_customer %>%
group_by(CUSTOMER_SEGMENT, BrandName) %>%
summarise(cnt = n())## `summarise()` has grouped output by 'CUSTOMER_SEGMENT'. You can override using
## the `.groups` argument.
ggplot(B, aes(x= BrandName, y = cnt, fill = BrandName))+
geom_col()+
labs(x = "BrandName", y = "Count", title = "Brand Name Preference by Target Customer Segment") +
facet_wrap(~CUSTOMER_SEGMENT) theme(axis.text.x = element_text(angle = 15, vjust = 0.5))## List of 1
## $ axis.text.x:List of 11
## ..$ family : NULL
## ..$ face : NULL
## ..$ colour : NULL
## ..$ size : NULL
## ..$ hjust : NULL
## ..$ vjust : num 0.5
## ..$ angle : num 15
## ..$ lineheight : NULL
## ..$ margin : NULL
## ..$ debug : NULL
## ..$ inherit.blank: logi FALSE
## ..- attr(*, "class")= chr [1:2] "element_text" "element"
## - attr(*, "class")= chr [1:2] "theme" "gg"
## - attr(*, "complete")= logi FALSE
## - attr(*, "validate")= logi TRUE