library(data.table)
library(ggplot2)
library(readr)
library(stringr)
library(dplyr)
filePath <- '/Users/daniel/Desktop/Data/R/Quantium/'
transactionData <- fread(paste0(filePath, 'QVI_transaction_data.csv'))
customerData <- fread(paste0(filePath, 'QVI_purchase_behaviour.csv'))
head(transactionData) # inspecting the first six rows of the dataset
## DATE STORE_NBR LYLTY_CARD_NBR TXN_ID PROD_NBR
## <int> <int> <int> <int> <int>
## 1: 43390 1 1000 1 5
## 2: 43599 1 1307 348 66
## 3: 43605 1 1343 383 61
## 4: 43329 2 2373 974 69
## 5: 43330 2 2426 1038 108
## 6: 43604 4 4074 2982 57
## PROD_NAME PROD_QTY TOT_SALES
## <char> <int> <num>
## 1: Natural Chip Compny SeaSalt175g 2 6.0
## 2: CCs Nacho Cheese 175g 3 6.3
## 3: Smiths Crinkle Cut Chips Chicken 170g 2 2.9
## 4: Smiths Chip Thinly S/Cream&Onion 175g 5 15.0
## 5: Kettle Tortilla ChpsHny&Jlpno Chili 150g 3 13.8
## 6: Old El Paso Salsa Dip Tomato Mild 300g 1 5.1
The ‘DATE’ column is not in DATE format
str(transactionData)
## Classes 'data.table' and 'data.frame': 264836 obs. of 8 variables:
## $ DATE : int 43390 43599 43605 43329 43330 43604 43601 43601 43332 43330 ...
## $ STORE_NBR : int 1 1 1 2 2 4 4 4 5 7 ...
## $ LYLTY_CARD_NBR: int 1000 1307 1343 2373 2426 4074 4149 4196 5026 7150 ...
## $ TXN_ID : int 1 348 383 974 1038 2982 3333 3539 4525 6900 ...
## $ PROD_NBR : int 5 66 61 69 108 57 16 24 42 52 ...
## $ PROD_NAME : chr "Natural Chip Compny SeaSalt175g" "CCs Nacho Cheese 175g" "Smiths Crinkle Cut Chips Chicken 170g" "Smiths Chip Thinly S/Cream&Onion 175g" ...
## $ PROD_QTY : int 2 3 2 5 3 1 1 1 1 2 ...
## $ TOT_SALES : num 6 6.3 2.9 15 13.8 5.1 5.7 3.6 3.9 7.2 ...
## - attr(*, ".internal.selfref")=<externalptr>
# converting DATE column to Date format
transactionData$DATE <- as.Date(transactionData$DATE, origin = "1899-12-30")
head(transactionData) # checking DATE column is accurate
## DATE STORE_NBR LYLTY_CARD_NBR TXN_ID PROD_NBR
## <Date> <int> <int> <int> <int>
## 1: 2018-10-17 1 1000 1 5
## 2: 2019-05-14 1 1307 348 66
## 3: 2019-05-20 1 1343 383 61
## 4: 2018-08-17 2 2373 974 69
## 5: 2018-08-18 2 2426 1038 108
## 6: 2019-05-19 4 4074 2982 57
## PROD_NAME PROD_QTY TOT_SALES
## <char> <int> <num>
## 1: Natural Chip Compny SeaSalt175g 2 6.0
## 2: CCs Nacho Cheese 175g 3 6.3
## 3: Smiths Crinkle Cut Chips Chicken 170g 2 2.9
## 4: Smiths Chip Thinly S/Cream&Onion 175g 5 15.0
## 5: Kettle Tortilla ChpsHny&Jlpno Chili 150g 3 13.8
## 6: Old El Paso Salsa Dip Tomato Mild 300g 1 5.1
summary(transactionData) # checking for null values and outliers
## DATE STORE_NBR LYLTY_CARD_NBR TXN_ID
## Min. :2018-07-01 Min. : 1.0 Min. : 1000 Min. : 1
## 1st Qu.:2018-09-30 1st Qu.: 70.0 1st Qu.: 70021 1st Qu.: 67602
## Median :2018-12-30 Median :130.0 Median : 130358 Median : 135138
## Mean :2018-12-30 Mean :135.1 Mean : 135550 Mean : 135158
## 3rd Qu.:2019-03-31 3rd Qu.:203.0 3rd Qu.: 203094 3rd Qu.: 202701
## Max. :2019-06-30 Max. :272.0 Max. :2373711 Max. :2415841
## PROD_NBR PROD_NAME PROD_QTY TOT_SALES
## Min. : 1.00 Length:264836 Min. : 1.000 Min. : 1.500
## 1st Qu.: 28.00 Class :character 1st Qu.: 2.000 1st Qu.: 5.400
## Median : 56.00 Mode :character Median : 2.000 Median : 7.400
## Mean : 56.58 Mean : 1.907 Mean : 7.304
## 3rd Qu.: 85.00 3rd Qu.: 2.000 3rd Qu.: 9.200
## Max. :114.00 Max. :200.000 Max. :650.000
transactionData[PROD_QTY == 200] # inspecting outlier of PROD_QTY at 200
## DATE STORE_NBR LYLTY_CARD_NBR TXN_ID PROD_NBR
## <Date> <int> <int> <int> <int>
## 1: 2018-08-19 226 226000 226201 4
## 2: 2019-05-20 226 226000 226210 4
## PROD_NAME PROD_QTY TOT_SALES
## <char> <int> <num>
## 1: Dorito Corn Chp Supreme 380g 200 650
## 2: Dorito Corn Chp Supreme 380g 200 650
transactionData[LYLTY_CARD_NBR == 226000] # other transactions for this customer
## DATE STORE_NBR LYLTY_CARD_NBR TXN_ID PROD_NBR
## <Date> <int> <int> <int> <int>
## 1: 2018-08-19 226 226000 226201 4
## 2: 2019-05-20 226 226000 226210 4
## PROD_NAME PROD_QTY TOT_SALES
## <char> <int> <num>
## 1: Dorito Corn Chp Supreme 380g 200 650
## 2: Dorito Corn Chp Supreme 380g 200 650
# removing outlier, not representative of customer spending habits
transactionData <- transactionData[!LYLTY_CARD_NBR == 226000]
productWords <- data.table(words = unlist(strsplit(transactionData$PROD_NAME, " ")))
productWordsCount <- productWords[, .(count = .N), by = words][order(-count)]
head(productWordsCount, 25) # checking for non-chip items
## words count
## <char> <int>
## 1: 504830
## 2: 175g 60561
## 3: Chips 49770
## 4: 150g 41633
## 5: Kettle 41288
## 6: & 35565
## 7: Smiths 28860
## 8: Salt 27976
## 9: Cheese 27890
## 10: Pringles 25102
## 11: 134g 25102
## 12: Doritos 24962
## 13: Crinkle 23960
## 14: 110g 22387
## 15: Corn 22061
## 16: Original 21560
## 17: Cut 20754
## 18: Chip 18645
## 19: 170g 18502
## 20: Salsa 18094
## 21: Chicken 15407
## 22: 165g 15297
## 23: Sea 14145
## 24: Thins 14075
## 25: Sour 13882
## words count
chip_keywords <- c("chips", "crisps", "potato", "snack", "pringles", "lays", "nacho", "tortilla", "crispy")
non_chip_products <- transactionData[!grepl(paste(chip_keywords, collapse = "|"), PROD_NAME, ignore.case = TRUE)]
head(non_chip_products)
## DATE STORE_NBR LYLTY_CARD_NBR TXN_ID PROD_NBR
## <Date> <int> <int> <int> <int>
## 1: 2018-10-17 1 1000 1 5
## 2: 2018-08-17 2 2373 974 69
## 3: 2019-05-19 4 4074 2982 57
## 4: 2019-05-16 4 4196 3539 24
## 5: 2018-08-20 5 5026 4525 42
## 6: 2018-08-18 7 7150 6900 52
## PROD_NAME PROD_QTY TOT_SALES
## <char> <int> <num>
## 1: Natural Chip Compny SeaSalt175g 2 6.0
## 2: Smiths Chip Thinly S/Cream&Onion 175g 5 15.0
## 3: Old El Paso Salsa Dip Tomato Mild 300g 1 5.1
## 4: Grain Waves Sweet Chilli 210g 1 3.6
## 5: Doritos Corn Chip Mexican Jalapeno 150g 1 3.9
## 6: Grain Waves Sour Cream&Chives 210G 2 7.2
# Salsa products are not relevant to chips spending habits
# Adding a column to identify rows with 'salsa'
transactionData[, SALSA := grepl('salsa', tolower(PROD_NAME))]
# Removing rows with 'salsa' and the SALSA column
transactionData <- transactionData[SALSA == FALSE][, SALSA := NULL]
# New features (brand name, pack size)
transactionData[, BRAND := word(transactionData$PROD_NAME, 1)]
unique(transactionData[, BRAND])
## [1] "Natural" "CCs" "Smiths" "Kettle" "Grain"
## [6] "Doritos" "Twisties" "WW" "Thins" "Burger"
## [11] "NCC" "Cheezels" "Infzns" "Red" "Pringles"
## [16] "Dorito" "Infuzions" "Smith" "GrnWves" "Tyrrells"
## [21] "Cobs" "French" "RRD" "Tostitos" "Cheetos"
## [26] "Woolworths" "Snbts" "Sunbites"
There are brands in this dataset with very similar names. By standardising brand names, we can acquire more accurate insights e.g. concerning sales of Doritos
transactionData[BRAND == "Red", BRAND := "RRD"]
transactionData[BRAND == "Infzns", BRAND := "Infuzions"]
transactionData[BRAND == "Snbts", BRAND := "Sunbites"]
transactionData[BRAND == "Smith", BRAND := "Smiths"]
transactionData[BRAND == "Dorito", BRAND := "Doritos"]
unique(transactionData[, BRAND])
## [1] "Natural" "CCs" "Smiths" "Kettle" "Grain"
## [6] "Doritos" "Twisties" "WW" "Thins" "Burger"
## [11] "NCC" "Cheezels" "Infuzions" "RRD" "Pringles"
## [16] "GrnWves" "Tyrrells" "Cobs" "French" "Tostitos"
## [21] "Cheetos" "Woolworths" "Sunbites"
# Pack Size column
transactionData[, PACK_SIZE := parse_number(transactionData$PROD_NAME)]
# Inspecting sales over time
transactions_day <- transactionData[, .(Count = .N), by = DATE][order(DATE)]
transactions_day
## DATE Count
## <Date> <int>
## 1: 2018-07-01 663
## 2: 2018-07-02 650
## 3: 2018-07-03 674
## 4: 2018-07-04 669
## 5: 2018-07-05 660
## ---
## 360: 2019-06-26 657
## 361: 2019-06-27 669
## 362: 2019-06-28 673
## 363: 2019-06-29 703
## 364: 2019-06-30 704
# There are only 364 rows, what day has 0 orders?
dates <- data.table(DATE = seq(as.Date('2018-07-01'), as.Date('2019-06-30'), 'day'))
dates_count <- dates[, .(Count = .N), by = DATE]
dates_count
## DATE Count
## <Date> <int>
## 1: 2018-07-01 1
## 2: 2018-07-02 1
## 3: 2018-07-03 1
## 4: 2018-07-04 1
## 5: 2018-07-05 1
## ---
## 361: 2019-06-26 1
## 362: 2019-06-27 1
## 363: 2019-06-28 1
## 364: 2019-06-29 1
## 365: 2019-06-30 1
merged_dates <- transactions_day[dates, on = .(DATE)]
merged_dates[is.na(Count), DATE] # date of day with no orders
## [1] "2018-12-25"
merged_dates[is.na(Count), Count := 0]
merged_dates
## DATE Count
## <Date> <int>
## 1: 2018-07-01 663
## 2: 2018-07-02 650
## 3: 2018-07-03 674
## 4: 2018-07-04 669
## 5: 2018-07-05 660
## ---
## 361: 2019-06-26 657
## 362: 2019-06-27 669
## 363: 2019-06-28 673
## 364: 2019-06-29 703
## 365: 2019-06-30 704
# Plot orders over time
ggplot(transactions_day, aes(x = DATE, y = Count)) +
geom_line() +
scale_x_date(breaks = "1 month") +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5))
# Peak in December
dec_dates <- transactions_day[DATE > '2018-11-30' & DATE < '2019-01-01']
# dec_dates
ggplot(dec_dates, aes(x = DATE, y = Count)) +
geom_line() + # creates line graph
scale_x_date(breaks = '1 day') + # scale of x-axis
theme(axis.text.x = element_text(angle = 45, vjust = 0.5, size = 7.5)) +
labs(x = 'Date of Order', y = 'Number of Orders', title = 'Orders over December')
str(customerData) # checking column data types
## Classes 'data.table' and 'data.frame': 72637 obs. of 3 variables:
## $ LYLTY_CARD_NBR : int 1000 1002 1003 1004 1005 1007 1009 1010 1011 1012 ...
## $ LIFESTAGE : chr "YOUNG SINGLES/COUPLES" "YOUNG SINGLES/COUPLES" "YOUNG FAMILIES" "OLDER SINGLES/COUPLES" ...
## $ PREMIUM_CUSTOMER: chr "Premium" "Mainstream" "Budget" "Mainstream" ...
## - attr(*, ".internal.selfref")=<externalptr>
summary(customerData) # check for null values/outliers
## LYLTY_CARD_NBR LIFESTAGE PREMIUM_CUSTOMER
## Min. : 1000 Length:72637 Length:72637
## 1st Qu.: 66202 Class :character Class :character
## Median : 134040 Mode :character Mode :character
## Mean : 136186
## 3rd Qu.: 203375
## Max. :2373711
# Lifestage Distribution
lifestageCount <- customerData[, .(Count = .N), by = LIFESTAGE]
lifestageCount
## LIFESTAGE Count
## <char> <int>
## 1: YOUNG SINGLES/COUPLES 14441
## 2: YOUNG FAMILIES 9178
## 3: OLDER SINGLES/COUPLES 14609
## 4: MIDAGE SINGLES/COUPLES 7275
## 5: NEW FAMILIES 2549
## 6: OLDER FAMILIES 9780
## 7: RETIREES 14805
# Distribution of Life stages plot
ggplot(lifestageCount, aes(x = '', y = Count, fill = LIFESTAGE)) +
geom_bar(stat = 'identity') +
coord_polar('y') + # converts bar chart to pie chart
theme_void() + # clearer format
geom_text(aes(label = Count),
position = position_stack(vjust = 0.5), # positions label centrally
size = 2) +
labs(title = "Distribution of Life Stages")
# Premium Distribution
premiumCount <- customerData[, .(Count = .N), by = PREMIUM_CUSTOMER]
premiumCount
## PREMIUM_CUSTOMER Count
## <char> <int>
## 1: Premium 18922
## 2: Mainstream 29245
## 3: Budget 24470
ggplot(premiumCount, aes(x = "", y = Count, fill = PREMIUM_CUSTOMER)) +
geom_bar(stat = 'identity') +
coord_polar("y") +
theme_void() +
geom_text(aes(label = paste0(PREMIUM_CUSTOMER, "\n", Count)),
position = position_stack(vjust = 0.5)) +
labs(title = "Distribution of Spending Profiles",
fill = 'Spending Profile')
# Merging the two data sets
data <- customerData[transactionData, on = .(LYLTY_CARD_NBR)]
summary(data)
## LYLTY_CARD_NBR LIFESTAGE PREMIUM_CUSTOMER DATE
## Min. : 1000 Length:246740 Length:246740 Min. :2018-07-01
## 1st Qu.: 70015 Class :character Class :character 1st Qu.:2018-09-30
## Median : 130367 Mode :character Mode :character Median :2018-12-30
## Mean : 135530 Mean :2018-12-30
## 3rd Qu.: 203083 3rd Qu.:2019-03-31
## Max. :2373711 Max. :2019-06-30
## STORE_NBR TXN_ID PROD_NBR PROD_NAME
## Min. : 1.0 Min. : 1 Min. : 1.00 Length:246740
## 1st Qu.: 70.0 1st Qu.: 67569 1st Qu.: 26.00 Class :character
## Median :130.0 Median : 135182 Median : 53.00 Mode :character
## Mean :135.1 Mean : 135130 Mean : 56.35
## 3rd Qu.:203.0 3rd Qu.: 202652 3rd Qu.: 87.00
## Max. :272.0 Max. :2415841 Max. :114.00
## PROD_QTY TOT_SALES BRAND PACK_SIZE
## Min. :1.000 Min. : 1.700 Length:246740 Min. : 70.0
## 1st Qu.:2.000 1st Qu.: 5.800 Class :character 1st Qu.:150.0
## Median :2.000 Median : 7.400 Mode :character Median :170.0
## Mean :1.906 Mean : 7.316 Mean :175.6
## 3rd Qu.:2.000 3rd Qu.: 8.800 3rd Qu.:175.0
## Max. :5.000 Max. :29.500 Max. :380.0
# Move DATE column to the front
setcolorder(data, c("DATE", setdiff(names(data), "DATE")))
names(data)
## [1] "DATE" "LYLTY_CARD_NBR" "LIFESTAGE" "PREMIUM_CUSTOMER"
## [5] "STORE_NBR" "TXN_ID" "PROD_NBR" "PROD_NAME"
## [9] "PROD_QTY" "TOT_SALES" "BRAND" "PACK_SIZE"
# Variable Summaries ------------------------------------------------------
# Brands by Life Stage
unique(data[, BRAND])
## [1] "Natural" "CCs" "Smiths" "Kettle" "Grain"
## [6] "Doritos" "Twisties" "WW" "Thins" "Burger"
## [11] "NCC" "Cheezels" "Infuzions" "RRD" "Pringles"
## [16] "GrnWves" "Tyrrells" "Cobs" "French" "Tostitos"
## [21] "Cheetos" "Woolworths" "Sunbites"
lifestage_brand <- data[, .(Count = .N), by = .(LIFESTAGE, BRAND)][order(-Count)]
lifestage_brand[order(LIFESTAGE)]
## LIFESTAGE BRAND Count
## <char> <char> <int>
## 1: MIDAGE SINGLES/COUPLES Kettle 4055
## 2: MIDAGE SINGLES/COUPLES Smiths 2790
## 3: MIDAGE SINGLES/COUPLES Doritos 2423
## 4: MIDAGE SINGLES/COUPLES Pringles 2389
## 5: MIDAGE SINGLES/COUPLES RRD 1478
## ---
## 157: YOUNG SINGLES/COUPLES French 194
## 158: YOUNG SINGLES/COUPLES Woolworths 192
## 159: YOUNG SINGLES/COUPLES Burger 178
## 160: YOUNG SINGLES/COUPLES GrnWves 163
## 161: YOUNG SINGLES/COUPLES NCC 158
brandCount <- data[, .(Count = .N), by = BRAND][order(-Count)]
brandCount
## BRAND Count
## <char> <int>
## 1: Kettle 41288
## 2: Smiths 30353
## 3: Doritos 25224
## 4: Pringles 25102
## 5: RRD 16321
## 6: Infuzions 14201
## 7: Thins 14075
## 8: WW 10320
## 9: Cobs 9693
## 10: Tostitos 9471
## 11: Twisties 9454
## 12: Tyrrells 6442
## 13: Grain 6272
## 14: Natural 6050
## 15: Cheezels 4603
## 16: CCs 4551
## 17: Sunbites 3008
## 18: Cheetos 2927
## 19: Burger 1564
## 20: Woolworths 1516
## 21: GrnWves 1468
## 22: NCC 1419
## 23: French 1418
## BRAND Count
# Stacked Bar Chart, lifestage and brand
ggplot(lifestage_brand, aes(x = BRAND, y = Count, fill = LIFESTAGE)) +
geom_bar(stat = 'identity') +
theme(axis.text.x = element_text(angle = 45, vjust = 0.5))
most purchased brands are Kettle, Smiths and Doritos
lifestage_brand[BRAND %in% c('Kettle', 'Smiths', 'Doritos')][order(BRAND)]
## LIFESTAGE BRAND Count
## <char> <char> <int>
## 1: OLDER SINGLES/COUPLES Doritos 5326
## 2: RETIREES Doritos 4987
## 3: OLDER FAMILIES Doritos 4218
## 4: YOUNG FAMILIES Doritos 3894
## 5: YOUNG SINGLES/COUPLES Doritos 3650
## 6: MIDAGE SINGLES/COUPLES Doritos 2423
## 7: NEW FAMILIES Doritos 726
## 8: OLDER SINGLES/COUPLES Kettle 8847
## 9: RETIREES Kettle 8194
## 10: OLDER FAMILIES Kettle 6851
## 11: YOUNG FAMILIES Kettle 6277
## 12: YOUNG SINGLES/COUPLES Kettle 5893
## 13: MIDAGE SINGLES/COUPLES Kettle 4055
## 14: NEW FAMILIES Kettle 1171
## 15: OLDER FAMILIES Smiths 6138
## 16: OLDER SINGLES/COUPLES Smiths 6032
## 17: YOUNG FAMILIES Smiths 5399
## 18: RETIREES Smiths 5374
## 19: YOUNG SINGLES/COUPLES Smiths 3893
## 20: MIDAGE SINGLES/COUPLES Smiths 2790
## 21: NEW FAMILIES Smiths 727
## LIFESTAGE BRAND Count
# Percentage of Sales by Life stage -----------------------------------------------------
sales_lifestage <- data[, .(sum = round(sum(TOT_SALES))), by = LIFESTAGE]
sales_lifestage <- sales_lifestage %>%
mutate(Percentage = sum / sum(sum) * 100)
ggplot(sales_lifestage, aes(x = LIFESTAGE, y = Percentage, fill = LIFESTAGE)) +
geom_bar(stat = 'identity', width = 1) +
geom_text(aes(label = round(Percentage, 2)),
position = position_stack(vjust = 0.5),
size = 3,
show.legend = FALSE) +
labs(title = 'Sales by Life stage') +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
OLDER SINGLES/COUPLES, RETIREES and OLDER FAMILIES are the greatest contributors to total sales.
lifestage_profile <- data[, .(Count = .N), by = .(LIFESTAGE, PREMIUM_CUSTOMER)][order(-Count)]
lifestage_profile
## LIFESTAGE PREMIUM_CUSTOMER Count
## <char> <char> <int>
## 1: OLDER FAMILIES Budget 21514
## 2: RETIREES Mainstream 19970
## 3: YOUNG SINGLES/COUPLES Mainstream 19544
## 4: YOUNG FAMILIES Budget 17763
## 5: OLDER SINGLES/COUPLES Budget 17172
## 6: OLDER SINGLES/COUPLES Mainstream 17061
## 7: OLDER SINGLES/COUPLES Premium 16560
## 8: RETIREES Budget 14225
## 9: OLDER FAMILIES Mainstream 13241
## 10: RETIREES Premium 12236
## 11: YOUNG FAMILIES Mainstream 11947
## 12: MIDAGE SINGLES/COUPLES Mainstream 11095
## 13: YOUNG FAMILIES Premium 10784
## 14: OLDER FAMILIES Premium 10403
## 15: YOUNG SINGLES/COUPLES Budget 8573
## 16: MIDAGE SINGLES/COUPLES Premium 7612
## 17: YOUNG SINGLES/COUPLES Premium 5852
## 18: MIDAGE SINGLES/COUPLES Budget 4691
## 19: NEW FAMILIES Budget 2824
## 20: NEW FAMILIES Mainstream 2185
## 21: NEW FAMILIES Premium 1488
## LIFESTAGE PREMIUM_CUSTOMER Count
ggplot(lifestage_profile, aes(x = LIFESTAGE, y = Count, fill = PREMIUM_CUSTOMER)) +
geom_bar(stat = 'identity') +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
geom_text(aes(label = Count),
position = position_stack(vjust = 0.5),
size = 3) +
labs(title = "Distribution by Lifestage and Spending Profile",
x = 'Life stage of Customer',
y = 'Number of customers',
fill = 'Spending Profile')
# Sales metrics by Life Stage and Spending Profile
segment_metrics <- data[, .(
avg_spend = mean(TOT_SALES),
total_sales = sum(TOT_SALES),
avg_order_size = mean(PROD_QTY)
), by = .(LIFESTAGE, PREMIUM_CUSTOMER)]
high_value_segments <- segment_metrics[, .(
LIFESTAGE,
PREMIUM_CUSTOMER,
Contribution = round(total_sales / sum(total_sales) * 100, 2))][order(-Contribution)]
ggplot(high_value_segments, aes(x = LIFESTAGE,
y = Contribution,
fill = PREMIUM_CUSTOMER)) +
geom_bar(stat = 'identity') +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1)) +
geom_text(aes(label = Contribution),
position = position_stack(vjust = 0.5)) +
labs(title = '% Contribution across Lifestage and Spending Profile',
fill = 'Spending Profile')
OLDER SINGLES/COUPLES contribute the most () to sales as a Life stage
segment
OLDER FAMILIES - BUDGET contribute the most as a Life stage/Spending
Profile segment
NEW FAMILIES as a segment are the smallest contributor to sales,
contributing less than 3% of sales.