Giphy
Main task is to generate actionable insights from this pet food customer orders data set. As a focus area, I to address the following questions:
This data set is a selection of customers and their orders. This task was provided by Jahangir Raina on Kaggle.
The data is in wide format. There’s no information on where the data came from or how it was collected.The data set has several missing entries.
Load libraries
#load libraries
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.4 ✓ purrr 0.3.4
## ✓ tibble 3.1.2 ✓ dplyr 1.0.7
## ✓ tidyr 1.1.3 ✓ stringr 1.4.0
## ✓ readr 1.4.0 ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(lubridate)
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
Load data set
#load csv
pet_food_customer <- read.csv("~/Downloads/pet_food_customer_orders.csv")
Taking a look small into the data set
head(pet_food_customer)
## customer_id pet_id pet_order_number wet_food_order_number
## 1 1.057485e+19 4.466839e+18 2 1
## 2 1.057485e+19 4.466839e+18 1 NA
## 3 1.057485e+19 4.466839e+18 8 7
## 4 1.057485e+19 4.466839e+18 4 3
## 5 1.057485e+19 4.466839e+18 9 8
## 6 6.342773e+18 1.184281e+19 6 NA
## orders_since_first_wet_trays_order pet_has_active_subscription pet_food_tier
## 1 1 True superpremium
## 2 NA True superpremium
## 3 7 True superpremium
## 4 3 True superpremium
## 5 8 True superpremium
## 6 NA True premium
## pet_signup_datetime pet_allergen_list pet_fav_flavour_list
## 1 1970-01-01 00:00:01.552397819 Chicken
## 2 1970-01-01 00:00:01.552397819 Chicken
## 3 1970-01-01 00:00:01.552397819 Chicken
## 4 1970-01-01 00:00:01.552397819 Chicken
## 5 1970-01-01 00:00:01.552397819 Chicken
## 6 1970-01-01 00:00:01.551530404
## pet_health_issue_list neutered gender pet_breed_size signup_promo
## 1 digestion, joints True female large Null & Default
## 2 digestion, joints True female large Null & Default
## 3 digestion, joints True female large Null & Default
## 4 digestion, joints True female large Null & Default
## 5 digestion, joints True female large Null & Default
## 6 True female medium Refer a Friend
## ate_wet_food_pre_tails dry_food_brand_pre_tails pet_life_stage_at_order
## 1 True Canagan mature
## 2 True Canagan mature
## 3 True Canagan mature
## 4 True Canagan mature
## 5 True Canagan mature
## 6 True Pets at Home Complete mature
## order_payment_date kibble_kcal wet_kcal total_order_kcal wet_trays
## 1 2019-04-11 00:00:00.000000000 36876.72 1234.65 38111.37 11
## 2 2019-03-18 00:00:00.000000000 21419.31 0.00 21419.31 0
## 3 2019-10-27 00:00:00.000000000 18352.84 6624.00 24976.84 60
## 4 2019-05-20 00:00:00.000000000 36617.21 2901.90 39519.11 26
## 5 2019-11-20 00:00:00.000000000 18340.30 6624.00 24964.30 60
## 6 2019-08-15 00:00:00.000000000 14435.99 0.00 14435.99 0
## wet_food_discount_percent wet_tray_size premium_treat_packs
## 1 0 150g 0
## 2 NA None 0
## 3 0 150g 0
## 4 0 150g 0
## 5 0 150g 0
## 6 NA None 0
## dental_treat_packs wet_food_textures_in_order total_web_sessions
## 1 0 gravy jelly pate 6
## 2 0 2
## 3 0 gravy jelly pate 18
## 4 0 gravy jelly pate 13
## 5 0 gravy jelly pate 19
## 6 0 15
## total_web_sessions_since_last_order total_minutes_on_website
## 1 4 101
## 2 1 69
## 3 0 184
## 4 6 130
## 5 1 184
## 6 2 118
## total_minutes_on_website_since_last_order total_wet_food_updates
## 1 32 0
## 2 3 0
## 3 0 0
## 4 15 0
## 5 0 0
## 6 1 0
## total_wet_food_updates_since_last_order last_customer_support_ticket_date
## 1 0 2019-03-16 09:10:12+00:00
## 2 0 2019-03-16 09:10:12+00:00
## 3 0 2019-10-26 06:10:13+00:00
## 4 0 2019-04-23 06:28:12+00:00
## 5 0 2019-10-26 06:10:13+00:00
## 6 0
## customer_support_ticket_category
## 1 proactive
## 2 proactive
## 3 account
## 4 account
## 5 account
## 6
Format ‘order_payment_date’ dates and creating dates,weekdays and month/year columns
#format date and create weekday,month and year col
pet_food_customer$date <- as.Date(pet_food_customer$order_payment_date)
pet_food_customer$weekday <- wday(pet_food_customer$date,label = TRUE, abbr = TRUE)
pet_food_customer$month <- format(as.Date(pet_food_customer$date), "%y %m")
Filtering wet pet food orders
#filter out
customer_wet_food <- filter(pet_food_customer,pet_food_customer$wet_food_order_number > 1 )
#omit NA rows
customer_wet_food <- drop_na(customer_wet_food)
A quick analytic review of the data set
summary(customer_wet_food)
## customer_id pet_id pet_order_number wet_food_order_number
## Min. :1.969e+15 Min. :3.456e+15 Min. : 2.000 Min. : 2.000
## 1st Qu.:4.747e+18 1st Qu.:4.477e+18 1st Qu.: 2.000 1st Qu.: 2.000
## Median :9.255e+18 Median :9.168e+18 Median : 4.000 Median : 3.000
## Mean :9.236e+18 Mean :9.168e+18 Mean : 4.406 Mean : 3.845
## 3rd Qu.:1.375e+19 3rd Qu.:1.372e+19 3rd Qu.: 5.000 3rd Qu.: 5.000
## Max. :1.844e+19 Max. :1.844e+19 Max. :20.000 Max. :20.000
##
## orders_since_first_wet_trays_order pet_has_active_subscription
## Min. : 2.000 Length:8607
## 1st Qu.: 2.000 Class :character
## Median : 3.000 Mode :character
## Mean : 3.912
## 3rd Qu.: 5.000
## Max. :20.000
##
## pet_food_tier pet_signup_datetime pet_allergen_list pet_fav_flavour_list
## Length:8607 Length:8607 Length:8607 Length:8607
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## pet_health_issue_list neutered gender pet_breed_size
## Length:8607 Length:8607 Length:8607 Length:8607
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## signup_promo ate_wet_food_pre_tails dry_food_brand_pre_tails
## Length:8607 Length:8607 Length:8607
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
##
##
##
##
## pet_life_stage_at_order order_payment_date kibble_kcal
## Length:8607 Length:8607 Min. : 597.1
## Class :character Class :character 1st Qu.: 9672.0
## Mode :character Mode :character Median : 14897.0
## Mean : 17991.6
## 3rd Qu.: 23841.9
## Max. :133261.0
##
## wet_kcal total_order_kcal wet_trays wet_food_discount_percent
## Min. : 108.2 Min. : 1840 Min. : 1.00 Min. :0.000000
## 1st Qu.: 1910.4 1st Qu.: 12997 1st Qu.: 16.00 1st Qu.:0.000000
## Median : 3460.8 Median : 18577 Median : 21.00 Median :0.000000
## Mean : 3717.2 Mean : 22017 Mean : 23.26 Mean :0.005103
## 3rd Qu.: 3753.3 3rd Qu.: 28490 3rd Qu.: 31.00 3rd Qu.:0.000000
## Max. :50467.5 Max. :138231 Max. :225.00 Max. :1.999939
##
## wet_tray_size premium_treat_packs dental_treat_packs
## Length:8607 Min. : 0.0000 Min. : 0.0000
## Class :character 1st Qu.: 0.0000 1st Qu.: 0.0000
## Mode :character Median : 0.0000 Median : 0.0000
## Mean : 0.1599 Mean : 0.5063
## 3rd Qu.: 0.0000 3rd Qu.: 0.0000
## Max. :23.0000 Max. :20.0000
##
## wet_food_textures_in_order total_web_sessions
## Length:8607 Min. : 0.00
## Class :character 1st Qu.: 4.00
## Mode :character Median : 8.00
## Mean : 11.41
## 3rd Qu.: 15.00
## Max. :119.00
##
## total_web_sessions_since_last_order total_minutes_on_website
## Min. : 0.000 Min. : 0.0
## 1st Qu.: 1.000 1st Qu.: 35.0
## Median : 2.000 Median : 160.0
## Mean : 2.774 Mean : 554.8
## 3rd Qu.: 4.000 3rd Qu.: 708.0
## Max. :28.000 Max. :23734.0
##
## total_minutes_on_website_since_last_order total_wet_food_updates
## Min. : 0.0 Min. :0.0000
## 1st Qu.: 0.0 1st Qu.:0.0000
## Median : 6.0 Median :0.0000
## Mean : 125.9 Mean :0.2216
## 3rd Qu.: 41.5 3rd Qu.:0.0000
## Max. :6446.0 Max. :9.0000
##
## total_wet_food_updates_since_last_order last_customer_support_ticket_date
## Min. :0.0000 Length:8607
## 1st Qu.:0.0000 Class :character
## Median :0.0000 Mode :character
## Mean :0.1508
## 3rd Qu.:0.0000
## Max. :9.0000
##
## customer_support_ticket_category date weekday
## Length:8607 Min. :2019-01-01 Sun:1938
## Class :character 1st Qu.:2019-10-28 Mon:1364
## Mode :character Median :2019-12-20 Tue:1694
## Mean :2019-12-07 Wed:1341
## 3rd Qu.:2020-02-09 Thu:1655
## Max. :2020-03-30 Fri: 477
## Sat: 138
## month
## Length:8607
## Class :character
## Mode :character
##
##
##
##
The highest month of wet pet food orders
customer_wet_food %>%
group_by(month) %>%
summarise(num_of_order = n()) %>%
arrange(desc(num_of_order))
## # A tibble: 15 x 2
## month num_of_order
## <chr> <int>
## 1 19 12 1383
## 2 20 01 1362
## 3 20 02 1317
## 4 20 03 1156
## 5 19 11 1068
## 6 19 10 817
## 7 19 09 543
## 8 19 08 303
## 9 19 07 217
## 10 19 06 142
## 11 19 05 97
## 12 19 04 76
## 13 19 03 63
## 14 19 02 35
## 15 19 01 28
The most popular food type tier
#food type
customer_wet_food %>%
group_by(pet_food_tier) %>%
summarise(num_of_order = n()) %>%
arrange(desc(num_of_order))
## # A tibble: 3 x 2
## pet_food_tier num_of_order
## <chr> <int>
## 1 superpremium 5242
## 2 mid 1876
## 3 premium 1489
How customers heard of the store and purchased wet pet food
customer_wet_food %>%
group_by(signup_promo) %>%
summarise(num = n()) %>%
arrange(desc(num))
## # A tibble: 12 x 2
## signup_promo num
## <chr> <int>
## 1 Null & Default 2191
## 2 Search Generic 1648
## 3 Digital Display 1109
## 4 Incompletes 959
## 5 Refer a Friend 864
## 6 Search Brand 772
## 7 Events 362
## 8 Affiliates 324
## 9 Inserts 141
## 10 Social Marketing 122
## 11 Other 114
## 12 Shopping Centres 1
List of popular food flavors
#pet food flavor
customer_wet_food %>%
group_by(pet_fav_flavour_list) %>%
summarise(num = n(),) %>%
filter(pet_fav_flavour_list != "") %>%
arrange(desc(num))
## # A tibble: 15 x 2
## pet_fav_flavour_list num
## <chr> <int>
## 1 Chicken 836
## 2 Beef Chicken Lamb 775
## 3 Beef Chicken Fish Lamb 422
## 4 Beef Chicken 380
## 5 Fish 222
## 6 Beef 214
## 7 Chicken Fish 208
## 8 Chicken Lamb 160
## 9 Lamb 132
## 10 Beef Chicken Fish 120
## 11 Chicken Fish Lamb 118
## 12 Beef Lamb 63
## 13 Beef Fish 56
## 14 Beef Fish Lamb 39
## 15 Fish Lamb 25
The common health issue in pets that bought wet pet food
#health issues
customer_wet_food %>%
group_by(pet_health_issue_list) %>%
filter(pet_health_issue_list != "") %>%
summarise(num = n()) %>%
arrange(desc(num))
## # A tibble: 14 x 2
## pet_health_issue_list num
## <chr> <int>
## 1 digestion 1541
## 2 skin and coat 717
## 3 joints 574
## 4 digestion, skin and coat 444
## 5 digestion, joints 331
## 6 joints, skin and coat 266
## 7 digestion, joints, skin and coat 246
## 8 pancreatitis 5
## 9 digestion, pancreatitis 4
## 10 digestion, pancreatitis, skin and coat 4
## 11 digestion, joints, pancreatitis, skin and coat 2
## 12 joints, pancreatitis, skin and coat 2
## 13 joints, pancreatitis 1
## 14 pancreatitis, skin and coat 1
The common allergies among pets that bought wet pet food
#pet allergies
customer_wet_food %>%
group_by(pet_allergen_list) %>%
filter(pet_allergen_list != "") %>%
summarise(num=n()) %>%
arrange(desc(num))
## # A tibble: 68 x 2
## pet_allergen_list num
## <chr> <int>
## 1 grain 196
## 2 beef dairy egg soya wheat 141
## 3 grain maize wheat 70
## 4 grain wheat 52
## 5 wheat 52
## 6 fish 41
## 7 beef dairy egg grain soya wheat 34
## 8 beef 31
## 9 beef dairy egg grain maize soya wheat 24
## 10 beef dairy egg fish soya wheat 20
## # … with 58 more rows
Gender of pets
customer_wet_food %>%
group_by(gender) %>%
summarise(num_of_order = n()) %>%
ggplot(aes(x=1,y=num_of_order,fill=gender))+geom_bar(color="Black",stat = "identity",width = 1) +
coord_polar(theta = "y")+
theme_void()+
scale_fill_brewer(palette = "Set1")
#weekday
customer_wet_food %>%
group_by(weekday, customer_id) %>%
ggplot(aes(x=weekday)) + geom_bar(color="Black",fill="#4B9CD3")+
labs(title = "Number of wet pet food orders per weekday in a year",x="Weekdays",y="Wet pet food orders")
#month/year
customer_wet_food %>%
group_by(month, customer_id) %>%
ggplot(aes(x=month,)) + geom_bar(color="Black",fill="#4B9CD3")+
theme(axis.text.x = element_text(angle = 45))+
scale_x_discrete(labels = c("Jan '19","Feb '20","Mar '19","Apr '19","May '19","Jun '19","Jul '19","Aug '19","Sep '19","Oct '19","Nov '19","Dec '19","Jan '20","Feb '20","Mar '20"))+
labs(title = "Number of wet pet food orders per month",x="Month-Year",y="Wet pet food orders")
The percentage of pet in genders
#how many reoccurring orders
# orders were mostly for small dogs
customer_wet_food %>%
group_by(pet_breed_size) %>%
summarise(num_of_order = n()) %>%
ggplot(aes(x=pet_breed_size,y=num_of_order)) + geom_bar(color="Black",stat="identity",fill="#4B9CD3")+
labs(title = "Pet breed size vs wet pet food orders", x="Pet breed size", y="Number of wet pet food orders")
The ages of pets
customer_wet_food %>%
group_by(pet_life_stage_at_order) %>%
summarise(num_of_order = n()) %>%
ggplot(aes(x=pet_life_stage_at_order,y=num_of_order)) + geom_bar(color="Black",stat="identity",fill="#4B9CD3")+
labs(title = "Pet life stage vs Wet pet food orders", x="Pet life stage", y="Number of wet pet food orders")