Setup

# load packages
library(tidyverse)
library(ggthemes)
theme_set(theme_minimal())
# read in data
orders <- read_csv("casestudy.csv")

Data Cleaning and Manipulation

# create output data and add year to output
year <- orders %>%
  select(year) %>%
  unique()
output <- tibble(year)
output$year <- as.integer(output$year)
# compute total revenue
total_revenue <- orders %>%
  group_by(year) %>%
  mutate(total_revenue = sum(net_revenue)) %>%
  select(total_revenue, year) %>%
  unique()
# add total revenue to output
output <- output %>%
  left_join(total_revenue, by = "year")
# add boolean column new_customer to orders
orders <- orders %>%
  group_by(customer_email) %>%
  mutate(new_customer = case_when(year == min(year) ~ TRUE, year > min(year) ~ FALSE))
# compute new customer revenue
new_customer_revenue <- orders %>%
  filter(new_customer == TRUE) %>%
  group_by(year) %>%
  mutate(new_customer_revenue = sum(net_revenue)) %>%
  select(new_customer_revenue, year) %>%
  unique()
# add new customer revenue to output
output <- output %>%
  left_join(new_customer_revenue, by = "year")
# compute existing customer growth and add to output
output <- output %>%
  arrange(year) %>%
  mutate(existing_customer_growth = (total_revenue - new_customer_revenue) - (lag(total_revenue) - lag(new_customer_revenue)))
# compute revenue lost from attrition and add to output
output <- output %>%
  arrange(year) %>%
  mutate(revenue_lost_attrition = lag(total_revenue) - (total_revenue - new_customer_revenue))
# compute existing customer revenue and add to output
output <- output %>%
  mutate(existing_customer_revenue = total_revenue - new_customer_revenue)
# compute existing customer revenue prior year and add to ouput
output <- output %>%
  arrange(year) %>%
  mutate(existing_customer_revenue_prior = lag(total_revenue) - lag(new_customer_revenue))
# compute total customers and total customers prior
total_customers <- orders %>%
  group_by(year) %>%
  mutate(total_customers = n()) %>%
  select(total_customers, year) %>%
  unique() %>%
  ungroup() %>%
  arrange(year) %>%
  mutate(total_customers_prior = lag(total_customers))
# add total customers and total customers prior to output
output <- output %>%
  left_join(total_customers, by = "year")
# compute new customers
new_customers <- orders %>%
  filter(new_customer == TRUE) %>%
  group_by(year) %>%
  mutate(new_customers = n()) %>%
  select(new_customers, year) %>%
  unique()
# add new customers to output
output <- output %>%
  left_join(new_customers, by = "year")
# compute lost customers and add to output
output <- output %>%
  arrange(year) %>%
  mutate(lost_customers = lag(total_customers) - (total_customers - new_customers))

Final Output

# final table of information
output

Visualizations and Observations

output %>%
  ggplot() +
  geom_line(aes(x = year, y = new_customer_revenue/new_customers, color = "New")) +
  geom_line(aes(x = year, y = (total_revenue - new_customer_revenue)/ (total_customers - new_customers), , color = "Existing")) +
  labs(x = "Year", y = "Average Revenue per Customer", title = "Average Revenue for New and Existing Customers") +
  scale_color_manual("", breaks = c("Existing", "New"), values = c("Existing" = "red", "New" = "blue"))

Existing customers tend to generate more revenue on average over the sample period.

output %>%
  ggplot() +
  geom_line(aes(x = year, y = new_customers, color = "New")) +
  geom_line(aes(x = year, y = lost_customers, color = "Lost")) +
  labs(x = "Year", y = "", title = "Number of Customers Gained and Lost per Year") +
  scale_color_manual("", breaks = c("New", "Lost"), values = c("Lost" = "blue", "New" = "orange"))

In 2016, more customers were lost than gained, but in 2017, more customers were gained.

output %>%
  ggplot() +
  geom_area(aes(x = year, y = new_customers / total_customers), fill = "green", alpha = .5) +
  labs(x = "Year", y = "", title = "Percent of New Customers by Year")

The percent of total customers that are new customers is somewhat constant around 66% after the first year.

LS0tCnRpdGxlOiAiQ2FzZSBTdHVkeSAjMiIKYXV0aG9yOiAiSmFzb24gV2hpdGVsYXciCmRhdGU6ICI0LzEwLzIwMjIiCm91dHB1dDogCiAgaHRtbF9kb2N1bWVudDoKICAgIGtlZXBfbWQ6IFRSVUUKICAgIHRvYzogVFJVRQogICAgdG9jX2Zsb2F0OiBUUlVFCiAgICBkZl9wcmludDogcGFnZWQKICAgIGNvZGVfZG93bmxvYWQ6IFRSVUUKLS0tCgojIyBTZXR1cAoKYGBge3Igc2V0dXAsIGluY2x1ZGU9RkFMU0V9CiMgciBzZXR1cAprbml0cjo6b3B0c19jaHVuayRzZXQoZWNobyA9IFRSVUUsIGVycm9yPVRSVUUsIG1lc3NhZ2U9RkFMU0UsIHdhcm5pbmc9RkFMU0UpCmBgYAoKYGBge3J9CiMgbG9hZCBwYWNrYWdlcwpsaWJyYXJ5KHRpZHl2ZXJzZSkKbGlicmFyeShnZ3RoZW1lcykKdGhlbWVfc2V0KHRoZW1lX21pbmltYWwoKSkKYGBgCgpgYGB7cn0KIyByZWFkIGluIGRhdGEKb3JkZXJzIDwtIHJlYWRfY3N2KCJjYXNlc3R1ZHkuY3N2IikKYGBgCgojIyBEYXRhIENsZWFuaW5nIGFuZCBNYW5pcHVsYXRpb24KCmBgYHtyfQojIGNyZWF0ZSBvdXRwdXQgZGF0YSBhbmQgYWRkIHllYXIgdG8gb3V0cHV0CnllYXIgPC0gb3JkZXJzICU+JQogIHNlbGVjdCh5ZWFyKSAlPiUKICB1bmlxdWUoKQpvdXRwdXQgPC0gdGliYmxlKHllYXIpCm91dHB1dCR5ZWFyIDwtIGFzLmludGVnZXIob3V0cHV0JHllYXIpCmBgYAoKYGBge3J9CiMgY29tcHV0ZSB0b3RhbCByZXZlbnVlCnRvdGFsX3JldmVudWUgPC0gb3JkZXJzICU+JQogIGdyb3VwX2J5KHllYXIpICU+JQogIG11dGF0ZSh0b3RhbF9yZXZlbnVlID0gc3VtKG5ldF9yZXZlbnVlKSkgJT4lCiAgc2VsZWN0KHRvdGFsX3JldmVudWUsIHllYXIpICU+JQogIHVuaXF1ZSgpCmBgYAoKYGBge3J9CiMgYWRkIHRvdGFsIHJldmVudWUgdG8gb3V0cHV0Cm91dHB1dCA8LSBvdXRwdXQgJT4lCiAgbGVmdF9qb2luKHRvdGFsX3JldmVudWUsIGJ5ID0gInllYXIiKQpgYGAKCmBgYHtyfQojIGFkZCBib29sZWFuIGNvbHVtbiBuZXdfY3VzdG9tZXIgdG8gb3JkZXJzCm9yZGVycyA8LSBvcmRlcnMgJT4lCiAgZ3JvdXBfYnkoY3VzdG9tZXJfZW1haWwpICU+JQogIG11dGF0ZShuZXdfY3VzdG9tZXIgPSBjYXNlX3doZW4oeWVhciA9PSBtaW4oeWVhcikgfiBUUlVFLCB5ZWFyID4gbWluKHllYXIpIH4gRkFMU0UpKQpgYGAKCmBgYHtyfQojIGNvbXB1dGUgbmV3IGN1c3RvbWVyIHJldmVudWUKbmV3X2N1c3RvbWVyX3JldmVudWUgPC0gb3JkZXJzICU+JQogIGZpbHRlcihuZXdfY3VzdG9tZXIgPT0gVFJVRSkgJT4lCiAgZ3JvdXBfYnkoeWVhcikgJT4lCiAgbXV0YXRlKG5ld19jdXN0b21lcl9yZXZlbnVlID0gc3VtKG5ldF9yZXZlbnVlKSkgJT4lCiAgc2VsZWN0KG5ld19jdXN0b21lcl9yZXZlbnVlLCB5ZWFyKSAlPiUKICB1bmlxdWUoKQpgYGAKCmBgYHtyfQojIGFkZCBuZXcgY3VzdG9tZXIgcmV2ZW51ZSB0byBvdXRwdXQKb3V0cHV0IDwtIG91dHB1dCAlPiUKICBsZWZ0X2pvaW4obmV3X2N1c3RvbWVyX3JldmVudWUsIGJ5ID0gInllYXIiKQpgYGAKCmBgYHtyfQojIGNvbXB1dGUgZXhpc3RpbmcgY3VzdG9tZXIgZ3Jvd3RoIGFuZCBhZGQgdG8gb3V0cHV0Cm91dHB1dCA8LSBvdXRwdXQgJT4lCiAgYXJyYW5nZSh5ZWFyKSAlPiUKICBtdXRhdGUoZXhpc3RpbmdfY3VzdG9tZXJfZ3Jvd3RoID0gKHRvdGFsX3JldmVudWUgLSBuZXdfY3VzdG9tZXJfcmV2ZW51ZSkgLSAobGFnKHRvdGFsX3JldmVudWUpIC0gbGFnKG5ld19jdXN0b21lcl9yZXZlbnVlKSkpCmBgYAoKYGBge3J9CiMgY29tcHV0ZSByZXZlbnVlIGxvc3QgZnJvbSBhdHRyaXRpb24gYW5kIGFkZCB0byBvdXRwdXQKb3V0cHV0IDwtIG91dHB1dCAlPiUKICBhcnJhbmdlKHllYXIpICU+JQogIG11dGF0ZShyZXZlbnVlX2xvc3RfYXR0cml0aW9uID0gbGFnKHRvdGFsX3JldmVudWUpIC0gKHRvdGFsX3JldmVudWUgLSBuZXdfY3VzdG9tZXJfcmV2ZW51ZSkpCmBgYAoKYGBge3J9CiMgY29tcHV0ZSBleGlzdGluZyBjdXN0b21lciByZXZlbnVlIGFuZCBhZGQgdG8gb3V0cHV0Cm91dHB1dCA8LSBvdXRwdXQgJT4lCiAgbXV0YXRlKGV4aXN0aW5nX2N1c3RvbWVyX3JldmVudWUgPSB0b3RhbF9yZXZlbnVlIC0gbmV3X2N1c3RvbWVyX3JldmVudWUpCmBgYAoKYGBge3J9CiMgY29tcHV0ZSBleGlzdGluZyBjdXN0b21lciByZXZlbnVlIHByaW9yIHllYXIgYW5kIGFkZCB0byBvdXB1dApvdXRwdXQgPC0gb3V0cHV0ICU+JQogIGFycmFuZ2UoeWVhcikgJT4lCiAgbXV0YXRlKGV4aXN0aW5nX2N1c3RvbWVyX3JldmVudWVfcHJpb3IgPSBsYWcodG90YWxfcmV2ZW51ZSkgLSBsYWcobmV3X2N1c3RvbWVyX3JldmVudWUpKQpgYGAKCmBgYHtyfQojIGNvbXB1dGUgdG90YWwgY3VzdG9tZXJzIGFuZCB0b3RhbCBjdXN0b21lcnMgcHJpb3IKdG90YWxfY3VzdG9tZXJzIDwtIG9yZGVycyAlPiUKICBncm91cF9ieSh5ZWFyKSAlPiUKICBtdXRhdGUodG90YWxfY3VzdG9tZXJzID0gbigpKSAlPiUKICBzZWxlY3QodG90YWxfY3VzdG9tZXJzLCB5ZWFyKSAlPiUKICB1bmlxdWUoKSAlPiUKICB1bmdyb3VwKCkgJT4lCiAgYXJyYW5nZSh5ZWFyKSAlPiUKICBtdXRhdGUodG90YWxfY3VzdG9tZXJzX3ByaW9yID0gbGFnKHRvdGFsX2N1c3RvbWVycykpCmBgYAoKYGBge3J9CiMgYWRkIHRvdGFsIGN1c3RvbWVycyBhbmQgdG90YWwgY3VzdG9tZXJzIHByaW9yIHRvIG91dHB1dApvdXRwdXQgPC0gb3V0cHV0ICU+JQogIGxlZnRfam9pbih0b3RhbF9jdXN0b21lcnMsIGJ5ID0gInllYXIiKQpgYGAKCmBgYHtyfQojIGNvbXB1dGUgbmV3IGN1c3RvbWVycwpuZXdfY3VzdG9tZXJzIDwtIG9yZGVycyAlPiUKICBmaWx0ZXIobmV3X2N1c3RvbWVyID09IFRSVUUpICU+JQogIGdyb3VwX2J5KHllYXIpICU+JQogIG11dGF0ZShuZXdfY3VzdG9tZXJzID0gbigpKSAlPiUKICBzZWxlY3QobmV3X2N1c3RvbWVycywgeWVhcikgJT4lCiAgdW5pcXVlKCkKYGBgCgpgYGB7cn0KIyBhZGQgbmV3IGN1c3RvbWVycyB0byBvdXRwdXQKb3V0cHV0IDwtIG91dHB1dCAlPiUKICBsZWZ0X2pvaW4obmV3X2N1c3RvbWVycywgYnkgPSAieWVhciIpCmBgYAoKYGBge3J9CiMgY29tcHV0ZSBsb3N0IGN1c3RvbWVycyBhbmQgYWRkIHRvIG91dHB1dApvdXRwdXQgPC0gb3V0cHV0ICU+JQogIGFycmFuZ2UoeWVhcikgJT4lCiAgbXV0YXRlKGxvc3RfY3VzdG9tZXJzID0gbGFnKHRvdGFsX2N1c3RvbWVycykgLSAodG90YWxfY3VzdG9tZXJzIC0gbmV3X2N1c3RvbWVycykpCmBgYAoKIyMgRmluYWwgT3V0cHV0CgpgYGB7cn0KIyBmaW5hbCB0YWJsZSBvZiBpbmZvcm1hdGlvbgpvdXRwdXQKYGBgCgojIyBWaXN1YWxpemF0aW9ucyBhbmQgT2JzZXJ2YXRpb25zCgpgYGB7cn0Kb3V0cHV0ICU+JQogIGdncGxvdCgpICsKICBnZW9tX2xpbmUoYWVzKHggPSB5ZWFyLCB5ID0gbmV3X2N1c3RvbWVyX3JldmVudWUvbmV3X2N1c3RvbWVycywgY29sb3IgPSAiTmV3IikpICsKICBnZW9tX2xpbmUoYWVzKHggPSB5ZWFyLCB5ID0gKHRvdGFsX3JldmVudWUgLSBuZXdfY3VzdG9tZXJfcmV2ZW51ZSkvICh0b3RhbF9jdXN0b21lcnMgLSBuZXdfY3VzdG9tZXJzKSwgLCBjb2xvciA9ICJFeGlzdGluZyIpKSArCiAgbGFicyh4ID0gIlllYXIiLCB5ID0gIkF2ZXJhZ2UgUmV2ZW51ZSBwZXIgQ3VzdG9tZXIiLCB0aXRsZSA9ICJBdmVyYWdlIFJldmVudWUgZm9yIE5ldyBhbmQgRXhpc3RpbmcgQ3VzdG9tZXJzIikgKwogIHNjYWxlX2NvbG9yX21hbnVhbCgiIiwgYnJlYWtzID0gYygiRXhpc3RpbmciLCAiTmV3IiksIHZhbHVlcyA9IGMoIkV4aXN0aW5nIiA9ICJyZWQiLCAiTmV3IiA9ICJibHVlIikpCmBgYAoKRXhpc3RpbmcgY3VzdG9tZXJzIHRlbmQgdG8gZ2VuZXJhdGUgbW9yZSByZXZlbnVlIG9uIGF2ZXJhZ2Ugb3ZlciB0aGUgc2FtcGxlIHBlcmlvZC4KCmBgYHtyfQpvdXRwdXQgJT4lCiAgZ2dwbG90KCkgKwogIGdlb21fbGluZShhZXMoeCA9IHllYXIsIHkgPSBuZXdfY3VzdG9tZXJzLCBjb2xvciA9ICJOZXciKSkgKwogIGdlb21fbGluZShhZXMoeCA9IHllYXIsIHkgPSBsb3N0X2N1c3RvbWVycywgY29sb3IgPSAiTG9zdCIpKSArCiAgbGFicyh4ID0gIlllYXIiLCB5ID0gIiIsIHRpdGxlID0gIk51bWJlciBvZiBDdXN0b21lcnMgR2FpbmVkIGFuZCBMb3N0IHBlciBZZWFyIikgKwogIHNjYWxlX2NvbG9yX21hbnVhbCgiIiwgYnJlYWtzID0gYygiTmV3IiwgIkxvc3QiKSwgdmFsdWVzID0gYygiTG9zdCIgPSAiYmx1ZSIsICJOZXciID0gIm9yYW5nZSIpKQpgYGAKCkluIDIwMTYsIG1vcmUgY3VzdG9tZXJzIHdlcmUgbG9zdCB0aGFuIGdhaW5lZCwgYnV0IGluIDIwMTcsIG1vcmUgY3VzdG9tZXJzIHdlcmUgZ2FpbmVkLgoKYGBge3J9Cm91dHB1dCAlPiUKICBnZ3Bsb3QoKSArCiAgZ2VvbV9hcmVhKGFlcyh4ID0geWVhciwgeSA9IG5ld19jdXN0b21lcnMgLyB0b3RhbF9jdXN0b21lcnMpLCBmaWxsID0gImdyZWVuIiwgYWxwaGEgPSAuNSkgKwogIGxhYnMoeCA9ICJZZWFyIiwgeSA9ICIiLCB0aXRsZSA9ICJQZXJjZW50IG9mIE5ldyBDdXN0b21lcnMgYnkgWWVhciIpCmBgYAoKVGhlIHBlcmNlbnQgb2YgdG90YWwgY3VzdG9tZXJzIHRoYXQgYXJlIG5ldyBjdXN0b21lcnMgaXMgc29tZXdoYXQgY29uc3RhbnQgYXJvdW5kIDY2JSBhZnRlciB0aGUgZmlyc3QgeWVhci4K