df <- read.csv('marketing_one.csv')
df$date_served <- mdy(df$date_served)
df$date_subscribed <- mdy(df$date_subscribed)
df$date_canceled <- mdy(df$date_canceled)
head(df,5)
## user_id date_served marketing_channel variant converted
## 1 a100000029 2018-01-01 House Ads personalization TRUE
## 2 a100000030 2018-01-01 House Ads personalization TRUE
## 3 a100000031 2018-01-01 House Ads personalization TRUE
## 4 a100000032 2018-01-01 House Ads personalization TRUE
## 5 a100000033 2018-01-01 House Ads personalization TRUE
## language_displayed language_preferred age_group date_subscribed
## 1 English English 0-18 years 2018-01-01
## 2 English English 19-24 years 2018-01-01
## 3 English English 24-30 years 2018-01-01
## 4 English English 30-36 years 2018-01-01
## 5 English English 36-45 years 2018-01-01
## date_canceled subscribing_channel is_retained
## 1 <NA> House Ads TRUE
## 2 <NA> House Ads TRUE
## 3 <NA> House Ads TRUE
## 4 <NA> House Ads TRUE
## 5 <NA> House Ads TRUE
## 'data.frame': 10037 obs. of 12 variables:
## $ user_id : chr "a100000029" "a100000030" "a100000031" "a100000032" ...
## $ date_served : Date, format: "2018-01-01" "2018-01-01" ...
## $ marketing_channel : chr "House Ads" "House Ads" "House Ads" "House Ads" ...
## $ variant : chr "personalization" "personalization" "personalization" "personalization" ...
## $ converted : logi TRUE TRUE TRUE TRUE TRUE TRUE ...
## $ language_displayed : chr "English" "English" "English" "English" ...
## $ language_preferred : chr "English" "English" "English" "English" ...
## $ age_group : chr "0-18 years" "19-24 years" "24-30 years" "30-36 years" ...
## $ date_subscribed : Date, format: "2018-01-01" "2018-01-01" ...
## $ date_canceled : Date, format: NA NA ...
## $ subscribing_channel: chr "House Ads" "House Ads" "House Ads" "House Ads" ...
## $ is_retained : logi TRUE TRUE TRUE TRUE TRUE TRUE ...
factor = c("marketing_channel",
"subscribing_channel",
"variant",
"language_preferred",
"language_displayed")
df <- df %>%
mutate(marketing_channel = ifelse(marketing_channel == '', 'Organic', marketing_channel)) %>%
mutate(subscribing_channel = ifelse(subscribing_channel == '', 'Organic', subscribing_channel)) %>%
mutate_at(vars(factor), as.factor)
## Warning: Using an external vector in selections was deprecated in tidyselect 1.1.0.
## ℹ Please use `all_of()` or `any_of()` instead.
## # Was:
## data %>% select(factor)
##
## # Now:
## data %>% select(all_of(factor))
##
## See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
# Jan summary
summary(df)
## user_id date_served marketing_channel
## Length:10037 Min. :2018-01-01 Email : 565
## Class :character 1st Qu.:2018-01-08 Facebook :1860
## Mode :character Median :2018-01-15 House Ads:4733
## Mean :2018-01-15 Instagram:1871
## 3rd Qu.:2018-01-22 Organic : 15
## Max. :2018-01-31 Push : 993
## NA's :16
## variant converted language_displayed language_preferred
## control :5091 Mode :logical Arabic : 27 Arabic : 145
## personalization:4946 FALSE:8946 English:9793 English:9275
## TRUE :1076 German : 81 German : 167
## NA's :15 Spanish: 136 Spanish: 450
##
##
##
## age_group date_subscribed date_canceled
## Length:10037 Min. :2018-01-01 Min. :2018-01-05
## Class :character 1st Qu.:2018-01-07 1st Qu.:2018-02-07
## Mode :character Median :2018-01-15 Median :2018-03-04
## Mean :2018-01-14 Mean :2018-03-03
## 3rd Qu.:2018-01-19 3rd Qu.:2018-04-01
## Max. :2018-01-31 Max. :2018-05-09
## NA's :8181 NA's :9460
## subscribing_channel is_retained
## Email : 290 Mode :logical
## Facebook : 445 FALSE:577
## House Ads: 354 TRUE :1279
## Instagram: 600 NA's :8181
## Organic :8181
## Push : 167
##
#Percentage of NA values in each row
round((colSums(is.na(df))/ nrow(df)),2)
## user_id date_served marketing_channel variant
## 0.00 0.00 0.00 0.00
## converted language_displayed language_preferred age_group
## 0.00 0.00 0.00 0.00
## date_subscribed date_canceled subscribing_channel is_retained
## 0.82 0.94 0.00 0.82
# Conversion Rate for January
converion_rate = round((1076/nrow(df)),2)
print(converion_rate)
## [1] 0.11
# Retention Rate for January
retention_rate = round((1279/nrow(df)),2)
print(retention_rate)
## [1] 0.13
# NA values dropped
jsr <- df %>% select(c(user_id,date_subscribed)) %>%
group_by(date_subscribed) %>%
summarise(user_id_count = n_distinct(user_id)) %>%
drop_na()
jsr_plot <- ggplot(jsr, aes(x=date_subscribed, y=user_id_count))
(jsr_plot + geom_line() +
ylab("Count") + xlab("Date of Subscription") + theme_bw() +
ggtitle("January Subcription Rate"))

age_sub <- df %>%
group_by(age_group, subscribing_channel) %>%
summarise(user_count = n()) %>%
drop_na()
## `summarise()` has grouped output by 'age_group'. You can override using the
## `.groups` argument.
age_mark <- df %>%
group_by(age_group, marketing_channel) %>%
summarise(user_count = n()) %>%
drop_na()
## `summarise()` has grouped output by 'age_group'. You can override using the
## `.groups` argument.
age_sub_plot <- ggplot(age_sub, aes(x=subscribing_channel, y= user_count, color = age_group))
p1 <- (age_sub_plot + geom_jitter(aes(color= age_group),position = position_dodge(width = .75))
+ xlab('Subscribing Channel') + ylab('Count') +
ggtitle('Customer Capture January') + theme(legend.position = "none") )
age_mark_plot <- ggplot(age_mark, aes(x=marketing_channel, y= user_count, color = age_group))
p2 <- (age_mark_plot + geom_jitter(aes(color= age_group),position = position_dodge(width = .75))
+ xlab('Marketing Channel') + ylab(''))
cowplot::plot_grid(p1, p2, label_size = 7)
