library(readxl)
table1<-read_excel("Data_2019.xlsx", sheet = 1)
head(table1)
str(table1)
## tibble [1,000 Ă— 6] (S3: tbl_df/tbl/data.frame)
## $ CustomerID : num [1:1000] 1 2 3 4 5 6 7 8 9 10 ...
## $ FreeAccountCreatedDate: POSIXct[1:1000], format: "2017-05-29" "2017-12-17" ...
## $ LastSeen : POSIXct[1:1000], format: "2019-04-05 08:02:32" "2019-04-27 13:23:56" ...
## $ SessionCount : num [1:1000] 1379 531 317 349 1158 ...
## $ Segment : chr [1:1000] "Marketing Segment 2" "Marketing Segment 1" "Marketing Segment 1" "Marketing Segment 1" ...
## $ BillingAccount : chr [1:1000] "AX#93195" "AX#20438" "AX#60779" "AX#91209" ...
table2<-read_excel("Data_2019.xlsx", sheet = 2)
head(table2,5)
str(table2)
## tibble [1,000 Ă— 2] (S3: tbl_df/tbl/data.frame)
## $ CustomerID: num [1:1000] 1 2 3 4 5 6 7 8 9 10 ...
## $ FirstName : chr [1:1000] "Joshua" "Ka" "Clint" "Hong" ...
table3<-read_excel("Data_2019.xlsx", sheet = 3)
head(table3,5)
str(table3)
## tibble [1,000 Ă— 5] (S3: tbl_df/tbl/data.frame)
## $ BillingAccount : chr [1:1000] "AX#10042" "AX#10114" "AX#10259" "AX#10695" ...
## $ AccountStatus : chr [1:1000] "paying" "paying" "paying" "paying" ...
## $ UpgradedAccountCreatedDate : POSIXct[1:1000], format: "2019-03-20" "2019-01-17" ...
## $ UpgradedAccountCancelledDate: POSIXct[1:1000], format: NA NA ...
## $ TotalCharges : num [1:1000] 333 451 1887 798 189 ...
table4<-read_excel("Data_2019.xlsx", sheet = 4)
head(table4,5)
str(table4)
## tibble [3 Ă— 2] (S3: tbl_df/tbl/data.frame)
## $ Release Date : POSIXct[1:3], format: "2017-06-28" "2018-11-13" ...
## $ Release Description: chr [1:3] "V2 released" "Product pricing changed" "V3 released"
#Which of the existing marketing segments is the most effective?
library(sqldf)
## Loading required package: gsubfn
## Loading required package: proto
## Loading required package: RSQLite
table1$last_seen=as.Date(table1$LastSeen)
table3$UpgradedAccount_CreatedDate <- as.Date(table3$UpgradedAccountCreatedDate)
s<- sqldf("select a.CustomerID,
b.FirstName,
a.FreeAccountCreatedDate,
c.UpgradedAccountCreatedDate,
a.last_seen,
a.last_seen - c.UpgradedAccount_CreatedDate as days,
a.SessionCount,
a.Segment,
a.BillingAccount,
c.TotalCharges
from table1 a
left join table2 b on a.CustomerID = b.CustomerID
left join table3 c on a.BillingAccount = c.BillingAccount
")
head(s,10)
library(ggplot2)
#Cleanup code for the Graphs and Plots
cleanup = theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_blank(),
axis.line.x = element_line(color = 'black'),
axis.line.y = element_line(color = 'black'),
legend.key = element_rect(fill = 'white'),
text = element_text(size = 15))
x_mid <- mean(c(max(s$TotalCharges, na.rm = TRUE),
min(s$TotalCharges, na.rm = TRUE)))
y_mid <- mean(c(max(s$SessionCount, na.rm = TRUE),
min(s$SessionCount, na.rm = TRUE)))
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
s %>%
group_by(Segment)%>%
summarise(TotalCharges=sum(TotalCharges), totalsession=sum(SessionCount))%>%
ggplot(aes(TotalCharges, totalsession)) + geom_point(aes(colour = Segment, size=3)) +
geom_vline(xintercept = x_mid)+ geom_hline(yintercept = y_mid)+
guides(alpha=F,size=F) +
labs(title = "Session vs TotalCharges by Segment ") + labs(x = "Total Charges", y = "Total Sessions")+ cleanup
## `summarise()` ungrouping output (override with `.groups` argument)
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
v<-s %>%
group_by(Segment)%>%
summarise(TotalCharges=sum(TotalCharges), totalsession=sum(SessionCount))%>%
ggplot(aes(TotalCharges, totalsession)) + geom_point(aes(colour = Segment, size=3)) +
geom_vline(xintercept = x_mid)+ geom_hline(yintercept = y_mid)+
geom_text(aes(label =round(TotalCharges/totalsession,2)),vjust = -0.5)+
guides(alpha=F,size=F) +
labs(title = "Session vs TotalCharges by Segment ") + labs(x = "Total Charges", y = "Total Sessions")+ cleanup
## `summarise()` ungrouping output (override with `.groups` argument)
ggplotly(v)
#From the analysis Market Segment 3 is the most effective. This segment is bringing in more money(revenue) and even though the usage is less, the per session charge (4.14) is also higher than the other two marketing segments.
#The team is considering running a test campaign on Facebook ads. How should they decide if it’s a success?
#The team should do A/B testing of the two different versions of ads in the Facebook campaign to see which one performed better. The A?b testing sample should be as such that it gives statistically significant results. In R usually its done by using Power Analysis (Power Mediation Library).
#Each of the campaign URLs should be utm tagged so that further analysis could be done on how many sessions each campaign is generating and how many conversions we are getting from each.
#Other Recommendations
#If given more time I would like to understand the LTV of each of the market segments to know the possible revenue each segment will generate for the business.