Loading packages. Importing and cleaning data.
#loading packages
#########################################################################
library(tidyverse)
library(viridis)
library(extrafont)
library(lubridate)
library(dygraphs)
library(xts)
#########################################################################
#importing data
#########################################################################
#Historic Yipit Estimates in tabular form
YipitHistoricData <- read.csv("C:\\Users\\NathanielDeacon\\Desktop\\HistoricDataTidy.csv")
#Q4 2013 Raw Data
Q4Data <- read.csv("C:\\Users\\NathanielDeacon\\Desktop\\Q4Data.csv")
#Groupon Historic Earnings data, from the Morgan Stanley Report
GrouponHistData <- read.csv("C:\\Users\\NathanielDeacon\\Desktop\\MS_GrouponHistoricData.csv", skip = 2)
#########################################################################
#cleaning data
#########################################################################
#column names
colnames(Q4Data)[1] <- "Deal.ID"
colnames(YipitHistoricData)[1] <- "month.beginning"
#convert to date types
YipitHistoricData$month.beginning <- as.Date(YipitHistoricData$month.beginning, "%m/%d/%Y")
Q4Data$Start.Date <- as.Date(Q4Data$Start.Date, "%m/%d/%Y")
#Change Billings column in Q4Data to num, as it is recognized by R as a character currently
Q4Data$Billings <- gsub(",", "", Q4Data$Billings) #remove commas to avoid NAs being introduced by coercion
Q4Data$Billings <- as.numeric(Q4Data$Billings)
#Create column for historic billings in dollars instead of millions of dollars
YipitHistoricData$billings <- YipitHistoricData$billings.millions*1000000
#add quarter + year column to historic data table
YipitHistoricData$qtryr <- quarter(YipitHistoricData$month.beginning, with_year = TRUE, fiscal_start = 1)
#########################################################################
#aggregate the Yipit historic data by quarter
YipitQuarterlyHistData <- YipitHistoricData %>%
group_by(qtryr, segment) %>%
summarize(billings = sum(billings))
## `summarise()` has grouped output by 'qtryr'. You can override using the `.groups` argument.
#combine dfs for groupon reported and Yipit estimated historic data
YipitQuarterlyHistData$billings.Millions.Yipit <- YipitQuarterlyHistData$billings/1000000
YipitVGroupon <- left_join(GrouponHistData, YipitQuarterlyHistData)
## Joining, by = c("qtryr", "segment")
#scatter plot
ggplot(data = YipitVGroupon, aes(x = billings.Millions.Yipit, y = billings.millions, color = segment)) +
geom_point()+
geom_smooth(method = lm) +
labs(x = "Yipit Estimated Billings ($Millions)", y = "Actual Reported Billings ($Millions)", color = "Segment",
title = "Exhibit 1: Yipit Estimated vs. Reported Actual Gross Billings, Q3'12-Q3'13") +
theme(text=element_text(size=12, family="Arial"), panel.grid.major = element_line(color = "#31577d", size = (0.5)))
## `geom_smooth()` using formula 'y ~ x'
#get intercepts and coefficients
lm(formula = billings.millions ~ billings.Millions.Yipit, data = filter(YipitVGroupon, segment == "Goods"))
##
## Call:
## lm(formula = billings.millions ~ billings.Millions.Yipit, data = filter(YipitVGroupon,
## segment == "Goods"))
##
## Coefficients:
## (Intercept) billings.Millions.Yipit
## 65.9998 0.7193
lm(formula = billings.millions ~ billings.Millions.Yipit, data = filter(YipitVGroupon, segment == "Local"))
##
## Call:
## lm(formula = billings.millions ~ billings.Millions.Yipit, data = filter(YipitVGroupon,
## segment == "Local"))
##
## Coefficients:
## (Intercept) billings.Millions.Yipit
## -126.67 1.25
lm(formula = billings.millions ~ billings.Millions.Yipit, data = filter(YipitVGroupon, segment == "Travel"))
##
## Call:
## lm(formula = billings.millions ~ billings.Millions.Yipit, data = filter(YipitVGroupon,
## segment == "Travel"))
##
## Coefficients:
## (Intercept) billings.Millions.Yipit
## -6.047 1.131
#filter to local data only
LocalData <- filter(Q4Data, Segment == "Local")
#group billings by deal start date,
LocalData <- LocalData %>%
group_by(Start.Date) %>%
summarise(sum(Billings))
colnames(LocalData) [2] <- "Billings"
#create time series
localts <- xts(x = LocalData$Billings, order.by = LocalData$Start.Date)
#plot
billingschart <- dygraph(localts, main = "Exhibit 2 Zoomed: Gross Billings by Deal Start Date, Local Segment",
xlab= "Deal Start Date", ylab = "Gross Billings ($millions)") %>%
dyOptions(labelsUTC = TRUE, fillGraph=TRUE, fillAlpha=0.1, drawGrid = TRUE, colors="#31577d")%>%
dyRangeSelector() %>%
dyCrosshair(direction = "vertical")
billingschart
#10 weekdays before and after outage
Weekdays <- c(as.Date(as.Date("2013-10-07"):as.Date("2013-10-11")), as.Date(as.Date("2013-10-14"):as.Date("2013-10-18")),
as.Date(as.Date("2013-10-31"):as.Date("2013-11-01")), as.Date(as.Date("2013-11-04"):as.Date("2013-11-08")),
as.Date(as.Date("2013-11-11"):as.Date("2013-11-13")))
#4 weekend days before and after outage
Weekenddays <- c(as.Date("2013-10-19"), as.Date("2013-10-13"), as.Date("2013-10-12"),
as.Date("2013-10-06"), as.Date("2013-11-02"), as.Date("2013-11-03"),
as.Date("2013-11-09"), as.Date("2013-11-10"))
#filter data to seleced days and local segment
WeekdaysAvg <- Q4Data %>%
filter(Start.Date %in% Weekdays, Segment == "Local")
WeekenddaysAvg <- Q4Data %>%
filter(Start.Date %in% Weekenddays, Segment == "Local")
#take average of weekend and weekdays and multiply by the number of week and weekend
#days from 10/20 - 10/30 of 2013
WeekdayBillings <- (sum(WeekdaysAvg$Billings)/20)*8
WeekenddayBillings <- (sum(WeekenddaysAvg$Billings)/4)*3
#add missed billings from weekends and weekdays
EstMissedBillings <- WeekdayBillings+WeekenddayBillings
#Total recorded local billings
TotalRecordedBillings <- sum(Q4Data[Q4Data$Segment == "Local",]$Billings)
#make table
LocalTable <- data.frame(EstMissedBillings, TotalRecordedBillings, EstMissedBillings+TotalRecordedBillings)
colnames(LocalTable) = c("Estimnated Missed Billings", "Recored Billings", "Estimated Total Billings")
LocalTable
## Estimnated Missed Billings Recored Billings Estimated Total Billings
## 1 41810134 409222658 451032792
BySeg <- data.frame(EstMissedBillings+TotalRecordedBillings, sum(Q4Data[Q4Data$Segment == "Goods",]$Billings),
sum(Q4Data[Q4Data$Segment == "Travel",]$Billings))
colnames(BySeg) = c("Local Billings", "Goods Billings", "Travel Billings")
BySeg
## Local Billings Goods Billings Travel Billings
## 1 451032792 282245671 70552062