getwd()
tickets <- read.csv("tickets.csv", header = T)
Analysis of Daily Revenues
#Daily Analysis
DailyTotals<- tickets %>% group_by(Citation.Issue.Date) %>% summarise(Total = sum(Amount.Paid))
head(DailyTotals)
## # A tibble: 6 × 2
## Citation.Issue.Date Total
## <fctr> <int>
## 1 2009-01-11 313
## 2 2009-01-19 97
## 3 2009-02-01 39
## 4 2009-02-04 114
## 5 2009-02-10 89
## 6 2009-02-15 3
AverageDay <- mean(DailyTotals$Total)
AverageDay
## [1] 884.5944
MedianDay <- median(DailyTotals$Total)
MedianDay
## [1] 619
Daily <- DailyTotals$Total
var(Daily)
## [1] 799183.9
sd(Daily)
## [1] 893.9708
quantile(Daily, c(.1, .3, .5, .7, .9))
## 10% 30% 50% 70% 90%
## 68.9 278.0 619.0 1100.0 2120.8
shapiro.test(Daily)
##
## Shapiro-Wilk normality test
##
## data: Daily
## W = 0.83609, p-value < 2.2e-16
#EDA Graphs for Rushhour
qqnorm(Daily)
qqline(Daily)

ggplot(DailyTotals, aes(x = Total)) +
geom_histogram(aes(y=..density..), colour="black", fill="pink") +
geom_density(alpha=.5, fill="red") +
geom_vline(xintercept = AverageDay, linetype = "longdash", colour = "blue") + geom_vline(xintercept = MedianDay, linetype = "twodash" , colour = "green")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Analysis of Weekly Revenues
#Create the week variable
tickets$Citation.Issue.Date <- as.Date(tickets$Citation.Issue.Date)
dateRange <- c(tickets$Citation.Issue.Date)
x <- as.POSIXlt(dateRange)
tickets$Citation.Issue.Week <- strftime(x,format="%W")
#Weekly Total Revenue Mean compared to each Week's Total Revenue
#Weekly Analysis
WeeklyTotals<- tickets %>% group_by(Citation.Issue.Week, Citation.Issue.Month, Citation.Issue.Year) %>% summarise(Total = sum(Amount.Paid))
head(WeeklyTotals)
## Source: local data frame [6 x 4]
## Groups: Citation.Issue.Week, Citation.Issue.Month [2]
##
## Citation.Issue.Week Citation.Issue.Month Citation.Issue.Year Total
## <chr> <fctr> <int> <int>
## 1 00 Jan 2010 1482
## 2 00 Jan 2011 247
## 3 00 Jan 2012 92
## 4 00 Jan 2013 3466
## 5 00 Jan 2014 789
## 6 01 Jan 2009 313
MeanWeek <- mean(WeeklyTotals$Total)
MeanWeek
## [1] 4059.011
MedianWeek <- median(WeeklyTotals$Total)
MedianWeek
## [1] 3512
Weekly <- WeeklyTotals$Total
var(Weekly)
## [1] 9399593
sd(Weekly)
## [1] 3065.876
quantile(Weekly, c(.1, .3, .5, .7, .9))
## 10% 30% 50% 70% 90%
## 395.4 1888.4 3512.0 5428.0 8323.4
shapiro.test(Weekly)
##
## Shapiro-Wilk normality test
##
## data: Weekly
## W = 0.93628, p-value = 1.966e-11
qqnorm(Weekly)
qqline(Weekly)

ggplot(WeeklyTotals, aes(x = Total)) + geom_histogram(aes(y = ..density..), fill = "pink", colour = "black", binwidth = 500) + geom_density(alpha=.5, fill="red") + geom_vline(xintercept = MeanWeek, linetype = "longdash", colour = "blue") + geom_vline(xintercept = MedianWeek, linetype = "twodash" , colour = "green")

sd.Week <- sd(WeeklyTotals$Total)
sd.Week
## [1] 3065.876
Analysis of Monthly Revenues
#Monthly Analysis
MonthlyTotals<- tickets %>% group_by(Citation.Issue.Month, Citation.Issue.Year) %>% summarise(Total = sum(Amount.Paid))
head(MonthlyTotals)
## Source: local data frame [6 x 3]
## Groups: Citation.Issue.Month [1]
##
## Citation.Issue.Month Citation.Issue.Year Total
## <fctr> <int> <int>
## 1 Apr 2009 10409
## 2 Apr 2010 812
## 3 Apr 2011 23206
## 4 Apr 2012 26792
## 5 Apr 2013 31983
## 6 Apr 2014 31477
#EDA of Statistics of Monthly Revenue
AverageMonth <- mean(MonthlyTotals$Total)
AverageMonth
## [1] 20689.68
MedianMonth <- median(MonthlyTotals$Total)
MedianMonth
## [1] 20284.5
Monthly <- MonthlyTotals$Total
var(Monthly)
## [1] 117427178
sd(Monthly)
## [1] 10836.38
quantile(Monthly, c(.1, .3, .5, .7, .9))
## 10% 30% 50% 70% 90%
## 9058.8 13586.1 20284.5 26730.8 32311.2
shapiro.test(Monthly)
##
## Shapiro-Wilk normality test
##
## data: Monthly
## W = 0.97677, p-value = 0.2037
#EDA Graphs for Rushhour
qqnorm(Monthly)
qqline(Monthly)

ggplot(MonthlyTotals, aes(x = Total)) + geom_histogram(aes(y = ..density..), fill = "pink", colour = "black", binwidth = 2000) + geom_density(alpha=.5, fill= "red") + geom_vline(xintercept = AverageMonth, linetype = "longdash", colour = "blue") + geom_vline(xintercept = MedianMonth, linetype = "twodash" , colour = "green")
