getwd()
tickets <- read.csv("tickets.csv", header = T)

Analysis of Daily Revenues

#Daily Analysis
DailyTotals<- tickets %>% group_by(Citation.Issue.Date) %>% summarise(Total = sum(Amount.Paid))

head(DailyTotals)
## # A tibble: 6 × 2
##   Citation.Issue.Date Total
##                <fctr> <int>
## 1          2009-01-11   313
## 2          2009-01-19    97
## 3          2009-02-01    39
## 4          2009-02-04   114
## 5          2009-02-10    89
## 6          2009-02-15     3
AverageDay <- mean(DailyTotals$Total)
AverageDay
## [1] 884.5944
MedianDay <- median(DailyTotals$Total)
MedianDay
## [1] 619
Daily <- DailyTotals$Total

var(Daily)
## [1] 799183.9
sd(Daily)
## [1] 893.9708
quantile(Daily, c(.1, .3, .5, .7, .9))
##    10%    30%    50%    70%    90% 
##   68.9  278.0  619.0 1100.0 2120.8
shapiro.test(Daily)
## 
##  Shapiro-Wilk normality test
## 
## data:  Daily
## W = 0.83609, p-value < 2.2e-16
#EDA Graphs for Rushhour
qqnorm(Daily)
qqline(Daily)

ggplot(DailyTotals, aes(x = Total)) + 
  geom_histogram(aes(y=..density..), colour="black", fill="pink") + 
  geom_density(alpha=.5, fill="red") + 
  geom_vline(xintercept = AverageDay, linetype = "longdash", colour = "blue") + geom_vline(xintercept = MedianDay, linetype = "twodash" , colour = "green")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Analysis of Weekly Revenues

#Create the week variable
tickets$Citation.Issue.Date <- as.Date(tickets$Citation.Issue.Date)
dateRange <- c(tickets$Citation.Issue.Date) 
x <- as.POSIXlt(dateRange) 
tickets$Citation.Issue.Week <- strftime(x,format="%W")

#Weekly Total Revenue Mean compared to each Week's Total Revenue
#Weekly Analysis
WeeklyTotals<- tickets %>% group_by(Citation.Issue.Week, Citation.Issue.Month, Citation.Issue.Year) %>% summarise(Total = sum(Amount.Paid))

head(WeeklyTotals)
## Source: local data frame [6 x 4]
## Groups: Citation.Issue.Week, Citation.Issue.Month [2]
## 
##   Citation.Issue.Week Citation.Issue.Month Citation.Issue.Year Total
##                 <chr>               <fctr>               <int> <int>
## 1                  00                  Jan                2010  1482
## 2                  00                  Jan                2011   247
## 3                  00                  Jan                2012    92
## 4                  00                  Jan                2013  3466
## 5                  00                  Jan                2014   789
## 6                  01                  Jan                2009   313
MeanWeek <- mean(WeeklyTotals$Total)
MeanWeek
## [1] 4059.011
MedianWeek <- median(WeeklyTotals$Total)
MedianWeek
## [1] 3512
Weekly <- WeeklyTotals$Total

var(Weekly)
## [1] 9399593
sd(Weekly)
## [1] 3065.876
quantile(Weekly, c(.1, .3, .5, .7, .9))
##    10%    30%    50%    70%    90% 
##  395.4 1888.4 3512.0 5428.0 8323.4
shapiro.test(Weekly)
## 
##  Shapiro-Wilk normality test
## 
## data:  Weekly
## W = 0.93628, p-value = 1.966e-11
qqnorm(Weekly)
qqline(Weekly)

ggplot(WeeklyTotals, aes(x = Total)) + geom_histogram(aes(y = ..density..), fill = "pink", colour = "black", binwidth = 500) + geom_density(alpha=.5, fill="red") + geom_vline(xintercept = MeanWeek, linetype = "longdash", colour = "blue") + geom_vline(xintercept = MedianWeek, linetype = "twodash" , colour = "green")

sd.Week <- sd(WeeklyTotals$Total)
sd.Week
## [1] 3065.876

Analysis of Monthly Revenues

#Monthly Analysis
MonthlyTotals<- tickets %>% group_by(Citation.Issue.Month, Citation.Issue.Year) %>% summarise(Total = sum(Amount.Paid))

head(MonthlyTotals)
## Source: local data frame [6 x 3]
## Groups: Citation.Issue.Month [1]
## 
##   Citation.Issue.Month Citation.Issue.Year Total
##                 <fctr>               <int> <int>
## 1                  Apr                2009 10409
## 2                  Apr                2010   812
## 3                  Apr                2011 23206
## 4                  Apr                2012 26792
## 5                  Apr                2013 31983
## 6                  Apr                2014 31477
#EDA of Statistics of Monthly Revenue
AverageMonth <- mean(MonthlyTotals$Total)
AverageMonth
## [1] 20689.68
MedianMonth <- median(MonthlyTotals$Total)
MedianMonth
## [1] 20284.5
Monthly <- MonthlyTotals$Total

var(Monthly)
## [1] 117427178
sd(Monthly)
## [1] 10836.38
quantile(Monthly, c(.1, .3, .5, .7, .9))
##     10%     30%     50%     70%     90% 
##  9058.8 13586.1 20284.5 26730.8 32311.2
shapiro.test(Monthly)
## 
##  Shapiro-Wilk normality test
## 
## data:  Monthly
## W = 0.97677, p-value = 0.2037
#EDA Graphs for Rushhour
qqnorm(Monthly)
qqline(Monthly)

ggplot(MonthlyTotals, aes(x = Total)) + geom_histogram(aes(y = ..density..), fill = "pink", colour = "black", binwidth = 2000) + geom_density(alpha=.5, fill= "red") + geom_vline(xintercept = AverageMonth, linetype = "longdash", colour = "blue") + geom_vline(xintercept = MedianMonth, linetype = "twodash" , colour = "green")