getwd()
tickets <- data.frame(read.csv("tickets.csv", header = T, sep = ","))

Violations

What TOLE violations are most common?

## 
##   BUS ZONE   DBL PARK PK PHB OTD PRK PROHIB TWAWY ZN#1 TWAWY ZONE 
##       2338       2876       1479       2523       5293       2295

What TOLE violations produce the most revenue?

What TOLE Violations produce the most revenue on weekends and weekdays, on average?

##     Violation Weekday Total.Revenue Daily.Average.Revenue
## 1    BUS ZONE Weekday        114256               57128.0
## 2    DBL PARK Weekday        131245               65622.5
## 3  PK PHB OTD Weekday        112951               56475.5
## 4  PRK PROHIB Weekday        219356              109678.0
## 5  TWAWY ZN#1 Weekday        401554              200777.0
## 6  TWAWY ZONE Weekday        167374               83687.0
## 7    BUS ZONE Weekend         52657               26328.5
## 8    DBL PARK Weekend         45969               22984.5
## 9  PK PHB OTD Weekend         35518               17759.0
## 10 PRK PROHIB Weekend         60396               30198.0
## 11 TWAWY ZN#1 Weekend         99012               49506.0
## 12 TWAWY ZONE Weekend         49369               24684.5

## Source: local data frame [12 x 3]
## Groups: Violation [?]
## 
##     Violation Citation.Issue.Weekday Amount_Paid
##        <fctr>                 <fctr>       <int>
## 1    BUS ZONE                Weekday      114256
## 2    BUS ZONE                Weekend       52657
## 3    DBL PARK                Weekday      131245
## 4    DBL PARK                Weekend       45969
## 5  PK PHB OTD                Weekday      112951
## 6  PK PHB OTD                Weekend       35518
## 7  PRK PROHIB                Weekday      219356
## 8  PRK PROHIB                Weekend       60396
## 9  TWAWY ZN#1                Weekday      401554
## 10 TWAWY ZN#1                Weekend       99012
## 11 TWAWY ZONE                Weekday      167374
## 12 TWAWY ZONE                Weekend       49369

For the top 3 violations, what hours are most ticketed?

## 
##    4    6    7    8    9   10   11   12   13   14   15   16   17   18   19 
##    4    2  182 1016  394  493  530  516  383  335  730 3375 2589   82   37 
##   20   21   22 
##   22    1    1

For the top 3 violations, what is the usual amount paid?

Do TOLE violations vary by time?

Do TOLE violations vary by hour of the day?

# Dataframe for number of violations by Hour
Tickets.per.Hour <- tickets %>% group_by(Citation.Issue.Hour) %>% summarise(count = n())
Tickets.per.Hour
## # A tibble: 20 × 2
##    Citation.Issue.Hour count
##                  <int> <int>
## 1                    2     1
## 2                    4     6
## 3                    5     2
## 4                    6     4
## 5                    7   214
## 6                    8  1142
## 7                    9   619
## 8                   10   754
## 9                   11   812
## 10                  12   837
## 11                  13   673
## 12                  14   575
## 13                  15   966
## 14                  16  6190
## 15                  17  3669
## 16                  18   183
## 17                  19    99
## 18                  20    52
## 19                  21     4
## 20                  22     2
## Plot violations by hour
ggplot(tickets, aes(x = Citation.Issue.Hour, fill = Violation)) + geom_bar(position = "fill") + 
    scale_x_continuous(breaks = seq(0, 24, 1)) + xlab("Hour of Citation") + 
    theme(plot.title = element_text(hjust = 0.5)) + ggtitle("Stacked Bar Chart of Violations by Citation Issue Hour ")

# NOTE: The graph shows almost entirely Double Parking and Bus Zone
# Violations during non-rush-hour and almost entirely Towaway and Prohibited
# Parking during rush hour
# Distribution of Violation by type and hour of day
ggplot(tickets) + geom_histogram(mapping = (aes(x = Citation.Issue.Hour)), fill = "salmon", 
    color = "black", bins = 24) + facet_wrap(~Violation) + xlab("Citation Issue Hour") + 
    theme(plot.title = element_text(hjust = 0.5)) + ggtitle("Number of Tickets by Violation and Hour of Citation")

Do TOLE violations vary by year?

#Violations broken down by year
ggplot(data = tickets) + geom_bar(mapping = aes(x = tickets$Violation, fill = as.factor(tickets$Citation.Issue.Year)), position = "dodge") + coord_flip() + xlab("Violation") + theme(plot.title = element_text(hjust = 0.5)) + ggtitle("Number of Tickets by Violation and Year of Citation") + labs(fill = "Citation Issue Year") 

How does Fine Amount vary?

<<<<<<< HEAD

# Violations
ggplot(data = tickets) + geom_bar(mapping = aes(x = tickets$Violation)) + coord_flip()

# Violations broken down by year
ggplot(data = tickets) + geom_bar(mapping = aes(x = tickets$Violation, fill = as.factor(tickets$Citation.Issue.Year)), 
    position = "dodge") + coord_flip() + labs(title = "Violations by Year", 
    x = "Violation", y = "Total Citations", fill = "Year")

# Fine amount
table(tickets$Fine.Amount)
## 
##   70   73   75   78   80   83   85   88   93   95   98  100  110  250  253 
##   68  124 1280  396  882 1547 4421  573 1254 1576  947  148 1250  194  362 
##  255  267  271  279 
##  785  451  332  214
fine.plot <- ggplot(data = tickets) + geom_bar(mapping = aes(x = tickets$Fine.Amount)) + 
    labs(x = "Fine Amount", y = "Total Citations")
fine.plot

## By Violation
fine.plot + facet_wrap(~tickets$Violation)

## It is clear that tickets have a low fine amount and high fine amount. Is
## it time of day, weekday vs weekend, year, etc.?
ggplot(data = tickets) + geom_bar(mapping = aes(x = tickets$Fine.Amount, fill = tickets$Citation.Issue.Weekday), 
    position = "dodge") + facet_wrap(~tickets$Violation) + labs(x = "Fine Amount", 
    y = "Total Citations", fill = "Weekday or Weekend")

# ggplot(data = tickets) + geom_bar(mapping = aes(x = tickets$Fine.Amount,
# fill = as.factor(tickets$Citation.Issue.Year)), position = 'fill') +
# facet_wrap(~tickets$Violation)
table(tickets$Fine.Amount)
## 
##   70   73   75   78   80   83   85   88   93   95   98  100  110  250  253 
##   68  124 1280  396  882 1547 4421  573 1254 1576  947  148 1250  194  362 
##  255  267  271  279 
##  785  451  332  214
fine.plot <- ggplot(data = tickets) + geom_histogram(mapping = aes(x = tickets$Fine.Amount), 
    fill = "salmon")
fine.plot + xlab("Fine Amount") + theme(plot.title = element_text(hjust = 0.5)) + 
    ggtitle("Histogram of Fine Amount")

## By Violation
fine.plot + facet_wrap(~tickets$Violation) + xlab("Fine Amount") + theme(plot.title = element_text(hjust = 0.5)) + 
    ggtitle("Histogram of Fine Amount by Violation")

## It is clear that tickets have a low fine amount and high fine amount. Is
## it time of day, weekday vs weekend, year, etc.? Let's make a factor
## variable to check this

# Remove from workspace
rm(fine.plot)

There are 2 levels of Fine Amount: Low and High

# Initialize new factor variable Fine.Level
tickets$Fine.Level <- tickets$Fine.Amount
tickets$Fine.Level <- ifelse(tickets$Fine.Amount < 175, "Low", "High")
tickets$Fine.Level <- as.factor(tickets$Fine.Level)
tickets$Fine.Level <- relevel(tickets$Fine.Level, "Low")
## Generate table
table(tickets$Fine.Level)
## 
##   Low  High 
## 14466  2338
fine.plot2 <- ggplot(data = tickets) + geom_bar(mapping = aes(x = tickets$Fine.Level), 
    fill = "salmon")
fine.plot2 + xlab("Fine Level") + theme(plot.title = element_text(hjust = 0.5)) + 
    ggtitle("Bar Chart of Fine Level")

## Fine level by Violation
fine.plot2 + facet_wrap(~tickets$Violation) + xlab("Fine Level") + theme(plot.title = element_text(hjust = 0.5)) + 
    ggtitle("Bar Chart of Fine Level by Violation")

# Remove from workspace
rm(fine.plot2)

Fine Level by Hour of Citation

# Plot Fine level by Hour
fine.plot3 <- ggplot(data = tickets) + geom_bar(mapping = aes(x = tickets$Citation.Issue.Hour), 
    fill = "salmon")
fine.plot3 + facet_wrap(~tickets$Fine.Level) + xlab("Fine Level") + theme(plot.title = element_text(hjust = 0.5)) + 
    ggtitle("Bar Chart of Fine Level by Hour of Citation")

## ******High Fine.Level seems to spike at 4 and 5 pm. Is that during Rush
## Hour? but there is a similar spike in Low Fine.Level too*******

# Remove from workspace
rm(fine.plot3)

Fine Level by weekday and weekend

# Plot Fine level by weekday vs weekend
ggplot(data = tickets) + geom_bar(mapping = aes(x = tickets$Citation.Issue.Weekday), 
    fill = "salmon") + facet_wrap(~tickets$Fine.Level) + xlab("Fine Level") + 
    theme(plot.title = element_text(hjust = 0.5)) + ggtitle("Bar Chart of Fine Level by Weekday and Weekend")

## Does not seem to be a correlation

Fine Level by day of week

# Plot Fine level by day of week
ggplot(data = tickets) + geom_bar(mapping = aes(x = tickets$Citation.Issue.DayOfWeek), 
    fill = "salmon") + facet_wrap(~tickets$Fine.Level) + xlab("Fine Level") + 
    theme(plot.title = element_text(hjust = 0.5)) + ggtitle("Bar Chart of Fine Level by Day of the Week")

Fine Level by Month

# Plot fine level by month
ggplot(data = tickets) + geom_bar(mapping = aes(x = tickets$Citation.Issue.Month), 
    fill = "salmon") + facet_wrap(~tickets$Fine.Level) + xlab("Fine Level") + 
    theme(plot.title = element_text(hjust = 0.5)) + ggtitle("Bar Chart of Fine Level by Month")

Fine level by Year

# Plot fine level by year
ggplot(data = tickets) + geom_bar(mapping = aes(x = as.factor(tickets$Citation.Issue.Year), 
    fill = "salmon")) + facet_wrap(~tickets$Fine.Level) + xlab("Fine Level") + 
    theme(plot.title = element_text(hjust = 0.5)) + ggtitle("Bar Chart of Fine Level by Year")

It looks like they introduced “High” level fines in 2014 as it is the only year in the dataset that a “High” fine level occurs.

Citation Status

# Citation Status
table(tickets$Citation.Status)
## 
## Closed   Open 
##  15110   1694
ggplot(data = tickets) + geom_bar(mapping = aes(x = tickets$Citation.Status)) + 
    labs(x = "Citation Status", y = "Total Citations")

ggplot(data = tickets) + geom_bar(mapping = aes(x = tickets$Citation.Status), 
    fill = "salmon") + xlab("Citation Status") + theme(plot.title = element_text(hjust = 0.5)) + 
    ggtitle("Bar Chart of Citation Status")

## Most are closed. Which ones are still open though?

Citation status by year

ggplot(data = tickets) + geom_bar(mapping = aes(x = tickets$Citation.Status), 
    fill = "salmon") + xlab("Citation Status") + theme(plot.title = element_text(hjust = 0.5)) + 
    ggtitle("Bar Chart of Citation Status by Year") + facet_wrap(~as.factor(tickets$Citation.Issue.Year))

## Not make sense to look at fine level or others like that

Citation Status by Amount Due

ggplot(data = tickets) + geom_point(mapping = (aes(x = tickets$Citation.Status, 
    y = tickets$Amount.Due)), alpha = 1/50) + xlab("Citation Status") + theme(plot.title = element_text(hjust = 0.5)) + 
    ggtitle("Dot plot of Citation Status by Amount Due")

ggplot(data = tickets) + geom_boxplot(aes(x = tickets$Citation.Status, y = tickets$Amount.Due)) + 
    xlab("Citation Status") + theme(plot.title = element_text(hjust = 0.5)) + 
    ggtitle("Boxplot of Citation Status by Amount Due")

Checking Fine amount vs Amount Due by Citation Status

ggplot(data = tickets) + geom_point(mapping = (aes(x = tickets$Fine.Amount, 
    y = tickets$Amount.Due, color = tickets$Citation.Status))) + xlab("Fine Amount") + 
    theme(plot.title = element_text(hjust = 0.5)) + ggtitle("Dot plot of Fine Amount vs Amount Due by Citation Status") + 
    ylab("Amount Due")

# Amount paid
table(tickets$Amount.Paid)
## 
##    1    2    3    4    5    7    8    9   10   11   12   13   14   15   17 
##  643  665 1124  846  739  175   23   31   23 1159   78   73    1   42    1 
##   18   19   20   21   22   23   24   25   26   27   28   29   30   31   32 
##  102  107   72   89    1    1   13   11  129   42   63   16   55    1   26 
##   33   34   35   36   37   39   40   41   42   43   44   45   46   47   48 
##  272   96    4   72    1    3   22  100    3   10  107    6    1    6   18 
##   49   50   51   52   53   54   55   56   57   58   59   60   62   63   64 
##    9    1    5    2    4   44    1   49   29    9    4   17    5   52   16 
##   65   67   68   69   70   71   72   73   74   75   76   77   78   79   80 
##   20   12    1   16    9   23    1   19    5    2   25   16    2    1   14 
##   81   82   83   84   86   88   89   90   92   93   94   96   97   99  100 
##    3    7    2    1    1    1   90  157  392    1  255  193   12   24  116 
##  101  102  103  104  105  106  107  108  110  111  112  113  114  115  116 
##   42    9    4    1   24   20   16    1    1    1   13    5   25   32   64 
##  117  120  121  122  123  124  125  126  127  128  129  131  132  133  134 
##    1    1   44    8   33   15    1    5    4    1    7    2   10    1    1 
##  136  137  138  139  140  141  142  143  144  145  147  150  151  152  153 
##    3    1   10    4    3    1    1    4    1    1    3    1    2    1   39 
##  154  155  156  157  158  159  161  163  165  167  168 
##   71  717  251  571  867 2376  332  794 1011  605    2
ggplot(data = tickets) + geom_bar(mapping = aes(x = tickets$Amount.Paid))

# Why are some amounts paid so low? Are these cases where status is open
# still?
tickets_by_paid_status <- tickets %>% group_by(Amount.Paid, Citation.Status)
summary(tickets_by_paid_status)
##  Ticket.Number       Citation.Issue.Date      Violation     Fine.Amount   
##  Min.   :100000476   2012-09-25:   58    BUS ZONE  :2338   Min.   : 70.0  
##  1st Qu.:794817169   2010-07-19:   53    DBL PARK  :2876   1st Qu.: 83.0  
##  Median :814718876   2012-08-20:   50    PK PHB OTD:1479   Median : 85.0  
##  Mean   :805965065   2012-09-06:   50    PRK PROHIB:2523   Mean   :112.3  
##  3rd Qu.:831587005   2012-09-20:   50    TWAWY ZN#1:5293   3rd Qu.: 98.0  
##  Max.   :977362606   2011-07-28:   48    TWAWY ZONE:2295   Max.   :279.0  
##                      (Other)   :16495                                     
##  Citation.Status  Amount.Paid       Amount.Due     Citation.Issue.Month
##  Closed:15110    Min.   :  1.00   Min.   :  0.00   Jul    :1865        
##  Open  : 1694    1st Qu.:  8.00   1st Qu.:  0.00   Sep    :1862        
##                  Median : 96.00   Median :  0.00   Mar    :1736        
##                  Mean   : 88.65   Mean   : 19.02   Aug    :1652        
##                  3rd Qu.:159.00   3rd Qu.:  0.00   Oct    :1627        
##                  Max.   :168.00   Max.   :392.00   Apr    :1533        
##                                                    (Other):6529        
##   Paid.On.Time       Latitude       Longitude      Citation.Issue.Year
##  Min.   :0.0000   Min.   :37.71   Min.   :-122.5   Min.   :2009       
##  1st Qu.:1.0000   1st Qu.:37.79   1st Qu.:-122.4   1st Qu.:2011       
##  Median :1.0000   Median :37.79   Median :-122.4   Median :2012       
##  Mean   :0.9015   Mean   :37.79   Mean   :-122.4   Mean   :2012       
##  3rd Qu.:1.0000   3rd Qu.:37.79   3rd Qu.:-122.4   3rd Qu.:2013       
##  Max.   :1.0000   Max.   :37.80   Max.   :-122.4   Max.   :2014       
##                                                                       
##  Citation.Issue.DayOfWeek Citation.Issue.Weekday Citation.Issue.Hour
##  Fri  : 546               Weekday:12779          Min.   : 2.00      
##  Mon  :3558               Weekend: 4025          1st Qu.:12.00      
##  Sat  : 809                                      Median :16.00      
##  Sun  :3216                                      Mean   :14.39      
##  Thurs:2978                                      3rd Qu.:16.00      
##  Tues :3077                                      Max.   :22.00      
##  Wed  :2620                                                         
##  Citation.Issue.MonthYear Fine.Level  
##  Sep-2012:  528           Low :14466  
##  Jul-2014:  471           High: 2338  
##  Sep-2014:  428                       
##  Aug-2012:  410                       
##  Mar-2011:  402                       
##  Apr-2013:  365                       
##  (Other) :14200
ggplot(data = tickets_by_paid_status) + geom_bar(mapping = aes(x = tickets$Amount.Paid, 
    fill = tickets$Citation.Status))

# Most of these ones are still open but it still doesn't make sense What
# types of violations are these????
tickets_paid_viol <- tickets %>% group_by(Amount.Paid, Violation)
ggplot(data = tickets_paid_viol) + geom_bar(mapping = aes(x = tickets$Amount.Paid, 
    fill = tickets$Violation))

# Looks like prices of violations vary. By time? Weekend?

# Amount Due
table(tickets$Amount.Due)
## 
##      0      7     25     29     30     38     39     60     68     75 
##  15110      1      2      2      2      1      7      3      7      1 
##     82     83     85     87     88     90     93     95     98    100 
##      1     46     34      1     23      2     17     20     17      2 
##    102    103    105    106    107    108    110    111    113    114 
##      1      4      5      5      1      1     34      1      5      1 
##    115    117    118    120    121    125    127    128    129    130 
##     39     17      1      6      1     40     14      1      1      1 
##    133    135    137    139    140    141    143    144    145    148 
##      1      3      1      5     11      1      1      1      5      1 
##    150    151    152    155    156    158    160    161    162    163 
##      6     12      3      3     53      7      9     33      1     13 
##    164    165    166    168    170 170.95    172    173    175    176 
##      1     11    107      7      2      1      1      4    153      4 
##    177    178    179    180    182    185    187    189    191    195 
##     14     10      2     21      5     81     29     33     38     35 
##    196    199    200    201    202    205    206    209    211    216 
##     20     39      1     51      6     62     58      1     53     14 
##    218    220    221    223    250    267    271    278    279    285 
##      5     12      9      6      1     15     13      6     18      4 
##    295    297    308    309    310    313    315    320    331    335 
##      5      2      4      2      3      1      1      2      1     15 
##    337    338    340    343    345    347    354    355    357    359 
##      4     18      2      5      9     17      1     22     12      1 
##    373    375    381    382    392 
##     25     12     16     11     12
ggplot(data = tickets) + geom_histogram(mapping = aes(x = tickets$Amount.Due, 
    fill = tickets$Violation))

## The vast majority of tickets are fully paid off
head(table(tickets$Amount.Due))
## 
##     0     7    25    29    30    38 
## 15110     1     2     2     2     1
## By citations status?
tickets_due_status <- tickets %>% group_by(Amount.Due, Citation.Status)
summary(tickets_due_status)
##  Ticket.Number       Citation.Issue.Date      Violation     Fine.Amount   
##  Min.   :100000476   2012-09-25:   58    BUS ZONE  :2338   Min.   : 70.0  
##  1st Qu.:794817169   2010-07-19:   53    DBL PARK  :2876   1st Qu.: 83.0  
##  Median :814718876   2012-08-20:   50    PK PHB OTD:1479   Median : 85.0  
##  Mean   :805965065   2012-09-06:   50    PRK PROHIB:2523   Mean   :112.3  
##  3rd Qu.:831587005   2012-09-20:   50    TWAWY ZN#1:5293   3rd Qu.: 98.0  
##  Max.   :977362606   2011-07-28:   48    TWAWY ZONE:2295   Max.   :279.0  
##                      (Other)   :16495                                     
##  Citation.Status  Amount.Paid       Amount.Due     Citation.Issue.Month
##  Closed:15110    Min.   :  1.00   Min.   :  0.00   Jul    :1865        
##  Open  : 1694    1st Qu.:  8.00   1st Qu.:  0.00   Sep    :1862        
##                  Median : 96.00   Median :  0.00   Mar    :1736        
##                  Mean   : 88.65   Mean   : 19.02   Aug    :1652        
##                  3rd Qu.:159.00   3rd Qu.:  0.00   Oct    :1627        
##                  Max.   :168.00   Max.   :392.00   Apr    :1533        
##                                                    (Other):6529        
##   Paid.On.Time       Latitude       Longitude      Citation.Issue.Year
##  Min.   :0.0000   Min.   :37.71   Min.   :-122.5   Min.   :2009       
##  1st Qu.:1.0000   1st Qu.:37.79   1st Qu.:-122.4   1st Qu.:2011       
##  Median :1.0000   Median :37.79   Median :-122.4   Median :2012       
##  Mean   :0.9015   Mean   :37.79   Mean   :-122.4   Mean   :2012       
##  3rd Qu.:1.0000   3rd Qu.:37.79   3rd Qu.:-122.4   3rd Qu.:2013       
##  Max.   :1.0000   Max.   :37.80   Max.   :-122.4   Max.   :2014       
##                                                                       
##  Citation.Issue.DayOfWeek Citation.Issue.Weekday Citation.Issue.Hour
##  Fri  : 546               Weekday:12779          Min.   : 2.00      
##  Mon  :3558               Weekend: 4025          1st Qu.:12.00      
##  Sat  : 809                                      Median :16.00      
##  Sun  :3216                                      Mean   :14.39      
##  Thurs:2978                                      3rd Qu.:16.00      
##  Tues :3077                                      Max.   :22.00      
##  Wed  :2620                                                         
##  Citation.Issue.MonthYear Fine.Level  
##  Sep-2012:  528           Low :14466  
##  Jul-2014:  471           High: 2338  
##  Sep-2014:  428                       
##  Aug-2012:  410                       
##  Mar-2011:  402                       
##  Apr-2013:  365                       
##  (Other) :14200
ggplot(data = tickets_due_status) + geom_histogram(mapping = aes(x = tickets$Amount.Due, 
    fill = tickets$Citation.Status))

# That seems to tell the whole story

# Paid on time
table(tickets$Paid.On.Time)
## 
##     0     1 
##  1656 15148
ggplot(data = tickets) + geom_bar(mapping = aes(x = as.factor(tickets$Paid.On.Time)))

# Month
tickets$Citation.Issue.DayOfWeek = factor(tickets$Citation.Issue.DayOfWeek, 
    levels(tickets$Citation.Issue.DayOfWeek)[c(4, 2, 6, 7, 5, 1, 3)])
table(tickets$Citation.Issue.Month)
## 
##  Apr  Aug  Dec  Feb  Jan  Jul  Jun  Mar  May  Nov  Oct  Sep 
## 1533 1652 1042 1184  790 1865 1199 1736 1270 1044 1627 1862
ggplot(data = tickets) + geom_bar(mapping = aes(x = tickets$Citation.Issue.Month), fill = "skyblue3", color = "gray29") + labs(title = "Average Number of Citations per Month", x = "Month", y = "Average Total Citations")

# Year
table(tickets$Citation.Issue.Year)
## 
## 2009 2010 2011 2012 2013 2014 
## 1288 2090 3058 3726 3108 3534
ggplot(data = tickets) + geom_bar(mapping = aes(x = tickets$Citation.Issue.Year), fill = "skyblue3", color = "gray29") + labs(title = "Toal Citations per Year", x = "Year", y = "Total Citations") + scale_x_continuous(breaks = seq(2009, 2014, 1)) 

# Day of Week 
table(tickets$Citation.Issue.DayOfWeek)
## 
##   Sun   Mon  Tues   Wed Thurs   Fri   Sat 
##  3216  3558  3077  2620  2978   546   809
ggplot(tickets, aes(x = Citation.Issue.DayOfWeek)) + geom_bar(fill = "indianred2", color = "black") + labs(title = "Total Citations by Day of Week", x = "Day of Week", y = "Total Citations") 

# ******* Let's look at how we coded the Day of Week variable. This trend shoud be lower on Sunday ******

# Weekday
table(tickets$Citation.Issue.Weekday)
## 
## Weekday Weekend 
##   12779    4025
ggplot(data = tickets) + geom_bar(mapping = aes(x = tickets$Citation.Issue.Weekday))

# ******* Let's look at how we coded the Day of Week variable. This trend shoud be lower on Sunday ******

#Hour 
table(tickets$Citation.Issue.Hour)
## 
##    2    4    5    6    7    8    9   10   11   12   13   14   15   16   17 
##    1    6    2    4  214 1142  619  754  812  837  673  575  966 6190 3669 
##   18   19   20   21   22 
##  183   99   52    4    2
ggplot(data = tickets) + geom_bar(mapping = aes(x = tickets$Citation.Issue.Hour), fill = "skyblue3", color = "gray29") + labs(title = "Citations By Hour", x = "Hour of Day", y = "Total Citations")

# People are getting boned during rush hour on the way home


#Same graph but with Rush Hours in blue and Off Hours in Red
bar.color <- c("indianred2", "indianred2", "indianred2", "indianred2", "indianred2", "royalblue3", "royalblue3", "royalblue3", "indianred2", "indianred2", "indianred2", "indianred2", "indianred2", "indianred2", "royalblue3", "royalblue3", "royalblue3", "indianred2", "indianred2", "indianred2", "indianred2")
ggplot(tickets, aes(x = Citation.Issue.Hour)) + geom_histogram(binwidth = 1, fill = bar.color, color = "black") + scale_x_continuous(breaks = seq(0, 24, 1)) + labs(title = "Citations by Hour", x = "Hour of Day (24 Hour Scale)", y = "Total Citaitons", fill = "Rush Hour") 

#Summary stats for the different tickets datasets for years
summary(tickets)
##  Ticket.Number       Citation.Issue.Date      Violation     Fine.Amount   
##  Min.   :100000476   2012-09-25:   58    BUS ZONE  :2338   Min.   : 70.0  
##  1st Qu.:794817169   2010-07-19:   53    DBL PARK  :2876   1st Qu.: 83.0  
##  Median :814718876   2012-08-20:   50    PK PHB OTD:1479   Median : 85.0  
##  Mean   :805965065   2012-09-06:   50    PRK PROHIB:2523   Mean   :112.3  
##  3rd Qu.:831587005   2012-09-20:   50    TWAWY ZN#1:5293   3rd Qu.: 98.0  
##  Max.   :977362606   2011-07-28:   48    TWAWY ZONE:2295   Max.   :279.0  
##                      (Other)   :16495                                     
##  Citation.Status  Amount.Paid       Amount.Due     Citation.Issue.Month
##  Closed:15110    Min.   :  1.00   Min.   :  0.00   Jul    :1865        
##  Open  : 1694    1st Qu.:  8.00   1st Qu.:  0.00   Sep    :1862        
##                  Median : 96.00   Median :  0.00   Mar    :1736        
##                  Mean   : 88.65   Mean   : 19.02   Aug    :1652        
##                  3rd Qu.:159.00   3rd Qu.:  0.00   Oct    :1627        
##                  Max.   :168.00   Max.   :392.00   Apr    :1533        
##                                                    (Other):6529        
##   Paid.On.Time       Latitude       Longitude      Citation.Issue.Year
##  Min.   :0.0000   Min.   :37.71   Min.   :-122.5   Min.   :2009       
##  1st Qu.:1.0000   1st Qu.:37.79   1st Qu.:-122.4   1st Qu.:2011       
##  Median :1.0000   Median :37.79   Median :-122.4   Median :2012       
##  Mean   :0.9015   Mean   :37.79   Mean   :-122.4   Mean   :2012       
##  3rd Qu.:1.0000   3rd Qu.:37.79   3rd Qu.:-122.4   3rd Qu.:2013       
##  Max.   :1.0000   Max.   :37.80   Max.   :-122.4   Max.   :2014       
##                                                                       
##  Citation.Issue.DayOfWeek Citation.Issue.Weekday Citation.Issue.Hour
##  Sun  :3216               Weekday:12779          Min.   : 2.00      
##  Mon  :3558               Weekend: 4025          1st Qu.:12.00      
##  Tues :3077                                      Median :16.00      
##  Wed  :2620                                      Mean   :14.39      
##  Thurs:2978                                      3rd Qu.:16.00      
##  Fri  : 546                                      Max.   :22.00      
##  Sat  : 809                                                         
##  Citation.Issue.MonthYear Fine.Level  
##  Sep-2012:  528           Low :14466  
##  Jul-2014:  471           High: 2338  
##  Sep-2014:  428                       
##  Aug-2012:  410                       
##  Mar-2011:  402                       
##  Apr-2013:  365                       
##  (Other) :14200
# summary(tickets_to2014)
# summary(tickets)

How have the prices of TOLE violations changed over time?

# summary stats and bar chart of fine amount
summary(tickets$Violation)
##   BUS ZONE   DBL PARK PK PHB OTD PRK PROHIB TWAWY ZN#1 TWAWY ZONE 
##       2338       2876       1479       2523       5293       2295
ggplot(data = tickets) + geom_bar(mapping = aes(x = tickets$Violation))

# Violations broken down by year
ggplot(data = tickets) + geom_bar(mapping = aes(x = tickets$Violation, fill = as.factor(tickets$Citation.Issue.Year)), 
    position = "dodge")

# Because the distribution of fine amount has several high-end outliers, we
# should be using the median as our measure of central tendency. This is
# even more clear when comparing the difference between the mean and median
# (112.7 and 85).
median(tickets$Fine.Amount)
## [1] 85
mean(tickets$Fine.Amount)
## [1] 112.3348
median(tickets$Fine.Amount)
## [1] 85
mean(tickets$Fine.Amount)
## [1] 112.3348
# find the median fine amount by violation
fine_by_viol <- tickets %>% group_by(Violation) %>% summarise(median = median(Fine.Amount))
fine_by_viol
## # A tibble: 6 × 2
##    Violation median
##       <fctr>  <dbl>
## 1   BUS ZONE    255
## 2   DBL PARK     80
## 3 PK PHB OTD     85
## 4 PRK PROHIB     95
## 5 TWAWY ZN#1     85
## 6 TWAWY ZONE     83
# Maybe put these results in a table?

# find the median fine amount by year
fine_by_year <- tickets %>% group_by(Citation.Issue.Year) %>% summarise(median = median(Fine.Amount))
head(fine_by_year)
## # A tibble: 6 × 2
##   Citation.Issue.Year median
##                 <int>  <dbl>
## 1                2009     78
## 2                2010     85
## 3                2011     85
## 4                2012     85
## 5                2013     95
## 6                2014     95
# plot median fine amount by year to see trend in prices over time
fine.plot.year <- ggplot(data = fine_by_year, aes(x = Citation.Issue.Year, y = median))
fine.plot.year <- fine.plot.year + geom_line() + geom_point()
fine.plot.year

# *******Median fine amount per year seems to have increased significantly
# over time. Index with inflation to see if this is really the case?*******

# fine amount by year, violation
fine_by_year_viol <- tickets %>% group_by(Violation, Citation.Issue.Year) %>% 
    summarise(median = median(Fine.Amount))
head(fine_by_year_viol)
## Source: local data frame [6 x 3]
## Groups: Violation [1]
## 
##   Violation Citation.Issue.Year median
##      <fctr>               <int>  <dbl>
## 1  BUS ZONE                2009    253
## 2  BUS ZONE                2010    255
## 3  BUS ZONE                2011    255
## 4  BUS ZONE                2012    267
## 5  BUS ZONE                2013    271
## 6  BUS ZONE                2014    279
# plot fine by year and violation to see change and trends
fine.plot.yv <- ggplot(data = fine_by_year_viol, aes(x = Citation.Issue.Year, 
    y = median))
fine.plot.yv <- fine.plot.yv + geom_line() + geom_point()
fine.plot.yv <- fine.plot.yv + facet_wrap(~Violation)
fine.plot.yv

# ********** Looks like we should look at violations and the years they
# occur in and maybe get more specific on type of violation*********

Are more citations issued on certain days of the week? Are more tickets issued on weekdays than weekends?

Jack EDA

# These graphs show how Fine Amount changes over each hour, day of the week, month, and year for each Violation Type

ggplot(tickets, aes(x = Citation.Issue.Hour, y = Fine.Amount)) + geom_point() + facet_wrap(~Violation)

ggplot(tickets, aes(x = Citation.Issue.DayOfWeek, y = Fine.Amount)) + geom_point() + facet_wrap(~Violation)

ggplot(tickets, aes(x = Citation.Issue.Month, y = Fine.Amount)) + geom_point() + facet_wrap(~Violation)

ggplot(tickets, aes(x = Citation.Issue.Year, y = Fine.Amount)) + geom_point() + facet_wrap(~Violation)

# There are appears to be no change over Hour, Day, or Month with a slight increase in prices each year, with a larger spike in 2012. 
#Analysis of price changes over the years by Violation type

#Total of each type of Violation
ViolationCount <- tickets %>% group_by(Violation) %>% summarise(count = n())
ViolationCount
## # A tibble: 6 × 2
##    Violation count
##       <fctr> <int>
## 1   BUS ZONE  2338
## 2   DBL PARK  2876
## 3 PK PHB OTD  1479
## 4 PRK PROHIB  2523
## 5 TWAWY ZN#1  5293
## 6 TWAWY ZONE  2295
#Create objects for each type of Violation
Violation.DBLPRK <- tickets %>% filter(Violation == "DBL PARK")
Violation.BUSZONE <- tickets %>% filter(Violation == "BUS ZONE")
Violation.PKPHBOTD <- tickets %>% filter(Violation == "PK PHB OTD")
Violation.PRKPROHIB <- tickets %>% filter(Violation == "PRK PROHIB")
Violation.TOWAWAY1 <- tickets %>% filter(Violation == "TWAWY ZN#1")
Violation.TOWAWAYzONE <- tickets %>% filter(Violation == "TWAWY ZONE")

#Graphing the change in fine for each Violation type from 2010-2015

ggplot(Violation.DBLPRK, aes(x = Citation.Issue.Year, y = Fine.Amount)) + geom_point() + labs(title = "Double Parking")

ggplot(Violation.BUSZONE, aes(x = Citation.Issue.Year, y = Fine.Amount)) + geom_point() + labs(title = "Bus Zone")

ggplot(Violation.PKPHBOTD, aes(x = Citation.Issue.Year, y = Fine.Amount)) + geom_point() + labs(title = "Tow-Away Zone - Outside Downtown Core")

ggplot(Violation.PRKPROHIB, aes(x = Citation.Issue.Year, y = Fine.Amount)) + geom_point() + labs(title = "Tow-Away Zone - Downtown Core")

ggplot(Violation.TOWAWAY1, aes(x = Citation.Issue.Year, y = Fine.Amount)) + geom_point() + labs(title = "Tow Away Zone 1")

ggplot(Violation.TOWAWAYzONE, aes(x = Citation.Issue.Year, y = Fine.Amount)) + geom_point() + labs(title = "Tow Away Zone 2")

#General increases in fines over the interval with a noticable jump in prices in 2012

What Hours have the most Violations

# Histogram of Citations per Hour
ByHour <- tickets %>% group_by(Citation.Issue.Hour) %>% summarise(Percentage = round(n()/nrow(tickets) * 
    100, digits = 5))
ByHour
## # A tibble: 20 × 2
##    Citation.Issue.Hour Percentage
##                  <int>      <dbl>
## 1                    2    0.00595
## 2                    4    0.03571
## 3                    5    0.01190
## 4                    6    0.02380
## 5                    7    1.27351
## 6                    8    6.79600
## 7                    9    3.68365
## 8                   10    4.48703
## 9                   11    4.83218
## 10                  12    4.98096
## 11                  13    4.00500
## 12                  14    3.42180
## 13                  15    5.74863
## 14                  16   36.83647
## 15                  17   21.83409
## 16                  18    1.08903
## 17                  19    0.58915
## 18                  20    0.30945
## 19                  21    0.02380
## 20                  22    0.01190
bar.color <- c("indianred2", "indianred2", "indianred2", "indianred2", "indianred2", 
    "royalblue3", "royalblue3", "royalblue3", "indianred2", "indianred2", "indianred2", 
    "indianred2", "indianred2", "indianred2", "royalblue3", "royalblue3", "royalblue3", 
    "indianred2", "indianred2", "indianred2", "indianred2")
ggplot(tickets, aes(x = Citation.Issue.Hour)) + geom_histogram(binwidth = 1, 
    fill = bar.color, color = "black") + scale_x_continuous(breaks = seq(0, 
    24, 1)) + labs(title = "Citations by Hour", x = "Hour of Day (24 Hour Scale)", 
    y = "Total Citaitons", fill = "Rush Hour")

Are more tickets issued during Rush Hour or Off Hours

#Created variable Day of Month for purposes of day-to-day calculations
tickets$Citation.Issue.Day <- day(tickets$Citation.Issue.Date)


RushHour <- tickets %>% filter((Citation.Issue.Hour >= 7 & Citation.Issue.Hour <= 9) | (Citation.Issue.Hour >= 16 & Citation.Issue.Hour <= 18))
NonRushHour <- tickets %>% filter((Citation.Issue.Hour < 7 | Citation.Issue.Hour > 9) & (Citation.Issue.Hour < 16 | Citation.Issue.Hour > 18))
nrow(RushHour)
## [1] 12017
nrow(NonRushHour)
## [1] 4787
PercentRush <- nrow(RushHour) / nrow(tickets) * 100
PercentRush
## [1] 71.51274
t <- tickets %>% group_by(Citation.Issue.Date) %>% summarise(median = median(Fine.Amount))
median(t$median)
## [1] 88
g <- tickets %>% group_by(Citation.Issue.DayOfWeek) %>% summarise(median = median(Fine.Amount))
g
## # A tibble: 7 × 2
##   Citation.Issue.DayOfWeek median
##                     <fctr>  <dbl>
## 1                      Sun     85
## 2                      Mon     85
## 3                     Tues     85
## 4                      Wed     85
## 5                    Thurs     85
## 6                      Fri    250
## 7                      Sat    110

Comparing Distributions of Rush Hour and Off Hour Totals per Day

#EDA for Rushhour vs NonRushhour
dim(RushHour)
## [1] 12017    18
str(RushHour)
## 'data.frame':    12017 obs. of  18 variables:
##  $ Ticket.Number           : int  975990164 975990175 975990186 975990190 100002550 975990223 100002551 975990271 975990282 100002557 ...
##  $ Citation.Issue.Date     : Factor w/ 1684 levels "2009-01-11","2009-01-19",..: 5 6 7 7 7 8 8 9 9 10 ...
##  $ Violation               : Factor w/ 6 levels "BUS ZONE","DBL PARK",..: 1 1 1 1 5 2 2 2 2 2 ...
##  $ Fine.Amount             : int  250 250 250 250 70 75 75 75 75 75 ...
##  $ Citation.Status         : Factor w/ 2 levels "Closed","Open": 1 2 1 1 1 1 1 1 1 1 ...
##  $ Amount.Paid             : int  89 3 89 89 153 155 155 3 155 155 ...
##  $ Amount.Due              : num  0 335 0 0 0 0 0 0 0 0 ...
##  $ Citation.Issue.Month    : Factor w/ 12 levels "Apr","Aug","Dec",..: 4 4 4 4 4 4 4 4 4 4 ...
##  $ Paid.On.Time            : int  1 0 1 1 1 1 1 1 1 1 ...
##  $ Latitude                : num  37.8 37.8 37.8 37.8 37.8 ...
##  $ Longitude               : num  -122 -122 -122 -122 -122 ...
##  $ Citation.Issue.Year     : int  2009 2009 2009 2009 2009 2009 2009 2009 2009 2009 ...
##  $ Citation.Issue.DayOfWeek: Factor w/ 7 levels "Sun","Mon","Tues",..: 3 1 2 2 2 3 3 4 4 5 ...
##  $ Citation.Issue.Weekday  : Factor w/ 2 levels "Weekday","Weekend": 1 2 1 1 1 1 1 1 1 1 ...
##  $ Citation.Issue.Hour     : int  17 16 18 9 8 9 16 17 17 9 ...
##  $ Citation.Issue.MonthYear: Factor w/ 72 levels "Apr-2009","Apr-2010",..: 19 19 19 19 19 19 19 19 19 19 ...
##  $ Fine.Level              : Factor w/ 2 levels "Low","High": 2 2 2 2 1 1 1 1 1 1 ...
##  $ Citation.Issue.Day      : int  10 15 16 16 16 17 17 18 18 19 ...
nrow(RushHour)
## [1] 12017
head(RushHour)
##   Ticket.Number Citation.Issue.Date  Violation Fine.Amount Citation.Status
## 1     975990164          2009-02-10   BUS ZONE         250          Closed
## 2     975990175          2009-02-15   BUS ZONE         250            Open
## 3     975990186          2009-02-16   BUS ZONE         250          Closed
## 4     975990190          2009-02-16   BUS ZONE         250          Closed
## 5     100002550          2009-02-16 TWAWY ZN#1          70          Closed
## 6     975990223          2009-02-17   DBL PARK          75          Closed
##   Amount.Paid Amount.Due Citation.Issue.Month Paid.On.Time Latitude
## 1          89          0                  Feb            1 37.78562
## 2           3        335                  Feb            0 37.78701
## 3          89          0                  Feb            1 37.78665
## 4          89          0                  Feb            1 37.78699
## 5         153          0                  Feb            1 37.78717
## 6         155          0                  Feb            1 37.78883
##   Longitude Citation.Issue.Year Citation.Issue.DayOfWeek
## 1 -122.4150                2009                     Tues
## 2 -122.4116                2009                      Sun
## 3 -122.4294                2009                      Mon
## 4 -122.4117                2009                      Mon
## 5 -122.4008                2009                      Mon
## 6 -122.4112                2009                     Tues
##   Citation.Issue.Weekday Citation.Issue.Hour Citation.Issue.MonthYear
## 1                Weekday                  17                 Feb-2009
## 2                Weekend                  16                 Feb-2009
## 3                Weekday                  18                 Feb-2009
## 4                Weekday                   9                 Feb-2009
## 5                Weekday                   8                 Feb-2009
## 6                Weekday                   9                 Feb-2009
##   Fine.Level Citation.Issue.Day
## 1       High                 10
## 2       High                 15
## 3       High                 16
## 4       High                 16
## 5        Low                 16
## 6        Low                 17
#Calculate total Rushhour violations for each day
Rush <- RushHour %>% group_by(Citation.Issue.Day, Citation.Issue.Month, Citation.Issue.Year) %>% summarise(count = n())
RushTotals <- Rush$count

#EDA of Statistics of Rushhour
mean(RushTotals)
## [1] 8.474612
median(RushTotals)
## [1] 6
var(RushTotals)
## [1] 64.75624
sd(RushTotals)
## [1] 8.047126
quantile(RushTotals, c(0, .1, .2, .3, .4, .5, .6, .7, .8, .9, .95, 1))
##   0%  10%  20%  30%  40%  50%  60%  70%  80%  90%  95% 100% 
##    1    1    2    3    4    6    8   11   14   20   24   52
shapiro.test(RushTotals)
## 
##  Shapiro-Wilk normality test
## 
## data:  RushTotals
## W = 0.83909, p-value < 2.2e-16
#EDA Graphs for Rushhour
qqnorm(RushTotals)
qqline(RushTotals)

boxplot(RushTotals)

plot(RushTotals, dbinom(RushTotals, 30, 0.25), type = "h")

#EDA of NonRushhour
str(NonRushHour)
## 'data.frame':    4787 obs. of  18 variables:
##  $ Ticket.Number           : int  100013644 100013645 100013646 100013648 100013649 975990153 975990212 975990245 100002552 975990256 ...
##  $ Citation.Issue.Date     : Factor w/ 1684 levels "2009-01-11","2009-01-19",..: 1 1 1 2 3 4 7 8 8 9 ...
##  $ Violation               : Factor w/ 6 levels "BUS ZONE","DBL PARK",..: 2 2 2 1 5 1 2 2 1 2 ...
##  $ Fine.Amount             : int  75 75 75 250 70 250 75 75 250 75 ...
##  $ Citation.Status         : Factor w/ 2 levels "Closed","Open": 1 1 1 1 1 1 1 1 1 1 ...
##  $ Amount.Paid             : int  155 155 3 97 39 114 155 155 3 155 ...
##  $ Amount.Due              : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Citation.Issue.Month    : Factor w/ 12 levels "Apr","Aug","Dec",..: 5 5 5 5 4 4 4 4 4 4 ...
##  $ Paid.On.Time            : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ Latitude                : num  37.8 37.8 37.8 37.8 37.8 ...
##  $ Longitude               : num  -122 -122 -122 -122 -122 ...
##  $ Citation.Issue.Year     : int  2009 2009 2009 2009 2009 2009 2009 2009 2009 2009 ...
##  $ Citation.Issue.DayOfWeek: Factor w/ 7 levels "Sun","Mon","Tues",..: 1 1 1 2 1 4 2 3 3 4 ...
##  $ Citation.Issue.Weekday  : Factor w/ 2 levels "Weekday","Weekend": 2 2 2 1 2 1 1 1 1 1 ...
##  $ Citation.Issue.Hour     : int  10 12 12 12 13 14 11 15 10 14 ...
##  $ Citation.Issue.MonthYear: Factor w/ 72 levels "Apr-2009","Apr-2010",..: 25 25 25 25 19 19 19 19 19 19 ...
##  $ Fine.Level              : Factor w/ 2 levels "Low","High": 1 1 1 2 1 2 1 1 2 1 ...
##  $ Citation.Issue.Day      : int  11 11 11 19 1 4 16 17 17 18 ...
dim(NonRushHour)
## [1] 4787   18
nrow(NonRushHour)
## [1] 4787
NonRush <- NonRushHour %>% group_by(Citation.Issue.Day, Citation.Issue.Month, Citation.Issue.Year) %>% summarise(count = n())
NonRushTotals <- NonRush$count

mean(NonRushTotals)
## [1] 3.481455
median(NonRushTotals)
## [1] 3
sd(NonRushTotals)
## [1] 2.724261
var(NonRushTotals)
## [1] 7.421599
quantile(NonRushTotals, c(0, .1, .2, .3, .4, .5, .6, .7, .8, .9, .95, 1))
##   0%  10%  20%  30%  40%  50%  60%  70%  80%  90%  95% 100% 
##    1    1    1    2    2    3    3    4    5    7    9   30
shapiro.test(NonRushTotals)
## 
##  Shapiro-Wilk normality test
## 
## data:  NonRushTotals
## W = 0.80712, p-value < 2.2e-16
qqnorm(NonRushTotals)
qqline(NonRushTotals)

boxplot(NonRushTotals)

ggplot(Rush, aes(x = count)) + geom_histogram(binwidth = 1, fill = "red", color = "black") + scale_x_continuous(breaks = seq(0, 130, 10)) + geom_histogram(data = NonRush, aes(x = count), binwidth = 1, fill = "blue", color = "black", alpha = 0.7) + scale_x_continuous(breaks = seq(0, 130, 10)) + labs(title = "Rush Hour vs Off Hour Citation Distribution", x = "Total Citations", y = "Number of Days", fill = "Rush Hour") 
## Scale for 'x' is already present. Adding another scale for 'x', which
## will replace the existing scale.

plot(NonRushTotals, dbinom(NonRushTotals, 30, 0.25), type = "h")

What Violations occur more during Rush Hour and what occur more during Off Hours?

# Proportion of Violation Type per Hour

Tickets.per.Hour <- tickets %>% group_by(Citation.Issue.Hour) %>% summarise(count = n())
Tickets.per.Hour
## # A tibble: 20 × 2
##    Citation.Issue.Hour count
##                  <int> <int>
## 1                    2     1
## 2                    4     6
## 3                    5     2
## 4                    6     4
## 5                    7   214
## 6                    8  1142
## 7                    9   619
## 8                   10   754
## 9                   11   812
## 10                  12   837
## 11                  13   673
## 12                  14   575
## 13                  15   966
## 14                  16  6190
## 15                  17  3669
## 16                  18   183
## 17                  19    99
## 18                  20    52
## 19                  21     4
## 20                  22     2
ggplot(tickets, aes(x = Citation.Issue.Hour, fill = Violation)) + geom_bar(position = "fill") + scale_x_continuous(breaks = seq(0, 24, 1)) + labs(title = "Proportion of Violation Types by Hour", x = "Citation Issue Hour", y = "Violation Type Proportion")

# Graph shows almost entirely Double Parking and Bus Zone Violations during non-rush-hour  and almost entirely Towaway and Prohibited Parking during rush hour

Violation.Rush <- RushHour %>% group_by(Violation) %>% summarise(Total_During_Rush = n())
Violation.Rush
## # A tibble: 6 × 2
##    Violation Total_During_Rush
##       <fctr>             <int>
## 1   BUS ZONE               717
## 2   DBL PARK               786
## 3 PK PHB OTD              1469
## 4 PRK PROHIB              2259
## 5 TWAWY ZN#1              4593
## 6 TWAWY ZONE              2193
Violation.Off <- NonRushHour %>% group_by(Violation) %>% summarise(Total_During_Off = n())
Violation.Off
## # A tibble: 6 × 2
##    Violation Total_During_Off
##       <fctr>            <int>
## 1   BUS ZONE             1621
## 2   DBL PARK             2090
## 3 PK PHB OTD               10
## 4 PRK PROHIB              264
## 5 TWAWY ZN#1              700
## 6 TWAWY ZONE              102
a <- Violation.Rush$Total_During_Rush / (Violation.Rush$Total_During_Rush + Violation.Off$Total_During_Off)
#a <- round(a, 3)