#Attaching the dplyr package
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
#Read the CSV file
MDcrime<-read.csv(file = "MDcrime.csv")
#Now lets check the numbers
nrow(MDcrime)
## [1] 1008
ncol(MDcrime)
## [1] 38
sum(is.na(MDcrime))
## [1] 312
###Lets look at the frequency under the Jurisdiction variable. We have to ensure that there are 42 counts under each county since the data ranges from 1975-2016.
table(MDcrime$JURISDICTION)
## 
##        Allegany County       Allegany County     Anne Arundel County 
##                     40                      2                     40 
##   Anne Arundel County          Baltimore City       Baltimore County 
##                      2                     42                     42 
##         Calvert County        Caroline County         Carroll County 
##                     42                     42                     42 
##           Cecil County         Charles County      Dorchester County 
##                     42                     42                     42 
##       Frederick County         Garrett County         Harford County 
##                     42                     42                     42 
##          Howard County            Kent County      Montgomery County 
##                     42                     42                     42 
## Prince George's County    Queen Anne's County        Somerset County 
##                     42                     42                     42 
##      St. Mary's County          Talbot County      Washington County 
##                     42                     42                     42 
##        Wicomico County       Worcester County 
##                     42                     42
#Now Lets looks at the structure of the dataset
str(MDcrime)
## 'data.frame':    1008 obs. of  38 variables:
##  $ JURISDICTION                                         : Factor w/ 26 levels "Allegany County",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ YEAR                                                 : Factor w/ 42 levels "1/1/1975","1/1/1976",..: 1 2 3 4 5 6 7 8 9 10 ...
##  $ POPULATION                                           : int  79655 83923 82102 79966 79721 80461 81781 81858 82605 80835 ...
##  $ MURDER                                               : int  3 2 3 1 1 2 11 1 5 2 ...
##  $ RAPE                                                 : int  5 2 7 2 7 12 13 18 9 15 ...
##  $ ROBBERY                                              : int  20 24 32 18 18 26 24 18 19 6 ...
##  $ AGGASSAULT                                           : int  114 59 85 81 84 79 101 80 89 67 ...
##  $ BE                                                   : int  669 581 592 539 502 541 539 447 347 361 ...
##  $ LARCENY.THEFT                                        : int  1425 1384 1390 1390 1611 1706 1697 1570 1412 1338 ...
##  $ MotorvehicleTHEFT                                    : int  93 73 102 100 99 108 88 55 67 68 ...
##  $ GRAND.TOTAL                                          : int  2329 2125 2211 2131 2322 2474 2473 2189 1948 1857 ...
##  $ PERCENT.CHANGE                                       : num  NA -8.8 4 -3.6 9 6.5 0 -11.5 -11 -4.7 ...
##  $ violentcrimetotal                                    : int  142 87 127 102 110 119 149 117 122 90 ...
##  $ VIOLENT.CRIME.PERCENT                                : num  6.1 4.1 5.7 4.8 4.7 4.8 6 5.3 6.3 4.8 ...
##  $ VIOLENT.CRIME.PERCENT.CHANGE                         : num  NA -38.7 46 -19.7 7.8 8.2 25.2 -21.5 4.3 -26.2 ...
##  $ PROPERTY.CRIME.TOTALS                                : int  2187 2038 2084 2029 2212 2355 2324 2072 1826 1767 ...
##  $ PROPERTY.CRIME.PERCENT                               : num  93.9 95.9 94.3 95.2 95.3 95.2 94 94.7 93.7 95.2 ...
##  $ PROPERTY.CRIME.PERCENT.CHANGE                        : num  NA -6.8 2.3 -2.6 9 6.5 -1.3 -10.8 -11.9 -3.2 ...
##  $ OVERALL.CRIME.RATE.PER.100.000.PEOPLE                : num  2924 2532 2693 2665 2913 ...
##  $ OVERALL.PERCENT.CHANGE.PER.100.000.PEOPLE            : num  NA -13.4 6.4 -1 9.3 5.6 -1.7 -11.6 -11.8 -2.6 ...
##  $ violentcrimerateper_100000                           : num  178 104 155 128 138 ...
##  $ VIOLENT.CRIME.RATE.PERCENT.CHANGE.PER.100.000.PEOPLE : num  NA -41.8 49.2 -17.5 8.2 7.2 23.2 -21.6 3.3 -24.6 ...
##  $ PROPERTY.CRIME.RATE.PER.100.000.PEOPLE               : num  2746 2428 2538 2537 2775 ...
##  $ PROPERTY.CRIME.RATE.PERCENT.CHANGE.PER.100.000.PEOPLE: num  NA -11.6 4.5 0 9.4 5.5 -2.9 -10.9 -12.7 -1.1 ...
##  $ murderper100000                                      : num  3.8 2.4 3.7 1.3 1.3 2.5 13.5 1.2 6.1 2.5 ...
##  $ RAPE.PER.100.000.PEOPLE                              : num  6.3 2.4 8.5 2.5 8.8 14.9 15.9 22 10.9 18.6 ...
##  $ robberyper100000                                     : num  25.1 28.6 39 22.5 22.6 32.3 29.3 22 23 7.4 ...
##  $ AGG..ASSAULT.PER.100.000.PEOPLE                      : num  143.1 70.3 103.5 101.3 105.4 ...
##  $ B...E.PER.100.000.PEOPLE                             : num  840 692 721 674 630 ...
##  $ LARCENY.THEFT.PER.100.000.PEOPLE                     : num  1789 1649 1693 1738 2021 ...
##  $ M.V.THEFT.PER.100.000.PEOPLE                         : num  117 87 124 125 124 ...
##  $ MURDER..RATE.PERCENT.CHANGE.PER.100.000.PEOPLE       : num  NA -36.7 53.3 -65.8 0.3 ...
##  $ RAPE.RATE.PERCENT.CHANGE.PER.100.000.PEOPLE          : num  NA -62 257.8 -70.7 251.1 ...
##  $ ROBBERY.RATE.PERCENT.CHANGE.PER.100.000.PEOPLE       : num  NA 13.9 36.3 -42.2 0.3 43.1 -9.2 -25.1 4.6 -67.7 ...
##  $ AGG..ASSAULT..RATE.PERCENT.CHANGE.PER.100.000.PEOPLE : num  NA -50.9 47.3 -2.2 4 -6.8 25.8 -20.9 10.2 -23.1 ...
##  $ B...E.RATE.PERCENT.CHANGE.PER.100.000.PEOPLE         : num  NA -17.6 4.2 -6.5 -6.6 6.8 -2 -17.1 -23.1 6.3 ...
##  $ LARCENY.THEFT..RATE.PERCENT.CHANGE.PER.100.000.PEOPLE: num  NA -7.8 2.7 2.7 16.3 4.9 -2.1 -7.6 -10.9 -3.2 ...
##  $ M.V.THEFT..RATE.PERCENT.CHANGE.PER.100.000.PEOPLE    : num  NA -25.5 42.8 0.7 -0.7 8.1 -19.8 -37.6 20.7 3.7 ...
#Lets look at the head and tail of the dataset
head(MDcrime)
##      JURISDICTION     YEAR POPULATION MURDER RAPE ROBBERY AGGASSAULT  BE
## 1 Allegany County 1/1/1975      79655      3    5      20        114 669
## 2 Allegany County 1/1/1976      83923      2    2      24         59 581
## 3 Allegany County 1/1/1977      82102      3    7      32         85 592
## 4 Allegany County 1/1/1978      79966      1    2      18         81 539
## 5 Allegany County 1/1/1979      79721      1    7      18         84 502
## 6 Allegany County 1/1/1980      80461      2   12      26         79 541
##   LARCENY.THEFT MotorvehicleTHEFT GRAND.TOTAL PERCENT.CHANGE
## 1          1425                93        2329             NA
## 2          1384                73        2125           -8.8
## 3          1390               102        2211            4.0
## 4          1390               100        2131           -3.6
## 5          1611                99        2322            9.0
## 6          1706               108        2474            6.5
##   violentcrimetotal VIOLENT.CRIME.PERCENT VIOLENT.CRIME.PERCENT.CHANGE
## 1               142                   6.1                           NA
## 2                87                   4.1                        -38.7
## 3               127                   5.7                         46.0
## 4               102                   4.8                        -19.7
## 5               110                   4.7                          7.8
## 6               119                   4.8                          8.2
##   PROPERTY.CRIME.TOTALS PROPERTY.CRIME.PERCENT
## 1                  2187                   93.9
## 2                  2038                   95.9
## 3                  2084                   94.3
## 4                  2029                   95.2
## 5                  2212                   95.3
## 6                  2355                   95.2
##   PROPERTY.CRIME.PERCENT.CHANGE OVERALL.CRIME.RATE.PER.100.000.PEOPLE
## 1                            NA                                2923.9
## 2                          -6.8                                2532.1
## 3                           2.3                                2693.0
## 4                          -2.6                                2664.9
## 5                           9.0                                2912.7
## 6                           6.5                                3074.8
##   OVERALL.PERCENT.CHANGE.PER.100.000.PEOPLE violentcrimerateper_100000
## 1                                        NA                      178.3
## 2                                     -13.4                      103.7
## 3                                       6.4                      154.7
## 4                                      -1.0                      127.6
## 5                                       9.3                      138.0
## 6                                       5.6                      147.9
##   VIOLENT.CRIME.RATE.PERCENT.CHANGE.PER.100.000.PEOPLE
## 1                                                   NA
## 2                                                -41.8
## 3                                                 49.2
## 4                                                -17.5
## 5                                                  8.2
## 6                                                  7.2
##   PROPERTY.CRIME.RATE.PER.100.000.PEOPLE
## 1                                 2745.6
## 2                                 2428.4
## 3                                 2538.3
## 4                                 2537.3
## 5                                 2774.7
## 6                                 2926.9
##   PROPERTY.CRIME.RATE.PERCENT.CHANGE.PER.100.000.PEOPLE murderper100000
## 1                                                    NA             3.8
## 2                                                 -11.6             2.4
## 3                                                   4.5             3.7
## 4                                                   0.0             1.3
## 5                                                   9.4             1.3
## 6                                                   5.5             2.5
##   RAPE.PER.100.000.PEOPLE robberyper100000 AGG..ASSAULT.PER.100.000.PEOPLE
## 1                     6.3             25.1                           143.1
## 2                     2.4             28.6                            70.3
## 3                     8.5             39.0                           103.5
## 4                     2.5             22.5                           101.3
## 5                     8.8             22.6                           105.4
## 6                    14.9             32.3                            98.2
##   B...E.PER.100.000.PEOPLE LARCENY.THEFT.PER.100.000.PEOPLE
## 1                    839.9                           1789.0
## 2                    692.3                           1649.1
## 3                    721.1                           1693.0
## 4                    674.0                           1738.2
## 5                    629.7                           2020.8
## 6                    672.4                           2120.3
##   M.V.THEFT.PER.100.000.PEOPLE
## 1                        116.8
## 2                         87.0
## 3                        124.2
## 4                        125.1
## 5                        124.2
## 6                        134.2
##   MURDER..RATE.PERCENT.CHANGE.PER.100.000.PEOPLE
## 1                                             NA
## 2                                          -36.7
## 3                                           53.3
## 4                                          -65.8
## 5                                            0.3
## 6                                           98.2
##   RAPE.RATE.PERCENT.CHANGE.PER.100.000.PEOPLE
## 1                                          NA
## 2                                       -62.0
## 3                                       257.8
## 4                                       -70.7
## 5                                       251.1
## 6                                        69.9
##   ROBBERY.RATE.PERCENT.CHANGE.PER.100.000.PEOPLE
## 1                                             NA
## 2                                           13.9
## 3                                           36.3
## 4                                          -42.2
## 5                                            0.3
## 6                                           43.1
##   AGG..ASSAULT..RATE.PERCENT.CHANGE.PER.100.000.PEOPLE
## 1                                                   NA
## 2                                                -50.9
## 3                                                 47.3
## 4                                                 -2.2
## 5                                                  4.0
## 6                                                 -6.8
##   B...E.RATE.PERCENT.CHANGE.PER.100.000.PEOPLE
## 1                                           NA
## 2                                        -17.6
## 3                                          4.2
## 4                                         -6.5
## 5                                         -6.6
## 6                                          6.8
##   LARCENY.THEFT..RATE.PERCENT.CHANGE.PER.100.000.PEOPLE
## 1                                                    NA
## 2                                                  -7.8
## 3                                                   2.7
## 4                                                   2.7
## 5                                                  16.3
## 6                                                   4.9
##   M.V.THEFT..RATE.PERCENT.CHANGE.PER.100.000.PEOPLE
## 1                                                NA
## 2                                             -25.5
## 3                                              42.8
## 4                                               0.7
## 5                                              -0.7
## 6                                               8.1

tail(MDcrime)

tail(MDcrime)
##          JURISDICTION     YEAR POPULATION MURDER RAPE ROBBERY AGGASSAULT
## 1003 Worcester County 1/1/2011      51942      1    7      28        212
## 1004 Worcester County 1/1/2012      52011      1   16      31        138
## 1005 Worcester County 1/1/2013      51718      2   14      28        173
## 1006 Worcester County 1/1/2014      51756      1   14      35        140
## 1007 Worcester County 1/1/2015      51566      1   15      27        102
## 1008 Worcester County 1/1/2016      51255      3   17      39         93
##       BE LARCENY.THEFT MotorvehicleTHEFT GRAND.TOTAL PERCENT.CHANGE
## 1003 423          1852                41        2564           -2.0
## 1004 432          1751                33        2402           -6.3
## 1005 356          1705                55        2333           -2.9
## 1006 445          1754                38        2427            4.0
## 1007 273          1562                28        2008          -17.3
## 1008 289          1514                32        1987           -1.0
##      violentcrimetotal VIOLENT.CRIME.PERCENT VIOLENT.CRIME.PERCENT.CHANGE
## 1003               248                   9.7                         -5.0
## 1004               186                   7.7                        -25.0
## 1005               217                   9.3                         16.7
## 1006               190                   7.8                        -12.4
## 1007               145                   7.2                        -23.7
## 1008               152                   7.6                          4.8
##      PROPERTY.CRIME.TOTALS PROPERTY.CRIME.PERCENT
## 1003                  2316                   90.3
## 1004                  2216                   92.3
## 1005                  2116                   90.7
## 1006                  2237                   92.2
## 1007                  1863                   92.8
## 1008                  1835                   92.4
##      PROPERTY.CRIME.PERCENT.CHANGE OVERALL.CRIME.RATE.PER.100.000.PEOPLE
## 1003                          -1.7                                4936.3
## 1004                          -4.3                                4618.3
## 1005                          -4.5                                4511.0
## 1006                           5.7                                4689.3
## 1007                         -16.7                                3894.0
## 1008                          -1.5                                3876.7
##      OVERALL.PERCENT.CHANGE.PER.100.000.PEOPLE violentcrimerateper_100000
## 1003                                      -6.4                      477.5
## 1004                                      -6.4                      357.6
## 1005                                      -2.3                      419.6
## 1006                                       4.0                      367.1
## 1007                                     -17.0                      281.2
## 1008                                      -0.4                      296.6
##      VIOLENT.CRIME.RATE.PERCENT.CHANGE.PER.100.000.PEOPLE
## 1003                                                 -9.2
## 1004                                                -25.1
## 1005                                                 17.3
## 1006                                                -12.5
## 1007                                                -23.4
## 1008                                                  5.5
##      PROPERTY.CRIME.RATE.PER.100.000.PEOPLE
## 1003                                 4458.8
## 1004                                 4260.6
## 1005                                 4091.4
## 1006                                 4322.2
## 1007                                 3612.8
## 1008                                 3580.1
##      PROPERTY.CRIME.RATE.PERCENT.CHANGE.PER.100.000.PEOPLE murderper100000
## 1003                                                  -6.0             1.9
## 1004                                                  -4.4             1.9
## 1005                                                  -4.0             3.9
## 1006                                                   5.6             1.9
## 1007                                                 -16.4             1.9
## 1008                                                 -16.4             5.9
##      RAPE.PER.100.000.PEOPLE robberyper100000
## 1003                    13.5             53.9
## 1004                    30.8             59.6
## 1005                    27.1             54.1
## 1006                    27.1             67.6
## 1007                    29.1             52.4
## 1008                    33.2             76.1
##      AGG..ASSAULT.PER.100.000.PEOPLE B...E.PER.100.000.PEOPLE
## 1003                           408.1                    814.4
## 1004                           265.3                    830.6
## 1005                           334.5                    688.3
## 1006                           270.5                    859.8
## 1007                           197.8                    529.4
## 1008                           181.4                    563.8
##      LARCENY.THEFT.PER.100.000.PEOPLE M.V.THEFT.PER.100.000.PEOPLE
## 1003                           3565.5                         78.9
## 1004                           3366.6                         63.4
## 1005                           3296.7                        106.3
## 1006                           3389.0                         73.4
## 1007                           3029.1                         54.3
## 1008                           2953.9                         62.4
##      MURDER..RATE.PERCENT.CHANGE.PER.100.000.PEOPLE
## 1003                                          -76.1
## 1004                                           -0.1
## 1005                                          101.1
## 1006                                          -50.0
## 1007                                            0.4
## 1008                                          201.8
##      RAPE.RATE.PERCENT.CHANGE.PER.100.000.PEOPLE
## 1003                                       -48.5
## 1004                                       128.3
## 1005                                       -12.0
## 1006                                        -0.1
## 1007                                         7.5
## 1008                                        14.0
##      ROBBERY.RATE.PERCENT.CHANGE.PER.100.000.PEOPLE
## 1003                                          -40.5
## 1004                                           10.6
## 1005                                           -9.2
## 1006                                           24.9
## 1007                                          -22.6
## 1008                                           45.3
##      AGG..ASSAULT..RATE.PERCENT.CHANGE.PER.100.000.PEOPLE
## 1003                                                  1.8
## 1004                                                -35.0
## 1005                                                 26.1
## 1006                                                -19.1
## 1007                                                -26.9
## 1008                                                 -8.3
##      B...E.RATE.PERCENT.CHANGE.PER.100.000.PEOPLE
## 1003                                        -17.7
## 1004                                          2.0
## 1005                                        -17.1
## 1006                                         24.9
## 1007                                        -38.4
## 1008                                          6.5
##      LARCENY.THEFT..RATE.PERCENT.CHANGE.PER.100.000.PEOPLE
## 1003                                                  -2.6
## 1004                                                  -5.6
## 1005                                                  -2.1
## 1006                                                   2.8
## 1007                                                 -10.6
## 1008                                                  -2.5
##      M.V.THEFT..RATE.PERCENT.CHANGE.PER.100.000.PEOPLE
## 1003                                             -18.4
## 1004                                             -19.6
## 1005                                              67.6
## 1006                                             -31.0
## 1007                                             -26.0
## 1008                                              15.0
#Lets select the Jurisdiction, year, population, and violent crime rate per 100,000 people. But first lets view a list of all the variables.
names(MDcrime)
##  [1] "JURISDICTION"                                         
##  [2] "YEAR"                                                 
##  [3] "POPULATION"                                           
##  [4] "MURDER"                                               
##  [5] "RAPE"                                                 
##  [6] "ROBBERY"                                              
##  [7] "AGGASSAULT"                                           
##  [8] "BE"                                                   
##  [9] "LARCENY.THEFT"                                        
## [10] "MotorvehicleTHEFT"                                    
## [11] "GRAND.TOTAL"                                          
## [12] "PERCENT.CHANGE"                                       
## [13] "violentcrimetotal"                                    
## [14] "VIOLENT.CRIME.PERCENT"                                
## [15] "VIOLENT.CRIME.PERCENT.CHANGE"                         
## [16] "PROPERTY.CRIME.TOTALS"                                
## [17] "PROPERTY.CRIME.PERCENT"                               
## [18] "PROPERTY.CRIME.PERCENT.CHANGE"                        
## [19] "OVERALL.CRIME.RATE.PER.100.000.PEOPLE"                
## [20] "OVERALL.PERCENT.CHANGE.PER.100.000.PEOPLE"            
## [21] "violentcrimerateper_100000"                           
## [22] "VIOLENT.CRIME.RATE.PERCENT.CHANGE.PER.100.000.PEOPLE" 
## [23] "PROPERTY.CRIME.RATE.PER.100.000.PEOPLE"               
## [24] "PROPERTY.CRIME.RATE.PERCENT.CHANGE.PER.100.000.PEOPLE"
## [25] "murderper100000"                                      
## [26] "RAPE.PER.100.000.PEOPLE"                              
## [27] "robberyper100000"                                     
## [28] "AGG..ASSAULT.PER.100.000.PEOPLE"                      
## [29] "B...E.PER.100.000.PEOPLE"                             
## [30] "LARCENY.THEFT.PER.100.000.PEOPLE"                     
## [31] "M.V.THEFT.PER.100.000.PEOPLE"                         
## [32] "MURDER..RATE.PERCENT.CHANGE.PER.100.000.PEOPLE"       
## [33] "RAPE.RATE.PERCENT.CHANGE.PER.100.000.PEOPLE"          
## [34] "ROBBERY.RATE.PERCENT.CHANGE.PER.100.000.PEOPLE"       
## [35] "AGG..ASSAULT..RATE.PERCENT.CHANGE.PER.100.000.PEOPLE" 
## [36] "B...E.RATE.PERCENT.CHANGE.PER.100.000.PEOPLE"         
## [37] "LARCENY.THEFT..RATE.PERCENT.CHANGE.PER.100.000.PEOPLE"
## [38] "M.V.THEFT..RATE.PERCENT.CHANGE.PER.100.000.PEOPLE"
MDcrime_df<-select(MDcrime,JURISDICTION,YEAR,POPULATION,OVERALL.CRIME.RATE.PER.100.000.PEOPLE)
##Now lets review the str of the new dataframe(MDcrime_df)
str(MDcrime_df)
## 'data.frame':    1008 obs. of  4 variables:
##  $ JURISDICTION                         : Factor w/ 26 levels "Allegany County",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ YEAR                                 : Factor w/ 42 levels "1/1/1975","1/1/1976",..: 1 2 3 4 5 6 7 8 9 10 ...
##  $ POPULATION                           : int  79655 83923 82102 79966 79721 80461 81781 81858 82605 80835 ...
##  $ OVERALL.CRIME.RATE.PER.100.000.PEOPLE: num  2924 2532 2693 2665 2913 ...
##Lets rename overall crime rate per 100000 people to crime_rate
MDcrime_df<-mutate(MDcrime_df, crime_rate = OVERALL.CRIME.RATE.PER.100.000.PEOPLE)
head(MDcrime_df)
##      JURISDICTION     YEAR POPULATION
## 1 Allegany County 1/1/1975      79655
## 2 Allegany County 1/1/1976      83923
## 3 Allegany County 1/1/1977      82102
## 4 Allegany County 1/1/1978      79966
## 5 Allegany County 1/1/1979      79721
## 6 Allegany County 1/1/1980      80461
##   OVERALL.CRIME.RATE.PER.100.000.PEOPLE crime_rate
## 1                                2923.9     2923.9
## 2                                2532.1     2532.1
## 3                                2693.0     2693.0
## 4                                2664.9     2664.9
## 5                                2912.7     2912.7
## 6                                3074.8     3074.8
#Lets change the format of the year column. But first, we will create a new variable to represent that change.
MDcrime_df<-mutate(MDcrime_df, year_new = as.Date(MDcrime_df$YEAR,format = "%m/%d/%Y"))
head(MDcrime_df)
##      JURISDICTION     YEAR POPULATION
## 1 Allegany County 1/1/1975      79655
## 2 Allegany County 1/1/1976      83923
## 3 Allegany County 1/1/1977      82102
## 4 Allegany County 1/1/1978      79966
## 5 Allegany County 1/1/1979      79721
## 6 Allegany County 1/1/1980      80461
##   OVERALL.CRIME.RATE.PER.100.000.PEOPLE crime_rate   year_new
## 1                                2923.9     2923.9 1975-01-01
## 2                                2532.1     2532.1 1976-01-01
## 3                                2693.0     2693.0 1977-01-01
## 4                                2664.9     2664.9 1978-01-01
## 5                                2912.7     2912.7 1979-01-01
## 6                                3074.8     3074.8 1980-01-01
#Which County has the highest crime rate per 100,000
Highest_crime_rates_by_county <- group_by(MDcrime_df, JURISDICTION) %>%  summarize(MDcrime_df = mean(crime_rate)) %>% as.data.frame %>%  arrange(desc(MDcrime_df))
head(Highest_crime_rates_by_county,26)
##              JURISDICTION MDcrime_df
## 1          Baltimore City   8917.145
## 2        Worcester County   7148.333
## 3  Prince George's County   6429.619
## 4        Baltimore County   5273.274
## 5         Wicomico County   5206.381
## 6     Anne Arundel County   4484.002
## 7       Dorchester County   4208.743
## 8           Howard County   3942.710
## 9          Charles County   3846.110
## 10           Cecil County   3642.740
## 11      Montgomery County   3588.581
## 12       Allegany County    3342.550
## 13          Talbot County   3295.355
## 14        Allegany County   3136.213
## 15        Somerset County   3115.286
## 16         Harford County   2933.545
## 17      St. Mary's County   2898.819
## 18        Caroline County   2892.738
## 19       Frederick County   2787.195
## 20      Washington County   2710.171
## 21   Anne Arundel County    2659.350
## 22    Queen Anne's County   2592.698
## 23            Kent County   2542.771
## 24         Calvert County   2432.902
## 25         Carroll County   2195.419
## 26         Garrett County   2063.914
###Baltimore City has the highest crime rate per 100,000 
#Lets create a boxplot of the crime rate per 100,000 people in Maryland. But first, lets install ggplot2
library(ggplot2)
## Registered S3 methods overwritten by 'ggplot2':
##   method         from 
##   [.quosures     rlang
##   c.quosures     rlang
##   print.quosures rlang
 MDboxplot<-ggplot(data = MDcrime_df, aes(x=JURISDICTION, y=crime_rate, fill=JURISDICTION)) + 
  geom_boxplot(alpha=0.3) +
  stat_summary(fun.y=mean, geom="point", shape=20, size=4, color="red", fill="red")+
  theme(legend.position="none",axis.text.x = element_text(angle = 45, vjust = 1, size = 8, hjust = 1))+
  ggtitle("Boxplot Of Maryland Crime Rate per 100,000")+
  theme(plot.title = element_text(hjust = 0.5))+
  xlab("County")+
  ylab("Crime Rate")
 MDboxplot

#Total Population by year
## Lets take a look at the total population by year
population_trends<- group_by(MDcrime_df, year_new) %>%  summarize(total_population = sum(POPULATION)) %>% as.data.frame %>%  arrange(year_new)
head(population_trends,47)
##      year_new total_population
## 1  1975-01-01          4098000
## 2  1976-01-01          4144000
## 3  1977-01-01          4139000
## 4  1978-01-01          4143000
## 5  1979-01-01          4149000
## 6  1980-01-01          4192211
## 7  1981-01-01          4261000
## 8  1982-01-01          4265000
## 9  1983-01-01          4304000
## 10 1984-01-01          4349000
## 11 1985-01-01          4392000
## 12 1986-01-01          4463000
## 13 1987-01-01          4535000
## 14 1988-01-01          4644000
## 15 1989-01-01          4694000
## 16 1990-01-01          4781468
## 17 1991-01-01          4860000
## 18 1992-01-01          4908000
## 19 1993-01-01          4965000
## 20 1994-01-01          5005640
## 21 1995-01-01          5042000
## 22 1996-01-01          5072000
## 23 1997-01-01          5093990
## 24 1998-01-01          5135000
## 25 1999-01-01          5172000
## 26 2000-01-01          5296486
## 27 2001-01-01          5375156
## 28 2002-01-01          5458137
## 29 2003-01-01          5508909
## 30 2004-01-01          5558058
## 31 2005-01-01          5600388
## 32 2006-01-01          5615727
## 33 2007-01-01          5618344
## 34 2008-01-01          5633597
## 35 2009-01-01          5699478
## 36 2010-01-01          5773550
## 37 2011-01-01          5828289
## 38 2012-01-01          5884563
## 39 2013-01-01          5928814
## 40 2014-01-01          5976407
## 41 2015-01-01          6006401
## 42 2016-01-01          6016537
###Now lets visualize the 42 year population trend using a linear graph
ggplot(population_trends, aes(year_new, total_population)) + geom_line()

#Does increase in population affect crime rate in Maryland?
###First, lets install tidyverse package
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.2.1 --
## v tibble  2.1.3     v purrr   0.3.2
## v tidyr   0.8.3     v stringr 1.4.0
## v readr   1.3.1     v forcats 0.4.0
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
ggplot(MDcrime_df, aes(x = POPULATION, y = crime_rate)) + stat_smooth(method = lm) + 
  geom_point()

###Now lets look at the relationship of population and crime rate in Montgomery county
Montgomerycounty<-filter(MDcrime_df, JURISDICTION == "Montgomery County")
head(Montgomerycounty)
##        JURISDICTION     YEAR POPULATION
## 1 Montgomery County 1/1/1975     571436
## 2 Montgomery County 1/1/1976     570275
## 3 Montgomery County 1/1/1977     575310
## 4 Montgomery County 1/1/1978     582458
## 5 Montgomery County 1/1/1979     576776
## 6 Montgomery County 1/1/1980     574093
##   OVERALL.CRIME.RATE.PER.100.000.PEOPLE crime_rate   year_new
## 1                                4563.9     4563.9 1975-01-01
## 2                                4271.6     4271.6 1976-01-01
## 3                                4311.8     4311.8 1977-01-01
## 4                                4469.7     4469.7 1978-01-01
## 5                                5243.3     5243.3 1979-01-01
## 6                                5482.4     5482.4 1980-01-01
tail(Montgomerycounty)
##         JURISDICTION     YEAR POPULATION
## 37 Montgomery County 1/1/2011     980991
## 38 Montgomery County 1/1/2012     999353
## 39 Montgomery County 1/1/2013    1016455
## 40 Montgomery County 1/1/2014    1029182
## 41 Montgomery County 1/1/2015    1041345
## 42 Montgomery County 1/1/2016    1047303
##    OVERALL.CRIME.RATE.PER.100.000.PEOPLE crime_rate   year_new
## 37                                2060.8     2060.8 2011-01-01
## 38                                1928.0     1928.0 2012-01-01
## 39                                1774.6     1774.6 2013-01-01
## 40                                1795.1     1795.1 2014-01-01
## 41                                1782.9     1782.9 2015-01-01
## 42                                1665.5     1665.5 2016-01-01
ggplot(Montgomerycounty, aes(x = POPULATION , y = crime_rate)) + stat_smooth(method = lm) + 
  geom_point()

###The graph above shows that the crime rate per 100,000 people decreases as the population increases.
###Lets Look at the relationship of population and crime rate in Baltimore City
Baltimorecity<-filter(MDcrime_df, JURISDICTION == "Baltimore City")
ggplot(Baltimorecity, aes(x = POPULATION, y = crime_rate)) + stat_smooth(method = lm) + 
  geom_point()

###The graph shows that the crime rate increases as the population increases but then decreases as the population continues to grow
###Lets look at Howard county
howardcounty<-filter(MDcrime_df, JURISDICTION == "Howard County")
ggplot(howardcounty, aes(x = POPULATION, y = crime_rate)) + stat_smooth(method = lm) + 
  geom_point()

#The graph shows that crime rates decrease as the population increases in Howard County.
###Lets look at Calvert County
calvertcounty<-filter(MDcrime_df, JURISDICTION == "Calvert County")
ggplot(calvertcounty, aes(x = POPULATION, y = crime_rate)) + stat_smooth(method = lm) + 
  geom_point()

###Lets look at Worchester County
worcestercounty<-filter(MDcrime_df, JURISDICTION == "Worcester County")
ggplot(worcestercounty, aes(x = POPULATION, y = crime_rate)) + stat_smooth(method = lm) + 
  geom_point()

#Worcester only have a population of approximately 52,000 residents yet account for one of the highest crime rates compared to other counties in Maryland. This is due to the high property crime rate in Ocean City. Ocean City is a popular summer vacation spot, therefore it's a target for criminals. Crime rates per 100,000 people have been decreasing as population increases.
#Now lets use the T-test to answer whether the average crime rate in Baltimore City is higher than the average crime rate in the whole state of Maryland. 
###Test for the hypothesis: The average crime rate for Baltimore City is no different than the average crime rate for the state of Maryland. A rejection of this hypothesis indicates that the crime rate in Baltimore City is higher than the crime rate in the whole state of Maryland.
#First, we will select, filter, group_by and summarize Baltimore city crime_rates over a 42 year period and then calculate the variance.
Baltimore_df <- MDcrime_df %>%
  select(JURISDICTION,crime_rate) %>%
  filter(JURISDICTION == "Baltimore City") %>%
  group_by(JURISDICTION,crime_rate) %>%
  summarize(Baltimore_average = mean(crime_rate)) 
var(Baltimore_df$Baltimore_average)
## [1] 4551424
#We will do the same for average crime rate in Maryland data without Baltimore City data
MDcrimerate_df <- MDcrime_df %>%
  select(JURISDICTION,crime_rate) %>%
  filter(JURISDICTION != "Baltimore City") %>%
  group_by(JURISDICTION,crime_rate) %>%
  summarize(MDcrimerate_average = mean(crime_rate)) 
var(MDcrimerate_df$MDcrimerate_average)
## [1] 2356237

#Now lets compute the t-test to test our hypothesis above.

t.test(Baltimore_df$Baltimore_average,MDcrimerate_df$MDcrimerate_average,var.equal = FALSE)
## 
##  Welch Two Sample t-test
## 
## data:  Baltimore_df$Baltimore_average and MDcrimerate_df$MDcrimerate_average
## t = 15.909, df = 42.866, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  4624.452 5967.190
## sample estimates:
## mean of x mean of y 
##  8917.145  3621.324
#Since the P-Value is very small, we can be comfortable enough in rejecting the null hypothesis. Therefore, the mean crime rate for Baltimore City is higher than the mean crime rate of the state of Maryland.