#Attaching the dplyr package
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
#Read the CSV file
MDcrime<-read.csv(file = "MDcrime.csv")
#Now lets check the numbers
nrow(MDcrime)
## [1] 1008
ncol(MDcrime)
## [1] 38
sum(is.na(MDcrime))
## [1] 312
###Lets look at the frequency under the Jurisdiction variable. We have to ensure that there are 42 counts under each county since the data ranges from 1975-2016.
table(MDcrime$JURISDICTION)
##
## Allegany County Allegany County Anne Arundel County
## 40 2 40
## Anne Arundel County Baltimore City Baltimore County
## 2 42 42
## Calvert County Caroline County Carroll County
## 42 42 42
## Cecil County Charles County Dorchester County
## 42 42 42
## Frederick County Garrett County Harford County
## 42 42 42
## Howard County Kent County Montgomery County
## 42 42 42
## Prince George's County Queen Anne's County Somerset County
## 42 42 42
## St. Mary's County Talbot County Washington County
## 42 42 42
## Wicomico County Worcester County
## 42 42
#Now Lets looks at the structure of the dataset
str(MDcrime)
## 'data.frame': 1008 obs. of 38 variables:
## $ JURISDICTION : Factor w/ 26 levels "Allegany County",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ YEAR : Factor w/ 42 levels "1/1/1975","1/1/1976",..: 1 2 3 4 5 6 7 8 9 10 ...
## $ POPULATION : int 79655 83923 82102 79966 79721 80461 81781 81858 82605 80835 ...
## $ MURDER : int 3 2 3 1 1 2 11 1 5 2 ...
## $ RAPE : int 5 2 7 2 7 12 13 18 9 15 ...
## $ ROBBERY : int 20 24 32 18 18 26 24 18 19 6 ...
## $ AGGASSAULT : int 114 59 85 81 84 79 101 80 89 67 ...
## $ BE : int 669 581 592 539 502 541 539 447 347 361 ...
## $ LARCENY.THEFT : int 1425 1384 1390 1390 1611 1706 1697 1570 1412 1338 ...
## $ MotorvehicleTHEFT : int 93 73 102 100 99 108 88 55 67 68 ...
## $ GRAND.TOTAL : int 2329 2125 2211 2131 2322 2474 2473 2189 1948 1857 ...
## $ PERCENT.CHANGE : num NA -8.8 4 -3.6 9 6.5 0 -11.5 -11 -4.7 ...
## $ violentcrimetotal : int 142 87 127 102 110 119 149 117 122 90 ...
## $ VIOLENT.CRIME.PERCENT : num 6.1 4.1 5.7 4.8 4.7 4.8 6 5.3 6.3 4.8 ...
## $ VIOLENT.CRIME.PERCENT.CHANGE : num NA -38.7 46 -19.7 7.8 8.2 25.2 -21.5 4.3 -26.2 ...
## $ PROPERTY.CRIME.TOTALS : int 2187 2038 2084 2029 2212 2355 2324 2072 1826 1767 ...
## $ PROPERTY.CRIME.PERCENT : num 93.9 95.9 94.3 95.2 95.3 95.2 94 94.7 93.7 95.2 ...
## $ PROPERTY.CRIME.PERCENT.CHANGE : num NA -6.8 2.3 -2.6 9 6.5 -1.3 -10.8 -11.9 -3.2 ...
## $ OVERALL.CRIME.RATE.PER.100.000.PEOPLE : num 2924 2532 2693 2665 2913 ...
## $ OVERALL.PERCENT.CHANGE.PER.100.000.PEOPLE : num NA -13.4 6.4 -1 9.3 5.6 -1.7 -11.6 -11.8 -2.6 ...
## $ violentcrimerateper_100000 : num 178 104 155 128 138 ...
## $ VIOLENT.CRIME.RATE.PERCENT.CHANGE.PER.100.000.PEOPLE : num NA -41.8 49.2 -17.5 8.2 7.2 23.2 -21.6 3.3 -24.6 ...
## $ PROPERTY.CRIME.RATE.PER.100.000.PEOPLE : num 2746 2428 2538 2537 2775 ...
## $ PROPERTY.CRIME.RATE.PERCENT.CHANGE.PER.100.000.PEOPLE: num NA -11.6 4.5 0 9.4 5.5 -2.9 -10.9 -12.7 -1.1 ...
## $ murderper100000 : num 3.8 2.4 3.7 1.3 1.3 2.5 13.5 1.2 6.1 2.5 ...
## $ RAPE.PER.100.000.PEOPLE : num 6.3 2.4 8.5 2.5 8.8 14.9 15.9 22 10.9 18.6 ...
## $ robberyper100000 : num 25.1 28.6 39 22.5 22.6 32.3 29.3 22 23 7.4 ...
## $ AGG..ASSAULT.PER.100.000.PEOPLE : num 143.1 70.3 103.5 101.3 105.4 ...
## $ B...E.PER.100.000.PEOPLE : num 840 692 721 674 630 ...
## $ LARCENY.THEFT.PER.100.000.PEOPLE : num 1789 1649 1693 1738 2021 ...
## $ M.V.THEFT.PER.100.000.PEOPLE : num 117 87 124 125 124 ...
## $ MURDER..RATE.PERCENT.CHANGE.PER.100.000.PEOPLE : num NA -36.7 53.3 -65.8 0.3 ...
## $ RAPE.RATE.PERCENT.CHANGE.PER.100.000.PEOPLE : num NA -62 257.8 -70.7 251.1 ...
## $ ROBBERY.RATE.PERCENT.CHANGE.PER.100.000.PEOPLE : num NA 13.9 36.3 -42.2 0.3 43.1 -9.2 -25.1 4.6 -67.7 ...
## $ AGG..ASSAULT..RATE.PERCENT.CHANGE.PER.100.000.PEOPLE : num NA -50.9 47.3 -2.2 4 -6.8 25.8 -20.9 10.2 -23.1 ...
## $ B...E.RATE.PERCENT.CHANGE.PER.100.000.PEOPLE : num NA -17.6 4.2 -6.5 -6.6 6.8 -2 -17.1 -23.1 6.3 ...
## $ LARCENY.THEFT..RATE.PERCENT.CHANGE.PER.100.000.PEOPLE: num NA -7.8 2.7 2.7 16.3 4.9 -2.1 -7.6 -10.9 -3.2 ...
## $ M.V.THEFT..RATE.PERCENT.CHANGE.PER.100.000.PEOPLE : num NA -25.5 42.8 0.7 -0.7 8.1 -19.8 -37.6 20.7 3.7 ...
#Lets look at the head and tail of the dataset
head(MDcrime)
## JURISDICTION YEAR POPULATION MURDER RAPE ROBBERY AGGASSAULT BE
## 1 Allegany County 1/1/1975 79655 3 5 20 114 669
## 2 Allegany County 1/1/1976 83923 2 2 24 59 581
## 3 Allegany County 1/1/1977 82102 3 7 32 85 592
## 4 Allegany County 1/1/1978 79966 1 2 18 81 539
## 5 Allegany County 1/1/1979 79721 1 7 18 84 502
## 6 Allegany County 1/1/1980 80461 2 12 26 79 541
## LARCENY.THEFT MotorvehicleTHEFT GRAND.TOTAL PERCENT.CHANGE
## 1 1425 93 2329 NA
## 2 1384 73 2125 -8.8
## 3 1390 102 2211 4.0
## 4 1390 100 2131 -3.6
## 5 1611 99 2322 9.0
## 6 1706 108 2474 6.5
## violentcrimetotal VIOLENT.CRIME.PERCENT VIOLENT.CRIME.PERCENT.CHANGE
## 1 142 6.1 NA
## 2 87 4.1 -38.7
## 3 127 5.7 46.0
## 4 102 4.8 -19.7
## 5 110 4.7 7.8
## 6 119 4.8 8.2
## PROPERTY.CRIME.TOTALS PROPERTY.CRIME.PERCENT
## 1 2187 93.9
## 2 2038 95.9
## 3 2084 94.3
## 4 2029 95.2
## 5 2212 95.3
## 6 2355 95.2
## PROPERTY.CRIME.PERCENT.CHANGE OVERALL.CRIME.RATE.PER.100.000.PEOPLE
## 1 NA 2923.9
## 2 -6.8 2532.1
## 3 2.3 2693.0
## 4 -2.6 2664.9
## 5 9.0 2912.7
## 6 6.5 3074.8
## OVERALL.PERCENT.CHANGE.PER.100.000.PEOPLE violentcrimerateper_100000
## 1 NA 178.3
## 2 -13.4 103.7
## 3 6.4 154.7
## 4 -1.0 127.6
## 5 9.3 138.0
## 6 5.6 147.9
## VIOLENT.CRIME.RATE.PERCENT.CHANGE.PER.100.000.PEOPLE
## 1 NA
## 2 -41.8
## 3 49.2
## 4 -17.5
## 5 8.2
## 6 7.2
## PROPERTY.CRIME.RATE.PER.100.000.PEOPLE
## 1 2745.6
## 2 2428.4
## 3 2538.3
## 4 2537.3
## 5 2774.7
## 6 2926.9
## PROPERTY.CRIME.RATE.PERCENT.CHANGE.PER.100.000.PEOPLE murderper100000
## 1 NA 3.8
## 2 -11.6 2.4
## 3 4.5 3.7
## 4 0.0 1.3
## 5 9.4 1.3
## 6 5.5 2.5
## RAPE.PER.100.000.PEOPLE robberyper100000 AGG..ASSAULT.PER.100.000.PEOPLE
## 1 6.3 25.1 143.1
## 2 2.4 28.6 70.3
## 3 8.5 39.0 103.5
## 4 2.5 22.5 101.3
## 5 8.8 22.6 105.4
## 6 14.9 32.3 98.2
## B...E.PER.100.000.PEOPLE LARCENY.THEFT.PER.100.000.PEOPLE
## 1 839.9 1789.0
## 2 692.3 1649.1
## 3 721.1 1693.0
## 4 674.0 1738.2
## 5 629.7 2020.8
## 6 672.4 2120.3
## M.V.THEFT.PER.100.000.PEOPLE
## 1 116.8
## 2 87.0
## 3 124.2
## 4 125.1
## 5 124.2
## 6 134.2
## MURDER..RATE.PERCENT.CHANGE.PER.100.000.PEOPLE
## 1 NA
## 2 -36.7
## 3 53.3
## 4 -65.8
## 5 0.3
## 6 98.2
## RAPE.RATE.PERCENT.CHANGE.PER.100.000.PEOPLE
## 1 NA
## 2 -62.0
## 3 257.8
## 4 -70.7
## 5 251.1
## 6 69.9
## ROBBERY.RATE.PERCENT.CHANGE.PER.100.000.PEOPLE
## 1 NA
## 2 13.9
## 3 36.3
## 4 -42.2
## 5 0.3
## 6 43.1
## AGG..ASSAULT..RATE.PERCENT.CHANGE.PER.100.000.PEOPLE
## 1 NA
## 2 -50.9
## 3 47.3
## 4 -2.2
## 5 4.0
## 6 -6.8
## B...E.RATE.PERCENT.CHANGE.PER.100.000.PEOPLE
## 1 NA
## 2 -17.6
## 3 4.2
## 4 -6.5
## 5 -6.6
## 6 6.8
## LARCENY.THEFT..RATE.PERCENT.CHANGE.PER.100.000.PEOPLE
## 1 NA
## 2 -7.8
## 3 2.7
## 4 2.7
## 5 16.3
## 6 4.9
## M.V.THEFT..RATE.PERCENT.CHANGE.PER.100.000.PEOPLE
## 1 NA
## 2 -25.5
## 3 42.8
## 4 0.7
## 5 -0.7
## 6 8.1
tail(MDcrime)
tail(MDcrime)
## JURISDICTION YEAR POPULATION MURDER RAPE ROBBERY AGGASSAULT
## 1003 Worcester County 1/1/2011 51942 1 7 28 212
## 1004 Worcester County 1/1/2012 52011 1 16 31 138
## 1005 Worcester County 1/1/2013 51718 2 14 28 173
## 1006 Worcester County 1/1/2014 51756 1 14 35 140
## 1007 Worcester County 1/1/2015 51566 1 15 27 102
## 1008 Worcester County 1/1/2016 51255 3 17 39 93
## BE LARCENY.THEFT MotorvehicleTHEFT GRAND.TOTAL PERCENT.CHANGE
## 1003 423 1852 41 2564 -2.0
## 1004 432 1751 33 2402 -6.3
## 1005 356 1705 55 2333 -2.9
## 1006 445 1754 38 2427 4.0
## 1007 273 1562 28 2008 -17.3
## 1008 289 1514 32 1987 -1.0
## violentcrimetotal VIOLENT.CRIME.PERCENT VIOLENT.CRIME.PERCENT.CHANGE
## 1003 248 9.7 -5.0
## 1004 186 7.7 -25.0
## 1005 217 9.3 16.7
## 1006 190 7.8 -12.4
## 1007 145 7.2 -23.7
## 1008 152 7.6 4.8
## PROPERTY.CRIME.TOTALS PROPERTY.CRIME.PERCENT
## 1003 2316 90.3
## 1004 2216 92.3
## 1005 2116 90.7
## 1006 2237 92.2
## 1007 1863 92.8
## 1008 1835 92.4
## PROPERTY.CRIME.PERCENT.CHANGE OVERALL.CRIME.RATE.PER.100.000.PEOPLE
## 1003 -1.7 4936.3
## 1004 -4.3 4618.3
## 1005 -4.5 4511.0
## 1006 5.7 4689.3
## 1007 -16.7 3894.0
## 1008 -1.5 3876.7
## OVERALL.PERCENT.CHANGE.PER.100.000.PEOPLE violentcrimerateper_100000
## 1003 -6.4 477.5
## 1004 -6.4 357.6
## 1005 -2.3 419.6
## 1006 4.0 367.1
## 1007 -17.0 281.2
## 1008 -0.4 296.6
## VIOLENT.CRIME.RATE.PERCENT.CHANGE.PER.100.000.PEOPLE
## 1003 -9.2
## 1004 -25.1
## 1005 17.3
## 1006 -12.5
## 1007 -23.4
## 1008 5.5
## PROPERTY.CRIME.RATE.PER.100.000.PEOPLE
## 1003 4458.8
## 1004 4260.6
## 1005 4091.4
## 1006 4322.2
## 1007 3612.8
## 1008 3580.1
## PROPERTY.CRIME.RATE.PERCENT.CHANGE.PER.100.000.PEOPLE murderper100000
## 1003 -6.0 1.9
## 1004 -4.4 1.9
## 1005 -4.0 3.9
## 1006 5.6 1.9
## 1007 -16.4 1.9
## 1008 -16.4 5.9
## RAPE.PER.100.000.PEOPLE robberyper100000
## 1003 13.5 53.9
## 1004 30.8 59.6
## 1005 27.1 54.1
## 1006 27.1 67.6
## 1007 29.1 52.4
## 1008 33.2 76.1
## AGG..ASSAULT.PER.100.000.PEOPLE B...E.PER.100.000.PEOPLE
## 1003 408.1 814.4
## 1004 265.3 830.6
## 1005 334.5 688.3
## 1006 270.5 859.8
## 1007 197.8 529.4
## 1008 181.4 563.8
## LARCENY.THEFT.PER.100.000.PEOPLE M.V.THEFT.PER.100.000.PEOPLE
## 1003 3565.5 78.9
## 1004 3366.6 63.4
## 1005 3296.7 106.3
## 1006 3389.0 73.4
## 1007 3029.1 54.3
## 1008 2953.9 62.4
## MURDER..RATE.PERCENT.CHANGE.PER.100.000.PEOPLE
## 1003 -76.1
## 1004 -0.1
## 1005 101.1
## 1006 -50.0
## 1007 0.4
## 1008 201.8
## RAPE.RATE.PERCENT.CHANGE.PER.100.000.PEOPLE
## 1003 -48.5
## 1004 128.3
## 1005 -12.0
## 1006 -0.1
## 1007 7.5
## 1008 14.0
## ROBBERY.RATE.PERCENT.CHANGE.PER.100.000.PEOPLE
## 1003 -40.5
## 1004 10.6
## 1005 -9.2
## 1006 24.9
## 1007 -22.6
## 1008 45.3
## AGG..ASSAULT..RATE.PERCENT.CHANGE.PER.100.000.PEOPLE
## 1003 1.8
## 1004 -35.0
## 1005 26.1
## 1006 -19.1
## 1007 -26.9
## 1008 -8.3
## B...E.RATE.PERCENT.CHANGE.PER.100.000.PEOPLE
## 1003 -17.7
## 1004 2.0
## 1005 -17.1
## 1006 24.9
## 1007 -38.4
## 1008 6.5
## LARCENY.THEFT..RATE.PERCENT.CHANGE.PER.100.000.PEOPLE
## 1003 -2.6
## 1004 -5.6
## 1005 -2.1
## 1006 2.8
## 1007 -10.6
## 1008 -2.5
## M.V.THEFT..RATE.PERCENT.CHANGE.PER.100.000.PEOPLE
## 1003 -18.4
## 1004 -19.6
## 1005 67.6
## 1006 -31.0
## 1007 -26.0
## 1008 15.0
#Lets select the Jurisdiction, year, population, and violent crime rate per 100,000 people. But first lets view a list of all the variables.
names(MDcrime)
## [1] "JURISDICTION"
## [2] "YEAR"
## [3] "POPULATION"
## [4] "MURDER"
## [5] "RAPE"
## [6] "ROBBERY"
## [7] "AGGASSAULT"
## [8] "BE"
## [9] "LARCENY.THEFT"
## [10] "MotorvehicleTHEFT"
## [11] "GRAND.TOTAL"
## [12] "PERCENT.CHANGE"
## [13] "violentcrimetotal"
## [14] "VIOLENT.CRIME.PERCENT"
## [15] "VIOLENT.CRIME.PERCENT.CHANGE"
## [16] "PROPERTY.CRIME.TOTALS"
## [17] "PROPERTY.CRIME.PERCENT"
## [18] "PROPERTY.CRIME.PERCENT.CHANGE"
## [19] "OVERALL.CRIME.RATE.PER.100.000.PEOPLE"
## [20] "OVERALL.PERCENT.CHANGE.PER.100.000.PEOPLE"
## [21] "violentcrimerateper_100000"
## [22] "VIOLENT.CRIME.RATE.PERCENT.CHANGE.PER.100.000.PEOPLE"
## [23] "PROPERTY.CRIME.RATE.PER.100.000.PEOPLE"
## [24] "PROPERTY.CRIME.RATE.PERCENT.CHANGE.PER.100.000.PEOPLE"
## [25] "murderper100000"
## [26] "RAPE.PER.100.000.PEOPLE"
## [27] "robberyper100000"
## [28] "AGG..ASSAULT.PER.100.000.PEOPLE"
## [29] "B...E.PER.100.000.PEOPLE"
## [30] "LARCENY.THEFT.PER.100.000.PEOPLE"
## [31] "M.V.THEFT.PER.100.000.PEOPLE"
## [32] "MURDER..RATE.PERCENT.CHANGE.PER.100.000.PEOPLE"
## [33] "RAPE.RATE.PERCENT.CHANGE.PER.100.000.PEOPLE"
## [34] "ROBBERY.RATE.PERCENT.CHANGE.PER.100.000.PEOPLE"
## [35] "AGG..ASSAULT..RATE.PERCENT.CHANGE.PER.100.000.PEOPLE"
## [36] "B...E.RATE.PERCENT.CHANGE.PER.100.000.PEOPLE"
## [37] "LARCENY.THEFT..RATE.PERCENT.CHANGE.PER.100.000.PEOPLE"
## [38] "M.V.THEFT..RATE.PERCENT.CHANGE.PER.100.000.PEOPLE"
MDcrime_df<-select(MDcrime,JURISDICTION,YEAR,POPULATION,OVERALL.CRIME.RATE.PER.100.000.PEOPLE)
##Now lets review the str of the new dataframe(MDcrime_df)
str(MDcrime_df)
## 'data.frame': 1008 obs. of 4 variables:
## $ JURISDICTION : Factor w/ 26 levels "Allegany County",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ YEAR : Factor w/ 42 levels "1/1/1975","1/1/1976",..: 1 2 3 4 5 6 7 8 9 10 ...
## $ POPULATION : int 79655 83923 82102 79966 79721 80461 81781 81858 82605 80835 ...
## $ OVERALL.CRIME.RATE.PER.100.000.PEOPLE: num 2924 2532 2693 2665 2913 ...
##Lets rename overall crime rate per 100000 people to crime_rate
MDcrime_df<-mutate(MDcrime_df, crime_rate = OVERALL.CRIME.RATE.PER.100.000.PEOPLE)
head(MDcrime_df)
## JURISDICTION YEAR POPULATION
## 1 Allegany County 1/1/1975 79655
## 2 Allegany County 1/1/1976 83923
## 3 Allegany County 1/1/1977 82102
## 4 Allegany County 1/1/1978 79966
## 5 Allegany County 1/1/1979 79721
## 6 Allegany County 1/1/1980 80461
## OVERALL.CRIME.RATE.PER.100.000.PEOPLE crime_rate
## 1 2923.9 2923.9
## 2 2532.1 2532.1
## 3 2693.0 2693.0
## 4 2664.9 2664.9
## 5 2912.7 2912.7
## 6 3074.8 3074.8
#Lets change the format of the year column. But first, we will create a new variable to represent that change.
MDcrime_df<-mutate(MDcrime_df, year_new = as.Date(MDcrime_df$YEAR,format = "%m/%d/%Y"))
head(MDcrime_df)
## JURISDICTION YEAR POPULATION
## 1 Allegany County 1/1/1975 79655
## 2 Allegany County 1/1/1976 83923
## 3 Allegany County 1/1/1977 82102
## 4 Allegany County 1/1/1978 79966
## 5 Allegany County 1/1/1979 79721
## 6 Allegany County 1/1/1980 80461
## OVERALL.CRIME.RATE.PER.100.000.PEOPLE crime_rate year_new
## 1 2923.9 2923.9 1975-01-01
## 2 2532.1 2532.1 1976-01-01
## 3 2693.0 2693.0 1977-01-01
## 4 2664.9 2664.9 1978-01-01
## 5 2912.7 2912.7 1979-01-01
## 6 3074.8 3074.8 1980-01-01
#Which County has the highest crime rate per 100,000
Highest_crime_rates_by_county <- group_by(MDcrime_df, JURISDICTION) %>% summarize(MDcrime_df = mean(crime_rate)) %>% as.data.frame %>% arrange(desc(MDcrime_df))
head(Highest_crime_rates_by_county,26)
## JURISDICTION MDcrime_df
## 1 Baltimore City 8917.145
## 2 Worcester County 7148.333
## 3 Prince George's County 6429.619
## 4 Baltimore County 5273.274
## 5 Wicomico County 5206.381
## 6 Anne Arundel County 4484.002
## 7 Dorchester County 4208.743
## 8 Howard County 3942.710
## 9 Charles County 3846.110
## 10 Cecil County 3642.740
## 11 Montgomery County 3588.581
## 12 Allegany County 3342.550
## 13 Talbot County 3295.355
## 14 Allegany County 3136.213
## 15 Somerset County 3115.286
## 16 Harford County 2933.545
## 17 St. Mary's County 2898.819
## 18 Caroline County 2892.738
## 19 Frederick County 2787.195
## 20 Washington County 2710.171
## 21 Anne Arundel County 2659.350
## 22 Queen Anne's County 2592.698
## 23 Kent County 2542.771
## 24 Calvert County 2432.902
## 25 Carroll County 2195.419
## 26 Garrett County 2063.914
###Baltimore City has the highest crime rate per 100,000
#Lets create a boxplot of the crime rate per 100,000 people in Maryland. But first, lets install ggplot2
library(ggplot2)
## Registered S3 methods overwritten by 'ggplot2':
## method from
## [.quosures rlang
## c.quosures rlang
## print.quosures rlang
MDboxplot<-ggplot(data = MDcrime_df, aes(x=JURISDICTION, y=crime_rate, fill=JURISDICTION)) +
geom_boxplot(alpha=0.3) +
stat_summary(fun.y=mean, geom="point", shape=20, size=4, color="red", fill="red")+
theme(legend.position="none",axis.text.x = element_text(angle = 45, vjust = 1, size = 8, hjust = 1))+
ggtitle("Boxplot Of Maryland Crime Rate per 100,000")+
theme(plot.title = element_text(hjust = 0.5))+
xlab("County")+
ylab("Crime Rate")
MDboxplot
#Total Population by year
## Lets take a look at the total population by year
population_trends<- group_by(MDcrime_df, year_new) %>% summarize(total_population = sum(POPULATION)) %>% as.data.frame %>% arrange(year_new)
head(population_trends,47)
## year_new total_population
## 1 1975-01-01 4098000
## 2 1976-01-01 4144000
## 3 1977-01-01 4139000
## 4 1978-01-01 4143000
## 5 1979-01-01 4149000
## 6 1980-01-01 4192211
## 7 1981-01-01 4261000
## 8 1982-01-01 4265000
## 9 1983-01-01 4304000
## 10 1984-01-01 4349000
## 11 1985-01-01 4392000
## 12 1986-01-01 4463000
## 13 1987-01-01 4535000
## 14 1988-01-01 4644000
## 15 1989-01-01 4694000
## 16 1990-01-01 4781468
## 17 1991-01-01 4860000
## 18 1992-01-01 4908000
## 19 1993-01-01 4965000
## 20 1994-01-01 5005640
## 21 1995-01-01 5042000
## 22 1996-01-01 5072000
## 23 1997-01-01 5093990
## 24 1998-01-01 5135000
## 25 1999-01-01 5172000
## 26 2000-01-01 5296486
## 27 2001-01-01 5375156
## 28 2002-01-01 5458137
## 29 2003-01-01 5508909
## 30 2004-01-01 5558058
## 31 2005-01-01 5600388
## 32 2006-01-01 5615727
## 33 2007-01-01 5618344
## 34 2008-01-01 5633597
## 35 2009-01-01 5699478
## 36 2010-01-01 5773550
## 37 2011-01-01 5828289
## 38 2012-01-01 5884563
## 39 2013-01-01 5928814
## 40 2014-01-01 5976407
## 41 2015-01-01 6006401
## 42 2016-01-01 6016537
###Now lets visualize the 42 year population trend using a linear graph
ggplot(population_trends, aes(year_new, total_population)) + geom_line()
#Does increase in population affect crime rate in Maryland?
###First, lets install tidyverse package
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.2.1 --
## v tibble 2.1.3 v purrr 0.3.2
## v tidyr 0.8.3 v stringr 1.4.0
## v readr 1.3.1 v forcats 0.4.0
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
ggplot(MDcrime_df, aes(x = POPULATION, y = crime_rate)) + stat_smooth(method = lm) +
geom_point()
###Now lets look at the relationship of population and crime rate in Montgomery county
Montgomerycounty<-filter(MDcrime_df, JURISDICTION == "Montgomery County")
head(Montgomerycounty)
## JURISDICTION YEAR POPULATION
## 1 Montgomery County 1/1/1975 571436
## 2 Montgomery County 1/1/1976 570275
## 3 Montgomery County 1/1/1977 575310
## 4 Montgomery County 1/1/1978 582458
## 5 Montgomery County 1/1/1979 576776
## 6 Montgomery County 1/1/1980 574093
## OVERALL.CRIME.RATE.PER.100.000.PEOPLE crime_rate year_new
## 1 4563.9 4563.9 1975-01-01
## 2 4271.6 4271.6 1976-01-01
## 3 4311.8 4311.8 1977-01-01
## 4 4469.7 4469.7 1978-01-01
## 5 5243.3 5243.3 1979-01-01
## 6 5482.4 5482.4 1980-01-01
tail(Montgomerycounty)
## JURISDICTION YEAR POPULATION
## 37 Montgomery County 1/1/2011 980991
## 38 Montgomery County 1/1/2012 999353
## 39 Montgomery County 1/1/2013 1016455
## 40 Montgomery County 1/1/2014 1029182
## 41 Montgomery County 1/1/2015 1041345
## 42 Montgomery County 1/1/2016 1047303
## OVERALL.CRIME.RATE.PER.100.000.PEOPLE crime_rate year_new
## 37 2060.8 2060.8 2011-01-01
## 38 1928.0 1928.0 2012-01-01
## 39 1774.6 1774.6 2013-01-01
## 40 1795.1 1795.1 2014-01-01
## 41 1782.9 1782.9 2015-01-01
## 42 1665.5 1665.5 2016-01-01
ggplot(Montgomerycounty, aes(x = POPULATION , y = crime_rate)) + stat_smooth(method = lm) +
geom_point()
###The graph above shows that the crime rate per 100,000 people decreases as the population increases.
###Lets Look at the relationship of population and crime rate in Baltimore City
Baltimorecity<-filter(MDcrime_df, JURISDICTION == "Baltimore City")
ggplot(Baltimorecity, aes(x = POPULATION, y = crime_rate)) + stat_smooth(method = lm) +
geom_point()
###The graph shows that the crime rate increases as the population increases but then decreases as the population continues to grow
###Lets look at Howard county
howardcounty<-filter(MDcrime_df, JURISDICTION == "Howard County")
ggplot(howardcounty, aes(x = POPULATION, y = crime_rate)) + stat_smooth(method = lm) +
geom_point()
#The graph shows that crime rates decrease as the population increases in Howard County.
###Lets look at Calvert County
calvertcounty<-filter(MDcrime_df, JURISDICTION == "Calvert County")
ggplot(calvertcounty, aes(x = POPULATION, y = crime_rate)) + stat_smooth(method = lm) +
geom_point()
###Lets look at Worchester County
worcestercounty<-filter(MDcrime_df, JURISDICTION == "Worcester County")
ggplot(worcestercounty, aes(x = POPULATION, y = crime_rate)) + stat_smooth(method = lm) +
geom_point()
#Worcester only have a population of approximately 52,000 residents yet account for one of the highest crime rates compared to other counties in Maryland. This is due to the high property crime rate in Ocean City. Ocean City is a popular summer vacation spot, therefore it's a target for criminals. Crime rates per 100,000 people have been decreasing as population increases.
#Now lets use the T-test to answer whether the average crime rate in Baltimore City is higher than the average crime rate in the whole state of Maryland.
###Test for the hypothesis: The average crime rate for Baltimore City is no different than the average crime rate for the state of Maryland. A rejection of this hypothesis indicates that the crime rate in Baltimore City is higher than the crime rate in the whole state of Maryland.
#First, we will select, filter, group_by and summarize Baltimore city crime_rates over a 42 year period and then calculate the variance.
Baltimore_df <- MDcrime_df %>%
select(JURISDICTION,crime_rate) %>%
filter(JURISDICTION == "Baltimore City") %>%
group_by(JURISDICTION,crime_rate) %>%
summarize(Baltimore_average = mean(crime_rate))
var(Baltimore_df$Baltimore_average)
## [1] 4551424
#We will do the same for average crime rate in Maryland data without Baltimore City data
MDcrimerate_df <- MDcrime_df %>%
select(JURISDICTION,crime_rate) %>%
filter(JURISDICTION != "Baltimore City") %>%
group_by(JURISDICTION,crime_rate) %>%
summarize(MDcrimerate_average = mean(crime_rate))
var(MDcrimerate_df$MDcrimerate_average)
## [1] 2356237
#Now lets compute the t-test to test our hypothesis above.
t.test(Baltimore_df$Baltimore_average,MDcrimerate_df$MDcrimerate_average,var.equal = FALSE)
##
## Welch Two Sample t-test
##
## data: Baltimore_df$Baltimore_average and MDcrimerate_df$MDcrimerate_average
## t = 15.909, df = 42.866, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 4624.452 5967.190
## sample estimates:
## mean of x mean of y
## 8917.145 3621.324
#Since the P-Value is very small, we can be comfortable enough in rejecting the null hypothesis. Therefore, the mean crime rate for Baltimore City is higher than the mean crime rate of the state of Maryland.