Load Libraries

## Warning: package 'ggplot2' was built under R version 3.4.4
## Warning: package 'dplyr' was built under R version 3.4.4
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
## Warning: package 'tidyr' was built under R version 3.4.3
## Warning: package 'tidyverse' was built under R version 3.4.4
## -- Attaching packages ---------------------------------- tidyverse 1.2.1 --
## v tibble  1.4.2     v purrr   0.2.4
## v readr   1.1.1     v stringr 1.2.0
## v tibble  1.4.2     v forcats 0.3.0
## Warning: package 'tibble' was built under R version 3.4.4
## Warning: package 'readr' was built under R version 3.4.3
## Warning: package 'purrr' was built under R version 3.4.3
## Warning: package 'stringr' was built under R version 3.4.2
## Warning: package 'forcats' was built under R version 3.4.4
## -- Conflicts ------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
## Google Maps API Terms of Service: http://developers.google.com/maps/terms.
## Please cite ggmap if you use it: see citation("ggmap") for details.
## Warning: package 'gridExtra' was built under R version 3.4.3
## 
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
## 
##     combine
## Warning: package 'googleway' was built under R version 3.4.4
## Warning: package 'devtools' was built under R version 3.4.4

Load Data

blt_crime <- read.csv("~/BPD_Part_1_Victim_Based_Crime_Data.csv", header = T, na.strings = c("","NA"))
str(blt_crime)
## 'data.frame':    276529 obs. of  15 variables:
##  $ CrimeDate      : Factor w/ 2072 levels "01/01/2012","01/01/2013",..: 1472 1472 1472 1472 1472 1472 1472 1472 1472 1472 ...
##  $ CrimeTime      : Factor w/ 3017 levels "00:00:00","00:01:00",..: 2950 2880 2865 2857 2812 2748 2642 2691 2610 2610 ...
##  $ CrimeCode      : Factor w/ 81 levels "1F","1K","1O",..: 27 62 81 41 43 45 1 12 41 43 ...
##  $ Location       : Factor w/ 26008 levels "& E BIDDLE ST",..: 15895 22873 20664 5390 966 24217 12555 15164 7696 2697 ...
##  $ Description    : Factor w/ 15 levels "AGG. ASSAULT",..: 13 4 15 1 6 5 7 14 1 6 ...
##  $ Inside.Outside : Factor w/ 4 levels "I","Inside","O",..: 1 3 4 1 3 1 4 3 3 1 ...
##  $ Weapon         : Factor w/ 4 levels "FIREARM","HANDS",..: 3 NA 1 4 2 NA 1 NA 4 2 ...
##  $ Post           : int  913 133 524 934 113 922 232 123 641 332 ...
##  $ District       : Factor w/ 9 levels "CENTRAL","EASTERN",..: 7 1 4 7 1 7 6 1 5 2 ...
##  $ Neighborhood   : Factor w/ 278 levels "Abell","Allendale",..: 27 215 269 38 64 46 32 250 268 16 ...
##  $ Longitude      : num  -76.6 -76.6 -76.6 -76.6 -76.6 ...
##  $ Latitude       : num  39.2 39.3 39.3 39.3 39.3 ...
##  $ Location.1     : Factor w/ 95497 levels "(39.2004100000, -76.5560200000)",..: 929 53780 81980 15696 22054 3585 14102 42340 53909 45268 ...
##  $ Premise        : Factor w/ 123 levels "Alley","ALLEY",..: 96 107 106 96 107 96 106 107 107 96 ...
##  $ Total.Incidents: int  1 1 1 1 1 1 1 1 1 1 ...

Sample Data

sampledata <- blt_crime[sample(1:nrow(blt_crime), 2000,
                                replace=FALSE),]
##convert to date
sampledata$CrimeDate=as.Date(sampledata$CrimeDate, "%m/%d/%Y")
sampledata$Day <- format(sampledata$CrimeDate, "%d")
sampledata$Year <- format(sampledata$CrimeDate, "%Y")
sampledata$Month <- format(sampledata$CrimeDate, "%b")

sampledata$day <- NULL

sampledata$Year <- as.factor(sampledata$Year)
## Converted Year to factor to the get different color for each year

Distribution of Incidents

map <- get_map(location = "baltimore", maptype = "terrain", zoom = 12)
## Source : https://maps.googleapis.com/maps/api/staticmap?center=baltimore&zoom=12&size=640x640&scale=2&maptype=terrain&language=en-EN
## Source : https://maps.googleapis.com/maps/api/geocode/json?address=baltimore
plot <- ggmap(map) + geom_point(aes(x=Longitude, y=Latitude, color=Year,size=3), 
                                alpha = 0.4, size = 2, data = sampledata)



print(plot +ggtitle("Distribution of total Number of Incidents"))
## Warning: Removed 15 rows containing missing values (geom_point).

Here we could observe from all the years major number incidents took place in the heart of downtown when compared to suburbs

Analyze weapon types

Structure the data

weapons<- data.frame(sampledata$Weapon, sampledata$Inside.Outside)

colnames(weapons) <- c("Weapon","Inside_Outside")

weapons <- within(weapons,Inside_Outside[Inside_Outside=="I"] <- "Inside" )
weapons <- within(weapons,Inside_Outside[Inside_Outside=="O"] <-"Outside" )

See Unique values

unique(weapons$Inside_Outside)
## [1] Inside  <NA>    Outside
## Levels: I Inside O Outside

Plot of Weapon Type against Crime Location

qplot(x=Weapon, data = weapons[complete.cases(weapons),], main = "Weapons used by Location") + geom_bar( fill = "darkred", color = "blue") +
       facet_grid(~Inside_Outside) 

We can observe that hands are the most common cause of weapons when the crime occurs inside and outside.

Analyze number of crimes by time

Create date granularity for date field

sampledata$CrimeDate=as.Date(sampledata$CrimeDate, "%m/%d/%Y")
sampledata$Day <- format(sampledata$CrimeDate, "%d")
sampledata$Year <- format(sampledata$CrimeDate, "%Y")
sampledata$Month <- format(sampledata$CrimeDate, "%b")

Analyze Crime by Month

colourCount = length(unique(sampledata$Description))
getPalette = colorRampPalette(brewer.pal(9, "Set1"))

plt <- ggplot(data = sampledata)

plt +
  geom_bar(aes(factor(Month, levels = month.abb), fill =(Description))) +
   scale_fill_manual(values = getPalette(colourCount)) + labs(title = "Display of Crime Type by Months", x = "Months", y = "Crime Type") 

There is an even distribution of crimes across all years with Assault, Larceny being most common and shooting being the least common.

THe crimes are more in summer months

Analyze crime by Year

plt +
  geom_bar(aes(x = Year, fill =(Description))) +
   scale_fill_manual(values = getPalette(colourCount)) + labs(title = "Display of Crime Type by Year", x = "Year", y = "Crime Type") 

We can observe that The frequency of crime per year has more or less remained the same. There was a drop in 2014 but not that significant. Data in 2017 is not fully representice as we only have 3 quarters of data in that year.

Density plots

map <- get_map(location = 'Baltimore', zoom = 12, maptype = 'satellite')
## Source : https://maps.googleapis.com/maps/api/staticmap?center=Baltimore&zoom=12&size=640x640&scale=2&maptype=satellite&language=en-EN
## Warning: package 'bindrcpp' was built under R version 3.4.4
## Source : https://maps.googleapis.com/maps/api/geocode/json?address=Baltimore
ggmap(map, extent="normal") + 
  geom_density2d(data = sampledata, aes(x=Longitude, y=Latitude, group=sampledata$Year, col=Year)) ##col=blt_crime$color
## Warning: Removed 15 rows containing non-finite values (stat_density2d).

ggmap(get_map(location="Baltimore", zoom=11, maptype="watercolor"))+
  stat_density2d(aes(Longitude, Latitude,group=sampledata$Year, col=Year , alpha=0.6), geom= "polygon",
                 data = sampledata, alpha = 0.4)+
  labs(title ="Total Crime Incidents grouped by Year")
## maptype = "watercolor" is only available with source = "stamen".
## resetting to source = "stamen"...
## Source : https://maps.googleapis.com/maps/api/staticmap?center=Baltimore&zoom=11&size=640x640&scale=2&maptype=terrain
## Source : https://maps.googleapis.com/maps/api/geocode/json?address=Baltimore
## Source : http://tile.stamen.com/watercolor/11/586/779.jpg
## Source : http://tile.stamen.com/watercolor/11/587/779.jpg
## Source : http://tile.stamen.com/watercolor/11/588/779.jpg
## Source : http://tile.stamen.com/watercolor/11/589/779.jpg
## Source : http://tile.stamen.com/watercolor/11/586/780.jpg
## Source : http://tile.stamen.com/watercolor/11/587/780.jpg
## Source : http://tile.stamen.com/watercolor/11/588/780.jpg
## Source : http://tile.stamen.com/watercolor/11/589/780.jpg
## Source : http://tile.stamen.com/watercolor/11/586/781.jpg
## Source : http://tile.stamen.com/watercolor/11/587/781.jpg
## Source : http://tile.stamen.com/watercolor/11/588/781.jpg
## Source : http://tile.stamen.com/watercolor/11/589/781.jpg
## Warning: Removed 15 rows containing non-finite values (stat_density2d).

We could observe that the total crimes spreading across the Baltimore city.

Using Google API to remove OVER QUERY LIMIT

register_google(key = "AIzaSyBROa5WcKppja2V9hFzRSm0zBLC8KAc0Aw")

Usage of Weapons by each year

blt_2012 <- subset(sampledata, Year=='2012')
blt_2013 <- subset(sampledata, Year=='2013')
blt_2014 <- subset(sampledata, Year=='2014')
blt_2015 <- subset(sampledata, Year=='2015')
blt_2016 <- subset(sampledata, Year=='2016')
blt_2017 <- subset(sampledata, Year=='2017')

key <- "AIzaSyBROa5WcKppja2V9hFzRSm0zBLC8KAc0Aw"

google_geocode(address = "Baltimore", key = key)
## $results
##                                                                                                                       address_components
## 1 Baltimore, Maryland, United States, Baltimore, MD, US, locality, political, administrative_area_level_1, political, country, political
##    formatted_address geometry.bounds.northeast.lat
## 1 Baltimore, MD, USA                      39.37221
##   geometry.bounds.northeast.lng geometry.bounds.southwest.lat
## 1                     -76.52945                      39.19721
##   geometry.bounds.southwest.lng geometry.location.lat
## 1                     -76.71152              39.29038
##   geometry.location.lng geometry.location_type
## 1             -76.61219            APPROXIMATE
##   geometry.viewport.northeast.lat geometry.viewport.northeast.lng
## 1                        39.37221                       -76.52945
##   geometry.viewport.southwest.lat geometry.viewport.southwest.lng
## 1                        39.19721                       -76.71152
##                      place_id               types
## 1 ChIJt4P01q4DyIkRWOcjQqiWSAQ locality, political
## 
## $status
## [1] "OK"
map2012 <- ggmap(get_map(location="Baltimore", zoom=12, maptype="roadmap"))+
  geom_point(aes(Longitude, Latitude,group=Weapon, col=Weapon , alpha=0.6),
             data = blt_2012, alpha = 0.4)+
  labs(title ="Total Crime Incidents in 2012")
## Source : https://maps.googleapis.com/maps/api/staticmap?center=Baltimore&zoom=12&size=640x640&scale=2&maptype=roadmap&language=en-EN&key=AIzaSyBROa5WcKppja2V9hFzRSm0zBLC8KAc0Aw
## Source : https://maps.googleapis.com/maps/api/geocode/json?address=Baltimore&key=AIzaSyBROa5WcKppja2V9hFzRSm0zBLC8KAc0Aw
map2013 <- ggmap(get_map(location="Baltimore", zoom=12, maptype="roadmap"))+
  geom_point(aes(Longitude, Latitude,group=Weapon, col=Weapon , alpha=0.6),
             data = blt_2013, alpha = 0.4)+
  labs(title ="Total Crime Incidents in 2013")
## Source : https://maps.googleapis.com/maps/api/staticmap?center=Baltimore&zoom=12&size=640x640&scale=2&maptype=roadmap&language=en-EN&key=AIzaSyBROa5WcKppja2V9hFzRSm0zBLC8KAc0Aw
## Source : https://maps.googleapis.com/maps/api/geocode/json?address=Baltimore&key=AIzaSyBROa5WcKppja2V9hFzRSm0zBLC8KAc0Aw
map2014 <- ggmap(get_map(location="Baltimore", zoom=12, maptype="roadmap"))+
  geom_point(aes(Longitude, Latitude,group=Weapon, col=Weapon , alpha=0.6),
             data = blt_2014, alpha = 0.4)+
  labs(title ="Total Crime Incidents in 2014")
## Source : https://maps.googleapis.com/maps/api/staticmap?center=Baltimore&zoom=12&size=640x640&scale=2&maptype=roadmap&language=en-EN&key=AIzaSyBROa5WcKppja2V9hFzRSm0zBLC8KAc0Aw
## Source : https://maps.googleapis.com/maps/api/geocode/json?address=Baltimore&key=AIzaSyBROa5WcKppja2V9hFzRSm0zBLC8KAc0Aw
map2015 <- ggmap(get_map(location="Baltimore", zoom=12, maptype="roadmap"))+
  geom_point(aes(Longitude, Latitude,group=Weapon, col=Weapon , alpha=0.6),
             data = blt_2015, alpha = 0.4)+
  labs(title ="Total Crime Incidents in 2015")
## Source : https://maps.googleapis.com/maps/api/staticmap?center=Baltimore&zoom=12&size=640x640&scale=2&maptype=roadmap&language=en-EN&key=AIzaSyBROa5WcKppja2V9hFzRSm0zBLC8KAc0Aw
## Source : https://maps.googleapis.com/maps/api/geocode/json?address=Baltimore&key=AIzaSyBROa5WcKppja2V9hFzRSm0zBLC8KAc0Aw
map2016 <- ggmap(get_map(location="Baltimore", zoom=12, maptype="roadmap"))+
  geom_point(aes(Longitude, Latitude,group=Weapon, col=Weapon , alpha=0.6),
             data = blt_2016, alpha = 0.4)+
  labs(title ="Total Crime Incidents in 2016")
## Source : https://maps.googleapis.com/maps/api/staticmap?center=Baltimore&zoom=12&size=640x640&scale=2&maptype=roadmap&language=en-EN&key=AIzaSyBROa5WcKppja2V9hFzRSm0zBLC8KAc0Aw
## Source : https://maps.googleapis.com/maps/api/geocode/json?address=Baltimore&key=AIzaSyBROa5WcKppja2V9hFzRSm0zBLC8KAc0Aw
map2017 <- ggmap(get_map(location="Baltimore", zoom=12, maptype="roadmap"))+
  geom_point(aes(Longitude, Latitude,group=Weapon, col=Weapon , alpha=0.6),
             data = blt_2017, alpha = 0.4)+
  labs(title ="Total Crime Incidents in 2017")
## Source : https://maps.googleapis.com/maps/api/staticmap?center=Baltimore&zoom=12&size=640x640&scale=2&maptype=roadmap&language=en-EN&key=AIzaSyBROa5WcKppja2V9hFzRSm0zBLC8KAc0Aw
## Source : https://maps.googleapis.com/maps/api/geocode/json?address=Baltimore&key=AIzaSyBROa5WcKppja2V9hFzRSm0zBLC8KAc0Aw
grid.arrange(map2012, map2013, nrow = 1)
## Warning: Removed 4 rows containing missing values (geom_point).
## Warning: Removed 3 rows containing missing values (geom_point).

grid.arrange(map2014, map2015, nrow = 1)
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 5 rows containing missing values (geom_point).

grid.arrange(map2016, map2017, nrow = 1)
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing missing values (geom_point).

##grid.arrange(map2012, map2013, map2014, map2015, map2016, map2017, nrow = 2,ncol=3)

From the above visulization we could compare the type weapons involved over the years. Most of the crimes are concentrated around the suburbs around Baltimore.

Weapons Vs Assault Types

blt_desc <- subset(sampledata, sampledata$Description=='AGG. ASSAULT' | sampledata$Description=='COMMON ASSAULT')

write.csv(blt_desc, file = "C:/Users/Administrator/OneDrive - HYLA Mobile/My PC/Downloads/blt_desc.csv")


d <- ggplot(blt_desc,aes(blt_desc$Weapon,blt_desc$Description,group=blt_desc$Total.Incidents))


final <- d +xlab("Weapon") + ylab("Assault Type")+
  labs(title = "Weapons vs Assault types")+
  geom_count() + scale_size_area()+
annotate("text", x = 2, y = 2.15, label = "Most number of Assaults")+
  annotate("text", x = 3, y = 1.15 , label = "Second Most number of Assaults")

print(final)

Here we used the subset of dataset filtered on assaults types to see which weapon is involved and the next most number of assaults can be seen where KNIFE is involved.