Montgomery County Data (https://data.montgomerycountymd.gov/browse) - This county has collected wide data points including Public Safety, Human Resources, Education etc, for this project, picked the 3 most common public safety wide datasets as below
In each of the datasets listed above, the objective is to find atleast 2 insights, the data analysis has been found in the respective tabs below
library(RCurl)
library(tidyverse)
library(ggplot2)
library(lubridate)
library(knitr)
library(kableExtra)
Please navigate each tab for data insights abour each datasets
Insight 1 - Which Year vehicle is causing most traffic voilation
traffic_data <- read.csv("https://raw.githubusercontent.com/thasleem1/DATA607/master/d607_p2/Traffic_Violations_MD_Motorcycle.csv",na.strings ="")
#list the data
kable(data.frame(head(traffic_data, n=5))) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive")) %>%
row_spec(0, bold = T, color = "white", background = "#89c5f9") %>%
scroll_box(width = "100%", height = "200px")
| X | Date.Of.Stop | Time.Of.Stop | Agency | SubAgency | Description | Location | Latitude | Longitude | Accident | Belts | Personal.Injury | Property.Damage | Fatal | Commercial.License | HAZMAT | Commercial.Vehicle | Alcohol | Work.Zone | State | VehicleType | Year | Make | Model | Color | Violation.Type | Charge | Article | Contributed.To.Accident | Race | Gender | Driver.City | Driver.State | DL.State | Arrest.Type | Geolocation |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 1 | 01/08/2013 | 16:33:00 | MCP | 6th district, Gaithersburg / Montgomery Village | DRIVING MOTOR VEHICLE ON HIGHWAY WITHOUT REQUIRED LICENSE AND AUTHORIZATION | AIRPARK @ STRATOS | 38.9964879833333 | -77.1093180333333 | No | No | Yes | No | No | No | No | No | No | No | MD | 01 - Motorcycle | 2012 | KAWASAKI | NINJA | GREEN | Citation | 16-101(a) | Transportation Article | No | WHITE | F | GAITHERSBURG | MD | MD | A - Marked Patrol | (38.9964879833333, -77.1093180333333) |
| 2 | 11/09/2013 | 22:32:00 | MCP | 3rd district, Silver Spring | FAILURE OF INDIVIDUAL DRIVING ON HIGHWAY TO DISPLAY LICENSE TO UNIFORMED POLICE ON DEMAND | ROBEY RD/PALMER HOUSE WAY | 39.0835716666667 | -76.9375816666667 | No | No | No | No | No | No | No | No | No | No | MD | 01 - Motorcycle | 1995 | TOYOTA | 4RUNNER | TAN | Citation | 16-112(c) | Transportation Article | No | BLACK | F | BURTONSVILLE | MD | MD | A - Marked Patrol | (39.0835716666667, -76.9375816666667) |
| 3 | 08/29/2017 | 08:30:00 | MCP | 3rd district, Silver Spring | PARKING WHERE PROHIBITED BY OFFICIAL SIGNS | BARRON / DOMER | 38.9966466666667 | -76.99728 | No | No | No | No | No | No | No | No | No | No | MD | 01 - Motorcycle | 1998 | DODGE | DAKOTA | RED | Citation | 21-1003(aa) | Transportation Article | No | BLACK | M | TAKOMA PARK | MD | MD | A - Marked Patrol | (38.9966466666667, -76.99728) |
| 4 | 07/27/2014 | 03:01:00 | MCP | 2nd district, Bethesda | DRIVING WHILE IMPAIRED BY ALCOHOL | GLENBROOK RD AT WESSLING LANE | NA | NA | No | No | No | No | No | No | No | No | No | No | MD | 01 - Motorcycle | 2013 | KIA | RIO | SILVER | Citation | 21-902(b1) | Transportation Article | No | WHITE | M | BETHESDA | MD | MD | A - Marked Patrol | NA |
| 5 | 06/28/2015 | 22:53:00 | MCP | 4th district, Wheaton | FAILURE TO CONTROL VEHICLE SPEED ON HIGHWAY TO AVOID COLLISION | NB GEORGIA AVE/PLYERS MILL RD | 38.99747 | -77.1097783333333 | No | No | No | No | No | No | No | No | No | No | MD | 01 - Motorcycle | 2013 | KAW | MOTORCYCLE | BLACK | Citation | 21-801(b) | Transportation Article | No | WHITE | M | SILVER SPRING | MD | MD | A - Marked Patrol | (38.99747, -77.1097783333333) |
#traffic_data <- traffic_data %>% filter(State == "MD" & VehicleType == "01 - Motorcycle")
#filter and grouping based on year (remove invalid years using Year>2006 & Year < 2008)
traffic_data_year <- traffic_data %>% select(Year) %>% group_by(Year) %>% tally() %>% filter(Year > 1950 & Year <2020)
#list the data
kable(data.frame(traffic_data_year)) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive")) %>%
row_spec(0, bold = T, color = "white", background = "#89c5f9") %>%
scroll_box(width = "100%", height = "200px")
| Year | n |
|---|---|
| 1952 | 2 |
| 1966 | 3 |
| 1969 | 9 |
| 1970 | 1 |
| 1972 | 1 |
| 1975 | 1 |
| 1976 | 1 |
| 1977 | 20 |
| 1978 | 6 |
| 1979 | 3 |
| 1980 | 4 |
| 1981 | 5 |
| 1982 | 17 |
| 1983 | 1 |
| 1984 | 3 |
| 1985 | 10 |
| 1986 | 11 |
| 1987 | 5 |
| 1988 | 12 |
| 1989 | 18 |
| 1990 | 31 |
| 1991 | 32 |
| 1992 | 78 |
| 1993 | 81 |
| 1994 | 82 |
| 1995 | 132 |
| 1996 | 117 |
| 1997 | 163 |
| 1998 | 221 |
| 1999 | 277 |
| 2000 | 496 |
| 2001 | 547 |
| 2002 | 485 |
| 2003 | 757 |
| 2004 | 663 |
| 2005 | 831 |
| 2006 | 827 |
| 2007 | 919 |
| 2008 | 636 |
| 2009 | 536 |
| 2010 | 450 |
| 2011 | 495 |
| 2012 | 622 |
| 2013 | 487 |
| 2014 | 483 |
| 2015 | 401 |
| 2016 | 231 |
| 2017 | 181 |
| 2018 | 52 |
| 2019 | 3 |
#Year based graph
ggplot(traffic_data_year, aes(Year, n)) + geom_line() + geom_line(color='orange', size=1) + xlab("Vehicle Year") + ylab("Incidents") + geom_point(aes(colour = Year)) +geom_text(aes(label=ifelse(Year>2006 & Year < 2008 ,as.character(Year),'')),hjust=0, vjust=0) + xlab("Year") + ylab("Incidents") + ggtitle("Traffic Voilations vs Vehicle Model Year") + theme_bw() + theme(plot.title = element_text(hjust=0.5))
Insight 2 - Traffic voilation by gender
#Only the gender based data
traffic_data_gender <- traffic_data %>% select(Gender) %>% group_by(Gender) %>% tally()
#list the data
kable(data.frame(traffic_data_gender)) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive")) %>%
row_spec(0, bold = T, color = "white", background = "#89c5f9")
| Gender | n |
|---|---|
| F | 2557 |
| M | 8957 |
| U | 5 |
#plot the chart
p<-ggplot(data=traffic_data_gender, aes(x=Gender, y=n, color=Gender)) +
geom_bar(stat="identity", fill="gray")+
geom_text(aes(label=n), vjust=-0.3, color="black", size=3.5)+
theme_minimal() + xlab("Gender") + ylab("Incidents") + ggtitle("Traffic Voilations vs Gender") + theme_bw() + theme(plot.title = element_text(hjust=0.5))
p
#Please change the directory accordingly to your file system
setwd("C:/Users/aisha/Dropbox/CUNY/Semester1/DATA607_Data_Acquisition_and_Management/Week6")
write.csv(traffic_data_year,"traffic_data_year.csv")
write.csv(traffic_data_gender,"traffic_data_gender.csv")
Insight 1 - In Which Weather along wiith Light condition causing more crash
Insight 2 - Top 5 Crash Collision Type
Insight 3 - How Road Alignment and Road Condition is responsible for Crash
#Fetch Crash data from github repository
crash_data <- read.csv("https://raw.githubusercontent.com/thasleem1/DATA607/master/d607_p2/Crash_Reporting_-_Incidents_Data.csv",na.strings ="")
#List the data
kable(data.frame(head(crash_data))) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive")) %>%
row_spec(0, bold = T, color = "white", background = "#89c5f9") %>%
scroll_box(width = "100%", height = "200px")
| Report.Number | Local.Case.Number | Agency.Name | ACRS.Report.Type | Crash.Date.Time | Route.Type | Road.Name | Cross.Street.Type | Cross.Street.Name | Off.Road.Description | Municipality | Related.Non.Motorist | At.Fault | Collision.Type | Weather | Surface.Condition | Light | Traffic.Control | Driver.Substance.Abuse | Non.Motorist.Substance.Abuse | First.Harmful.Event | Second.Harmful.Event | Fixed.Oject.Struck | Junction | Intersection.Type | Road.Alignment | Road.Condition | Road.Division | Latitude | Longitude | Location |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| MCP26890028 | 16046213 | Montgomery County Police | Property Damage Crash | 09/09/2016 01:53:00 PM | County | E JEFFERSON ST | Unknown | CALIFORNIA CIR | NA | N/A | NA | DRIVER | SAME DIR REAR END | CLEAR | DRY | DAYLIGHT | NO CONTROLS | NONE DETECTED | NA | OTHER VEHICLE | N/A | N/A | NON INTERSECTION | N/A | STRAIGHT | NO DEFECTS | TWO-WAY, NOT DIVIDED WITH A CONTINUOUS LEFT TURN | 39.05685 | -77.12622 | (39.05685333, -77.126215) |
| MCP11520005 | 15000008 | Montgomery County Police | Property Damage Crash | 01/01/2015 12:30:00 AM | Maryland (State) | ROCKVILLE PIKE | County | GROSVENOR LA | NA | N/A | NA | DRIVER | SAME DIR REAR END | CLEAR | DRY | DARK LIGHTS ON | TRAFFIC SIGNAL | ALCOHOL CONTRIBUTED, N/A | NA | OTHER VEHICLE | N/A | N/A | INTERSECTION RELATED | FOUR-WAY INTERSECTION | STRAIGHT | NO DEFECTS | TWO-WAY, DIVIDED, POSITIVE MEDIAN BARRIER | 39.02200 | -77.10267 | (39.02200107, -77.10267305) |
| MCP2916001N | 170521681 | Montgomery County Police | Property Damage Crash | 07/28/2017 02:00:00 PM | NA | NA | NA | NA | 14620 PALMER HOUSE WAY | NA | NA | DRIVER | OTHER | RAINING | NA | DAYLIGHT | NO CONTROLS | NONE DETECTED | NA | OTHER VEHICLE | N/A | N/A | NA | NA | NA | NA | NA | 39.05121 | -76.98840 | (39.05121333, -76.98839667) |
| DD56040016 | 15051858 | ROCKVILLE | Property Damage Crash | 10/12/2015 11:59:00 PM | Municipality | EDMONSTON DR | Municipality | CRAWFORD DR | NA | ROCKVILLE | NA | DRIVER | OPPOSITE DIRECTION SIDESWIPE | CLEAR | DRY | DARK LIGHTS ON | NO CONTROLS | NONE DETECTED | NA | PARKED VEHICLE | N/A | N/A | NON INTERSECTION | N/A | CURVE LEFT | NO DEFECTS | TWO-WAY, DIVIDED, UNPROTECTED PAINTED MIN 4 FEET | 39.07750 | -77.13099 | (39.07749833, -77.13099333) |
| MCP1502000L | 15058824 | MONTGOMERY | Injury Crash | 11/18/2015 05:54:00 AM | County | SNOUFFERS SCHOOL RD | County | GOSHEN RD | NA | N/A | NA | DRIVER | STRAIGHT MOVEMENT ANGLE | CLEAR | DRY | DARK NO LIGHTS | STOP SIGN | NONE DETECTED | NA | OTHER VEHICLE | N/A | N/A | INTERSECTION | T-INTERSECTION | STRAIGHT | NO DEFECTS | TWO-WAY, NOT DIVIDED | 39.16101 | -77.16339 | (39.161005, -77.163395) |
| EJ7858000C | 15052805 | GAITHERSBURG | Property Damage Crash | 10/18/2015 11:45:00 PM | Maryland (State) | MONTGOMERY VILLAGE AVE | Maryland (State) | FREDERICK RD | NA | N/A | NA | DRIVER | SAME DIRECTION LEFT TURN | CLEAR | DRY | DARK LIGHTS ON | NO CONTROLS | NONE DETECTED, UNKNOWN | NA | OTHER VEHICLE | N/A | N/A | N/A | FOUR-WAY INTERSECTION | STRAIGHT | NO DEFECTS | TWO-WAY, DIVIDED, POSITIVE MEDIAN BARRIER | 39.15277 | -77.21159 | (39.15276875, -77.2115922) |
Insight 1 - Which weather and light combination causes more Crash Incident
#Fetch data, apply filter on weather
crash_data_weather <- crash_data %>% select(Weather,Light) %>% group_by(Weather,Light) %>% tally() %>% na.exclude() %>%
filter(Light != 'N/A' & Light != 'OTHER' & Light != 'UNKNOWN' & Light != 'DARK -- UNKNOWN LIGHTING') %>%
filter(Weather != 'N/A' & Weather != 'OTHER' & Weather != 'UNKNOWN') %>% rename(incidents = n)
#List the data
kable(data.frame(crash_data_weather)) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive")) %>%
row_spec(0, bold = T, color = "white", background = "#89c5f9") %>%
scroll_box(width = "100%", height = "200px")
| Weather | Light | incidents |
|---|---|---|
| BLOWING SAND, SOIL, DIRT | DARK LIGHTS ON | 2 |
| BLOWING SAND, SOIL, DIRT | DARK NO LIGHTS | 1 |
| BLOWING SAND, SOIL, DIRT | DAYLIGHT | 4 |
| BLOWING SNOW | DARK LIGHTS ON | 14 |
| BLOWING SNOW | DARK NO LIGHTS | 3 |
| BLOWING SNOW | DAWN | 2 |
| BLOWING SNOW | DAYLIGHT | 28 |
| BLOWING SNOW | DUSK | 3 |
| CLEAR | DARK LIGHTS ON | 7248 |
| CLEAR | DARK NO LIGHTS | 1138 |
| CLEAR | DAWN | 563 |
| CLEAR | DAYLIGHT | 21161 |
| CLEAR | DUSK | 733 |
| CLOUDY | DARK LIGHTS ON | 545 |
| CLOUDY | DARK NO LIGHTS | 80 |
| CLOUDY | DAWN | 143 |
| CLOUDY | DAYLIGHT | 4079 |
| CLOUDY | DUSK | 106 |
| FOGGY | DARK LIGHTS ON | 92 |
| FOGGY | DARK NO LIGHTS | 25 |
| FOGGY | DAWN | 20 |
| FOGGY | DAYLIGHT | 66 |
| FOGGY | DUSK | 3 |
| RAINING | DARK LIGHTS ON | 1885 |
| RAINING | DARK NO LIGHTS | 317 |
| RAINING | DAWN | 148 |
| RAINING | DAYLIGHT | 3420 |
| RAINING | DUSK | 174 |
| SEVERE WINDS | DARK LIGHTS ON | 13 |
| SEVERE WINDS | DARK NO LIGHTS | 5 |
| SEVERE WINDS | DAWN | 1 |
| SEVERE WINDS | DAYLIGHT | 36 |
| SEVERE WINDS | DUSK | 2 |
| SLEET | DARK LIGHTS ON | 29 |
| SLEET | DARK NO LIGHTS | 10 |
| SLEET | DAWN | 2 |
| SLEET | DAYLIGHT | 29 |
| SLEET | DUSK | 3 |
| SNOW | DARK LIGHTS ON | 198 |
| SNOW | DARK NO LIGHTS | 23 |
| SNOW | DAWN | 10 |
| SNOW | DAYLIGHT | 371 |
| SNOW | DUSK | 9 |
| WINTRY MIX | DARK LIGHTS ON | 61 |
| WINTRY MIX | DARK NO LIGHTS | 14 |
| WINTRY MIX | DAWN | 12 |
| WINTRY MIX | DAYLIGHT | 68 |
| WINTRY MIX | DUSK | 1 |
#Stacked Graph
ggplot(crash_data_weather, aes(fill=Weather, y=incidents, x=Light)) +
geom_bar( stat="identity", position="fill") + theme(axis.text.x = element_text(angle=45)
) + xlab("Light") + ylab("Incidents") + ggtitle("Weather and Light vs Incidents") + theme(plot.title = element_text(hjust=0.5))
Insight 2 - What is most common Collison Type
#Collison data
crash_data_Collison <- crash_data %>% select(Collision.Type) %>% group_by(Collision.Type) %>% tally() %>% na.exclude() %>% rename(incidents = n) %>%
filter(Collision.Type != 'N/A' & Collision.Type != 'OTHER' & Collision.Type != 'UNKNOWN') %>% arrange(desc(incidents)) %>% top_n(5)
#List Collison data
kable(data.frame(crash_data_Collison)) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive")) %>%
row_spec(0, bold = T, color = "white", background = "#89c5f9")
| Collision.Type | incidents |
|---|---|
| SAME DIR REAR END | 13732 |
| SINGLE VEHICLE | 7900 |
| STRAIGHT MOVEMENT ANGLE | 7090 |
| SAME DIRECTION SIDESWIPE | 4106 |
| HEAD ON LEFT TURN | 3292 |
#Plot Pie Chart
ggplot(crash_data_Collison, aes(x="", y=incidents, fill=Collision.Type)) +
geom_bar(width = 1, stat = "identity") +
coord_polar("y", start=0) +
geom_text(aes(label = incidents),
position = position_stack(vjust = 0.5)) +
labs(x = NULL, y = NULL, fill = NULL,
title = "Top 5 Crash Collision Type") +
guides(fill = guide_legend(reverse = TRUE)) +
scale_colour_gradientn(colours=rainbow(5)) +
theme_classic() +
theme(axis.line = element_blank(),
axis.text = element_blank(),
axis.ticks = element_blank(),
plot.title = element_text(hjust = 0.5, color = "#666666"))
Insight 3 - Which Road Alignment with Road Condition is more exposed to Crash Incident
#Get the Alignment and Road Condition data
crash_data_Alignment <- crash_data %>% select(Road.Alignment,Road.Condition) %>% group_by(Road.Alignment,Road.Condition) %>% tally() %>% na.exclude() %>% rename(incidents = n) %>%
filter(Road.Alignment != 'N/A' & Road.Alignment != 'UNKNOWN' & Road.Alignment != 'OTHER' & Road.Alignment != '') %>% filter(Road.Condition != 'N/A' & Road.Condition != 'OTHER' & Road.Condition != 'UNKNOWN') %>% arrange(desc(incidents))
## Warning: Factor `Road.Alignment` contains implicit NA, consider using
## `forcats::fct_explicit_na`
## Warning: Factor `Road.Condition` contains implicit NA, consider using
## `forcats::fct_explicit_na`
#list the data
kable(data.frame(crash_data_Alignment)) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive")) %>%
row_spec(0, bold = T, color = "white", background = "#89c5f9") %>%
scroll_box(width = "100%", height = "200px")
| Road.Alignment | Road.Condition | incidents |
|---|---|---|
| STRAIGHT | NO DEFECTS | 34716 |
| CURVE RIGHT | NO DEFECTS | 2518 |
| CURVE LEFT | NO DEFECTS | 2219 |
| STRAIGHT | HOLES RUTS ETC | 121 |
| STRAIGHT | LOOSE SURFACE MATERIAL | 74 |
| STRAIGHT | FOREIGN MATERIAL | 47 |
| STRAIGHT | VIEW OBSTRUCTED | 26 |
| CURVE LEFT | FOREIGN MATERIAL | 18 |
| STRAIGHT | SHOULDER DEFECT | 17 |
| CURVE LEFT | LOOSE SURFACE MATERIAL | 16 |
| CURVE RIGHT | HOLES RUTS ETC | 16 |
| CURVE RIGHT | LOOSE SURFACE MATERIAL | 16 |
| CURVE LEFT | HOLES RUTS ETC | 13 |
| CURVE LEFT | VIEW OBSTRUCTED | 8 |
| CURVE LEFT | SHOULDER DEFECT | 7 |
| CURVE RIGHT | SHOULDER DEFECT | 7 |
| CURVE RIGHT | FOREIGN MATERIAL | 6 |
| STRAIGHT | OBSTRUCTION NOT LIGHTED | 5 |
| CURVE RIGHT | OBSTRUCTION NOT LIGHTED | 4 |
| CURVE RIGHT | VIEW OBSTRUCTED | 4 |
| STRAIGHT | OBSTRUCTION NOT SIGNALED | 4 |
| CURVE LEFT | OBSTRUCTION NOT SIGNALED | 1 |
#plot Stacked Graph
ggplot(crash_data_Alignment, aes(fill=Road.Alignment, y=incidents, x=Road.Condition)) +
geom_bar( stat="identity", position="fill") + theme(axis.text.x = element_text(angle=90)
) + xlab("Road Condition") + ylab("Incidents") + ggtitle("Road Alignment with Road Condition vs Incidents") + theme(plot.title = element_text(hjust=0.5))
#Please change the directory accordingly to your file system
setwd("C:/Users/aisha/Dropbox/CUNY/Semester1/DATA607_Data_Acquisition_and_Management/Week6")
write.csv(crash_data_weather,"crash_data_weather.csv")
write.csv(crash_data_Collison,"crash_data_Collison.csv")
write.csv(crash_data_Alignment,"crash_data_Alignment.csv")
Insight 1 : When is the most favorable time for Crime Incidents
#fetch the zip code file
all_zip <- read.csv("https://raw.githubusercontent.com/thasleem1/DATA607/master/d607_p2/zip_codes.csv",na.strings ="")
names(all_zip)[1] <- "zip_code"
#fetch the zip code file
crime_data <- read.csv("https://raw.githubusercontent.com/thasleem1/DATA607/master/d607_p2/Crime.csv",na.strings ="")
#Transform Hour time
names(crime_data)[21] <- "Time"
#list the data
kable(data.frame(head(crime_data))) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive")) %>%
row_spec(0, bold = T, color = "white", background = "#89c5f9") %>%
scroll_box(width = "100%", height = "200px")
| Incident.ID | Offence.Code | CR.Number | Dispatch.Date.Time | NIBRS.Code | Victims | Crime.Name1 | Crime.Name2 | Crime.Name3 | Police.District.Name | Block_Address | City | State | Zip.Code | Agency | Place | Sector | Beat | PRA | Address.Number | Time | End.Date.Time | Latitude | Longitude | Police.District.Number | Location |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 201090098 | 2203 | 16036935 | 7/21/2016 13:23 | 220 | 1 | Crime Against Property | Burglary/Breaking and Entering | BURGLARY - FORCED ENTRY-NONRESIDENTIAL | MONTGOMERY VILLAGE | 8000 SNOUFFER SCHOOL RD | GAITHERSBURG | MD | 20879 | MCPD | Rental Storage Facility | R | 6R3 | 421 | 8000 | 7/20/2016 19:00 | 7/21/2016 9:00 | 39.15910 | -77.16044 | 6D | (39.159095928510695, -77.160441128692796) |
| 201090098 | 2901 | 16036935 | 7/21/2016 13:23 | 290 | 1 | Crime Against Property | Destruction/Damage/Vandalism of Property | DAMAGE PROPERTY - BUSINESS | MONTGOMERY VILLAGE | 8000 SNOUFFER SCHOOL RD | GAITHERSBURG | MD | 20879 | MCPD | Rental Storage Facility | R | 6R3 | 421 | 8000 | 7/20/2016 19:00 | 7/21/2016 9:00 | 39.15910 | -77.16044 | 6D | (39.159095928510695, -77.160441128692796) |
| 201089849 | 3522 | 16036571 | 7/19/2016 16:35 | 35A | 1 | Crime Against Society | Drug/Narcotic Violations | DRUGS - OPIUM OR DERIVATIVE - POSSESS | GERMANTOWN | 19900 LOCBURY DR | GERMANTOWN | MD | 20874 | MCPD | Street - Residential | N | 5N1 | 702 | 19900 | 7/19/2016 16:35 | NA | 39.18292 | -77.27078 | 5D | (39.182916491931458, -77.270775053585496) |
| 201089849 | 3550 | 16036571 | 7/19/2016 16:35 | 35B | 1 | Crime Against Society | Drug Equipment Violations | DRUGS - NARCOTIC EQUIP - POSSESS | GERMANTOWN | 19900 LOCBURY DR | GERMANTOWN | MD | 20874 | MCPD | Street - Residential | N | 5N1 | 702 | 19900 | 7/19/2016 16:35 | NA | 39.18292 | -77.27078 | 5D | (39.182916491931458, -77.270775053585496) |
| 201090376 | 2203 | 16037277 | 7/23/2016 9:25 | 220 | 1 | Crime Against Property | Burglary/Breaking and Entering | BURGLARY - FORCED ENTRY-NONRESIDENTIAL | MONTGOMERY VILLAGE | 8000 SNOUFFER SCHOOL RD | GAITHERSBURG | MD | 20879 | MCPD | Rental Storage Facility | R | 6R3 | 421 | 8000 | 7/11/2016 14:00 | 7/12/2016 11:00 | 39.15910 | -77.16044 | 6D | (39.159095928510695, -77.160441128692796) |
| 201100302 | 1103 | 16049613 | 9/26/2016 22:50 | 11A | 1 | Crime Against Person | Forcible Rape | RAPE - STRONG-ARM | ROCKVILLE | 13200 ATLANTIC AVE | ROCKVILLE | MD | 20851 | MCPD | Park | A | 1A2 | 522 | 13200 | 9/24/2016 23:50 | 9/25/2016 0:00 | 39.07355 | -77.11571 | 1D | (39.073548626973007, -77.115714724003979) |
#Seperate Hours
Hours <- crime_data %>% mutate(
cleanTime = ifelse(grepl(":[0-9][0-9]:", Time)
, Time
, paste0(Time, ":00")) %>% mdy_hms
, hour = format(cleanTime, "%H:00:00")
)
#group by Hour
group_hours <- Hours %>% select(hour) %>% group_by(hour) %>% tally() %>% na.exclude()
#plot Graph
ggplot(group_hours, aes(x=hour, y=n)) +
geom_segment( aes(x=hour, xend=hour, y=0, yend=n), color="grey") +
geom_point( color="orange", size=4) +
theme_light() + coord_flip() +
theme(
panel.grid.major.x = element_blank(),
panel.border = element_blank(),
axis.ticks.x = element_blank()
) +
xlab("Hour in EST") +
ylab("Number of Crime Incidents") + ggtitle("Hour vs Incident") + theme(plot.title = element_text(hjust=0.5))
Insight 2 : Which City are most vulnerable to Crime Incidents
#Using join and aggregate the data
crime_data_zip <- crime_data %>%
filter(State == 'MD') %>% select(Zip.Code) %>% rename(zip_code = Zip.Code) %>% group_by(zip_code) %>% tally() %>% rename(incidents = n) %>% left_join(all_zip,crime_data1,by='zip_code') %>% na.exclude() %>% filter(incidents > 0)
#list the data
kable(data.frame(head(crime_data_zip))) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive")) %>%
row_spec(0, bold = T, color = "white", background = "#89c5f9") %>%
scroll_box(width = "100%", height = "200px")
| zip_code | incidents | City | County | Type |
|---|---|---|---|---|
| 20705 | 4 | Beltsville | Prince Georges | Standard |
| 20706 | 4 | Lanham | Prince Georges | Standard |
| 20707 | 7 | Laurel | Prince Georges | Standard |
| 20770 | 1 | Greenbelt | Prince Georges | Standard |
| 20777 | 1 | Highland | Howard | Standard |
| 20782 | 2 | Hyattsville | Prince Georges | Standard |
#plot the graph
ggplot(crime_data_zip, aes(x=City, y=incidents, fill=City)) + geom_bar(stat = "identity") + theme_light() + coord_flip() +
xlab("City") + ylab("Number of Crime Incidents") +
theme(legend.position="none") +
scale_fill_grey(start = 0.80, end = 0.15) + ggtitle("City vs Incidents") + theme(plot.title = element_text(hjust=0.5))
#Please change the directory accordingly to your file system
setwd("C:/Users/aisha/Dropbox/CUNY/Semester1/DATA607_Data_Acquisition_and_Management/Week6")
write.csv(crime_data_zip,"crime_data_zip.csv")
write.csv(group_hours,"group_hours.csv")