library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(lubridate)
##
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
##
## date
library(ggplot2)
library(openintro)
## Please visit openintro.org for free statistics materials
##
## Attaching package: 'openintro'
## The following object is masked from 'package:ggplot2':
##
## diamonds
## The following objects are masked from 'package:datasets':
##
## cars, trees
library(usmap)
setwd("~/Data_Science/R/Projects/School Gun Violence")
gun = read.csv("pah_us_school_gun_violence.csv",sep=',', header = TRUE)
gun$Date = mdy(gun$Date)
levels(gun$School) = c(levels(gun$School),"PS") # PS = Pre-School
gun$School[gun$School=='']='PS'
gun$School = droplevels(gun$School)
gun$state_code = state2abbr(gun$State)
summary(gun)
## X Date City State
## Min. : 0 Min. :1990-05-20 Chicago : 10 California : 56
## 1st Qu.: 95 1st Qu.:1996-02-08 Houston : 9 Texas : 35
## Median :190 Median :2003-06-04 Detroit : 7 Florida : 22
## Mean :190 Mean :2003-01-14 Los Angeles: 7 Pennsylvania: 17
## 3rd Qu.:285 3rd Qu.:2009-05-05 Memphis : 7 Tennessee : 17
## Max. :380 Max. :2013-12-19 Seattle : 5 Illinois : 16
## (Other) :336 (Other) :218
## AreaType School Fatalities state_code
## rural : 23 C : 68 Min. : 0.000 Length:381
## suburban:102 ES: 35 1st Qu.: 0.000 Class :character
## urban :256 HS:226 Median : 1.000 Mode :character
## MS: 51 Mean : 1.089
## PS: 1 3rd Qu.: 1.000
## Max. :32.000
##
gun %>%
group_by(Year =year(Date)) %>%
summarize(Total_Fatalities = sum(Fatalities)) %>%
ggplot(aes(x=Year, y=Total_Fatalities)) + geom_line()
gun %>%
group_by(Year = year(Date)) %>%
summarize(Total_School_Shooting_Incidents=n(), Avg_Fatalities_per_Incident=round((sum(Fatalities)/n()),digits=2)) %>%
ggplot(aes(x=Year, y=Total_School_Shooting_Incidents)) + geom_bar(stat = "identity") + geom_text(aes(label=Avg_Fatalities_per_Incident, hjust=-0.3, vjust=0.25, angle=90)) +
theme(panel.background = element_rect(colour = "black")) +
labs(title="School shootings per year in USA from 1990 to 2013", caption = "Numbers above the bar represent average fatality per shooting")
2007 appears to be the worst year. There were atleast 3 fatalities per shooting incident resulting in 30 deaths. Number of incidents decreased in a roughly consistent way from 1993 to 2005 and then went back up.
ggplot(gun, aes(x=year(Date), fill=School)) + geom_bar(stat = "count", position="fill") + labs(y="Shooting Incidents")
High Schools consistently have had highest shooting incidents except in 2002 amongst all school types
mydata = gun %>%
group_by(state=state_code) %>%
summarize(Number_Of_Incidents=n()) %>%
arrange(desc(Number_Of_Incidents))
plot_usmap(regions = c("states"), data=mydata, value="Number_Of_Incidents", color="black", labels = FALSE) +
scale_fill_continuous(low="white", high="red", name = "Total Number of Incidents") +
theme(legend.position = "right") +
theme(panel.background = element_rect(color = "black")) +
labs(title="School shooting incidents in USA from 1990 to 2013")
mydata <- gun %>%
group_by(state=state_code) %>%
summarize(Number_Of_Fatalities=sum(Fatalities)) %>%
arrange(desc(Number_Of_Fatalities))
plot_usmap(regions = c("states"), data=mydata, value="Number_Of_Fatalities" , color = "black", labels = FALSE) +
scale_fill_continuous(low="white", high= "red", name="Total Fatalities") +
theme(legend.position = "right") +
theme(panel.background = element_rect(colour="black")) +
labs(title = "School shooting fatalities in USA from 1990 to 2013")
From the plots above, it is clear that California has highest number of school shootings incidents and highest number of fatalities. Texas comes in second with number of incidents but VA has higher number of fatalities than TX. The states in grey did not have any reported incidents.