library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(lubridate)
## 
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
## 
##     date
library(ggplot2)
library(openintro)
## Please visit openintro.org for free statistics materials
## 
## Attaching package: 'openintro'
## The following object is masked from 'package:ggplot2':
## 
##     diamonds
## The following objects are masked from 'package:datasets':
## 
##     cars, trees
library(usmap)
setwd("~/Data_Science/R/Projects/School Gun Violence")
gun = read.csv("pah_us_school_gun_violence.csv",sep=',', header = TRUE)
gun$Date = mdy(gun$Date)
levels(gun$School) = c(levels(gun$School),"PS") # PS = Pre-School
gun$School[gun$School=='']='PS'
gun$School = droplevels(gun$School)
gun$state_code = state2abbr(gun$State)
summary(gun)
##        X            Date                     City              State    
##  Min.   :  0   Min.   :1990-05-20   Chicago    : 10   California  : 56  
##  1st Qu.: 95   1st Qu.:1996-02-08   Houston    :  9   Texas       : 35  
##  Median :190   Median :2003-06-04   Detroit    :  7   Florida     : 22  
##  Mean   :190   Mean   :2003-01-14   Los Angeles:  7   Pennsylvania: 17  
##  3rd Qu.:285   3rd Qu.:2009-05-05   Memphis    :  7   Tennessee   : 17  
##  Max.   :380   Max.   :2013-12-19   Seattle    :  5   Illinois    : 16  
##                                     (Other)    :336   (Other)     :218  
##      AreaType   School     Fatalities      state_code       
##  rural   : 23   C : 68   Min.   : 0.000   Length:381        
##  suburban:102   ES: 35   1st Qu.: 0.000   Class :character  
##  urban   :256   HS:226   Median : 1.000   Mode  :character  
##                 MS: 51   Mean   : 1.089                     
##                 PS:  1   3rd Qu.: 1.000                     
##                          Max.   :32.000                     
## 
gun %>%
  group_by(Year =year(Date)) %>%
   summarize(Total_Fatalities = sum(Fatalities)) %>%
     ggplot(aes(x=Year, y=Total_Fatalities)) + geom_line() 

gun %>%
  group_by(Year = year(Date)) %>%
    summarize(Total_School_Shooting_Incidents=n(), Avg_Fatalities_per_Incident=round((sum(Fatalities)/n()),digits=2)) %>%
      ggplot(aes(x=Year, y=Total_School_Shooting_Incidents)) + geom_bar(stat = "identity") + geom_text(aes(label=Avg_Fatalities_per_Incident, hjust=-0.3, vjust=0.25, angle=90)) + 
   theme(panel.background = element_rect(colour = "black")) + 
   labs(title="School shootings per year in USA from 1990 to 2013", caption = "Numbers above the bar represent average fatality per shooting")


2007 appears to be the worst year. There were atleast 3 fatalities per shooting incident resulting in 30 deaths. Number of incidents decreased in a roughly consistent way from 1993 to 2005 and then went back up.

ggplot(gun, aes(x=year(Date), fill=School)) + geom_bar(stat = "count", position="fill") + labs(y="Shooting Incidents")


High Schools consistently have had highest shooting incidents except in 2002 amongst all school types

mydata = gun %>%
         group_by(state=state_code) %>%
         summarize(Number_Of_Incidents=n()) %>%
         arrange(desc(Number_Of_Incidents)) 

plot_usmap(regions = c("states"), data=mydata, value="Number_Of_Incidents", color="black", labels = FALSE) + 
scale_fill_continuous(low="white", high="red", name = "Total Number of Incidents") +
theme(legend.position = "right")  +
theme(panel.background = element_rect(color = "black"))  +
labs(title="School shooting incidents in USA from 1990 to 2013")

mydata <- gun %>%
            group_by(state=state_code) %>%
                 summarize(Number_Of_Fatalities=sum(Fatalities)) %>%
                        arrange(desc(Number_Of_Fatalities))
plot_usmap(regions = c("states"), data=mydata, value="Number_Of_Fatalities" , color = "black", labels = FALSE) + 
   scale_fill_continuous(low="white", high= "red", name="Total Fatalities") +
     theme(legend.position = "right") +
      theme(panel.background = element_rect(colour="black")) +
        labs(title = "School shooting fatalities in USA from 1990 to 2013") 


From the plots above, it is clear that California has highest number of school shootings incidents and highest number of fatalities. Texas comes in second with number of incidents but VA has higher number of fatalities than TX. The states in grey did not have any reported incidents.