require(lubridate)
## Loading required package: lubridate
##
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
##
## date
require(dplyr)
## Loading required package: dplyr
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:lubridate':
##
## intersect, setdiff, union
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
require(tidyr)
## Loading required package: tidyr
require(stringr)
## Loading required package: stringr
require(RMySQL)
## Loading required package: RMySQL
require(doBy)
## Loading required package: doBy
require(knitr)
## Loading required package: knitr
require(ggplot2)
## Loading required package: ggplot2
require(reshape)
## Loading required package: reshape
##
## Attaching package: 'reshape'
## The following objects are masked from 'package:tidyr':
##
## expand, smiths
## The following object is masked from 'package:dplyr':
##
## rename
## The following object is masked from 'package:lubridate':
##
## stamp
knitr::opts_chunk$set(echo = TRUE)
yr_2013 <- read.csv(file = ("https://raw.githubusercontent.com/raghu74us/606/master/2013_shooter.csv"),sep =",")
#not including the columns that are not needed for analysis.
yr_2013 <- yr_2013[ , c('date', 'killed', 'wounded', 'location')]
#split state and city based on location
str(yr_2013)
## 'data.frame': 363 obs. of 4 variables:
## $ date : Factor w/ 210 levels "1/1/13","1/10/13",..: 1 1 1 1 10 11 11 11 2 3 ...
## $ killed : int 4 1 0 1 4 5 2 3 3 1 ...
## $ wounded : int 0 3 4 4 0 0 2 1 2 4 ...
## $ location: Factor w/ 249 levels "Aguas Buenas, PR",..: 199 87 135 122 9 228 76 50 154 229 ...
head(yr_2013,5)
## date killed wounded location
## 1 1/1/13 4 0 Sacramento, CA
## 2 1/1/13 1 3 Hawthorne, CA
## 3 1/1/13 0 4 McKeesport, PA
## 4 1/1/13 1 4 Lorain, OH
## 5 1/5/13 4 0 Aurora, CO
cit_st <- strsplit( as.character(yr_2013$location), ", " , fixed=TRUE)
mat <- matrix(unlist(cit_st), ncol=2, byrow=TRUE)
df <- as.data.frame(mat)
yr_2013_cit_st <- cbind(yr_2013, df)
colnames(yr_2013_cit_st) <- c('date', 'killed', 'wounded', 'location', "City", "State")
#Add month and year based on date.
yr_2013_cit_st$date <- mdy(yr_2013_cit_st$date)
str(yr_2013_cit_st)
## 'data.frame': 363 obs. of 6 variables:
## $ date : Date, format: "2013-01-01" "2013-01-01" ...
## $ killed : int 4 1 0 1 4 5 2 3 3 1 ...
## $ wounded : int 0 3 4 4 0 0 2 1 2 4 ...
## $ location: Factor w/ 249 levels "Aguas Buenas, PR",..: 199 87 135 122 9 228 76 50 154 229 ...
## $ City : Factor w/ 244 levels "Aguas Buenas",..: 195 87 131 119 9 223 76 50 150 224 ...
## $ State : Factor w/ 49 levels "AL","AZ","CA",..: 3 3 37 34 4 36 28 46 18 1 ...
y1<-year(yr_2013_cit_st$date)
m1<-as.character(yr_2013_cit_st$date, format="%b")
d1<-as.character(yr_2013_cit_st$date, format="%d")
df1 <- cbind(d1,m1,y1)
colnames(df1) <- c('day','month','year')
yr_2013_df2 <- cbind(yr_2013_cit_st , df1)
#str(yr_2013_df2)
#summarize by total killed and wounded in each state
yr_2013_df2_kw <- yr_2013_df2 %>%
select(year,State,month,killed,wounded) %>%
filter( str_length(yr_2013_df2$State) == 2) %>%
group_by(year,State,month ) %>%
summarise_each(funs(sum) ) %>%
arrange(year,State,month)
#summarize by total incidents in each state
yr_2013_df2_inc <- yr_2013_df2 %>%
select(year,State,month,killed,wounded) %>%
filter( str_length(yr_2013_df2$State) == 2) %>%
group_by(year,State,month ) %>%
summarise( total_incidents= n() ) %>%
arrange(year,State,month)
kable(yr_2013_df2_kw)
year | State | month | killed | wounded |
---|---|---|---|---|
2013 | AL | Dec | 6 | 6 |
2013 | AL | Jan | 1 | 4 |
2013 | AL | Jul | 1 | 7 |
2013 | AL | Mar | 3 | 1 |
2013 | AL | May | 0 | 4 |
2013 | AL | Oct | 4 | 0 |
2013 | AZ | Apr | 2 | 4 |
2013 | AZ | Jan | 3 | 1 |
2013 | AZ | May | 5 | 4 |
2013 | AZ | Nov | 5 | 3 |
2013 | AZ | Oct | 5 | 4 |
2013 | CA | Apr | 3 | 13 |
2013 | CA | Aug | 3 | 12 |
2013 | CA | Dec | 4 | 0 |
2013 | CA | Feb | 12 | 25 |
2013 | CA | Jan | 5 | 7 |
2013 | CA | Jul | 6 | 15 |
2013 | CA | Jun | 10 | 12 |
2013 | CA | Mar | 7 | 23 |
2013 | CA | May | 6 | 16 |
2013 | CA | Nov | 2 | 23 |
2013 | CA | Oct | 4 | 35 |
2013 | CA | Sep | 6 | 19 |
2013 | CO | Feb | 3 | 1 |
2013 | CO | Jan | 4 | 0 |
2013 | CO | Jun | 0 | 4 |
2013 | CO | Sep | 0 | 5 |
2013 | CT | Dec | 4 | 0 |
2013 | CT | Jul | 0 | 4 |
2013 | CT | Oct | 1 | 5 |
2013 | CT | Sep | 1 | 4 |
2013 | DC | Jan | 0 | 5 |
2013 | DC | Jul | 0 | 4 |
2013 | DC | Mar | 0 | 13 |
2013 | DC | Nov | 0 | 4 |
2013 | DC | Sep | 14 | 11 |
2013 | DE | Aug | 0 | 4 |
2013 | DE | Dec | 0 | 4 |
2013 | DE | Feb | 3 | 2 |
2013 | DE | Sep | 0 | 4 |
2013 | FL | Apr | 1 | 8 |
2013 | FL | Aug | 3 | 2 |
2013 | FL | Dec | 7 | 10 |
2013 | FL | Feb | 2 | 2 |
2013 | FL | Jul | 12 | 17 |
2013 | FL | Mar | 0 | 9 |
2013 | FL | Nov | 7 | 1 |
2013 | FL | Oct | 2 | 10 |
2013 | FL | Sep | 4 | 13 |
2013 | GA | Feb | 0 | 8 |
2013 | GA | Jul | 1 | 3 |
2013 | GA | Jun | 0 | 7 |
2013 | GA | Mar | 1 | 3 |
2013 | GA | May | 0 | 4 |
2013 | GA | Nov | 0 | 4 |
2013 | GA | Sep | 3 | 1 |
2013 | IA | Mar | 1 | 3 |
2013 | IL | Apr | 6 | 1 |
2013 | IL | Aug | 2 | 7 |
2013 | IL | Feb | 0 | 4 |
2013 | IL | Jul | 5 | 21 |
2013 | IL | Jun | 8 | 31 |
2013 | IL | Mar | 0 | 7 |
2013 | IL | May | 0 | 8 |
2013 | IL | Sep | 1 | 21 |
2013 | IN | Aug | 0 | 8 |
2013 | IN | Dec | 1 | 3 |
2013 | IN | Jun | 0 | 4 |
2013 | IN | Mar | 1 | 3 |
2013 | IN | May | 4 | 0 |
2013 | IN | Nov | 0 | 4 |
2013 | IN | Sep | 2 | 4 |
2013 | KA | Apr | 4 | 0 |
2013 | KA | Dec | 4 | 0 |
2013 | KA | Nov | 4 | 0 |
2013 | KA | Sep | 1 | 6 |
2013 | KS | Jul | 1 | 8 |
2013 | KY | Apr | 1 | 4 |
2013 | KY | Aug | 4 | 0 |
2013 | KY | Dec | 0 | 4 |
2013 | KY | Jul | 2 | 2 |
2013 | KY | Jun | 3 | 1 |
2013 | KY | Nov | 1 | 3 |
2013 | LA | Dec | 5 | 10 |
2013 | LA | Feb | 0 | 4 |
2013 | LA | Jan | 4 | 10 |
2013 | LA | Jun | 0 | 4 |
2013 | LA | Mar | 1 | 3 |
2013 | LA | May | 0 | 19 |
2013 | MA | Apr | 2 | 2 |
2013 | MD | Aug | 3 | 8 |
2013 | MD | Jul | 0 | 4 |
2013 | MD | Jun | 2 | 7 |
2013 | ME | Jul | 1 | 3 |
2013 | MI | Aug | 0 | 5 |
2013 | MI | Dec | 1 | 3 |
2013 | MI | Feb | 0 | 4 |
2013 | MI | Jul | 1 | 15 |
2013 | MI | Mar | 0 | 4 |
2013 | MI | May | 3 | 14 |
2013 | MI | Nov | 3 | 12 |
2013 | MI | Sep | 3 | 5 |
2013 | MN | Aug | 2 | 2 |
2013 | MN | Feb | 1 | 4 |
2013 | MN | Jun | 2 | 2 |
2013 | MN | Nov | 1 | 3 |
2013 | Mo | Aug | 1 | 3 |
2013 | MO | Aug | 2 | 6 |
2013 | MO | Jan | 1 | 4 |
2013 | MO | Jun | 7 | 20 |
2013 | MO | Mar | 1 | 4 |
2013 | MO | Sep | 2 | 6 |
2013 | MS | Jul | 0 | 4 |
2013 | MS | Mar | 2 | 2 |
2013 | NC | Apr | 0 | 5 |
2013 | NC | Aug | 0 | 4 |
2013 | NC | Dec | 0 | 8 |
2013 | NC | Feb | 3 | 5 |
2013 | NC | Jan | 2 | 2 |
2013 | NC | Jul | 1 | 3 |
2013 | NC | Jun | 0 | 9 |
2013 | NC | Mar | 0 | 4 |
2013 | NC | May | 4 | 4 |
2013 | NC | Oct | 3 | 6 |
2013 | NC | Sep | 0 | 12 |
2013 | NE | Aug | 4 | 0 |
2013 | NE | Oct | 0 | 4 |
2013 | NJ | Aug | 2 | 11 |
2013 | NJ | Dec | 4 | 6 |
2013 | NJ | Jul | 2 | 7 |
2013 | NJ | Jun | 1 | 4 |
2013 | NJ | May | 0 | 18 |
2013 | NM | Jan | 5 | 0 |
2013 | NM | Jul | 1 | 7 |
2013 | NM | Oct | 0 | 4 |
2013 | NV | Dec | 2 | 2 |
2013 | NV | Jun | 2 | 2 |
2013 | NV | May | 5 | 0 |
2013 | NV | Oct | 3 | 6 |
2013 | NV | Sep | 0 | 4 |
2013 | NY | Aug | 2 | 6 |
2013 | NY | Dec | 5 | 8 |
2013 | NY | Jul | 1 | 12 |
2013 | NY | Jun | 0 | 9 |
2013 | NY | Mar | 7 | 8 |
2013 | NY | Nov | 4 | 5 |
2013 | NY | Oct | 0 | 4 |
2013 | NY | Sep | 0 | 4 |
2013 | OH | Apr | 4 | 0 |
2013 | OH | Aug | 3 | 10 |
2013 | OH | Dec | 2 | 2 |
2013 | OH | Jan | 2 | 7 |
2013 | OH | Jul | 0 | 5 |
2013 | OH | May | 0 | 5 |
2013 | OH | Nov | 3 | 5 |
2013 | OH | Sep | 0 | 4 |
2013 | OK | Aug | 4 | 0 |
2013 | OK | Feb | 1 | 3 |
2013 | OK | Jan | 5 | 0 |
2013 | OK | Jul | 1 | 3 |
2013 | OK | Nov | 4 | 1 |
2013 | OK | Oct | 2 | 7 |
2013 | PA | Apr | 4 | 12 |
2013 | PA | Aug | 5 | 10 |
2013 | PA | Jan | 0 | 4 |
2013 | PA | Jun | 0 | 4 |
2013 | PA | May | 0 | 12 |
2013 | PA | Nov | 0 | 4 |
2013 | PA | Oct | 3 | 17 |
2013 | PA | Sep | 4 | 0 |
2013 | PR | May | 7 | 9 |
2013 | RI | Jun | 0 | 4 |
2013 | SC | Apr | 1 | 4 |
2013 | SC | Aug | 2 | 8 |
2013 | SC | Jul | 2 | 2 |
2013 | SC | Jun | 3 | 1 |
2013 | SC | Oct | 6 | 0 |
2013 | TN | Dec | 4 | 0 |
2013 | TN | Feb | 2 | 7 |
2013 | TN | Jun | 1 | 7 |
2013 | TN | May | 5 | 7 |
2013 | TN | Sep | 5 | 4 |
2013 | TX | Aug | 4 | 4 |
2013 | TX | Dec | 0 | 5 |
2013 | TX | Jul | 4 | 5 |
2013 | TX | Jun | 1 | 3 |
2013 | TX | Mar | 4 | 6 |
2013 | TX | May | 3 | 5 |
2013 | TX | Nov | 6 | 21 |
2013 | TX | Oct | 9 | 4 |
2013 | TX | Sep | 5 | 0 |
2013 | UT | Feb | 3 | 1 |
2013 | VA | Apr | 2 | 7 |
2013 | VA | Aug | 1 | 4 |
2013 | VA | Jan | 3 | 1 |
2013 | VA | Jun | 1 | 19 |
2013 | VA | May | 5 | 8 |
2013 | WA | Apr | 5 | 2 |
2013 | WA | Jul | 0 | 4 |
2013 | WA | Mar | 3 | 1 |
2013 | WA | Sep | 0 | 6 |
2013 | WI | Jun | 0 | 4 |
2013 | WV | Dec | 1 | 3 |
2013 | WV | Jul | 4 | 0 |
2013 | WV | Sep | 0 | 6 |
kable(yr_2013_df2_inc)
year | State | month | total_incidents |
---|---|---|---|
2013 | AL | Dec | 2 |
2013 | AL | Jan | 1 |
2013 | AL | Jul | 2 |
2013 | AL | Mar | 1 |
2013 | AL | May | 1 |
2013 | AL | Oct | 1 |
2013 | AZ | Apr | 1 |
2013 | AZ | Jan | 1 |
2013 | AZ | May | 2 |
2013 | AZ | Nov | 2 |
2013 | AZ | Oct | 2 |
2013 | CA | Apr | 4 |
2013 | CA | Aug | 3 |
2013 | CA | Dec | 1 |
2013 | CA | Feb | 7 |
2013 | CA | Jan | 3 |
2013 | CA | Jul | 5 |
2013 | CA | Jun | 4 |
2013 | CA | Mar | 6 |
2013 | CA | May | 5 |
2013 | CA | Nov | 5 |
2013 | CA | Oct | 5 |
2013 | CA | Sep | 5 |
2013 | CO | Feb | 1 |
2013 | CO | Jan | 1 |
2013 | CO | Jun | 1 |
2013 | CO | Sep | 1 |
2013 | CT | Dec | 1 |
2013 | CT | Jul | 1 |
2013 | CT | Oct | 1 |
2013 | CT | Sep | 1 |
2013 | DC | Jan | 1 |
2013 | DC | Jul | 1 |
2013 | DC | Mar | 1 |
2013 | DC | Nov | 1 |
2013 | DC | Sep | 2 |
2013 | DE | Aug | 1 |
2013 | DE | Dec | 1 |
2013 | DE | Feb | 1 |
2013 | DE | Sep | 1 |
2013 | FL | Apr | 2 |
2013 | FL | Aug | 1 |
2013 | FL | Dec | 4 |
2013 | FL | Feb | 1 |
2013 | FL | Jul | 4 |
2013 | FL | Mar | 2 |
2013 | FL | Nov | 2 |
2013 | FL | Oct | 3 |
2013 | FL | Sep | 4 |
2013 | GA | Feb | 1 |
2013 | GA | Jul | 1 |
2013 | GA | Jun | 1 |
2013 | GA | Mar | 1 |
2013 | GA | May | 1 |
2013 | GA | Nov | 1 |
2013 | GA | Sep | 1 |
2013 | IA | Mar | 1 |
2013 | IL | Apr | 1 |
2013 | IL | Aug | 2 |
2013 | IL | Feb | 1 |
2013 | IL | Jul | 5 |
2013 | IL | Jun | 7 |
2013 | IL | Mar | 1 |
2013 | IL | May | 2 |
2013 | IL | Sep | 3 |
2013 | IN | Aug | 2 |
2013 | IN | Dec | 1 |
2013 | IN | Jun | 1 |
2013 | IN | Mar | 1 |
2013 | IN | May | 1 |
2013 | IN | Nov | 1 |
2013 | IN | Sep | 1 |
2013 | KA | Apr | 1 |
2013 | KA | Dec | 1 |
2013 | KA | Nov | 1 |
2013 | KA | Sep | 1 |
2013 | KS | Jul | 2 |
2013 | KY | Apr | 1 |
2013 | KY | Aug | 1 |
2013 | KY | Dec | 1 |
2013 | KY | Jul | 1 |
2013 | KY | Jun | 1 |
2013 | KY | Nov | 1 |
2013 | LA | Dec | 2 |
2013 | LA | Feb | 1 |
2013 | LA | Jan | 3 |
2013 | LA | Jun | 1 |
2013 | LA | Mar | 1 |
2013 | LA | May | 1 |
2013 | MA | Apr | 1 |
2013 | MD | Aug | 2 |
2013 | MD | Jul | 1 |
2013 | MD | Jun | 2 |
2013 | ME | Jul | 1 |
2013 | MI | Aug | 1 |
2013 | MI | Dec | 1 |
2013 | MI | Feb | 1 |
2013 | MI | Jul | 3 |
2013 | MI | Mar | 1 |
2013 | MI | May | 4 |
2013 | MI | Nov | 2 |
2013 | MI | Sep | 2 |
2013 | MN | Aug | 1 |
2013 | MN | Feb | 1 |
2013 | MN | Jun | 1 |
2013 | MN | Nov | 1 |
2013 | Mo | Aug | 1 |
2013 | MO | Aug | 2 |
2013 | MO | Jan | 1 |
2013 | MO | Jun | 5 |
2013 | MO | Mar | 1 |
2013 | MO | Sep | 2 |
2013 | MS | Jul | 1 |
2013 | MS | Mar | 1 |
2013 | NC | Apr | 1 |
2013 | NC | Aug | 1 |
2013 | NC | Dec | 2 |
2013 | NC | Feb | 2 |
2013 | NC | Jan | 1 |
2013 | NC | Jul | 1 |
2013 | NC | Jun | 2 |
2013 | NC | Mar | 1 |
2013 | NC | May | 2 |
2013 | NC | Oct | 2 |
2013 | NC | Sep | 3 |
2013 | NE | Aug | 1 |
2013 | NE | Oct | 1 |
2013 | NJ | Aug | 3 |
2013 | NJ | Dec | 2 |
2013 | NJ | Jul | 2 |
2013 | NJ | Jun | 1 |
2013 | NJ | May | 4 |
2013 | NM | Jan | 1 |
2013 | NM | Jul | 2 |
2013 | NM | Oct | 1 |
2013 | NV | Dec | 1 |
2013 | NV | Jun | 1 |
2013 | NV | May | 1 |
2013 | NV | Oct | 2 |
2013 | NV | Sep | 1 |
2013 | NY | Aug | 2 |
2013 | NY | Dec | 3 |
2013 | NY | Jul | 3 |
2013 | NY | Jun | 1 |
2013 | NY | Mar | 3 |
2013 | NY | Nov | 2 |
2013 | NY | Oct | 1 |
2013 | NY | Sep | 1 |
2013 | OH | Apr | 1 |
2013 | OH | Aug | 3 |
2013 | OH | Dec | 1 |
2013 | OH | Jan | 2 |
2013 | OH | Jul | 1 |
2013 | OH | May | 1 |
2013 | OH | Nov | 2 |
2013 | OH | Sep | 1 |
2013 | OK | Aug | 1 |
2013 | OK | Feb | 1 |
2013 | OK | Jan | 1 |
2013 | OK | Jul | 1 |
2013 | OK | Nov | 1 |
2013 | OK | Oct | 2 |
2013 | PA | Apr | 4 |
2013 | PA | Aug | 3 |
2013 | PA | Jan | 1 |
2013 | PA | Jun | 1 |
2013 | PA | May | 3 |
2013 | PA | Nov | 1 |
2013 | PA | Oct | 3 |
2013 | PA | Sep | 1 |
2013 | PR | May | 2 |
2013 | RI | Jun | 1 |
2013 | SC | Apr | 1 |
2013 | SC | Aug | 2 |
2013 | SC | Jul | 1 |
2013 | SC | Jun | 1 |
2013 | SC | Oct | 1 |
2013 | TN | Dec | 1 |
2013 | TN | Feb | 2 |
2013 | TN | Jun | 2 |
2013 | TN | May | 3 |
2013 | TN | Sep | 2 |
2013 | TX | Aug | 1 |
2013 | TX | Dec | 1 |
2013 | TX | Jul | 2 |
2013 | TX | Jun | 1 |
2013 | TX | Mar | 2 |
2013 | TX | May | 1 |
2013 | TX | Nov | 3 |
2013 | TX | Oct | 3 |
2013 | TX | Sep | 1 |
2013 | UT | Feb | 1 |
2013 | VA | Apr | 2 |
2013 | VA | Aug | 1 |
2013 | VA | Jan | 1 |
2013 | VA | Jun | 4 |
2013 | VA | May | 3 |
2013 | WA | Apr | 1 |
2013 | WA | Jul | 1 |
2013 | WA | Mar | 1 |
2013 | WA | Sep | 1 |
2013 | WI | Jun | 1 |
2013 | WV | Dec | 1 |
2013 | WV | Jul | 1 |
2013 | WV | Sep | 1 |
yr_2013_df2_inc %>%
ggplot( aes(x=total_incidents, y=State)) +
geom_segment(aes(yend=State), xend=0, color='blue') +
geom_point(size=4, aes(color=month)) +
geom_text(aes(label=total_incidents), vjust=-1, hjust=.5,color='black') +
scale_color_brewer(palette="Set2", limits=c('Jan', 'Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec')) +
ggtitle("Total Incidents by State and Month") +
xlab("Incidents by month") + ylab("City")
## Warning in RColorBrewer::brewer.pal(n, pal): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors
## Warning: Removed 68 rows containing missing values (geom_point).
knitr::opts_chunk$set(echo = TRUE)
Is mass shooting predictable in a State based on the prior data ?. I will be using 2013 data for analysis.
What are the cases, and how many are there?
Each case represents a City and State in the united states. There observations in the given data set.
Data is available in Buzfeed News. There are data files for each year from 2013 to 2015.
This study is observational.
Data is available under the below link. https://github.com/BuzzFeedNews/2015-12-mass-shooting-intervals/tree/master/data
Response variable is the number of incidents in a month in a city, State. It is numerical.
Number of incidents each month. Has the incidents happened each month what is the count of it. This need to be done for atleast 3 years to decide which city has highest incident and the reoccurrence chances each month.
Provide summary statistics relevant to your research question. For example, if you’re comparing means across groups provide means, SDs, sample sizes of each group. This step requires the use of R, hence a code chunk is provided below. Insert more code chunks as needed.