require(lubridate)
## Loading required package: lubridate
## 
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
## 
##     date
require(dplyr)
## Loading required package: dplyr
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:lubridate':
## 
##     intersect, setdiff, union
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
require(tidyr)
## Loading required package: tidyr
require(stringr)
## Loading required package: stringr
require(RMySQL)
## Loading required package: RMySQL
require(doBy)
## Loading required package: doBy
require(knitr)
## Loading required package: knitr
require(ggplot2)
## Loading required package: ggplot2
require(reshape)
## Loading required package: reshape
## 
## Attaching package: 'reshape'
## The following objects are masked from 'package:tidyr':
## 
##     expand, smiths
## The following object is masked from 'package:dplyr':
## 
##     rename
## The following object is masked from 'package:lubridate':
## 
##     stamp
knitr::opts_chunk$set(echo = TRUE)

Data Preparation

yr_2013 <- read.csv(file = ("https://raw.githubusercontent.com/raghu74us/606/master/2013_shooter.csv"),sep =",")

#not including the columns that are not needed for analysis.
yr_2013 <- yr_2013[ , c('date', 'killed', 'wounded', 'location')]
     #split state and city based on location
     str(yr_2013)
## 'data.frame':    363 obs. of  4 variables:
##  $ date    : Factor w/ 210 levels "1/1/13","1/10/13",..: 1 1 1 1 10 11 11 11 2 3 ...
##  $ killed  : int  4 1 0 1 4 5 2 3 3 1 ...
##  $ wounded : int  0 3 4 4 0 0 2 1 2 4 ...
##  $ location: Factor w/ 249 levels "Aguas Buenas, PR",..: 199 87 135 122 9 228 76 50 154 229 ...
     head(yr_2013,5)   
##     date killed wounded       location
## 1 1/1/13      4       0 Sacramento, CA
## 2 1/1/13      1       3  Hawthorne, CA
## 3 1/1/13      0       4 McKeesport, PA
## 4 1/1/13      1       4     Lorain, OH
## 5 1/5/13      4       0     Aurora, CO
     cit_st <- strsplit( as.character(yr_2013$location), ", " , fixed=TRUE) 
     mat  <- matrix(unlist(cit_st), ncol=2, byrow=TRUE)
     df   <- as.data.frame(mat)
     yr_2013_cit_st   <- cbind(yr_2013, df)
     colnames(yr_2013_cit_st) <- c('date', 'killed', 'wounded', 'location', "City", "State")
     
     #Add month and year based on date.
     yr_2013_cit_st$date <- mdy(yr_2013_cit_st$date)
     
     str(yr_2013_cit_st) 
## 'data.frame':    363 obs. of  6 variables:
##  $ date    : Date, format: "2013-01-01" "2013-01-01" ...
##  $ killed  : int  4 1 0 1 4 5 2 3 3 1 ...
##  $ wounded : int  0 3 4 4 0 0 2 1 2 4 ...
##  $ location: Factor w/ 249 levels "Aguas Buenas, PR",..: 199 87 135 122 9 228 76 50 154 229 ...
##  $ City    : Factor w/ 244 levels "Aguas Buenas",..: 195 87 131 119 9 223 76 50 150 224 ...
##  $ State   : Factor w/ 49 levels "AL","AZ","CA",..: 3 3 37 34 4 36 28 46 18 1 ...
     y1<-year(yr_2013_cit_st$date)
     m1<-as.character(yr_2013_cit_st$date, format="%b")
     d1<-as.character(yr_2013_cit_st$date, format="%d")
     
     df1  <- cbind(d1,m1,y1)
     colnames(df1)  <- c('day','month','year')
     
     yr_2013_df2 <- cbind(yr_2013_cit_st , df1)
     #str(yr_2013_df2)
    
    #summarize by total killed and wounded in each state  
     yr_2013_df2_kw <- yr_2013_df2  %>%
        select(year,State,month,killed,wounded) %>%
         filter( str_length(yr_2013_df2$State) == 2) %>%
          group_by(year,State,month )  %>%
           summarise_each(funs(sum) ) %>%
             arrange(year,State,month)
     
    #summarize by total incidents in each state        
        yr_2013_df2_inc <- yr_2013_df2  %>%
        select(year,State,month,killed,wounded) %>%
         filter( str_length(yr_2013_df2$State) == 2) %>%
          group_by(year,State,month )  %>%
            summarise( total_incidents= n() )  %>%
             arrange(year,State,month)
              
     kable(yr_2013_df2_kw)
year State month killed wounded
2013 AL Dec 6 6
2013 AL Jan 1 4
2013 AL Jul 1 7
2013 AL Mar 3 1
2013 AL May 0 4
2013 AL Oct 4 0
2013 AZ Apr 2 4
2013 AZ Jan 3 1
2013 AZ May 5 4
2013 AZ Nov 5 3
2013 AZ Oct 5 4
2013 CA Apr 3 13
2013 CA Aug 3 12
2013 CA Dec 4 0
2013 CA Feb 12 25
2013 CA Jan 5 7
2013 CA Jul 6 15
2013 CA Jun 10 12
2013 CA Mar 7 23
2013 CA May 6 16
2013 CA Nov 2 23
2013 CA Oct 4 35
2013 CA Sep 6 19
2013 CO Feb 3 1
2013 CO Jan 4 0
2013 CO Jun 0 4
2013 CO Sep 0 5
2013 CT Dec 4 0
2013 CT Jul 0 4
2013 CT Oct 1 5
2013 CT Sep 1 4
2013 DC Jan 0 5
2013 DC Jul 0 4
2013 DC Mar 0 13
2013 DC Nov 0 4
2013 DC Sep 14 11
2013 DE Aug 0 4
2013 DE Dec 0 4
2013 DE Feb 3 2
2013 DE Sep 0 4
2013 FL Apr 1 8
2013 FL Aug 3 2
2013 FL Dec 7 10
2013 FL Feb 2 2
2013 FL Jul 12 17
2013 FL Mar 0 9
2013 FL Nov 7 1
2013 FL Oct 2 10
2013 FL Sep 4 13
2013 GA Feb 0 8
2013 GA Jul 1 3
2013 GA Jun 0 7
2013 GA Mar 1 3
2013 GA May 0 4
2013 GA Nov 0 4
2013 GA Sep 3 1
2013 IA Mar 1 3
2013 IL Apr 6 1
2013 IL Aug 2 7
2013 IL Feb 0 4
2013 IL Jul 5 21
2013 IL Jun 8 31
2013 IL Mar 0 7
2013 IL May 0 8
2013 IL Sep 1 21
2013 IN Aug 0 8
2013 IN Dec 1 3
2013 IN Jun 0 4
2013 IN Mar 1 3
2013 IN May 4 0
2013 IN Nov 0 4
2013 IN Sep 2 4
2013 KA Apr 4 0
2013 KA Dec 4 0
2013 KA Nov 4 0
2013 KA Sep 1 6
2013 KS Jul 1 8
2013 KY Apr 1 4
2013 KY Aug 4 0
2013 KY Dec 0 4
2013 KY Jul 2 2
2013 KY Jun 3 1
2013 KY Nov 1 3
2013 LA Dec 5 10
2013 LA Feb 0 4
2013 LA Jan 4 10
2013 LA Jun 0 4
2013 LA Mar 1 3
2013 LA May 0 19
2013 MA Apr 2 2
2013 MD Aug 3 8
2013 MD Jul 0 4
2013 MD Jun 2 7
2013 ME Jul 1 3
2013 MI Aug 0 5
2013 MI Dec 1 3
2013 MI Feb 0 4
2013 MI Jul 1 15
2013 MI Mar 0 4
2013 MI May 3 14
2013 MI Nov 3 12
2013 MI Sep 3 5
2013 MN Aug 2 2
2013 MN Feb 1 4
2013 MN Jun 2 2
2013 MN Nov 1 3
2013 Mo Aug 1 3
2013 MO Aug 2 6
2013 MO Jan 1 4
2013 MO Jun 7 20
2013 MO Mar 1 4
2013 MO Sep 2 6
2013 MS Jul 0 4
2013 MS Mar 2 2
2013 NC Apr 0 5
2013 NC Aug 0 4
2013 NC Dec 0 8
2013 NC Feb 3 5
2013 NC Jan 2 2
2013 NC Jul 1 3
2013 NC Jun 0 9
2013 NC Mar 0 4
2013 NC May 4 4
2013 NC Oct 3 6
2013 NC Sep 0 12
2013 NE Aug 4 0
2013 NE Oct 0 4
2013 NJ Aug 2 11
2013 NJ Dec 4 6
2013 NJ Jul 2 7
2013 NJ Jun 1 4
2013 NJ May 0 18
2013 NM Jan 5 0
2013 NM Jul 1 7
2013 NM Oct 0 4
2013 NV Dec 2 2
2013 NV Jun 2 2
2013 NV May 5 0
2013 NV Oct 3 6
2013 NV Sep 0 4
2013 NY Aug 2 6
2013 NY Dec 5 8
2013 NY Jul 1 12
2013 NY Jun 0 9
2013 NY Mar 7 8
2013 NY Nov 4 5
2013 NY Oct 0 4
2013 NY Sep 0 4
2013 OH Apr 4 0
2013 OH Aug 3 10
2013 OH Dec 2 2
2013 OH Jan 2 7
2013 OH Jul 0 5
2013 OH May 0 5
2013 OH Nov 3 5
2013 OH Sep 0 4
2013 OK Aug 4 0
2013 OK Feb 1 3
2013 OK Jan 5 0
2013 OK Jul 1 3
2013 OK Nov 4 1
2013 OK Oct 2 7
2013 PA Apr 4 12
2013 PA Aug 5 10
2013 PA Jan 0 4
2013 PA Jun 0 4
2013 PA May 0 12
2013 PA Nov 0 4
2013 PA Oct 3 17
2013 PA Sep 4 0
2013 PR May 7 9
2013 RI Jun 0 4
2013 SC Apr 1 4
2013 SC Aug 2 8
2013 SC Jul 2 2
2013 SC Jun 3 1
2013 SC Oct 6 0
2013 TN Dec 4 0
2013 TN Feb 2 7
2013 TN Jun 1 7
2013 TN May 5 7
2013 TN Sep 5 4
2013 TX Aug 4 4
2013 TX Dec 0 5
2013 TX Jul 4 5
2013 TX Jun 1 3
2013 TX Mar 4 6
2013 TX May 3 5
2013 TX Nov 6 21
2013 TX Oct 9 4
2013 TX Sep 5 0
2013 UT Feb 3 1
2013 VA Apr 2 7
2013 VA Aug 1 4
2013 VA Jan 3 1
2013 VA Jun 1 19
2013 VA May 5 8
2013 WA Apr 5 2
2013 WA Jul 0 4
2013 WA Mar 3 1
2013 WA Sep 0 6
2013 WI Jun 0 4
2013 WV Dec 1 3
2013 WV Jul 4 0
2013 WV Sep 0 6
     kable(yr_2013_df2_inc)        
year State month total_incidents
2013 AL Dec 2
2013 AL Jan 1
2013 AL Jul 2
2013 AL Mar 1
2013 AL May 1
2013 AL Oct 1
2013 AZ Apr 1
2013 AZ Jan 1
2013 AZ May 2
2013 AZ Nov 2
2013 AZ Oct 2
2013 CA Apr 4
2013 CA Aug 3
2013 CA Dec 1
2013 CA Feb 7
2013 CA Jan 3
2013 CA Jul 5
2013 CA Jun 4
2013 CA Mar 6
2013 CA May 5
2013 CA Nov 5
2013 CA Oct 5
2013 CA Sep 5
2013 CO Feb 1
2013 CO Jan 1
2013 CO Jun 1
2013 CO Sep 1
2013 CT Dec 1
2013 CT Jul 1
2013 CT Oct 1
2013 CT Sep 1
2013 DC Jan 1
2013 DC Jul 1
2013 DC Mar 1
2013 DC Nov 1
2013 DC Sep 2
2013 DE Aug 1
2013 DE Dec 1
2013 DE Feb 1
2013 DE Sep 1
2013 FL Apr 2
2013 FL Aug 1
2013 FL Dec 4
2013 FL Feb 1
2013 FL Jul 4
2013 FL Mar 2
2013 FL Nov 2
2013 FL Oct 3
2013 FL Sep 4
2013 GA Feb 1
2013 GA Jul 1
2013 GA Jun 1
2013 GA Mar 1
2013 GA May 1
2013 GA Nov 1
2013 GA Sep 1
2013 IA Mar 1
2013 IL Apr 1
2013 IL Aug 2
2013 IL Feb 1
2013 IL Jul 5
2013 IL Jun 7
2013 IL Mar 1
2013 IL May 2
2013 IL Sep 3
2013 IN Aug 2
2013 IN Dec 1
2013 IN Jun 1
2013 IN Mar 1
2013 IN May 1
2013 IN Nov 1
2013 IN Sep 1
2013 KA Apr 1
2013 KA Dec 1
2013 KA Nov 1
2013 KA Sep 1
2013 KS Jul 2
2013 KY Apr 1
2013 KY Aug 1
2013 KY Dec 1
2013 KY Jul 1
2013 KY Jun 1
2013 KY Nov 1
2013 LA Dec 2
2013 LA Feb 1
2013 LA Jan 3
2013 LA Jun 1
2013 LA Mar 1
2013 LA May 1
2013 MA Apr 1
2013 MD Aug 2
2013 MD Jul 1
2013 MD Jun 2
2013 ME Jul 1
2013 MI Aug 1
2013 MI Dec 1
2013 MI Feb 1
2013 MI Jul 3
2013 MI Mar 1
2013 MI May 4
2013 MI Nov 2
2013 MI Sep 2
2013 MN Aug 1
2013 MN Feb 1
2013 MN Jun 1
2013 MN Nov 1
2013 Mo Aug 1
2013 MO Aug 2
2013 MO Jan 1
2013 MO Jun 5
2013 MO Mar 1
2013 MO Sep 2
2013 MS Jul 1
2013 MS Mar 1
2013 NC Apr 1
2013 NC Aug 1
2013 NC Dec 2
2013 NC Feb 2
2013 NC Jan 1
2013 NC Jul 1
2013 NC Jun 2
2013 NC Mar 1
2013 NC May 2
2013 NC Oct 2
2013 NC Sep 3
2013 NE Aug 1
2013 NE Oct 1
2013 NJ Aug 3
2013 NJ Dec 2
2013 NJ Jul 2
2013 NJ Jun 1
2013 NJ May 4
2013 NM Jan 1
2013 NM Jul 2
2013 NM Oct 1
2013 NV Dec 1
2013 NV Jun 1
2013 NV May 1
2013 NV Oct 2
2013 NV Sep 1
2013 NY Aug 2
2013 NY Dec 3
2013 NY Jul 3
2013 NY Jun 1
2013 NY Mar 3
2013 NY Nov 2
2013 NY Oct 1
2013 NY Sep 1
2013 OH Apr 1
2013 OH Aug 3
2013 OH Dec 1
2013 OH Jan 2
2013 OH Jul 1
2013 OH May 1
2013 OH Nov 2
2013 OH Sep 1
2013 OK Aug 1
2013 OK Feb 1
2013 OK Jan 1
2013 OK Jul 1
2013 OK Nov 1
2013 OK Oct 2
2013 PA Apr 4
2013 PA Aug 3
2013 PA Jan 1
2013 PA Jun 1
2013 PA May 3
2013 PA Nov 1
2013 PA Oct 3
2013 PA Sep 1
2013 PR May 2
2013 RI Jun 1
2013 SC Apr 1
2013 SC Aug 2
2013 SC Jul 1
2013 SC Jun 1
2013 SC Oct 1
2013 TN Dec 1
2013 TN Feb 2
2013 TN Jun 2
2013 TN May 3
2013 TN Sep 2
2013 TX Aug 1
2013 TX Dec 1
2013 TX Jul 2
2013 TX Jun 1
2013 TX Mar 2
2013 TX May 1
2013 TX Nov 3
2013 TX Oct 3
2013 TX Sep 1
2013 UT Feb 1
2013 VA Apr 2
2013 VA Aug 1
2013 VA Jan 1
2013 VA Jun 4
2013 VA May 3
2013 WA Apr 1
2013 WA Jul 1
2013 WA Mar 1
2013 WA Sep 1
2013 WI Jun 1
2013 WV Dec 1
2013 WV Jul 1
2013 WV Sep 1
   yr_2013_df2_inc %>% 
    ggplot( aes(x=total_incidents, y=State)) + 
    geom_segment(aes(yend=State), xend=0, color='blue') + 
    geom_point(size=4, aes(color=month)) + 
    geom_text(aes(label=total_incidents), vjust=-1, hjust=.5,color='black') +
    scale_color_brewer(palette="Set2", limits=c('Jan', 'Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec')) + 
     ggtitle("Total Incidents by State and Month") +
    xlab("Incidents by month") + ylab("City") 
## Warning in RColorBrewer::brewer.pal(n, pal): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors
## Warning: Removed 68 rows containing missing values (geom_point).

knitr::opts_chunk$set(echo = TRUE)

Research question

Is mass shooting predictable in a State based on the prior data ?. I will be using 2013 data for analysis.

Cases

What are the cases, and how many are there?

Each case represents a City and State in the united states. There observations in the given data set.

Data collection

Describe the method of data collection.

Data is available in Buzfeed News. There are data files for each year from 2013 to 2015.

Type of study

What type of study is this (observational/experiment)?

This study is observational.

Data Source

Data is available under the below link. https://github.com/BuzzFeedNews/2015-12-mass-shooting-intervals/tree/master/data

Response

What is the response variable, and what type is it (numerical/categorical)?

Response variable is the number of incidents in a month in a city, State. It is numerical.

Explanatory

What is the explanatory variable, and what type is it (numerical/categorival)?

Number of incidents each month. Has the incidents happened each month what is the count of it. This need to be done for atleast 3 years to decide which city has highest incident and the reoccurrence chances each month.

Relevant summary statistics

Provide summary statistics relevant to your research question. For example, if you’re comparing means across groups provide means, SDs, sample sizes of each group. This step requires the use of R, hence a code chunk is provided below. Insert more code chunks as needed.