library(tidyverse)
library(dplyr)
library(ggplot2)
head(terror,10) #Run in R console
## # A tibble: 10 × 135
## eventid iyear imonth iday approxdate extended resolution country
## <dbl> <dbl> <dbl> <dbl> <chr> <dbl> <dttm> <dbl>
## 1 1.97e11 1970 7 2 <NA> 0 NA 58
## 2 1.97e11 1970 0 0 <NA> 0 NA 130
## 3 1.97e11 1970 1 0 <NA> 0 NA 160
## 4 1.97e11 1970 1 0 <NA> 0 NA 78
## 5 1.97e11 1970 1 0 <NA> 0 NA 101
## 6 1.97e11 1970 1 1 <NA> 0 NA 217
## 7 1.97e11 1970 1 2 <NA> 0 NA 218
## 8 1.97e11 1970 1 2 <NA> 0 NA 217
## 9 1.97e11 1970 1 2 <NA> 0 NA 217
## 10 1.97e11 1970 1 3 <NA> 0 NA 217
## # ℹ 127 more variables: country_txt <chr>, region <dbl>, region_txt <chr>,
## # provstate <chr>, city <chr>, latitude <dbl>, longitude <dbl>,
## # specificity <dbl>, vicinity <dbl>, location <chr>, summary <chr>,
## # crit1 <dbl>, crit2 <dbl>, crit3 <dbl>, doubtterr <dbl>, alternative <dbl>,
## # alternative_txt <chr>, multiple <dbl>, success <dbl>, suicide <dbl>,
## # attacktype1 <dbl>, attacktype1_txt <chr>, attacktype2 <dbl>,
## # attacktype2_txt <chr>, attacktype3 <lgl>, attacktype3_txt <lgl>, …
nrow(terror)
## [1] 209706
ncol(terror)
## [1] 135
Data terror contains 209706 record rows and 135 columns
variables<- names(terror)
variables[3]
## [1] "imonth"
terror<-rename(terror,"event_id"="eventid")
terror1<-terror %>%
dplyr::filter(country_txt == "France") %>% head(4)
terror1 %>% select("event_id","iyear","imonth","iday","approxdate","extended","resolution","country","country_txt")
## # A tibble: 4 × 9
## event_id iyear imonth iday approxdate extended resolution country
## <dbl> <dbl> <dbl> <dbl> <chr> <dbl> <dttm> <dbl>
## 1 1.97e11 1972 5 25 <NA> 0 NA 69
## 2 1.97e11 1972 5 25 <NA> 0 NA 69
## 3 1.97e11 1972 5 25 <NA> 0 NA 69
## 4 1.97e11 1972 5 25 <NA> 0 NA 69
## # ℹ 1 more variable: country_txt <chr>
filter extract all data between the years of 1995 and 2011.
Call this new dataset terror_short.(4pt)terror_short<-terror %>%
dplyr::filter(iyear >= 1995 & iyear <= 2011 )
terror_short
## # A tibble: 46,626 × 135
## event_id iyear imonth iday approxdate extended resolution country
## <dbl> <dbl> <dbl> <dbl> <chr> <dbl> <dttm> <dbl>
## 1 2.00e11 1995 1 0 <NA> 0 NA 217
## 2 2.00e11 1995 1 0 <NA> 0 NA 217
## 3 2.00e11 1995 1 0 <NA> 0 NA 217
## 4 2.00e11 1995 1 1 <NA> 0 NA 202
## 5 2.00e11 1995 1 1 <NA> 0 NA 209
## 6 2.00e11 1995 1 1 <NA> 0 NA 55
## 7 2.00e11 1995 1 1 <NA> 0 NA 55
## 8 2.00e11 1995 1 1 <NA> 0 NA 55
## 9 2.00e11 1995 1 1 <NA> 0 NA 55
## 10 2.00e11 1995 1 1 <NA> 0 NA 83
## # ℹ 46,616 more rows
## # ℹ 127 more variables: country_txt <chr>, region <dbl>, region_txt <chr>,
## # provstate <chr>, city <chr>, latitude <dbl>, longitude <dbl>,
## # specificity <dbl>, vicinity <dbl>, location <chr>, summary <chr>,
## # crit1 <dbl>, crit2 <dbl>, crit3 <dbl>, doubtterr <dbl>, alternative <dbl>,
## # alternative_txt <chr>, multiple <dbl>, success <dbl>, suicide <dbl>,
## # attacktype1 <dbl>, attacktype1_txt <chr>, attacktype2 <dbl>, …
count or nrow to complete this.nrow(terror_short)
## [1] 46626
ncol(terror_short)
## [1] 135
Data terror contains 46626 record rows and 135 columns
terror_short %>% group_by(country_txt) %>% summarise(ncount=n()) %>% arrange(-ncount) %>% head(5)
## # A tibble: 5 × 2
## country_txt ncount
## <chr> <int>
## 1 Iraq 7742
## 2 Pakistan 4833
## 3 India 4638
## 4 Afghanistan 2935
## 5 Colombia 2585
terror_short %>% filter(region_txt=="Western Europe")%>% group_by(country_txt) %>% summarise(ncount=n()) %>% arrange(-ncount) %>% head(1)
## # A tibble: 1 × 2
## country_txt ncount
## <chr> <int>
## 1 France 771
terror_short %>% filter(region_txt=="Eastern Europe")%>% group_by(attacktype1_txt) %>% summarise(ncount=n()) %>% arrange(-ncount) %>% head(3)
## # A tibble: 3 × 2
## attacktype1_txt ncount
## <chr> <int>
## 1 Bombing/Explosion 1370
## 2 Armed Assault 624
## 3 Assassination 259
terror_each_month <- terror_short %>% group_by(iyear,imonth)%>% summarise(number_of_attack=n()) %>% arrange(-number_of_attack)
## `summarise()` has grouped output by 'iyear'. You can override using the
## `.groups` argument.
terror_each_month
## # A tibble: 204 × 3
## # Groups: iyear [17]
## iyear imonth number_of_attack
## <dbl> <dbl> <int>
## 1 2011 11 661
## 2 2007 6 588
## 3 2001 8 568
## 4 2008 7 554
## 5 2007 11 534
## 6 2010 9 528
## 7 2008 6 510
## 8 2008 4 507
## 9 2008 5 500
## 10 2009 7 467
## # ℹ 194 more rows
terror_each_month %>% group_by(imonth) %>% summarise(mean=mean(number_of_attack),sd=sd(number_of_attack))%>% arrange(-sd)
## # A tibble: 12 × 3
## imonth mean sd
## <dbl> <dbl> <dbl>
## 1 11 242. 178.
## 2 6 244. 166.
## 3 8 257. 145.
## 4 4 227. 145.
## 5 5 239. 144.
## 6 7 255. 143.
## 7 12 200. 133.
## 8 9 208. 127.
## 9 2 204. 126.
## 10 10 229. 120.
## 11 3 219. 117.
## 12 1 219. 103.
ggplot(terror_each_month, aes(x =factor(imonth),y=number_of_attack,color = factor(imonth)))+
geom_boxplot()
based on gg-plot above, the highest interquartile of number of attack is either month June or November
terror_each_month %>% group_by(imonth) %>% summarise(q1=quantile(number_of_attack,0.25),q3=quantile(number_of_attack,0.75))%>% arrange(-q3)
## # A tibble: 12 × 3
## imonth q1 q3
## <dbl> <dbl> <dbl>
## 1 6 119 376
## 2 4 104 358
## 3 8 125 347
## 4 7 117 337
## 5 11 107 337
## 6 10 119 326
## 7 5 130 323
## 8 9 105 287
## 9 1 164 282
## 10 3 120 270
## 11 12 83 268
## 12 2 112 267
After validating it with the gap between Q3 and Q1, we can take conclusion that the highest number of attack occur on June
** Mostly the number of attacks for each month throughout the year is arround 200 but on selected period, specially on July and Aug, frequency of attack tend experience an increasing. Therefore, median number on these period were high**
This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
summary(cars)
## speed dist
## Min. : 4.0 Min. : 2.00
## 1st Qu.:12.0 1st Qu.: 26.00
## Median :15.0 Median : 36.00
## Mean :15.4 Mean : 42.98
## 3rd Qu.:19.0 3rd Qu.: 56.00
## Max. :25.0 Max. :120.00
You can also embed plots, for example:
Note that the echo = FALSE parameter was added to the
code chunk to prevent printing of the R code that generated the
plot.