This Data was optained from The Global Terrorism Database (GTD) which is an open-source database that contains information about the terrorist attacks around the world from 1970 to 2017. The GTD includes the information about domestic and international terrorist incidents that have occured during this time periods and now includes more than 180,000 attacks. The database is maintained by researchers at the National Consortium for the Study of Terrorism and Responses to Terrorism, headquartered at the University of Maryland. In this dataset, there are a lot of missing observations which we cannot clean as it is a sensitive topic, so we leave it as it is.
library(tidyverse)
library(leaflet)
library(treemap)
library(rmarkdown)
library(highcharter)
library(viridis)
library(ggplot2)
library(tidyr)
library(dplyr)
library(viridisLite)
library(RColorBrewer)
library(countrycode)
library(wordcloud)
library(tidytext)
library(tm)
library(plotly)
data = read.csv("gtd.csv")
str(data)
## 'data.frame': 181691 obs. of 135 variables:
## $ eventid : num 1.97e+11 1.97e+11 1.97e+11 1.97e+11 1.97e+11 ...
## $ iyear : int 1970 1970 1970 1970 1970 1970 1970 1970 1970 1970 ...
## $ imonth : int 7 0 1 1 1 1 1 1 1 1 ...
## $ iday : int 2 0 0 0 0 1 2 2 2 3 ...
## $ approxdate : Factor w/ 2245 levels "","01/04/2000",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ extended : int 0 0 0 0 0 0 0 0 0 0 ...
## $ resolution : Factor w/ 1860 levels "","1/1/1978",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ country : int 58 130 160 78 101 217 218 217 217 217 ...
## $ country_txt : Factor w/ 205 levels "Afghanistan",..: 48 117 141 69 91 190 191 190 190 190 ...
## $ region : int 2 1 5 8 4 1 3 1 1 1 ...
## $ region_txt : Factor w/ 12 levels "Australasia & Oceania",..: 2 7 10 12 4 7 8 7 7 7 ...
## $ provstate : Factor w/ 2856 levels "","(Region) of Republican Subordination (Province)",..: 1 784 2505 188 810 1016 1648 441 2766 2766 ...
## $ city : Factor w/ 36675 levels "","62 miles South of Dushanbe",..: 29944 22265 34673 2533 11312 6349 22807 24828 20416 20416 ...
## $ latitude : num 18.5 19.4 15.5 38 33.6 ...
## $ longitude : num -70 -99.1 120.6 23.8 130.4 ...
## $ specificity : int 1 1 4 1 1 1 1 1 1 1 ...
## $ vicinity : int 0 0 0 0 0 0 0 0 0 0 ...
## $ location : Factor w/ 44110 levels "","\"Colony 39\" settlement in uncleared areas of Ampara district, in eastern Sri Lanka",..: 1 1 1 1 1 1 1 3743 1 1 ...
## $ summary : Factor w/ 112493 levels "","00/00/2014: Sometime between February 23 and March 1, 2014, assailants attacked the 115th Infantry Brigade in L"| __truncated__,..: 1 1 1 1 1 82767 1 82870 82869 82949 ...
## $ crit1 : int 1 1 1 1 1 1 1 1 1 1 ...
## $ crit2 : int 1 1 1 1 1 1 1 1 1 1 ...
## $ crit3 : int 1 1 1 1 1 1 1 1 1 1 ...
## $ doubtterr : int 0 0 0 0 -9 0 0 1 0 0 ...
## $ alternative : int NA NA NA NA NA NA NA 2 NA NA ...
## $ alternative_txt : Factor w/ 6 levels "","Insurgency/Guerilla Action",..: 1 1 1 1 1 1 1 5 1 1 ...
## $ multiple : int 0 0 0 0 0 0 0 0 0 0 ...
## $ success : int 1 1 1 1 1 1 0 1 1 1 ...
## $ suicide : int 0 0 0 0 0 0 0 0 0 0 ...
## $ attacktype1 : int 1 6 1 3 7 2 1 3 7 7 ...
## $ attacktype1_txt : Factor w/ 9 levels "Armed Assault",..: 2 7 2 3 4 1 2 3 4 4 ...
## $ attacktype2 : int NA NA NA NA NA NA NA NA NA NA ...
## $ attacktype2_txt : Factor w/ 10 levels "","Armed Assault",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ attacktype3 : int NA NA NA NA NA NA NA NA NA NA ...
## $ attacktype3_txt : Factor w/ 9 levels "","Armed Assault",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ targtype1 : int 14 7 10 7 7 3 3 21 4 2 ...
## $ targtype1_txt : Factor w/ 22 levels "Abortion Related",..: 14 6 8 6 6 13 13 21 10 7 ...
## $ targsubtype1 : int 68 45 54 46 46 22 25 107 28 21 ...
## $ targsubtype1_txt : Factor w/ 113 levels "","Affiliated Institution",..: 60 17 89 21 21 75 78 20 52 29 ...
## $ corp1 : Factor w/ 33241 levels "","'Adayah Sector",..: 1 4451 32419 1 1 5454 31976 22914 25208 27187 ...
## $ target1 : Factor w/ 86008 levels "","'Add al-Fattah Isma'il, secretery general",..: 42245 50785 28617 80408 80382 16055 42032 27456 60433 65946 ...
## $ natlty1 : int 58 21 217 217 217 217 218 217 217 217 ...
## $ natlty1_txt : Factor w/ 216 levels "","Afghanistan",..: 52 18 201 201 201 201 202 201 201 201 ...
## $ targtype2 : int NA NA NA NA NA NA NA NA NA NA ...
## $ targtype2_txt : Factor w/ 23 levels "","Abortion Related",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ targsubtype2 : int NA NA NA NA NA NA NA NA NA NA ...
## $ targsubtype2_txt : Factor w/ 108 levels "","Affiliated Institution",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ corp2 : Factor w/ 2692 levels "","\"L'evenement Du Jeudi\", a magazine",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ target2 : Factor w/ 5044 levels "","(District) Survey Office",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ natlty2 : int NA NA NA NA NA NA NA NA NA NA ...
## $ natlty2_txt : Factor w/ 159 levels "","Afghanistan",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ targtype3 : int NA NA NA NA NA NA NA NA NA NA ...
## $ targtype3_txt : Factor w/ 21 levels "","Airports & Aircraft",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ targsubtype3 : int NA NA NA NA NA NA NA NA NA NA ...
## $ targsubtype3_txt : Factor w/ 93 levels "","Affiliated Institution",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ corp3 : Factor w/ 423 levels "","A bus terminal",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ target3 : Factor w/ 721 levels "","(District) Prison",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ natlty3 : int NA NA NA NA NA NA NA NA NA NA ...
## $ natlty3_txt : Factor w/ 111 levels "","Afghanistan",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ gname : Factor w/ 3537 levels "1 May","14 K Triad",..: 1952 14 3410 3410 3410 682 3282 3410 2273 2273 ...
## $ gsubname : Factor w/ 1184 levels "","103rd Base Command",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ gname2 : Factor w/ 434 levels "","1 May","8 March Coalition",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ gsubname2 : Factor w/ 61 levels "","101st Battalion",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ gname3 : Factor w/ 117 levels "","313 Brigade",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ gsubname3 : Factor w/ 15 levels "","Ahrar al-Sham",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ motive : Factor w/ 14491 levels "","\"Disgruntled tribesmen have frequently targeted the pipeline in the past few years to try to force the governm"| __truncated__,..: 1 1 1 1 1 14016 1 1 14149 14149 ...
## $ guncertain1 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ guncertain2 : int NA NA NA NA NA NA NA NA NA NA ...
## $ guncertain3 : int NA NA NA NA NA NA NA NA NA NA ...
## $ individual : int 0 0 0 0 0 0 0 0 0 0 ...
## $ nperps : int NA 7 NA NA NA -99 3 -99 1 1 ...
## $ nperpcap : num NA NA NA NA NA -99 NA -99 1 1 ...
## $ claimed : int NA NA NA NA NA 0 NA 0 1 0 ...
## $ claimmode : int NA NA NA NA NA NA NA NA 1 NA ...
## $ claimmode_txt : Factor w/ 11 levels "","Call (post-incident)",..: 1 1 1 1 1 1 1 1 5 1 ...
## $ claim2 : int NA NA NA NA NA NA NA NA NA NA ...
## $ claimmode2 : int NA NA NA NA NA NA NA NA NA NA ...
## $ claimmode2_txt : Factor w/ 10 levels "","Call (post-incident)",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ claim3 : int NA NA NA NA NA NA NA NA NA NA ...
## $ claimmode3 : int NA NA NA NA NA NA NA NA NA NA ...
## $ claimmode3_txt : Factor w/ 9 levels "","Call (post-incident)",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ compclaim : int NA NA NA NA NA NA NA NA NA NA ...
## $ weaptype1 : int 13 13 13 6 8 5 5 6 8 8 ...
## $ weaptype1_txt : Factor w/ 12 levels "Biological","Chemical",..: 11 11 11 3 6 5 5 3 6 6 ...
## $ weapsubtype1 : int NA NA NA 16 NA 5 2 16 19 20 ...
## $ weapsubtype1_txt : Factor w/ 31 levels "","Arson/Fire",..: 1 1 1 28 1 29 3 28 14 7 ...
## $ weaptype2 : int NA NA NA NA NA NA NA NA NA NA ...
## $ weaptype2_txt : Factor w/ 12 levels "","Biological",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ weapsubtype2 : int NA NA NA NA NA NA NA NA NA NA ...
## $ weapsubtype2_txt : Factor w/ 29 levels "","Arson/Fire",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ weaptype3 : int NA NA NA NA NA NA NA NA NA NA ...
## $ weaptype3_txt : Factor w/ 11 levels "","Chemical",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ weapsubtype3 : int NA NA NA NA NA NA NA NA NA NA ...
## $ weapsubtype3_txt : Factor w/ 23 levels "","Arson/Fire",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ weaptype4 : int NA NA NA NA NA NA NA NA NA NA ...
## $ weaptype4_txt : Factor w/ 6 levels "","Explosives",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ weapsubtype4 : int NA NA NA NA NA NA NA NA NA NA ...
## $ weapsubtype4_txt : Factor w/ 17 levels "","Arson/Fire",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ weapdetail : Factor w/ 19149 levels "","\"A magnetic explosive device\"",..: 1 1 1 9899 11843 14591 8798 1 10602 13763 ...
## $ nkill : int 1 0 1 NA NA 0 0 0 0 0 ...
## [list output truncated]
Major_attacks <- data %>%
filter(nkill > 100)
mymap <- leaflet() %>%
addTiles('http://{s}.basemaps.cartocdn.com/light_all/{z}/{x}/{y}.png') %>%
setView(12, 38, zoom = 3) %>%
addCircles(data=Major_attacks, lat= ~latitude, lng = ~longitude, color = "brown", fillOpacity = 0.8, fillColor = "black",
popup = paste( "<strong>Year: </strong>", Major_attacks$iyear,
"<br><strong>City: </strong>", Major_attacks$city,
"<br><strong>Country: </strong>", Major_attacks$country_txt,
"<br><strong>Attack type: </strong>", Major_attacks$attacktype1_txt,
"<br><strong>Target: </strong>", Major_attacks$targtype1_txt,
"<br><strong>Group: </strong>", Major_attacks$gname,
"<br><strong>Summary: </strong>", Major_attacks$summary,
"<br><strong>Killed: </strong>", Major_attacks$nkill))
mymap
This map shows the major attacks where more than 100 people where killed. For more information about the attacks, you can zoom in and click on a particular point, the information related to that event will pop-up.
countries <- data %>%
group_by(country_txt) %>%
summarise(Total = n())
names(countries) <- c("country.name", "total")
countries$iso3 = countrycode(countries$country.name, origin = "country.name", destination = "iso3c")
## Warning in countrycode(countries$country.name, origin = "country.name", : Some values were not matched unambiguously: Czechoslovakia, East Germany (GDR), International, Kosovo, North Yemen, Serbia-Montenegro, South Yemen, Yugoslavia
stops <- data.frame(q = c(0, exp(1:10)/exp(10)),
c = substring(viridis(10 + 1, option = "D"), 0, 7)) %>%
list_parse2()
data(worldgeojson, package = "highcharter")
hc_plot = highchart() %>%
hc_add_series_map(worldgeojson, countries, value = "total", joinBy = "iso3") %>%
hc_colorAxis(stops = stops) %>%
hc_legend(enabled = TRUE) %>%
hc_add_theme(hc_theme_db()) %>%
hc_mapNavigation(enabled = TRUE) %>%
hc_title(text = "Total Number of Attacks From 1970 to 2017") %>%
hc_add_theme(hc_theme_google())
hc_plot
The map above shows the total number of incidents in each country. Iraq, Afghanistan, Pakistan and India were most effected by the terrorism. USA had around 2800 reported incidents.
year_killing <- data %>%
group_by(Year = iyear) %>%
filter(nkill > 0) %>%
summarise(total_killing = sum(nkill)) %>%
ungroup()
hc_plot <- highchart() %>%
hc_add_series(year_killing, hcaes(x = Year, y = total_killing), type = "line") %>%
hc_tooltip(crosshairs = TRUE, borderWidth = 1.5, headerFormat = "", pointFormat = paste("Year: <b>{point.Year}</b> <br>","Killing: <b>{point.total_killing}</b><br>")) %>%
hc_title(text = "Terrorism over the years") %>%
hc_subtitle(text = "1970-2017") %>%
hc_xAxis(title = list(text = "Year")) %>%
hc_yAxis(title = list(text = "Number of people killed"),
allowDecimals = FALSE,
plotLines = list(list(
color = "black", width = 1, dashStyle = "Dash",
value = mean(year_killing$total_killing),
label = list(text = "Average people killed = 8763",
style = list(color = 'red', fontSize = 20))))) %>%
hc_add_theme(hc_theme_ffx())
## Warning: `parse_quosure()` is deprecated as of rlang 0.2.0.
## Please use `parse_quo()` instead.
## This warning is displayed once per session.
hc_plot
The graph above shows how rapidly terrorism related killings have increased after 2011 and reached highest in 2014, in that year, with around 44000 people were killed. However, after 2014, it started declining but still the number of people killed after 2014 are way above the average number of people killed throughout the period of 1970 to 2017. Lets explore other options to visualize the result.
regional_terrorism = data %>%
group_by(iyear, region_txt) %>%
summarise(Events = n())
regional_terrorism %>%
plot_ly(x = ~iyear, y = ~Events,
hoverinfo = "text", text = ~region_txt) %>%
add_markers(
size = ~Events, color = ~region_txt,
frame = ~iyear, ids = ~region_txt,
marker = list(sizemode = "diameter", sizeref = 2)
) %>%
animation_opts(
frame = 100,
transition = 30,
easing = "elastic"
) %>%
animation_slider(
currentvalue = list(
prefix = NULL,
font = list(color = "white", size = 50)
)
) %>%
layout(paper_bgcolor='black', plot_bgcolor ='black',
xaxis = list(title = "Year"),
yaxis = list(title = "Events")
)
## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors
## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors
## Warning in p$x$data[firstFrame] <- p$x$frames[[1]]$data: number of items to
## replace is not a multiple of replacement length
This animated graph shows how terrorism has changed in these years in different regions. It is completely dynamic, you can choose a particular region by deselecting the other regions. I made this to compare the regions with each other.
country1 <- data %>%
group_by(country_txt) %>%
summarise(incidents = n()) %>%
arrange(desc(incidents))
treemap(country1,
index=c("country_txt"),
vSize = "incidents",
palette = "RdYlBu",
title="Number of events - Terrorism",
fontsize.title = 16
)
As expected, we all know that Iraq, Pakistan, Afghanistan and India are mostly effected by the terrorism. The graph shows the number of incidents happened in each country by its size.
killings_city <- data %>%
drop_na(nkill) %>%
filter(nkill > 1) %>%
group_by(city)%>%
summarise(total_killing = sum(nkill)) %>%
arrange(desc(total_killing)) %>%
head(n=15)
ggplot(killings_city, aes(x=reorder(city, total_killing), y= total_killing)) +
geom_bar(stat = "identity", fill = "red", col = "black") +
coord_flip() +
labs(x="City", y="Killings", title = "Number of people killed - CITY")
As I have mentioned above in the introduction, this dataset has many observations filled with NA or Unknown, and we cannot change the data as it is official and sensitive topic. Looking at the graph we can see that majority of the (REPORTED) terrorism related killings happened in Baghdad and Mosul. The Unknown field shows the number of killings that were recorded without mentioning the city name. We should further explore this whether the Unknown is in the same country or not.
unknown = data %>%
filter(city == "Unknown", nkill > 0) %>%
group_by(country_txt) %>%
summarise(total_killings = sum(nkill)) %>%
arrange(desc(total_killings)) %>%
head(n = 15)
ggplot(unknown, aes(x=reorder(country_txt, total_killings), y= total_killings)) +
geom_bar(stat = "identity", fill = "red", col = "black") +
coord_flip() +
labs(x="City", y="Number of people killed", title = "Number of people killed in the country Where city was not mentioned")
The unknown in the city field is not 1 city, its actually a number of cities recorded as unknown, so when we accumulated the number of killings according to the city, it was showing unknown as a single city. So according to this findings, Baghdad suffered most with terrorism related killings throughout these years.
killter <- data %>%
filter(nkillter > 0) %>%
group_by(region = region_txt, year = iyear) %>%
summarise(terrorist_killed = sum(nkillter))
ggplot(killter, aes(x=reorder(year, terrorist_killed), y=terrorist_killed)) +
geom_bar(stat = "identity", fill = "brown") +
coord_flip() +
labs(x="Year", y="Terrorist killed", title = "Number of terrorist killed from 1970 - 2016")
This graph shows the number of terrorists killed throughout these years. As the terrorism increased after 2011, the concerned authorities started the operation against them, which lead to the increased number of terrorist killed after 2011 and we can see the results of such operations in the fall of terrorism related incidents after 2014.
region <- data %>%
group_by(region = region_txt) %>%
filter(nkill > 0) %>%
summarise(killings = sum(nkill)) %>%
arrange(desc(killings))
hc_plot = hctreemap2(region,
group_vars = "region",
size_var = "killings",
color_var = "killings",
layoutAlgorithm = "squarified",
levelIsConstant = FALSE,
levels = list(
list(level = 1, dataLabels = list(enabled = TRUE)),
list(level = 2, dataLabels = list(enabled = FALSE)),
list(level = 3, dataLabels = list(enabled = FALSE))
)
) %>%
hc_colorAxis(minColor = brewer.pal(7, "Reds")[1],
maxColor = brewer.pal(7, "Reds")[7]) %>%
hc_tooltip(pointFormat = "<b>{point.region}</b>:<br>
killings: {point.colorValue:,.0f}")
hc_plot
Treemap above shows that MENA region is most effected by the terrorism over the years followed by South Asia and South America.
gang_killing <- data %>%
group_by(gang = gname) %>%
summarise(killing = sum(nkill)) %>%
arrange(desc(killing)) %>%
head(n=20)
ggplot(gang_killing, aes(x=reorder(gang, killing), y = killing)) +
geom_bar(fill = 'red', col = "black", stat = "identity") +
theme_classic() +
labs(x="Gangs", y="Number of people killed") +
coord_flip()
According to the graph above, Al-Qaida killed the most number of people.
terrorist <- data %>%
filter(nkill > 0) %>%
group_by(natlty1_txt) %>%
summarise(killing = sum(nkill)) %>%
arrange(desc(killing)) %>%
head(n = 10)
ggplot(terrorist, aes(x=reorder(natlty1_txt, killing), y = killing)) +
geom_bar(fill = 'red', col = "black", stat = "identity") +
theme_classic() +
labs(x="Nationality of Terrorists", y="Number of people killed") +
coord_flip()
Terrorist from Iraq killed almost 78000 people on record, which is highest by any other terrorists based on nationality, followed by Afghanistan. Terrorists from Pakistan, nigeria and India killed around 20000+ people on record.
crime <- data %>%
filter(nkill > 0) %>%
group_by(alternative_txt) %>%
summarise(killing = sum(nkill)) %>%
arrange(desc(killing))
ggplot(crime, aes(x=reorder(alternative_txt, killing), y = killing)) +
geom_bar(fill = 'red', col = "black", stat = "identity") +
theme_classic() +
labs(x="Crime Type", y="Number of people killed") +
coord_flip()
This graph doesnt show much of the information as majority of the fields are filled with " “. If we remove those fields, insurgency/guerilla Action killed majority of the people.
weapon <- data %>%
filter(weaptype1_txt != "Unknown") %>%
group_by(weaptype1_txt) %>%
summarise(Events = n())
plot1 <- ggplot(weapon, aes(x = reorder(weaptype1_txt, Events), y = Events)) +
geom_bar(stat = "identity", fill = "Red", col = "black") +
labs(x = "Weapon Used", y = "Number of times", title = "Weapon Type") +
coord_flip() +
theme_classic()
weapon_subtype <- data %>%
group_by(weapsubtype1_txt) %>%
summarise(Events = n())
plot2 <- ggplot(weapon_subtype, aes(x = reorder(weapsubtype1_txt, Events), y = Events)) +
geom_bar(stat = "identity", fill = "Red", col = "black") +
labs(x = "Weapon Sub-type", y = "Number of times", title = "Weapon Sub-Type") +
coord_flip() +
theme_classic()
plot1
Explosives and firearms were used most frequently.
plot2
attack <- data %>%
group_by(attacktype1_txt) %>%
summarise(Events = n())
plot3 <- ggplot(attack, aes(x = reorder(attacktype1_txt, Events), y = Events, fill = attacktype1_txt)) +
geom_bar(stat = "identity",col = "black") +
coord_flip() +
labs(x = "Attack Type", y = "Events", title = "Number of Attacks based on Attack Type")
plot3
Explosions were most frequently used by the terrorists, followed by Armed Assault.
target <- data %>%
group_by(targtype1_txt) %>%
summarise(Events = n())
plot4 <- ggplot(target, aes(x = reorder(targtype1_txt, Events), y = Events)) +
geom_bar(stat = "identity",col = "black", fill = "darkblue") +
coord_flip() +
labs(x = "Target", y = "Events", title = "Target")
target_subtype <- data %>%
group_by(targsubtype1_txt) %>%
summarise(Events = n()) %>%
arrange(desc(Events)) %>%
head(n = 15)
plot5 <- ggplot(target_subtype, aes(x = reorder(targsubtype1_txt, Events), y = Events)) +
geom_bar(stat = "identity",col = "black", fill = "darkblue") +
coord_flip() +
labs(x = "Target-Subtype", y = "Events", title = "Target-Subtype")
plot4
Private Citizens And Property was mostly targeted by the terrorists, followed by military and police.
plot5
The dataset is available at:
https://www.kaggle.com/russellyates88/suicide-rates-overview-1985-to-2016