Marie Daras LLC
Marie Daras LLC

Global Terrorism Database Analysis

Counter Terrorism
Counter Terrorism

These are EDA’s and an analysis run on the Global Terrorism Database (GTD), which is publicly available and kept by the University of Maryland: GTD Univ of MD

Here is the codebook for the dataset: GTD Codebook

This is our Roadmap for our analysis:

Roadmap
Roadmap
library(dplyr) # for data manipulation
library(stringr) # for data manipulation
library(ggplot2) # for data visualization
library(corrplot) # for correlations
library(ggplot2)
library(RColorBrewer)
library(nnet)
library(tidyr)
library(plyr)
library(leaflet)
library(sf)
library(tidyverse)

1) Bring in Our Data

df <- read.csv("/cloud/project/gtd.csv", header=TRUE, stringsAsFactors=FALSE)

head(df,1)
##    eventid iyear imonth iday approxdate extended resolution country
## 1 1.97e+11  1970      7    2                   0                 58
##          country_txt region                  region_txt provstate          city
## 1 Dominican Republic      2 Central America & Caribbean           Santo Domingo
##   latitude longitude specificity vicinity location summary crit1 crit2 crit3
## 1 18.45679 -69.95116           1        0                      1     1     1
##   doubtterr alternative alternative_txt multiple success suicide attacktype1
## 1         0          NA                        0       1       0           1
##   attacktype1_txt attacktype2 attacktype2_txt attacktype3 attacktype3_txt
## 1   Assassination          NA                          NA                
##   targtype1               targtype1_txt targsubtype1 targsubtype1_txt corp1
## 1        14 Private Citizens & Property           68   Named Civilian      
##        target1 natlty1        natlty1_txt targtype2 targtype2_txt targsubtype2
## 1 Julio Guzman      58 Dominican Republic        NA                         NA
##   targsubtype2_txt corp2 target2 natlty2 natlty2_txt targtype3 targtype3_txt
## 1                                     NA                    NA              
##   targsubtype3 targsubtype3_txt corp3 target3 natlty3 natlty3_txt  gname
## 1           NA                                     NA             MANO-D
##   gsubname gname2 gsubname2 gname3 gsubname3 motive guncertain1 guncertain2
## 1                                                             0          NA
##   guncertain3 individual nperps nperpcap claimed claimmode claimmode_txt claim2
## 1          NA          0     NA       NA      NA        NA                   NA
##   claimmode2 claimmode2_txt claim3 claimmode3 claimmode3_txt compclaim
## 1         NA                    NA         NA                       NA
##   weaptype1 weaptype1_txt weapsubtype1 weapsubtype1_txt weaptype2 weaptype2_txt
## 1        13       Unknown           NA                         NA              
##   weapsubtype2 weapsubtype2_txt weaptype3 weaptype3_txt weapsubtype3
## 1           NA                         NA                         NA
##   weapsubtype3_txt weaptype4 weaptype4_txt weapsubtype4 weapsubtype4_txt
## 1                         NA                         NA                 
##   weapdetail nkill nkillus nkillter nwound nwoundus nwoundte property
## 1                1      NA       NA      0       NA       NA        0
##   propextent propextent_txt propvalue propcomment ishostkid nhostkid nhostkidus
## 1         NA                       NA                     0       NA         NA
##   nhours ndays divert kidhijcountry ransom ransomamt ransomamtus ransompaid
## 1     NA    NA                           0        NA          NA         NA
##   ransompaidus ransomnote hostkidoutcome hostkidoutcome_txt nreleased addnotes
## 1           NA                        NA                           NA         
##   scite1 scite2 scite3 dbsource INT_LOG INT_IDEO INT_MISC INT_ANY related
## 1                          PGIS       0        0        0       0

2) EDAs - Exploratory Data Analysis & Data Cleaning

Selecting only the variables that you wish to keep

After dropping the variables we don’t want these are the ones we are left with

Data Dictionary
Data Dictionary
df2 <-df %>% select(eventid, iyear,success,imonth, iday, country_txt,region_txt,suicide,latitude,longitude,attacktype1_txt, targtype1_txt, target1,nkill )

head(df2,1)
##    eventid iyear success imonth iday        country_txt
## 1 1.97e+11  1970       1      7    2 Dominican Republic
##                    region_txt suicide latitude longitude attacktype1_txt
## 1 Central America & Caribbean       0 18.45679 -69.95116   Assassination
##                 targtype1_txt      target1 nkill
## 1 Private Citizens & Property Julio Guzman     1

Change eventid from numeric to character

df2$eventid <- as.factor(df2$eventid)

Checking that it went well

class(df2$eventid)
## [1] "factor"

Datatypes for all vars

str(df2)
## 'data.frame':    181691 obs. of  14 variables:
##  $ eventid        : Factor w/ 181691 levels "197000000001",..: 1 2 3 4 5 6 7 8 9 10 ...
##  $ iyear          : int  1970 1970 1970 1970 1970 1970 1970 1970 1970 1970 ...
##  $ success        : int  1 1 1 1 1 1 0 1 1 1 ...
##  $ imonth         : int  7 0 1 1 1 1 1 1 1 1 ...
##  $ iday           : int  2 0 0 0 0 1 2 2 2 3 ...
##  $ country_txt    : chr  "Dominican Republic" "Mexico" "Philippines" "Greece" ...
##  $ region_txt     : chr  "Central America & Caribbean" "North America" "Southeast Asia" "Western Europe" ...
##  $ suicide        : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ latitude       : num  18.5 19.4 15.5 38 33.6 ...
##  $ longitude      : num  -70 -99.1 120.6 23.8 130.4 ...
##  $ attacktype1_txt: chr  "Assassination" "Hostage Taking (Kidnapping)" "Assassination" "Bombing/Explosion" ...
##  $ targtype1_txt  : chr  "Private Citizens & Property" "Government (Diplomatic)" "Journalists & Media" "Government (Diplomatic)" ...
##  $ target1        : chr  "Julio Guzman" "Nadine Chaval, daughter" "Employee" "U.S. Embassy" ...
##  $ nkill          : int  1 0 1 NA NA 0 0 0 0 0 ...

EDAs

summary(df2)
##          eventid           iyear         success           imonth      
##  197000000001:     1   Min.   :1970   Min.   :0.0000   Min.   : 0.000  
##  197000000002:     1   1st Qu.:1991   1st Qu.:1.0000   1st Qu.: 4.000  
##  197001000001:     1   Median :2009   Median :1.0000   Median : 6.000  
##  197001000002:     1   Mean   :2003   Mean   :0.8896   Mean   : 6.467  
##  197001000003:     1   3rd Qu.:2014   3rd Qu.:1.0000   3rd Qu.: 9.000  
##  197001010002:     1   Max.   :2017   Max.   :1.0000   Max.   :12.000  
##  (Other)     :181685                                                   
##       iday       country_txt         region_txt           suicide       
##  Min.   : 0.00   Length:181691      Length:181691      Min.   :0.00000  
##  1st Qu.: 8.00   Class :character   Class :character   1st Qu.:0.00000  
##  Median :15.00   Mode  :character   Mode  :character   Median :0.00000  
##  Mean   :15.51                                         Mean   :0.03651  
##  3rd Qu.:23.00                                         3rd Qu.:0.00000  
##  Max.   :31.00                                         Max.   :1.00000  
##                                                                         
##     latitude        longitude         attacktype1_txt    targtype1_txt     
##  Min.   :-53.16   Min.   :-86185896   Length:181691      Length:181691     
##  1st Qu.: 11.51   1st Qu.:        5   Class :character   Class :character  
##  Median : 31.47   Median :       43   Mode  :character   Mode  :character  
##  Mean   : 23.50   Mean   :     -459                                        
##  3rd Qu.: 34.69   3rd Qu.:       69                                        
##  Max.   : 74.63   Max.   :      179                                        
##  NA's   :4556     NA's   :4557                                             
##    target1              nkill         
##  Length:181691      Min.   :   0.000  
##  Class :character   1st Qu.:   0.000  
##  Mode  :character   Median :   0.000  
##                     Mean   :   2.403  
##                     3rd Qu.:   2.000  
##                     Max.   :1570.000  
##                     NA's   :10313

3) Plot Terror Attacks By Region & Year

attacks <- df2 %>%
  group_by(iyear, region_txt) %>%
  tally()

##Let’s look at number of terror attacks by year binned

region <- df2 %>%
  group_by(region_txt) %>%
  tally()
print(region)
## # A tibble: 12 × 2
##    region_txt                      n
##    <chr>                       <int>
##  1 Australasia & Oceania         282
##  2 Central America & Caribbean 10344
##  3 Central Asia                  563
##  4 East Asia                     802
##  5 Eastern Europe               5144
##  6 Middle East & North Africa  50474
##  7 North America                3456
##  8 South America               18978
##  9 South Asia                  44974
## 10 Southeast Asia              12485
## 11 Sub-Saharan Africa          17550
## 12 Western Europe              16639

Binning years by decade

df2$decade <- case_when(
  df2$iyear %in% c("1970", "1971", "1972", "1973", "1974", "1975", "1976", "1977", "1978", "1979") ~ "1970's",
  df2$iyear %in% c("1980", "1981", "1982", "1982", "1983", "1984", "1985", "1986", "1987", "1988", "1989") ~ "1980's",
  df2$iyear %in% c("1990", "1991", "1992", "1992", "1993", "1994", "1995", "1996", "1997", "1998", "1999") ~ "1990's",
df2$iyear %in% c("2000", "2001", "2002", "2003", "2004", "2005", "2006", "2007", "2008", "2009") ~ "2000's",
df2$iyear %in% c("2010", "2011", "2012", "2013", "2014", "2015", "2016", "2017", "2018", "2019", "2020") ~ "2010's",
)
library(ggplot2)

ggplot(attacks, aes(x = iyear, y = n, color = region_txt)) +
  geom_line() 

The trendline shows a sharp increase in global terrorist attacks overall since after 2000 with a specific sharp increase after 2010.

4) Interactive Table of Attacks By Year & Regions

attacks2 <- df2 %>%
  group_by(region_txt, decade, attacktype1_txt) %>%
  tally()
head(attacks2, 5)
## # A tibble: 5 × 4
## # Groups:   region_txt, decade [2]
##   region_txt            decade attacktype1_txt                    n
##   <chr>                 <chr>  <chr>                          <int>
## 1 Australasia & Oceania 1970's Armed Assault                      1
## 2 Australasia & Oceania 1970's Assassination                      6
## 3 Australasia & Oceania 1970's Bombing/Explosion                  8
## 4 Australasia & Oceania 1970's Facility/Infrastructure Attack     1
## 5 Australasia & Oceania 1980's Armed Assault                     15

Rename Columns

#Rename columns 
colnames(attacks2)[1] <- "Region"              
colnames(attacks2)[2] <- "Decade"  
colnames(attacks2)[3] <- "Attack Type"

head(attacks2, 5)
## # A tibble: 5 × 4
## # Groups:   Region, Decade [2]
##   Region                Decade `Attack Type`                      n
##   <chr>                 <chr>  <chr>                          <int>
## 1 Australasia & Oceania 1970's Armed Assault                      1
## 2 Australasia & Oceania 1970's Assassination                      6
## 3 Australasia & Oceania 1970's Bombing/Explosion                  8
## 4 Australasia & Oceania 1970's Facility/Infrastructure Attack     1
## 5 Australasia & Oceania 1980's Armed Assault                     15
 attacks2 <- attacks2 %>%
      mutate_if(is.character, utf8::utf8_encode)

Building an interactive data table with exportable buttons for attacks by Region, Decades, Attack Type and Targets

library(data.table)
library(DT)
datatable(attacks2,extensions = 'Buttons',
options = list(dom='Bfrtip',
buttons=c('copy', 'csv', 'excel', 'print', 'pdf')))

5) Map of Attacks By Region

Building our map dataframe

We’re doing it by region because country borders have changed as politics and wars have traversed by decade

We are going to use the libraries for mapping

library(tidyverse)
library(rnaturalearth)
library(sf)

Get world data

world <- ne_countries(scale ="medium", returnclass="sf")%>%
        filter(admin != "Anarctica")

Build our dataframe to join

Terrorist Attacks Per Country/Region

terrorattacks <- df2 %>%
  group_by(country_txt, decade) %>%
  tally()

Rename Columns for Joining

#Rename columns 

colnames(terrorattacks)[1] <- "region"              
colnames(terrorattacks)[2] <- "Decade"  
colnames(terrorattacks)[3] <- "Freq"

head(terrorattacks, 5)
## # A tibble: 5 × 3
## # Groups:   region [1]
##   region      Decade  Freq
##   <chr>       <chr>  <int>
## 1 Afghanistan 1970's     4
## 2 Afghanistan 1980's    22
## 3 Afghanistan 1990's    98
## 4 Afghanistan 2000's  1949
## 5 Afghanistan 2010's 10658

Add lat and long

latlong <- read.csv("/cloud/project/countrieslatlong.csv", header=TRUE, stringsAsFactors=FALSE)

head(latlong,1)
##          name lat long
## 1 Afghanistan  33   65

Rename name to region

colnames(latlong)[1] <- "region"
worldattacks <- merge(x=terrorattacks,y=latlong, 
          by="region", all.x=TRUE)

Plot World Map

world_map <- map_data("world")

Retrieve the world data and merge with terrorattacks data

terrorattacks_map <- left_join(world_map, worldattacks, by = "region")
# Create the map
ggplot(terrorattacks_map, aes(long.x, lat.x, group = group ))+
  geom_polygon(aes(fill = Freq), color = "lavender")+
  scale_fill_viridis_c(option = "C")