Global Terrorism Database Analysis & Plots

##Global Terrorism Database Analysis by Dawn Daras MS

##These are EDA’s and an analysis run on the Global Terrorism Database (GTD), which is publicly available and kept by the University of Maryland: GTD Univ of MD

Here is the codebook for the dataset: GTD Codebook

Downloadable shapefile for the world map: World shapefile

Counter Terrorism

library(dplyr) # for data manipulation

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(stringr) # for data manipulation
library(ggplot2) # for data visualization
library(corrplot) # for correlations

## corrplot 0.92 loaded

library(Rtsne) # for tsne plotting
library(geomtextpath)
library(hrbrthemes)
library(ggplot2)
library(repr)
library(RColorBrewer)
library(e1071)
library(C50)
library(DMwR)

## Loading required package: lattice

## Loading required package: grid

## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo

library(factoextra)

## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa

library(pROC)

## Type 'citation("pROC")' for a citation.

## 
## Attaching package: 'pROC'

## The following objects are masked from 'package:stats':
## 
##     cov, smooth, var

library(nnet)
library(tidyr)
library(lares)

## 
## Attaching package: 'lares'

## The following object is masked from 'package:e1071':
## 
##     impute

## The following objects are masked from 'package:hrbrthemes':
## 
##     scale_x_comma, scale_x_percent, scale_y_comma, scale_y_percent

library(Hmisc)

## 
## Attaching package: 'Hmisc'

## The following object is masked from 'package:lares':
## 
##     impute

## The following object is masked from 'package:e1071':
## 
##     impute

## The following objects are masked from 'package:dplyr':
## 
##     src, summarize

## The following objects are masked from 'package:base':
## 
##     format.pval, units

library(plyr)

## ------------------------------------------------------------------------------

## You have loaded plyr after dplyr - this is likely to cause problems.
## If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
## library(plyr); library(dplyr)

## ------------------------------------------------------------------------------

## 
## Attaching package: 'plyr'

## The following objects are masked from 'package:Hmisc':
## 
##     is.discrete, summarize

## The following object is masked from 'package:DMwR':
## 
##     join

## The following objects are masked from 'package:dplyr':
## 
##     arrange, count, desc, failwith, id, mutate, rename, summarise,
##     summarize

library(leaflet)
library(sf)

## Linking to GEOS 3.8.0, GDAL 3.0.4, PROJ 6.3.1; sf_use_s2() is TRUE

library(DT)
library(descr)

## 
## Attaching package: 'descr'

## The following object is masked from 'package:lares':
## 
##     crosstab

library(gt)

## 
## Attaching package: 'gt'

## The following object is masked from 'package:Hmisc':
## 
##     html

library(htmltools)
library(plotly)

## 
## Attaching package: 'plotly'

## The following objects are masked from 'package:plyr':
## 
##     arrange, mutate, rename, summarise

## The following object is masked from 'package:Hmisc':
## 
##     subplot

## The following object is masked from 'package:ggplot2':
## 
##     last_plot

## The following object is masked from 'package:stats':
## 
##     filter

## The following object is masked from 'package:graphics':
## 
##     layout

library(WDI)
library(tidyverse)

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats   1.0.0     ✔ readr     2.1.5
## ✔ lubridate 1.9.3     ✔ tibble    3.2.1
## ✔ purrr     1.0.2

## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ plotly::arrange()   masks plyr::arrange(), dplyr::arrange()
## ✖ purrr::compact()    masks plyr::compact()
## ✖ plyr::count()       masks dplyr::count()
## ✖ plyr::desc()        masks dplyr::desc()
## ✖ plyr::failwith()    masks dplyr::failwith()
## ✖ plotly::filter()    masks dplyr::filter(), stats::filter()
## ✖ plyr::id()          masks dplyr::id()
## ✖ dplyr::lag()        masks stats::lag()
## ✖ plotly::mutate()    masks plyr::mutate(), dplyr::mutate()
## ✖ plotly::rename()    masks plyr::rename(), dplyr::rename()
## ✖ Hmisc::src()        masks dplyr::src()
## ✖ plotly::summarise() masks plyr::summarise(), dplyr::summarise()
## ✖ plyr::summarize()   masks Hmisc::summarize(), dplyr::summarize()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(vembedr)

## 
## Attaching package: 'vembedr'
## 
## The following object is masked from 'package:lubridate':
## 
##     hms

df <- read.csv("/cloud/project/gtd.csv", header=TRUE, stringsAsFactors=FALSE)

#EDAs - Exploratory Data Analysis

##Explanation of Selected Variables:

##success - Success of a terrorist strike ##suicide - 1 = “Yes” The incident was a suicide attack. 0 = “No” There is no indication that the incident was a suicide ##attacktype1 - The general method of attack ##attacktype1_txt - The general method of attack and broad class of tactics used. ##targtype1_txt - The general type of target/victim ##targsubtype1_txt - The more specific target category ##target1 - The specific person, building, installation that was targeted and/or victimized ##natlty1_txt - The nationality of the target that was attacked ##gname - The name of the group that carried out the attack ##gsubname - Additional details about group that carried out the attack like fractions ##nperps - The total number of terrorists participating in the incident ##weaptype1_txt - General type of weapon used in the incident ##weapsubtype1_txt - More specific value for most of the Weapon Types ##nkill - The number of total confirmed fatalities for the incident ##nkillus - The number of U.S. citizens who died as a result of the incident

##Selecting only the variables that you wish to keep

df2 <-df %>% select(eventid, iyear,success,imonth, iday, country_txt,region_txt,suicide,latitude,longitude,attacktype1_txt, targtype1_txt, target1,nkill )

head(df2,1)

##    eventid iyear success imonth iday        country_txt
## 1 1.97e+11  1970       1      7    2 Dominican Republic
##                    region_txt suicide latitude longitude attacktype1_txt
## 1 Central America & Caribbean       0 18.45679 -69.95116   Assassination
##                 targtype1_txt      target1 nkill
## 1 Private Citizens & Property Julio Guzman     1

df2$eventid <- as.factor(df2$eventid)

class(df2$eventid)

## [1] "factor"

str(df2)

## 'data.frame':    181691 obs. of  14 variables:
##  $ eventid        : Factor w/ 181691 levels "197000000001",..: 1 2 3 4 5 6 7 8 9 10 ...
##  $ iyear          : int  1970 1970 1970 1970 1970 1970 1970 1970 1970 1970 ...
##  $ success        : int  1 1 1 1 1 1 0 1 1 1 ...
##  $ imonth         : int  7 0 1 1 1 1 1 1 1 1 ...
##  $ iday           : int  2 0 0 0 0 1 2 2 2 3 ...
##  $ country_txt    : chr  "Dominican Republic" "Mexico" "Philippines" "Greece" ...
##  $ region_txt     : chr  "Central America & Caribbean" "North America" "Southeast Asia" "Western Europe" ...
##  $ suicide        : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ latitude       : num  18.5 19.4 15.5 38 33.6 ...
##  $ longitude      : num  -70 -99.1 120.6 23.8 130.4 ...
##  $ attacktype1_txt: chr  "Assassination" "Hostage Taking (Kidnapping)" "Assassination" "Bombing/Explosion" ...
##  $ targtype1_txt  : chr  "Private Citizens & Property" "Government (Diplomatic)" "Journalists & Media" "Government (Diplomatic)" ...
##  $ target1        : chr  "Julio Guzman" "Nadine Chaval, daughter" "Employee" "U.S. Embassy" ...
##  $ nkill          : int  1 0 1 NA NA 0 0 0 0 0 ...

summary(df2)

##          eventid           iyear         success           imonth      
##  197000000001:     1   Min.   :1970   Min.   :0.0000   Min.   : 0.000  
##  197000000002:     1   1st Qu.:1991   1st Qu.:1.0000   1st Qu.: 4.000  
##  197001000001:     1   Median :2009   Median :1.0000   Median : 6.000  
##  197001000002:     1   Mean   :2003   Mean   :0.8896   Mean   : 6.467  
##  197001000003:     1   3rd Qu.:2014   3rd Qu.:1.0000   3rd Qu.: 9.000  
##  197001010002:     1   Max.   :2017   Max.   :1.0000   Max.   :12.000  
##  (Other)     :181685                                                   
##       iday       country_txt         region_txt           suicide       
##  Min.   : 0.00   Length:181691      Length:181691      Min.   :0.00000  
##  1st Qu.: 8.00   Class :character   Class :character   1st Qu.:0.00000  
##  Median :15.00   Mode  :character   Mode  :character   Median :0.00000  
##  Mean   :15.51                                         Mean   :0.03651  
##  3rd Qu.:23.00                                         3rd Qu.:0.00000  
##  Max.   :31.00                                         Max.   :1.00000  
##                                                                         
##     latitude        longitude         attacktype1_txt    targtype1_txt     
##  Min.   :-53.16   Min.   :-86185896   Length:181691      Length:181691     
##  1st Qu.: 11.51   1st Qu.:        5   Class :character   Class :character  
##  Median : 31.47   Median :       43   Mode  :character   Mode  :character  
##  Mean   : 23.50   Mean   :     -459                                        
##  3rd Qu.: 34.69   3rd Qu.:       69                                        
##  Max.   : 74.63   Max.   :      179                                        
##  NA's   :4556     NA's   :4557                                             
##    target1              nkill         
##  Length:181691      Min.   :   0.000  
##  Class :character   1st Qu.:   0.000  
##  Mode  :character   Median :   0.000  
##                     Mean   :   2.403  
##                     3rd Qu.:   2.000  
##                     Max.   :1570.000  
##                     NA's   :10313

##Let’s look at number of terror attacks by year

##Then we’ll look at attacks by region X year

region <- df2 %>%
  group_by(region_txt) %>%
  tally()

print(region)

## # A tibble: 12 × 2
##    region_txt                      n
##    <chr>                       <int>
##  1 Australasia & Oceania         282
##  2 Central America & Caribbean 10344
##  3 Central Asia                  563
##  4 East Asia                     802
##  5 Eastern Europe               5144
##  6 Middle East & North Africa  50474
##  7 North America                3456
##  8 South America               18978
##  9 South Asia                  44974
## 10 Southeast Asia              12485
## 11 Sub-Saharan Africa          17550
## 12 Western Europe              16639

df2$year <- case_when(
  df2$iyear %in% c("1970", "1971", "1972", "1973", "1974", "1975", "1976", "1977", "1978", "1979") ~ "1970's",
  df2$iyear %in% c("1980", "1981", "1982", "1982", "1983", "1984", "1985", "1986", "1987", "1988", "1989") ~ "1980's",
  df2$iyear %in% c("1990", "1991", "1992", "1992", "1993", "1994", "1995", "1996", "1997", "1998", "1999") ~ "1990's",
df2$iyear %in% c("2000", "2001", "2002", "2003", "2004", "2005", "2006", "2007", "2008", "2009") ~ "2000's",
df2$iyear %in% c("2010", "2011", "2012", "2013", "2014", "2015", "2016", "2017", "2018", "2019", "2020") ~ "2010's",
)

agg_df <- aggregate(df2$iyear, by=list(df2$success,df2$iyear), FUN=length)

# filter data 


agg_df = subset(agg_df, agg_df$Group.1 == 1)

head(agg_df)

##    Group.1 Group.2   x
## 2        1    1970 549
## 4        1    1971 420
## 6        1    1972 452
## 8        1    1973 433
## 10       1    1974 545
## 12       1    1975 705

colnames(agg_df)[1] <- "Group"      
colnames(agg_df)[2] <- "Year"  
colnames(agg_df)[3] <- "Attacks"
head(agg_df, 5)

##    Group Year Attacks
## 2      1 1970     549
## 4      1 1971     420
## 6      1 1972     452
## 8      1 1973     433
## 10     1 1974     545

##Building a trendline of attacks from 1970 to 2020

agg_df %>%
  ggplot(aes(x = Year, y = Attacks, group = 1)) +
  geom_line(aes(col = 'red'))

##The trendline shows a sharp increase in global terrorist attacks overall since after 2000 with a specific sharp increase after 2010. Let’s drill in and see where these are occurring.

attacks <- df2 %>%
  group_by(region_txt, year) %>%
  tally()

head(attacks, 5)

## # A tibble: 5 × 3
## # Groups:   region_txt [1]
##   region_txt            year       n
##   <chr>                 <chr>  <int>
## 1 Australasia & Oceania 1970's    16
## 2 Australasia & Oceania 1980's    78
## 3 Australasia & Oceania 1990's   113
## 4 Australasia & Oceania 2000's    28
## 5 Australasia & Oceania 2010's    47

#Rename columns 
colnames(attacks)[1] <- "Region"               # Rename first column
colnames(attacks)[2] <- "Decade"              # Rename the 2nd column
colnames(attacks)[3] <- "Attacks"
head(attacks, 5)

## # A tibble: 5 × 3
## # Groups:   Region [1]
##   Region                Decade Attacks
##   <chr>                 <chr>    <int>
## 1 Australasia & Oceania 1970's      16
## 2 Australasia & Oceania 1980's      78
## 3 Australasia & Oceania 1990's     113
## 4 Australasia & Oceania 2000's      28
## 5 Australasia & Oceania 2010's      47

##Building an interactive data table with exportable buttons for attacks by decades and regions

datatable(attacks,extensions = 'Buttons',
options = list(dom='Bfrtip',
buttons=c('copy', 'csv', 'excel', 'print', 'pdf')))

##Building a map

##We are going to use the “map data” from ggplot

mapdata <- map_data('world')

df3 <-df2 %>% group_by(country_txt) %>% 
  summarise(sum_attacks=sum(success),
            .groups = 'drop')

#df3 <- df3[ -c(1) ]

head(df3,5)

## # A tibble: 5 × 2
##   country_txt sum_attacks
##   <chr>             <int>
## 1 Afghanistan       11141
## 2 Albania              64
## 3 Algeria            2561
## 4 Andorra               1
## 5 Angola              486

colnames(df3)[1] <- "region"      

head(df3, 1)

## # A tibble: 1 × 2
##   region      sum_attacks
##   <chr>             <int>
## 1 Afghanistan       11141

mapjoin <-left_join(df3,mapdata,by='region')

head(mapjoin,5)

## # A tibble: 5 × 7
##   region      sum_attacks  long   lat group order subregion
##   <chr>             <int> <dbl> <dbl> <dbl> <int> <chr>    
## 1 Afghanistan       11141  74.9  37.2     2    12 <NA>     
## 2 Afghanistan       11141  74.8  37.2     2    13 <NA>     
## 3 Afghanistan       11141  74.8  37.2     2    14 <NA>     
## 4 Afghanistan       11141  74.7  37.3     2    15 <NA>     
## 5 Afghanistan       11141  74.7  37.3     2    16 <NA>

mapjoin%>%
  ggplot(aes(x=long,y=lat,group=group))+
  geom_polygon(aes(fill=sum_attacks),color="grey",size = 0.2)+ 
  labs(fill= "Terrorist Attacks per Region")+
  theme(axis.title=element_blank(),axis.text=element_blank(),
        axis.ticks=element_blank() )

## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

embed_url("https://youtu.be/gWK1sG3spiE?si=TDrkogh7SrbYkTMW")

Global Terrorism Database Analysis & Plots

Dawn Daras MS

2024-05-30