Here is the codebook for the dataset: GTD Codebook
This is our Roadmap for our analysis:
library(dplyr) # for data manipulation
library(stringr) # for data manipulation
library(ggplot2) # for data visualization
library(corrplot) # for correlations
library(ggplot2)
library(RColorBrewer)
library(nnet)
library(tidyr)
library(plyr)
library(leaflet)
library(sf)
library(tidyverse)
df <- read.csv("/cloud/project/gtd.csv", header=TRUE, stringsAsFactors=FALSE)
head(df,1)
## eventid iyear imonth iday approxdate extended resolution country
## 1 1.97e+11 1970 7 2 0 58
## country_txt region region_txt provstate city
## 1 Dominican Republic 2 Central America & Caribbean Santo Domingo
## latitude longitude specificity vicinity location summary crit1 crit2 crit3
## 1 18.45679 -69.95116 1 0 1 1 1
## doubtterr alternative alternative_txt multiple success suicide attacktype1
## 1 0 NA 0 1 0 1
## attacktype1_txt attacktype2 attacktype2_txt attacktype3 attacktype3_txt
## 1 Assassination NA NA
## targtype1 targtype1_txt targsubtype1 targsubtype1_txt corp1
## 1 14 Private Citizens & Property 68 Named Civilian
## target1 natlty1 natlty1_txt targtype2 targtype2_txt targsubtype2
## 1 Julio Guzman 58 Dominican Republic NA NA
## targsubtype2_txt corp2 target2 natlty2 natlty2_txt targtype3 targtype3_txt
## 1 NA NA
## targsubtype3 targsubtype3_txt corp3 target3 natlty3 natlty3_txt gname
## 1 NA NA MANO-D
## gsubname gname2 gsubname2 gname3 gsubname3 motive guncertain1 guncertain2
## 1 0 NA
## guncertain3 individual nperps nperpcap claimed claimmode claimmode_txt claim2
## 1 NA 0 NA NA NA NA NA
## claimmode2 claimmode2_txt claim3 claimmode3 claimmode3_txt compclaim
## 1 NA NA NA NA
## weaptype1 weaptype1_txt weapsubtype1 weapsubtype1_txt weaptype2 weaptype2_txt
## 1 13 Unknown NA NA
## weapsubtype2 weapsubtype2_txt weaptype3 weaptype3_txt weapsubtype3
## 1 NA NA NA
## weapsubtype3_txt weaptype4 weaptype4_txt weapsubtype4 weapsubtype4_txt
## 1 NA NA
## weapdetail nkill nkillus nkillter nwound nwoundus nwoundte property
## 1 1 NA NA 0 NA NA 0
## propextent propextent_txt propvalue propcomment ishostkid nhostkid nhostkidus
## 1 NA NA 0 NA NA
## nhours ndays divert kidhijcountry ransom ransomamt ransomamtus ransompaid
## 1 NA NA 0 NA NA NA
## ransompaidus ransomnote hostkidoutcome hostkidoutcome_txt nreleased addnotes
## 1 NA NA NA
## scite1 scite2 scite3 dbsource INT_LOG INT_IDEO INT_MISC INT_ANY related
## 1 PGIS 0 0 0 0
After dropping the variables we don’t want these are the ones we are left with
df2 <-df %>% select(eventid, iyear,success,imonth, iday, country_txt,region_txt,suicide,latitude,longitude,attacktype1_txt, targtype1_txt, target1,nkill )
head(df2,1)
## eventid iyear success imonth iday country_txt
## 1 1.97e+11 1970 1 7 2 Dominican Republic
## region_txt suicide latitude longitude attacktype1_txt
## 1 Central America & Caribbean 0 18.45679 -69.95116 Assassination
## targtype1_txt target1 nkill
## 1 Private Citizens & Property Julio Guzman 1
df2$eventid <- as.factor(df2$eventid)
class(df2$eventid)
## [1] "factor"
str(df2)
## 'data.frame': 181691 obs. of 14 variables:
## $ eventid : Factor w/ 181691 levels "197000000001",..: 1 2 3 4 5 6 7 8 9 10 ...
## $ iyear : int 1970 1970 1970 1970 1970 1970 1970 1970 1970 1970 ...
## $ success : int 1 1 1 1 1 1 0 1 1 1 ...
## $ imonth : int 7 0 1 1 1 1 1 1 1 1 ...
## $ iday : int 2 0 0 0 0 1 2 2 2 3 ...
## $ country_txt : chr "Dominican Republic" "Mexico" "Philippines" "Greece" ...
## $ region_txt : chr "Central America & Caribbean" "North America" "Southeast Asia" "Western Europe" ...
## $ suicide : int 0 0 0 0 0 0 0 0 0 0 ...
## $ latitude : num 18.5 19.4 15.5 38 33.6 ...
## $ longitude : num -70 -99.1 120.6 23.8 130.4 ...
## $ attacktype1_txt: chr "Assassination" "Hostage Taking (Kidnapping)" "Assassination" "Bombing/Explosion" ...
## $ targtype1_txt : chr "Private Citizens & Property" "Government (Diplomatic)" "Journalists & Media" "Government (Diplomatic)" ...
## $ target1 : chr "Julio Guzman" "Nadine Chaval, daughter" "Employee" "U.S. Embassy" ...
## $ nkill : int 1 0 1 NA NA 0 0 0 0 0 ...
summary(df2)
## eventid iyear success imonth
## 197000000001: 1 Min. :1970 Min. :0.0000 Min. : 0.000
## 197000000002: 1 1st Qu.:1991 1st Qu.:1.0000 1st Qu.: 4.000
## 197001000001: 1 Median :2009 Median :1.0000 Median : 6.000
## 197001000002: 1 Mean :2003 Mean :0.8896 Mean : 6.467
## 197001000003: 1 3rd Qu.:2014 3rd Qu.:1.0000 3rd Qu.: 9.000
## 197001010002: 1 Max. :2017 Max. :1.0000 Max. :12.000
## (Other) :181685
## iday country_txt region_txt suicide
## Min. : 0.00 Length:181691 Length:181691 Min. :0.00000
## 1st Qu.: 8.00 Class :character Class :character 1st Qu.:0.00000
## Median :15.00 Mode :character Mode :character Median :0.00000
## Mean :15.51 Mean :0.03651
## 3rd Qu.:23.00 3rd Qu.:0.00000
## Max. :31.00 Max. :1.00000
##
## latitude longitude attacktype1_txt targtype1_txt
## Min. :-53.16 Min. :-86185896 Length:181691 Length:181691
## 1st Qu.: 11.51 1st Qu.: 5 Class :character Class :character
## Median : 31.47 Median : 43 Mode :character Mode :character
## Mean : 23.50 Mean : -459
## 3rd Qu.: 34.69 3rd Qu.: 69
## Max. : 74.63 Max. : 179
## NA's :4556 NA's :4557
## target1 nkill
## Length:181691 Min. : 0.000
## Class :character 1st Qu.: 0.000
## Mode :character Median : 0.000
## Mean : 2.403
## 3rd Qu.: 2.000
## Max. :1570.000
## NA's :10313
attacks <- df2 %>%
group_by(iyear, region_txt) %>%
tally()
##Let’s look at number of terror attacks by year binned
region <- df2 %>%
group_by(region_txt) %>%
tally()
print(region)
## # A tibble: 12 × 2
## region_txt n
## <chr> <int>
## 1 Australasia & Oceania 282
## 2 Central America & Caribbean 10344
## 3 Central Asia 563
## 4 East Asia 802
## 5 Eastern Europe 5144
## 6 Middle East & North Africa 50474
## 7 North America 3456
## 8 South America 18978
## 9 South Asia 44974
## 10 Southeast Asia 12485
## 11 Sub-Saharan Africa 17550
## 12 Western Europe 16639
df2$decade <- case_when(
df2$iyear %in% c("1970", "1971", "1972", "1973", "1974", "1975", "1976", "1977", "1978", "1979") ~ "1970's",
df2$iyear %in% c("1980", "1981", "1982", "1982", "1983", "1984", "1985", "1986", "1987", "1988", "1989") ~ "1980's",
df2$iyear %in% c("1990", "1991", "1992", "1992", "1993", "1994", "1995", "1996", "1997", "1998", "1999") ~ "1990's",
df2$iyear %in% c("2000", "2001", "2002", "2003", "2004", "2005", "2006", "2007", "2008", "2009") ~ "2000's",
df2$iyear %in% c("2010", "2011", "2012", "2013", "2014", "2015", "2016", "2017", "2018", "2019", "2020") ~ "2010's",
)
library(ggplot2)
ggplot(attacks, aes(x = iyear, y = n, color = region_txt)) +
geom_line()
attacks2 <- df2 %>%
group_by(region_txt, decade, attacktype1_txt) %>%
tally()
head(attacks2, 5)
## # A tibble: 5 × 4
## # Groups: region_txt, decade [2]
## region_txt decade attacktype1_txt n
## <chr> <chr> <chr> <int>
## 1 Australasia & Oceania 1970's Armed Assault 1
## 2 Australasia & Oceania 1970's Assassination 6
## 3 Australasia & Oceania 1970's Bombing/Explosion 8
## 4 Australasia & Oceania 1970's Facility/Infrastructure Attack 1
## 5 Australasia & Oceania 1980's Armed Assault 15
#Rename columns
colnames(attacks2)[1] <- "Region"
colnames(attacks2)[2] <- "Decade"
colnames(attacks2)[3] <- "Attack Type"
head(attacks2, 5)
## # A tibble: 5 × 4
## # Groups: Region, Decade [2]
## Region Decade `Attack Type` n
## <chr> <chr> <chr> <int>
## 1 Australasia & Oceania 1970's Armed Assault 1
## 2 Australasia & Oceania 1970's Assassination 6
## 3 Australasia & Oceania 1970's Bombing/Explosion 8
## 4 Australasia & Oceania 1970's Facility/Infrastructure Attack 1
## 5 Australasia & Oceania 1980's Armed Assault 15
attacks2 <- attacks2 %>%
mutate_if(is.character, utf8::utf8_encode)
library(tidyverse)
library(rnaturalearth)
library(sf)
world <- ne_countries(scale ="medium", returnclass="sf")%>%
filter(admin != "Anarctica")
terrorattacks <- df2 %>%
group_by(country_txt, decade) %>%
tally()
#Rename columns
colnames(terrorattacks)[1] <- "region"
colnames(terrorattacks)[2] <- "Decade"
colnames(terrorattacks)[3] <- "Freq"
head(terrorattacks, 5)
## # A tibble: 5 × 3
## # Groups: region [1]
## region Decade Freq
## <chr> <chr> <int>
## 1 Afghanistan 1970's 4
## 2 Afghanistan 1980's 22
## 3 Afghanistan 1990's 98
## 4 Afghanistan 2000's 1949
## 5 Afghanistan 2010's 10658
latlong <- read.csv("/cloud/project/countrieslatlong.csv", header=TRUE, stringsAsFactors=FALSE)
head(latlong,1)
## name lat long
## 1 Afghanistan 33 65
colnames(latlong)[1] <- "region"
worldattacks <- merge(x=terrorattacks,y=latlong,
by="region", all.x=TRUE)
world_map <- map_data("world")
terrorattacks_map <- left_join(world_map, worldattacks, by = "region")
# Create the map
ggplot(terrorattacks_map, aes(long.x, lat.x, group = group ))+
geom_polygon(aes(fill = Freq), color = "lavender")+
scale_fill_viridis_c(option = "C")