library(ggmap); library(ggplot2); library(dplyr)
## Loading required package: ggplot2
## Warning: package 'dplyr' was built under R version 3.1.2
##
## Attaching package: 'dplyr'
##
## The following object is masked from 'package:stats':
##
## filter
##
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
# load files - World
trr <- read.csv("~/Documents/NYDSA/DATA SET/Idea5-InternationalTerrirism/enigma-terrirism.csv" , sep=",", stringsAsFactor=FALSE)
trr <- tbl_df(trr)
trr$iyear.lvl <- trr$iyear #Add a column with levels of iyear
trr$iyear.lvl <- as.factor(trunc(trr$iyear.lvl/5,0)*5)
## most active Gangs
topGangs<- group_by(trr, gname) %>% summarise(Total_Regions=length(unique(region_txt, na.rm=T)), Years_Active=diff(range(iyear, na.rm=T)+1)) %>%
arrange(desc(Years_Active), desc(Total_Regions))
#Top 10 Gangs
topGangs10 <- topGangs[2:12,1:2]
(topGangs10.barchart <- ggplot(topGangs10, aes(x=gname, y=Total_Regions), color='blue') +
geom_bar(stat="identity", fill='grey')+
ggtitle('Top10 Gangs Worldwide')+ xlab("Gang Name") + ylab("# of Regions")+
theme(axis.text.x = element_text(angle=45, vjust=1, size=10, hjust=1)))

## Global reach for top 10 gangs
#By REGION
topGangs10
## Source: local data frame [11 x 2]
##
## gname Total_Regions
## 1 Individual 12
## 2 Irish Republican Army (IRA) 3
## 3 Basque Fatherland and Freedom (ETA) 2
## 4 New People's Army (NPA) 2
## 5 Irish Republican Extremists 1
## 6 Popular Front for the Liberation of Palestine (PFLP) 4
## 7 Neo-Nazi Group 5
## 8 al-Fatah 3
## 9 National Liberation Army of Colombia (ELN) 2
## 10 Ku Klux Klan 1
## 11 Corsican National Liberation Front (FLNC) 1
gangs.reach.recent <- trr %>% group_by(., gname) %>% summarise(., Countries=length(unique(country, na.rm=T)), last_year=max(iyear)) %>%
arrange(., desc(Countries), desc(last_year)) %>% filter(last_year >=1990) # By Country
gangs.reach.recent10 <- gangs.reach.recent[2:12,] # Top 10
gangs.reach.recent <- trr %>% group_by(., gname) %>% summarise(., Countries=length(unique(country, na.rm=T)), last_year=max(iyear)) %>% arrange(., desc(Countries), desc(last_year)) %>% filter(last_year >=1990) # Country list
gangs.reach.recent10 <- gangs.reach.recent[2:12,]
CountryList <- lapply(1:10, function(x) trr[which(trr$gname==gangs.reach.recent10[[1]][x]),9])
CountryListUnique <- lapply(1:10, function(x) unique(CountryList[[x]]))
# add gang name to the country
CountryListUnique.wGang <- lapply(1:10, function(x) cbind(CountryListUnique[[x]], rep(gangs.reach.recent10[x,1], 1)))
CountryListUnique.wGang.df <- do.call('rbind', CountryListUnique.wGang) # list to dataframe
#CountryListUnique.wGang.df
# Top 10 Type of Attacks
atk.type <- table(trr$attacktype1_txt, trr$iyear.category) # simple table
atk.type.df <- tbl_df(as.data.frame(atk.type)) # df
(atk.type.df.sqplot <- ggplot(atk.type.df, aes(Var2, Var1)) +
geom_tile(data=atk.type.df, aes(fill=Freq), color="white")+
scale_fill_gradient2(low="blue", high="red", mid="white", midpoint=2000, limit=c(500,11000),name="# of\nAttacks")+
ggtitle('Top 10 Type of Attacks:\n1970-2010 Trend')+ xlab("Years") + ylab("Type of Attack")+
theme(axis.text.x = element_text(angle=45, vjust=1, size=14, hjust=1))+
coord_equal())

## Type of Attacks with number of attacks
atkType.byYears <- group_by(trr, attacktype1_txt, iyear) %>% summarise(Attack_byYears = n())
#worldwide
(atkType.byYears.bubblePlot <- ggplot(data=atkType.byYears, aes(x=as.factor(iyear), y=Attack_byYears)) +
geom_point(aes(color=attacktype1_txt, size=Attack_byYears), alpha=0.6, position="jitter") +
scale_size_continuous(range=c(4,10))+
ggtitle('Type of Attacks (by color)\nNumber of Attacks (by size)\nFrom; 1970 To:2010')+ xlab("Year") + ylab(" Number of Attacks")+
theme(axis.text.x = element_text(angle=45, vjust=1, size=12, hjust=1)))

(atkType.byRegion.byYears <- group_by(trr, attacktype1_txt, region_txt, iyear) %>% summarise(Attack_byYears = n()))
## Source: local data frame [2,569 x 4]
## Groups: attacktype1_txt, region_txt
##
## attacktype1_txt region_txt iyear Attack_byYears
## 1 Armed Assault Australasia & Oceania 1973 1
## 2 Armed Assault Australasia & Oceania 1980 1
## 3 Armed Assault Australasia & Oceania 1988 3
## 4 Armed Assault Australasia & Oceania 1989 11
## 5 Armed Assault Australasia & Oceania 1990 8
## 6 Armed Assault Australasia & Oceania 1991 1
## 7 Armed Assault Australasia & Oceania 1992 6
## 8 Armed Assault Australasia & Oceania 1994 2
## 9 Armed Assault Australasia & Oceania 1995 6
## 10 Armed Assault Australasia & Oceania 1996 2
## .. ... ... ... ...
#By region
(atkType.byRegion.byYears.bubblePlot <- ggplot(data=atkType.byRegion.byYears, aes(x=as.factor(iyear), y=Attack_byYears)) +
geom_point(aes(color=attacktype1_txt, size=Attack_byYears), alpha=0.7, position="jitter") +
scale_size_continuous(range=c(4,10))+
facet_wrap(~region_txt,ncol=2, scales="fixed")+
theme(panel.background =element_rect(fill="white"),axis.text=element_blank(),axis.ticks=element_blank(),axis.title=element_blank() )+
ggtitle('Type of Attacks (by color)\nNumber of Attacks (by size)\nFrom; 1970 To:2010'))

# TOP 10 Countries most reported in the terrorism report
Top10CountryList <- group_by(trr, country_txt) %>% summarise(n=n()) %>% arrange(., desc(n))
(Top10CountryList <- Top10CountryList[1:10,1])
## Source: local data frame [10 x 1]
##
## country_txt
## 1 Colombia
## 2 Iraq
## 3 India
## 4 Peru
## 5 El Salvador
## 6 Pakistan
## 7 Northern Ireland
## 8 Spain
## 9 Philippines
## 10 Sri Lanka
# More about # 1 in the list
Colombia <- trr[trr$country_txt=="Colombia", ]
(Colombia.year <- group_by(Colombia, iyear.category) %>% summarise(., Casualities=sum(nkill, na.rm=T)))
## Source: local data frame [9 x 2]
##
## iyear.category Casualities
## 1 1970 1
## 2 1975 282
## 3 1980 1810
## 4 1985 4200
## 5 1990 2285
## 6 1995 3184
## 7 2000 980
## 8 2005 473
## 9 2010 56
(Colombia.weapon <- group_by(Colombia,weaptype1_txt) %>% summarise(., Casualities=sum(nkill, na.rm=T)) %>% arrange(., desc(Casualities)) %>% filter(Casualities >0))
## Source: local data frame [7 x 2]
##
## weaptype1_txt
## 1 Firearms
## 2 Explosives/Bombs/Dynamite
## 3 Unknown
## 4 Melee
## 5 Incendiary
## 6 Chemical
## 7 Vehicle (not to include vehicle-borne explosives, i.e., car or truck bombs)
## Variables not shown: Casualities (dbl)
par(mfrow=c(1,2))
ggplot(Colombia.year, aes(x=iyear.category, y=Casualities), color='red4') + geom_bar(stat="identity", fill='grey') +
ggtitle('Colombia - Causualities')

ggplot(Colombia.weapon, aes(x=weaptype1_txt, y=Casualities), color='red4') + geom_bar(stat="identity", fill='grey') +
ggtitle('Colombia - Causualities')
