library(ggmap); library(ggplot2); library(dplyr)
## Loading required package: ggplot2
## Warning: package 'dplyr' was built under R version 3.1.2
## 
## Attaching package: 'dplyr'
## 
## The following object is masked from 'package:stats':
## 
##     filter
## 
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
# load files - World
trr <- read.csv("~/Documents/NYDSA/DATA SET/Idea5-InternationalTerrirism/enigma-terrirism.csv" , sep=",", stringsAsFactor=FALSE)
trr <- tbl_df(trr)

trr$iyear.lvl <- trr$iyear #Add a column with levels of iyear 
trr$iyear.lvl <- as.factor(trunc(trr$iyear.lvl/5,0)*5)

## most active Gangs
    topGangs<- group_by(trr, gname) %>% summarise(Total_Regions=length(unique(region_txt, na.rm=T)), Years_Active=diff(range(iyear, na.rm=T)+1)) %>%
         arrange(desc(Years_Active), desc(Total_Regions))
#Top 10 Gangs
    topGangs10 <- topGangs[2:12,1:2]

(topGangs10.barchart <- ggplot(topGangs10, aes(x=gname, y=Total_Regions), color='blue') + 
                geom_bar(stat="identity", fill='grey')+
                ggtitle('Top10 Gangs Worldwide')+ xlab("Gang Name") + ylab("# of Regions")+
               theme(axis.text.x = element_text(angle=45, vjust=1, size=10, hjust=1)))

## Global reach for top 10 gangs
#By REGION
topGangs10 
## Source: local data frame [11 x 2]
## 
##                                                   gname Total_Regions
## 1                                            Individual            12
## 2                           Irish Republican Army (IRA)             3
## 3                   Basque Fatherland and Freedom (ETA)             2
## 4                               New People's Army (NPA)             2
## 5                           Irish Republican Extremists             1
## 6  Popular Front for the Liberation of Palestine (PFLP)             4
## 7                                        Neo-Nazi Group             5
## 8                                              al-Fatah             3
## 9            National Liberation Army of Colombia (ELN)             2
## 10                                         Ku Klux Klan             1
## 11            Corsican National Liberation Front (FLNC)             1
gangs.reach.recent <- trr %>% group_by(., gname) %>% summarise(., Countries=length(unique(country, na.rm=T)), last_year=max(iyear)) %>% 
    arrange(., desc(Countries), desc(last_year)) %>% filter(last_year >=1990) # By Country

    gangs.reach.recent10 <- gangs.reach.recent[2:12,] # Top 10
    gangs.reach.recent <- trr %>% group_by(., gname) %>% summarise(., Countries=length(unique(country, na.rm=T)), last_year=max(iyear)) %>% arrange(., desc(Countries), desc(last_year)) %>% filter(last_year >=1990) # Country list

    gangs.reach.recent10 <- gangs.reach.recent[2:12,]
    CountryList <- lapply(1:10, function(x) trr[which(trr$gname==gangs.reach.recent10[[1]][x]),9])
    CountryListUnique <- lapply(1:10, function(x) unique(CountryList[[x]]))
    # add gang name to the country
    CountryListUnique.wGang <- lapply(1:10, function(x) cbind(CountryListUnique[[x]], rep(gangs.reach.recent10[x,1], 1)))
    CountryListUnique.wGang.df <- do.call('rbind', CountryListUnique.wGang) # list to dataframe
    #CountryListUnique.wGang.df

# Top 10 Type of Attacks
atk.type <- table(trr$attacktype1_txt, trr$iyear.category) # simple table
atk.type.df <- tbl_df(as.data.frame(atk.type)) # df

(atk.type.df.sqplot <- ggplot(atk.type.df, aes(Var2, Var1)) + 
    geom_tile(data=atk.type.df, aes(fill=Freq), color="white")+
    scale_fill_gradient2(low="blue", high="red", mid="white", midpoint=2000, limit=c(500,11000),name="# of\nAttacks")+
    ggtitle('Top 10 Type of Attacks:\n1970-2010 Trend')+ xlab("Years") + ylab("Type of Attack")+
    theme(axis.text.x = element_text(angle=45, vjust=1, size=14, hjust=1))+
    coord_equal())

## Type of Attacks with number of attacks
atkType.byYears <- group_by(trr, attacktype1_txt, iyear) %>% summarise(Attack_byYears = n())
#worldwide
(atkType.byYears.bubblePlot <- ggplot(data=atkType.byYears, aes(x=as.factor(iyear), y=Attack_byYears)) +
     geom_point(aes(color=attacktype1_txt, size=Attack_byYears), alpha=0.6, position="jitter") +
     scale_size_continuous(range=c(4,10))+
     ggtitle('Type of Attacks (by color)\nNumber of Attacks (by size)\nFrom; 1970 To:2010')+ xlab("Year") + ylab(" Number of Attacks")+
     theme(axis.text.x = element_text(angle=45, vjust=1, size=12, hjust=1)))

(atkType.byRegion.byYears <- group_by(trr, attacktype1_txt, region_txt, iyear) %>% summarise(Attack_byYears = n()))
## Source: local data frame [2,569 x 4]
## Groups: attacktype1_txt, region_txt
## 
##    attacktype1_txt            region_txt iyear Attack_byYears
## 1    Armed Assault Australasia & Oceania  1973              1
## 2    Armed Assault Australasia & Oceania  1980              1
## 3    Armed Assault Australasia & Oceania  1988              3
## 4    Armed Assault Australasia & Oceania  1989             11
## 5    Armed Assault Australasia & Oceania  1990              8
## 6    Armed Assault Australasia & Oceania  1991              1
## 7    Armed Assault Australasia & Oceania  1992              6
## 8    Armed Assault Australasia & Oceania  1994              2
## 9    Armed Assault Australasia & Oceania  1995              6
## 10   Armed Assault Australasia & Oceania  1996              2
## ..             ...                   ...   ...            ...
#By region
(atkType.byRegion.byYears.bubblePlot <- ggplot(data=atkType.byRegion.byYears, aes(x=as.factor(iyear), y=Attack_byYears)) +
     geom_point(aes(color=attacktype1_txt, size=Attack_byYears), alpha=0.7, position="jitter") +
     scale_size_continuous(range=c(4,10))+
     facet_wrap(~region_txt,ncol=2, scales="fixed")+
    theme(panel.background =element_rect(fill="white"),axis.text=element_blank(),axis.ticks=element_blank(),axis.title=element_blank() )+
      ggtitle('Type of Attacks (by color)\nNumber of Attacks (by size)\nFrom; 1970 To:2010'))

# TOP 10 Countries most reported in the terrorism report
    Top10CountryList <- group_by(trr, country_txt)  %>% summarise(n=n()) %>% arrange(., desc(n))
    (Top10CountryList <- Top10CountryList[1:10,1])
## Source: local data frame [10 x 1]
## 
##         country_txt
## 1          Colombia
## 2              Iraq
## 3             India
## 4              Peru
## 5       El Salvador
## 6          Pakistan
## 7  Northern Ireland
## 8             Spain
## 9       Philippines
## 10        Sri Lanka
# More about # 1 in the list
    Colombia <- trr[trr$country_txt=="Colombia", ]

    (Colombia.year <- group_by(Colombia, iyear.category) %>% summarise(., Casualities=sum(nkill, na.rm=T)))
## Source: local data frame [9 x 2]
## 
##   iyear.category Casualities
## 1           1970           1
## 2           1975         282
## 3           1980        1810
## 4           1985        4200
## 5           1990        2285
## 6           1995        3184
## 7           2000         980
## 8           2005         473
## 9           2010          56
    (Colombia.weapon <- group_by(Colombia,weaptype1_txt) %>% summarise(., Casualities=sum(nkill, na.rm=T)) %>% arrange(., desc(Casualities)) %>% filter(Casualities >0))
## Source: local data frame [7 x 2]
## 
##                                                                 weaptype1_txt
## 1                                                                    Firearms
## 2                                                   Explosives/Bombs/Dynamite
## 3                                                                     Unknown
## 4                                                                       Melee
## 5                                                                  Incendiary
## 6                                                                    Chemical
## 7 Vehicle (not to include vehicle-borne explosives, i.e., car or truck bombs)
## Variables not shown: Casualities (dbl)
par(mfrow=c(1,2))
    ggplot(Colombia.year, aes(x=iyear.category, y=Casualities), color='red4') + geom_bar(stat="identity", fill='grey') +
        ggtitle('Colombia - Causualities')

    ggplot(Colombia.weapon, aes(x=weaptype1_txt, y=Casualities), color='red4') + geom_bar(stat="identity", fill='grey') +
        ggtitle('Colombia - Causualities')