# This is an in-depth exploratory analysis toward robbery in the Bay area. 
library(ggplot2)
library(lubridate)
library(ggmap)
train <- read.csv("train.csv", header = TRUE, stringsAsFactors = FALSE)
rob   <- train[which(train[, "Category"] == "ROBBERY"), ]

# The description of robbery can be simplified with regards to weapons used in the criminal acts
# "gun", "bodily force", "knife", and "unspecified". Let's parse out these strings.
rob[, "weapon"] <- NA
rob[grep("GUN", rob[, "Descript"]), "weapon"]       <- "gun"
rob[grep("KNIFE", rob[, "Descript"]), "weapon"]     <- "knife"
rob[grep("BODILY", rob[, "Descript"]), "weapon"]    <- "bodily force"
rob[grep("STRONGARM", rob[, "Descript"]), "weapon"] <- "bodily force"
rob[, "weapon"][which(is.na(rob[, "weapon"]))]      <- "unspecified"

# pare the date variable, obatining more information
rob[, 1] <- parse_date_time(rob[, 1], "%Y-%m-%d %H:%M:%S", tz = "UTC")
rob[, "year"]  <- as.numeric(format(rob[, 1], "%Y"))
rob[, "month"] <- as.numeric(format(rob[, 1], "%m"))
rob[, "hour"]  <- as.numeric(format(rob[, 1], "%H"))


# plot robbery weapon 
ggplot(data = rob, aes(x = weapon, fill = weapon))+
        geom_bar()+
        stat_count(aes(label = ..count..), geom = "text", hjust = -.1)+
        coord_flip()+
        xlab("Case Count")+
        ylab("Weapon Used")+
        ggtitle("Types of Weapon Used in Robbery")

# Robbery locations
map<-get_map(location="sanfrancisco",zoom=12,source="osm", color = 'bw')
## Map from URL : http://maps.googleapis.com/maps/api/staticmap?center=sanfrancisco&zoom=12&size=640x640&scale=2&maptype=terrain&sensor=false
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=sanfrancisco&sensor=false
ggmap(map)+
        geom_point(data = rob, alpha = I(1/20), aes(x = X, y = Y, color = as.factor(weapon)))+
        scale_color_manual(name = "Weapon", values = c("#f08080","#22bb22", "#00ced1", "#9900cc"))+
        facet_wrap(~weapon)+
        ggtitle("Robbery Locations by Weapon Type")
## Warning: Removed 1 rows containing missing values (geom_point).

# Robbery Density by Weapon Type
ggmap(map)+
        stat_density2d(data = rob, geom = "polygon", n = 500, 
                   aes(x = X, y = Y, fill = ..level.., alpha = ..level..))+
        scale_fill_gradient(low = "#ff3333", high = "#b30000")+
        facet_wrap(~weapon)+
        ggtitle("Robbery Density by Weapon Type")
## Warning: Removed 1 rows containing non-finite values (stat_density2d).

# plot by hours in a day
# gun violence peaked at 8pm.
ggplot(rob)+
    geom_density(aes(x = hour, color = weapon))+
        theme_bw()+
        theme(panel.border = element_blank(),
              axis.line = element_line(color = "black"))+
        scale_x_continuous(breaks = c(1:24))+
        ggtitle("Robbery by Hours")

# data from 2015 are incomplete, so they should be removed for month plot to reduce bias
monthplot <- rob[which(rob[, 'year'] != 2015), ]
ggplot(monthplot)+
    geom_bar(stat = 'count', aes(x = as.factor(month), fill = weapon))+
    theme_bw()+
    theme(panel.border = element_blank(),
          axis.line = element_line(color = "black"))+
    facet_grid(~weapon)+
    coord_flip()+
    ggtitle("Robbery by Months")

# Robbery by year
ggplot(rob)+
    geom_bar(stat = 'count', aes(x = as.factor(year), fill = weapon))+
    theme_bw()+
    theme(panel.border = element_blank(),
          axis.line = element_line(color = "black"))+
    coord_flip()+
    facet_grid(~weapon)+
    ggtitle("Robbery by Year")

# Robbery by days in a Week
ggplot(rob)+
    geom_bar(stat = 'count', aes(x = factor(as.factor(DayOfWeek), 
                                            c("Monday", "Tuesday", 
                                              "Wednesday","Thursday"
                                             ,"Friday", "Saturday","Sunday"))
                                 , fill = weapon))+
    theme_bw()+
    theme(panel.border = element_blank(),
          axis.line = element_line(color = "black"))+
    coord_flip()+
    facet_grid(~weapon)+
    ggtitle("Robbery by Days of a Week")