結合機器學習模型,萃取資料關聯性,建構犯罪資料探勘平台

本中心將參照境外美國地區的犯罪資料,經由雲端運算之大數據分析技術,萃取資料關聯性,結合機器學習模型探索犯罪模型的特徵或關鍵條件規則(如在怎樣的境況下現行犯容易被逮補等,何種犯罪類型較不容易當場抓獲現行犯)等,進而建構犯罪資料探勘及分析平台,將各類分析方法、資料輸入及輸出介面、視覺化時空資料展示等功能整合於內,供相關單位使用。

繪製基本地圖

ggmap 套件中的 get_map 函數可以讓我們在 R 的環境中直接下載地圖,接著再呼叫 ggmap 函數就可以畫出來。地圖的位置是透過 location 參數來指定,直接輸入地名即可,而 zoom 則是控制地圖的大小。

Exploratory San Francisco Crime Analysis

The Key the for this exploratory analysis is in the ggmap.

map<-get_map(location=“sanfrancisco”,zoom=12,source=“osm”) taiwan_map <- get_map(location = ‘Taiwan’, zoom = 8, source=“osm”) taipei_map <- get_map(location = ‘taipei’, zoom = 12, source=“osm”)

get train dataset from: https://www.kaggle.com/c/sf-crime/data

# This is an in-depth exploratory analysis toward robbery in the Bay area. 

library(ggplot2)
library(lubridate)
## 
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
## 
##     date
library(ggmap)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:lubridate':
## 
##     intersect, setdiff, union
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
train <- read.csv("C:/Users/Janpu/Documents/R_Files/train.csv", header = TRUE, stringsAsFactors = FALSE)
map<-get_map(location="sanfrancisco",zoom=12)
## Map from URL : http://maps.googleapis.com/maps/api/staticmap?center=sanfrancisco&zoom=12&size=640x640&scale=2&maptype=terrain&language=en-EN&sensor=false
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=sanfrancisco&sensor=false
rob   <- train[which(train[, "Category"] == "ROBBERY"), ]

# The description of robbery can be simplified with regards to weapons used in the criminal acts
# "gun", "bodily force", "knife", and "unspecified". Let's parse out these strings.
rob[, "weapon"] <- NA
rob[grep("GUN", rob[, "Descript"]), "weapon"]       <- "gun"
rob[grep("KNIFE", rob[, "Descript"]), "weapon"]     <- "knife"
rob[grep("BODILY", rob[, "Descript"]), "weapon"]    <- "bodily force"
rob[grep("STRONGARM", rob[, "Descript"]), "weapon"] <- "bodily force"
rob[, "weapon"][which(is.na(rob[, "weapon"]))]      <- "unspecified"

# pare the date variable, obatining more information
rob[, 1] <- parse_date_time(rob[, 1], "%Y-%m-%d %H:%M:%S", tz = "UTC")
rob[, "year"]  <- as.numeric(format(rob[, 1], "%Y"))
rob[, "month"] <- as.numeric(format(rob[, 1], "%m"))
rob[, "hour"]  <- as.numeric(format(rob[, 1], "%H"))


# plot robbery weapon 
ggplot(data = rob, aes(x = weapon, fill = weapon))+
  geom_bar()+
  stat_count(aes(label = ..count..), geom = "text", hjust = -.1)+
  coord_flip()+
  xlab("Case Count")+
  ylab("Weapon Used")+
  ggtitle("Types of Weapon Used in Robbery")

Robbery locations by weapon types

ggmap(map)+
  geom_point(data = rob, alpha = I(1/20), aes(x = X, y = Y, color = as.factor(weapon)))+
  scale_color_manual(name = "Weapon", values = c("#f08080","#22bb22", "#00ced1", "#9900cc"))+
  facet_wrap(~weapon)+
  ggtitle("Robbery Locations by Weapon Type")
## Warning: Removed 1 rows containing missing values (geom_point).

Robbery Density by Weapon types

ggmap(map)+
  stat_density2d(data = rob, geom = "polygon", n = 500, 
                 aes(x = X, y = Y, fill = ..level.., alpha = ..level..))+
  scale_fill_gradient(low = "#ff3333", high = "#b30000")+
  facet_wrap(~weapon)+
  ggtitle("Robbery Density by Weapon Type")
## Warning: Removed 1 rows containing non-finite values (stat_density2d).

## plot by hours in a day ## gun violence peaked at 8pm.

ggplot(rob)+
  geom_density(aes(x = hour, color = weapon))+
  theme_bw()+
  theme(panel.border = element_blank(),
        axis.line = element_line(color = "black"))+
  scale_x_continuous(breaks = c(1:24))+
  ggtitle("Robbery by Hours")

plot by years

# data from 2015 are incomplete, so they should be removed for month plot to reduce bias
monthplot <- rob[which(rob[, 'year'] != 2015), ]
ggplot(monthplot)+
  geom_bar(stat = 'count', aes(x = as.factor(month), fill = weapon))+
  theme_bw()+
  theme(panel.border = element_blank(),
        axis.line = element_line(color = "black"))+
  facet_grid(~weapon)+
  coord_flip()+
  ggtitle("Robbery by Months")

# Robbery by year
ggplot(rob)+
  geom_bar(stat = 'count', aes(x = as.factor(year), fill = weapon))+
  theme_bw()+
  theme(panel.border = element_blank(),
        axis.line = element_line(color = "black"))+
  coord_flip()+
  facet_grid(~weapon)+
  ggtitle("Robbery by Year")

plot by days in a week

# Robbery by days in a Week
ggplot(rob)+
  geom_bar(stat = 'count', aes(x = factor(as.factor(DayOfWeek), 
                                          c("Monday", "Tuesday", 
                                            "Wednesday","Thursday"
                                            ,"Friday", "Saturday","Sunday"))
                               , fill = weapon))+
  theme_bw()+
  theme(panel.border = element_blank(),
        axis.line = element_line(color = "black"))+
  coord_flip()+
  facet_grid(~weapon)+
  ggtitle("Robbery by Days of a Week")