本中心將參照境外美國地區的犯罪資料,經由雲端運算之大數據分析技術,萃取資料關聯性,結合機器學習模型探索犯罪模型的特徵或關鍵條件規則(如在怎樣的境況下現行犯容易被逮補等,何種犯罪類型較不容易當場抓獲現行犯)等,進而建構犯罪資料探勘及分析平台,將各類分析方法、資料輸入及輸出介面、視覺化時空資料展示等功能整合於內,供相關單位使用。
繪製基本地圖
ggmap 套件中的 get_map 函數可以讓我們在 R 的環境中直接下載地圖,接著再呼叫 ggmap 函數就可以畫出來。地圖的位置是透過 location 參數來指定,直接輸入地名即可,而 zoom 則是控制地圖的大小。
The Key the for this exploratory analysis is in the ggmap.
map<-get_map(location=“sanfrancisco”,zoom=12,source=“osm”) taiwan_map <- get_map(location = ‘Taiwan’, zoom = 8, source=“osm”) taipei_map <- get_map(location = ‘taipei’, zoom = 12, source=“osm”)
get train dataset from: https://www.kaggle.com/c/sf-crime/data
# This is an in-depth exploratory analysis toward robbery in the Bay area.
library(ggplot2)
library(lubridate)
##
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
##
## date
library(ggmap)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:lubridate':
##
## intersect, setdiff, union
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
train <- read.csv("C:/Users/Janpu/Documents/R_Files/train.csv", header = TRUE, stringsAsFactors = FALSE)
map<-get_map(location="sanfrancisco",zoom=12)
## Map from URL : http://maps.googleapis.com/maps/api/staticmap?center=sanfrancisco&zoom=12&size=640x640&scale=2&maptype=terrain&language=en-EN&sensor=false
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=sanfrancisco&sensor=false
rob <- train[which(train[, "Category"] == "ROBBERY"), ]
# The description of robbery can be simplified with regards to weapons used in the criminal acts
# "gun", "bodily force", "knife", and "unspecified". Let's parse out these strings.
rob[, "weapon"] <- NA
rob[grep("GUN", rob[, "Descript"]), "weapon"] <- "gun"
rob[grep("KNIFE", rob[, "Descript"]), "weapon"] <- "knife"
rob[grep("BODILY", rob[, "Descript"]), "weapon"] <- "bodily force"
rob[grep("STRONGARM", rob[, "Descript"]), "weapon"] <- "bodily force"
rob[, "weapon"][which(is.na(rob[, "weapon"]))] <- "unspecified"
# pare the date variable, obatining more information
rob[, 1] <- parse_date_time(rob[, 1], "%Y-%m-%d %H:%M:%S", tz = "UTC")
rob[, "year"] <- as.numeric(format(rob[, 1], "%Y"))
rob[, "month"] <- as.numeric(format(rob[, 1], "%m"))
rob[, "hour"] <- as.numeric(format(rob[, 1], "%H"))
# plot robbery weapon
ggplot(data = rob, aes(x = weapon, fill = weapon))+
geom_bar()+
stat_count(aes(label = ..count..), geom = "text", hjust = -.1)+
coord_flip()+
xlab("Case Count")+
ylab("Weapon Used")+
ggtitle("Types of Weapon Used in Robbery")
ggmap(map)+
geom_point(data = rob, alpha = I(1/20), aes(x = X, y = Y, color = as.factor(weapon)))+
scale_color_manual(name = "Weapon", values = c("#f08080","#22bb22", "#00ced1", "#9900cc"))+
facet_wrap(~weapon)+
ggtitle("Robbery Locations by Weapon Type")
## Warning: Removed 1 rows containing missing values (geom_point).
ggmap(map)+
stat_density2d(data = rob, geom = "polygon", n = 500,
aes(x = X, y = Y, fill = ..level.., alpha = ..level..))+
scale_fill_gradient(low = "#ff3333", high = "#b30000")+
facet_wrap(~weapon)+
ggtitle("Robbery Density by Weapon Type")
## Warning: Removed 1 rows containing non-finite values (stat_density2d).
## plot by hours in a day ## gun violence peaked at 8pm.
ggplot(rob)+
geom_density(aes(x = hour, color = weapon))+
theme_bw()+
theme(panel.border = element_blank(),
axis.line = element_line(color = "black"))+
scale_x_continuous(breaks = c(1:24))+
ggtitle("Robbery by Hours")
# data from 2015 are incomplete, so they should be removed for month plot to reduce bias
monthplot <- rob[which(rob[, 'year'] != 2015), ]
ggplot(monthplot)+
geom_bar(stat = 'count', aes(x = as.factor(month), fill = weapon))+
theme_bw()+
theme(panel.border = element_blank(),
axis.line = element_line(color = "black"))+
facet_grid(~weapon)+
coord_flip()+
ggtitle("Robbery by Months")
# Robbery by year
ggplot(rob)+
geom_bar(stat = 'count', aes(x = as.factor(year), fill = weapon))+
theme_bw()+
theme(panel.border = element_blank(),
axis.line = element_line(color = "black"))+
coord_flip()+
facet_grid(~weapon)+
ggtitle("Robbery by Year")
# Robbery by days in a Week
ggplot(rob)+
geom_bar(stat = 'count', aes(x = factor(as.factor(DayOfWeek),
c("Monday", "Tuesday",
"Wednesday","Thursday"
,"Friday", "Saturday","Sunday"))
, fill = weapon))+
theme_bw()+
theme(panel.border = element_blank(),
axis.line = element_line(color = "black"))+
coord_flip()+
facet_grid(~weapon)+
ggtitle("Robbery by Days of a Week")