load("SQF_clean.rda")
In order to perform itemsets mining we need to convert our dataset to transactions
library(arules)
## Warning: package 'arules' was built under R version 3.2.3
## Loading required package: Matrix
##
## Attaching package: 'arules'
##
## The following objects are masked from 'package:base':
##
## %in%, abbreviate, write
print( try(trans <- as(dat, "transactions")) )
## [1] "Error in asMethod(object) : \n column(s) 2, 3, 6, 7, 74, 79, 80 not logical or a factor. Discretize the columns first.\n"
## attr(,"class")
## [1] "try-error"
## attr(,"condition")
## <simpleError in asMethod(object): column(s) 2, 3, 6, 7, 74, 79, 80 not logical or a factor. Discretize the columns first.>
colnames(dat)[c(2, 3, 6, 7, 74, 79, 80)]
## [1] "datestop" "timestop" "perobs" "perstop" "age" "xcoord"
## [7] "ycoord"
As suggested by the error message, we need to discretize those columns. “xcoord” and “ycoord” are not removed.
datestop <- dat[,"datestop"]
dat[,"datestop"] <- discretize(datestop,method="interval",categories=12)
timestop <- dat[,"timestop"]
dat[,"timestop"] <- discretize(timestop,method="interval",categories=24)
perobs <- dat[,"perobs"]
dat[,"perobs"] <- discretize(perobs,method="frequency",categories= 3)
perstop <- dat[,"perstop"]
dat[,"perstop"] <- discretize(perstop,method="frequency",categories= 3)
age <- dat[,"age"]
dat[,"age"] <- discretize(timestop,method="frequency",categories= 3)
dat[,"xcoord"] <- NULL
dat[,"ycoord"] <- NULL
Let’s create a new column to represent whether a pedestrian is armed and clean out some other columns.
dat$armed <- dat$pistol | dat$riflshot | dat$asltweap | dat$knifcuti |
dat$machgun | dat$othrweap | dat$contrabn
for (i in c("contrabn","pistol","riflshot","asltweap","knifcuti","machgun","othrweap")){
dat[,i]<-NULL
}
for (i in c("offverb","offshld","officrid","offunif","typeofid")){
dat[,i]<-NULL
}
Now Let’s redo the transaction conversion.
print( try(trans <- as(dat, "transactions")) )
## transactions in sparse format with
## 45787 transactions (rows) and
## 716 items (columns)
itemFrequencyPlot(trans,topN=20)
plot(sort(itemFrequency(trans, type="absolute"), decreasing=TRUE),
xlab = "Items", ylab="Support Count", type="l")
freqs <- apriori(trans, parameter=list(target="frequent", support=0.5, minlen=2, maxlen=5))
## Apriori
##
## Parameter specification:
## confidence minval smax arem aval originalSupport support minlen maxlen
## NA 0.1 1 none FALSE TRUE 0.5 2 5
## target ext
## frequent itemsets FALSE
##
## Algorithmic control:
## filter tree heap memopt load sort verbose
## 0.1 TRUE TRUE FALSE TRUE 2 TRUE
##
## Absolute minimum support count: 22893
##
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[716 item(s), 45787 transaction(s)] done [0.07s].
## sorting and recoding items ... [12 item(s)] done [0.00s].
## creating transaction tree ... done [0.02s].
## checking subsets of size 1 2 3 4 5 done [0.00s].
## writing ... [127 set(s)] done [0.00s].
## creating S4 object ... done [0.01s].
freqs <- sort(freqs, by="support")
inspect(head(freqs, n=10))
## items support
## 37 {explnstp,sex=M} 0.9274467
## 36 {explnstp,eyecolor=BR} 0.8911481
## 35 {sex=M,eyecolor=BR} 0.8309345
## 87 {explnstp,sex=M,eyecolor=BR} 0.8296678
## 34 {inout=O,explnstp} 0.8141612
## 31 {trhsloc=P,explnstp} 0.8102955
## 27 {explnstp,forceuse= } 0.7881713
## 33 {inout=O,sex=M} 0.7677944
## 86 {inout=O,explnstp,sex=M} 0.7667679
## 30 {trhsloc=P,sex=M} 0.7611549
rules <- apriori(trans, parameter=list(target="rules", support=0.5, minlen=2, maxlen=5))
## Apriori
##
## Parameter specification:
## confidence minval smax arem aval originalSupport support minlen maxlen
## 0.8 0.1 1 none FALSE TRUE 0.5 2 5
## target ext
## rules FALSE
##
## Algorithmic control:
## filter tree heap memopt load sort verbose
## 0.1 TRUE TRUE FALSE TRUE 2 TRUE
##
## Absolute minimum support count: 22893
##
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[716 item(s), 45787 transaction(s)] done [0.07s].
## sorting and recoding items ... [12 item(s)] done [0.01s].
## creating transaction tree ... done [0.02s].
## checking subsets of size 1 2 3 4 5 done [0.00s].
## writing ... [280 rule(s)] done [0.00s].
## creating S4 object ... done [0.01s].
rules <- sort(rules, by="lift")
inspect(head(rules, n=10))
## lhs rhs support confidence lift
## 1 {trhsloc=P,
## explnstp,
## frisked} => {inout=O} 0.5133116 0.9288989 1.139369
## 2 {trhsloc=P,
## frisked} => {inout=O} 0.5139887 0.9287659 1.139206
## 3 {trhsloc=P,
## explnstp,
## forceuse= ,
## sex=M} => {inout=O} 0.5520999 0.9273294 1.137444
## 4 {trhsloc=P,
## forceuse= ,
## sex=M} => {inout=O} 0.5525367 0.9272467 1.137342
## 5 {trhsloc=P,
## explnstp,
## forceuse= ,
## eyecolor=BR} => {inout=O} 0.5246686 0.9240682 1.133443
## 6 {trhsloc=P,
## forceuse= ,
## eyecolor=BR} => {inout=O} 0.5251491 0.9239903 1.133348
## 7 {trhsloc=P,
## explnstp,
## forceuse= } => {inout=O} 0.5868915 0.9227071 1.131774
## 8 {trhsloc=P,
## forceuse= } => {inout=O} 0.5873938 0.9226415 1.131694
## 9 {trhsloc=P,
## explnstp,
## sex=M,
## eyecolor=BR} => {inout=O} 0.6243694 0.9219854 1.130889
## 10 {trhsloc=P,
## sex=M,
## eyecolor=BR} => {inout=O} 0.6250901 0.9218307 1.130699