Load the Cleansed Dataset

load("SQF_clean.rda")

Convert dataset to transactions

In order to perform itemsets mining we need to convert our dataset to transactions

library(arules)
## Warning: package 'arules' was built under R version 3.2.3
## Loading required package: Matrix
## 
## Attaching package: 'arules'
## 
## The following objects are masked from 'package:base':
## 
##     %in%, abbreviate, write
print( try(trans <- as(dat, "transactions")) )
## [1] "Error in asMethod(object) : \n  column(s) 2, 3, 6, 7, 74, 79, 80 not logical or a factor. Discretize the columns first.\n"
## attr(,"class")
## [1] "try-error"
## attr(,"condition")
## <simpleError in asMethod(object): column(s) 2, 3, 6, 7, 74, 79, 80 not logical or a factor. Discretize the columns first.>
colnames(dat)[c(2, 3, 6, 7, 74, 79, 80)]
## [1] "datestop" "timestop" "perobs"   "perstop"  "age"      "xcoord"  
## [7] "ycoord"

As suggested by the error message, we need to discretize those columns. “xcoord” and “ycoord” are not removed.

datestop <- dat[,"datestop"]
dat[,"datestop"] <- discretize(datestop,method="interval",categories=12)

timestop <- dat[,"timestop"]
dat[,"timestop"] <- discretize(timestop,method="interval",categories=24)

perobs <- dat[,"perobs"]
dat[,"perobs"] <- discretize(perobs,method="frequency",categories= 3)

perstop <- dat[,"perstop"]
dat[,"perstop"] <- discretize(perstop,method="frequency",categories= 3)

age <- dat[,"age"]
dat[,"age"] <- discretize(timestop,method="frequency",categories= 3)

dat[,"xcoord"] <- NULL
dat[,"ycoord"] <- NULL

Let’s create a new column to represent whether a pedestrian is armed and clean out some other columns.

dat$armed <- dat$pistol | dat$riflshot | dat$asltweap | dat$knifcuti | 
  dat$machgun | dat$othrweap | dat$contrabn
for (i in c("contrabn","pistol","riflshot","asltweap","knifcuti","machgun","othrweap")){
  dat[,i]<-NULL
}

for (i in c("offverb","offshld","officrid","offunif","typeofid")){
  dat[,i]<-NULL
}

Now Let’s redo the transaction conversion.

print( try(trans <- as(dat, "transactions")) )
## transactions in sparse format with
##  45787 transactions (rows) and
##  716 items (columns)
itemFrequencyPlot(trans,topN=20)

plot(sort(itemFrequency(trans, type="absolute"), decreasing=TRUE),
     xlab = "Items", ylab="Support Count", type="l")

freqs <- apriori(trans, parameter=list(target="frequent", support=0.5, minlen=2, maxlen=5))
## Apriori
## 
## Parameter specification:
##  confidence minval smax arem  aval originalSupport support minlen maxlen
##          NA    0.1    1 none FALSE            TRUE     0.5      2      5
##             target   ext
##  frequent itemsets FALSE
## 
## Algorithmic control:
##  filter tree heap memopt load sort verbose
##     0.1 TRUE TRUE  FALSE TRUE    2    TRUE
## 
## Absolute minimum support count: 22893 
## 
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[716 item(s), 45787 transaction(s)] done [0.07s].
## sorting and recoding items ... [12 item(s)] done [0.00s].
## creating transaction tree ... done [0.02s].
## checking subsets of size 1 2 3 4 5 done [0.00s].
## writing ... [127 set(s)] done [0.00s].
## creating S4 object  ... done [0.01s].
freqs <- sort(freqs, by="support")
inspect(head(freqs, n=10))
##    items                        support  
## 37 {explnstp,sex=M}             0.9274467
## 36 {explnstp,eyecolor=BR}       0.8911481
## 35 {sex=M,eyecolor=BR}          0.8309345
## 87 {explnstp,sex=M,eyecolor=BR} 0.8296678
## 34 {inout=O,explnstp}           0.8141612
## 31 {trhsloc=P,explnstp}         0.8102955
## 27 {explnstp,forceuse= }        0.7881713
## 33 {inout=O,sex=M}              0.7677944
## 86 {inout=O,explnstp,sex=M}     0.7667679
## 30 {trhsloc=P,sex=M}            0.7611549
rules <- apriori(trans, parameter=list(target="rules", support=0.5, minlen=2, maxlen=5))
## Apriori
## 
## Parameter specification:
##  confidence minval smax arem  aval originalSupport support minlen maxlen
##         0.8    0.1    1 none FALSE            TRUE     0.5      2      5
##  target   ext
##   rules FALSE
## 
## Algorithmic control:
##  filter tree heap memopt load sort verbose
##     0.1 TRUE TRUE  FALSE TRUE    2    TRUE
## 
## Absolute minimum support count: 22893 
## 
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[716 item(s), 45787 transaction(s)] done [0.07s].
## sorting and recoding items ... [12 item(s)] done [0.01s].
## creating transaction tree ... done [0.02s].
## checking subsets of size 1 2 3 4 5 done [0.00s].
## writing ... [280 rule(s)] done [0.00s].
## creating S4 object  ... done [0.01s].
rules <- sort(rules, by="lift")
inspect(head(rules, n=10))
##    lhs              rhs         support confidence     lift
## 1  {trhsloc=P,                                             
##     explnstp,                                              
##     frisked}     => {inout=O} 0.5133116  0.9288989 1.139369
## 2  {trhsloc=P,                                             
##     frisked}     => {inout=O} 0.5139887  0.9287659 1.139206
## 3  {trhsloc=P,                                             
##     explnstp,                                              
##     forceuse= ,                                            
##     sex=M}       => {inout=O} 0.5520999  0.9273294 1.137444
## 4  {trhsloc=P,                                             
##     forceuse= ,                                            
##     sex=M}       => {inout=O} 0.5525367  0.9272467 1.137342
## 5  {trhsloc=P,                                             
##     explnstp,                                              
##     forceuse= ,                                            
##     eyecolor=BR} => {inout=O} 0.5246686  0.9240682 1.133443
## 6  {trhsloc=P,                                             
##     forceuse= ,                                            
##     eyecolor=BR} => {inout=O} 0.5251491  0.9239903 1.133348
## 7  {trhsloc=P,                                             
##     explnstp,                                              
##     forceuse= }  => {inout=O} 0.5868915  0.9227071 1.131774
## 8  {trhsloc=P,                                             
##     forceuse= }  => {inout=O} 0.5873938  0.9226415 1.131694
## 9  {trhsloc=P,                                             
##     explnstp,                                              
##     sex=M,                                                 
##     eyecolor=BR} => {inout=O} 0.6243694  0.9219854 1.130889
## 10 {trhsloc=P,                                             
##     sex=M,                                                 
##     eyecolor=BR} => {inout=O} 0.6250901  0.9218307 1.130699