Sequence Analysis

library(arulesSequences)

## Loading required package: arules

## Loading required package: Matrix

## Loading required package: lattice

## Attaching package: 'Matrix'

## The following object(s) are masked from 'package:stats':
## 
## toeplitz

## Attaching package: 'arules'

## The following object(s) are masked from 'package:base':
## 
## %in%, write

data(zaki)
as(zaki, "data.frame")

##    transactionID.sequenceID transactionID.eventID transactionID.SIZE
## 1                         1                    10                  2
## 2                         1                    15                  3
## 3                         1                    20                  3
## 4                         1                    25                  4
## 5                         2                    15                  3
## 6                         2                    20                  1
## 7                         3                    10                  3
## 8                         4                    10                  3
## 9                         4                    20                  2
## 10                        4                    25                  3
##        items
## 1      {C,D}
## 2    {A,B,C}
## 3    {A,B,F}
## 4  {A,C,D,F}
## 5    {A,B,F}
## 6        {E}
## 7    {A,B,F}
## 8    {D,G,H}
## 9      {B,F}
## 10   {A,G,H}

s1 <- cspade(zaki, parameter = list(support = 0.4), control = list(verbose = TRUE))

## 
## parameter specification:

## Note: Method with signature "numeric#character" chosen for function
## "coerce", target signature "integer#character".  "ANY#character" would
## also be valid

## support : 0.4
## maxsize :  10
## maxlen  :  10
## 
## algorithmic control:
## bfstype : FALSE
## verbose :  TRUE
## summary : FALSE
## 
## preprocessing ... 1 partition(s), 0 MB [0.035s]
## mining transactions ... 0 MB [0.014s]
## reading sequences ... [0.19s]
## 
## total elapsed time: 0.234s

summary(s1)

## set of 18 sequences with
## 
## most frequent items:
##       A       B       F       D (Other) 
##      11      10      10       8      28 
## 
## most frequent elements:
##     {A}     {D}     {B}     {F}   {B,F} (Other) 
##       8       8       4       4       4       3 
## 
## element (sequence) size distribution:
## sizes
## 1 2 3 
## 8 7 3 
## 
## sequence length distribution:
## lengths
## 1 2 3 4 
## 4 8 5 1 
## 
## summary of quality measures:
##     support     
##  Min.   :0.500  
##  1st Qu.:0.500  
##  Median :0.500  
##  Mean   :0.653  
##  3rd Qu.:0.750  
##  Max.   :1.000  
## 
## mining info:
##  data ntransactions nsequences support
##  zaki            10          4     0.4

as(s1, "data.frame")

##           sequence support
## 1            <{A}>    1.00
## 2            <{B}>    1.00
## 3            <{D}>    0.50
## 4            <{F}>    1.00
## 5          <{A,F}>    0.75
## 6          <{B,F}>    1.00
## 7        <{D},{F}>    0.50
## 8      <{D},{B,F}>    0.50
## 9        <{A,B,F}>    0.75
## 10         <{A,B}>    0.75
## 11       <{D},{B}>    0.50
## 12       <{B},{A}>    0.50
## 13       <{D},{A}>    0.50
## 14       <{F},{A}>    0.50
## 15   <{D},{F},{A}>    0.50
## 16     <{B,F},{A}>    0.50
## 17 <{D},{B,F},{A}>    0.50
## 18   <{D},{B},{A}>    0.50

s2 <- cspade(zaki, parameter = list(support = 0.4, maxwin = 5))
as(s2, "data.frame")

##       sequence support
## 1        <{A}>    1.00
## 2        <{B}>    1.00
## 3        <{D}>    0.50
## 4        <{F}>    1.00
## 5      <{A,F}>    0.75
## 6      <{B,F}>    1.00
## 7    <{A,B,F}>    0.75
## 8      <{A,B}>    0.75
## 9    <{B},{A}>    0.50
## 10   <{F},{A}>    0.50
## 11 <{B,F},{A}>    0.50

You can also embed plots, for example: