library(tm)
library(arules)
# Sample Data
word_list = c("word1 word2", "word1 word2 word3", "word3 word5")
my_corpus = Corpus(VectorSource(word_list))
my_tdm = TermDocumentMatrix(my_corpus)
tm::inspect(my_tdm)
## <<TermDocumentMatrix (terms: 4, documents: 3)>>
## Non-/sparse entries: 7/5
## Sparsity : 42%
## Maximal term length: 5
## Weighting : term frequency (tf)
##
## Docs
## Terms 1 2 3
## word1 1 1 0
## word2 1 1 0
## word3 0 1 1
## word5 0 0 1
# Data processing
my_matrix = as.matrix(my_tdm)
my_df = as.data.frame(my_matrix)
my_df_t = t(my_df) # tranpose
# Converting to Transactions
my_transactions = as(my_df_t, "transactions")
inspect(my_transactions)
## items transactionID
## [1] {word1,word2} 1
## [2] {word1,word2,word3} 2
## [3] {word3,word5} 3
rules <- apriori(my_transactions)
inspect(rules)
## lhs rhs support confidence lift
## [1] {word5} => {word3} 0.3333333 1 1.5
## [2] {word2} => {word1} 0.6666667 1 1.5
## [3] {word1} => {word2} 0.6666667 1 1.5
## [4] {word2,word3} => {word1} 0.3333333 1 1.5
## [5] {word1,word3} => {word2} 0.3333333 1 1.5