laboratorio reglas de asociación

#Organizar los datos
library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:arules':
## 
##     intersect, recode, setdiff, setequal, union

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

music=read.csv("D:/Curso_R_Social_data/data/lastfm - lastfm.csv")
music$artist=as.character(music$artist)
no_col=music %>% group_by(user) %>% summarise(n = n())

## `summarise()` ungrouping output (override with `.groups` argument)

no_col=as.data.frame(no_col)
music_2 = as.data.frame(matrix(NA,nrow(no_col),max(no_col$n)))
for (i in 1:nrow(no_col)) {
  artista = music$artist[music$user==no_col$user[i]]
  music_2[i,1:length(artista)]=artista
}
write.csv(music_2,"D:/Curso_R_Social_data/data/lastfm.csv")


#Llamar data organizada

data <- read.transactions(file = file.choose(),
                               sep = ",",
                               format = "basket")

itemFrequencyPlot(data, topN = 20)

datarules <- apriori(data, 
                        parameter = list(support =0.01,#soporte mínimode un item en una transacción
                                         confidence = 0.5,#confianza mínima
                                         minlen = 2))#establecer item con no menos 2 productos

## Apriori
## 
## Parameter specification:
##  confidence minval smax arem  aval originalSupport maxtime support minlen
##         0.5    0.1    1 none FALSE            TRUE       5    0.01      2
##  maxlen target  ext
##      10  rules TRUE
## 
## Algorithmic control:
##  filter tree heap memopt load sort verbose
##     0.1 TRUE TRUE  FALSE TRUE    2    TRUE
## 
## Absolute minimum support count: 150 
## 
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[1004 item(s), 15000 transaction(s)] done [0.16s].
## sorting and recoding items ... [655 item(s)] done [0.01s].
## creating transaction tree ... done [0.02s].
## checking subsets of size 1 2 3 4 done [0.04s].
## writing ... [50 rule(s)] done [0.00s].
## creating S4 object  ... done [0.00s].

summary(datarules)

## set of 50 rules
## 
## rule length distribution (lhs + rhs):sizes
##  2  3 
## 15 35 
## 
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##     2.0     2.0     3.0     2.7     3.0     3.0 
## 
## summary of quality measures:
##     support          confidence        coverage            lift       
##  Min.   :0.01000   Min.   :0.5013   Min.   :0.01587   Min.   : 2.781  
##  1st Qu.:0.01062   1st Qu.:0.5216   1st Qu.:0.01847   1st Qu.: 3.120  
##  Median :0.01147   Median :0.5430   Median :0.02150   Median : 3.283  
##  Mean   :0.01296   Mean   :0.5556   Mean   :0.02359   Mean   : 3.963  
##  3rd Qu.:0.01372   3rd Qu.:0.5858   3rd Qu.:0.02655   3rd Qu.: 3.711  
##  Max.   :0.02927   Max.   :0.6627   Max.   :0.05747   Max.   :13.416  
##      count      
##  Min.   :150.0  
##  1st Qu.:159.2  
##  Median :172.0  
##  Mean   :194.4  
##  3rd Qu.:205.8  
##  Max.   :439.0  
## 
## mining info:
##  data ntransactions support confidence
##  data         15000    0.01        0.5

#Se han creado 50 reglas,15 de ellas con 2 items y 35 reglas con 3 items.

#Reglas de asosiaci+on más importantes
importantes=inspect(sort(datarules, by="confidence")[1:5])

##     lhs                                 rhs         support    confidence
## [1] {oasis,the killers}              => {coldplay}  0.01113333 0.6626984 
## [2] {sigur rÃ³s,the beatles}         => {radiohead} 0.01046667 0.6434426 
## [3] {keane}                          => {coldplay}  0.02226667 0.6374046 
## [4] {radiohead,snow patrol}          => {coldplay}  0.01006667 0.6344538 
## [5] {coldplay,the smashing pumpkins} => {radiohead} 0.01093333 0.6283525 
##     coverage   lift     count
## [1] 0.01680000 4.180183 167  
## [2] 0.01626667 3.569393 157  
## [3] 0.03493333 4.020634 334  
## [4] 0.01586667 4.002021 151  
## [5] 0.01740000 3.485683 164

#(b) Considerando que se tenga un lift mayor a 5. ¿Qué recomendaría a un usuario que gusta de Judas Priest? ¿Y a los que gustan de the Pussycat Dolls?
inspect(subset(datarules, subset=lift > 5))

##     lhs                         rhs            support    confidence
## [1] {t.i.}                   => {kanye west}   0.01040000 0.5672727 
## [2] {the pussycat dolls}     => {rihan}        0.01040000 0.5777778 
## [3] {sota arctica}           => {nightwish}    0.01346667 0.5101010 
## [4] {judas priest}           => {iron maiden}  0.01353333 0.5075000 
## [5] {led zeppelin,the doors} => {pink floyd}   0.01066667 0.5970149 
## [6] {pink floyd,the doors}   => {led zeppelin} 0.01066667 0.5387205 
##     coverage   lift      count
## [1] 0.01833333  8.854413 156  
## [2] 0.01800000 13.415893 156  
## [3] 0.02640000  8.236292 202  
## [4] 0.02666667  8.562992 203  
## [5] 0.01786667  5.689469 160  
## [6] 0.01980000  6.802027 160

laboratorio reglas de asociación

Maria Alejandra Molina Berbeo

12/6/2021