#Organizar los datos
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:arules':
##
## intersect, recode, setdiff, setequal, union
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
music=read.csv("D:/Curso_R_Social_data/data/lastfm - lastfm.csv")
music$artist=as.character(music$artist)
no_col=music %>% group_by(user) %>% summarise(n = n())
## `summarise()` ungrouping output (override with `.groups` argument)
no_col=as.data.frame(no_col)
music_2 = as.data.frame(matrix(NA,nrow(no_col),max(no_col$n)))
for (i in 1:nrow(no_col)) {
artista = music$artist[music$user==no_col$user[i]]
music_2[i,1:length(artista)]=artista
}
write.csv(music_2,"D:/Curso_R_Social_data/data/lastfm.csv")
#Llamar data organizada
data <- read.transactions(file = file.choose(),
sep = ",",
format = "basket")
itemFrequencyPlot(data, topN = 20)

datarules <- apriori(data,
parameter = list(support =0.01,#soporte mínimode un item en una transacción
confidence = 0.5,#confianza mínima
minlen = 2))#establecer item con no menos 2 productos
## Apriori
##
## Parameter specification:
## confidence minval smax arem aval originalSupport maxtime support minlen
## 0.5 0.1 1 none FALSE TRUE 5 0.01 2
## maxlen target ext
## 10 rules TRUE
##
## Algorithmic control:
## filter tree heap memopt load sort verbose
## 0.1 TRUE TRUE FALSE TRUE 2 TRUE
##
## Absolute minimum support count: 150
##
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[1004 item(s), 15000 transaction(s)] done [0.16s].
## sorting and recoding items ... [655 item(s)] done [0.01s].
## creating transaction tree ... done [0.02s].
## checking subsets of size 1 2 3 4 done [0.04s].
## writing ... [50 rule(s)] done [0.00s].
## creating S4 object ... done [0.00s].
summary(datarules)
## set of 50 rules
##
## rule length distribution (lhs + rhs):sizes
## 2 3
## 15 35
##
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 2.0 2.0 3.0 2.7 3.0 3.0
##
## summary of quality measures:
## support confidence coverage lift
## Min. :0.01000 Min. :0.5013 Min. :0.01587 Min. : 2.781
## 1st Qu.:0.01062 1st Qu.:0.5216 1st Qu.:0.01847 1st Qu.: 3.120
## Median :0.01147 Median :0.5430 Median :0.02150 Median : 3.283
## Mean :0.01296 Mean :0.5556 Mean :0.02359 Mean : 3.963
## 3rd Qu.:0.01372 3rd Qu.:0.5858 3rd Qu.:0.02655 3rd Qu.: 3.711
## Max. :0.02927 Max. :0.6627 Max. :0.05747 Max. :13.416
## count
## Min. :150.0
## 1st Qu.:159.2
## Median :172.0
## Mean :194.4
## 3rd Qu.:205.8
## Max. :439.0
##
## mining info:
## data ntransactions support confidence
## data 15000 0.01 0.5
#Se han creado 50 reglas,15 de ellas con 2 items y 35 reglas con 3 items.
#Reglas de asosiaci+on más importantes
importantes=inspect(sort(datarules, by="confidence")[1:5])
## lhs rhs support confidence
## [1] {oasis,the killers} => {coldplay} 0.01113333 0.6626984
## [2] {sigur rós,the beatles} => {radiohead} 0.01046667 0.6434426
## [3] {keane} => {coldplay} 0.02226667 0.6374046
## [4] {radiohead,snow patrol} => {coldplay} 0.01006667 0.6344538
## [5] {coldplay,the smashing pumpkins} => {radiohead} 0.01093333 0.6283525
## coverage lift count
## [1] 0.01680000 4.180183 167
## [2] 0.01626667 3.569393 157
## [3] 0.03493333 4.020634 334
## [4] 0.01586667 4.002021 151
## [5] 0.01740000 3.485683 164
#(b) Considerando que se tenga un lift mayor a 5. ¿Qué recomendaría a un usuario que gusta de Judas Priest? ¿Y a los que gustan de the Pussycat Dolls?
inspect(subset(datarules, subset=lift > 5))
## lhs rhs support confidence
## [1] {t.i.} => {kanye west} 0.01040000 0.5672727
## [2] {the pussycat dolls} => {rihan} 0.01040000 0.5777778
## [3] {sota arctica} => {nightwish} 0.01346667 0.5101010
## [4] {judas priest} => {iron maiden} 0.01353333 0.5075000
## [5] {led zeppelin,the doors} => {pink floyd} 0.01066667 0.5970149
## [6] {pink floyd,the doors} => {led zeppelin} 0.01066667 0.5387205
## coverage lift count
## [1] 0.01833333 8.854413 156
## [2] 0.01800000 13.415893 156
## [3] 0.02640000 8.236292 202
## [4] 0.02666667 8.562992 203
## [5] 0.01786667 5.689469 160
## [6] 0.01980000 6.802027 160