u = read.table("C:/Users/Zero/Desktop/学校课程学习/推荐系统/ml-100k/u.data")
findIndexByValues = function(vect, values) {
ret = rep(FALSE, length(vect))
for (j in values) {
ret = ret | vect == j
}
return(ret)
}
res = table(u[, 2])
res = sort(res, decreasing = TRUE)
dwf = function(lower, upper = Inf) {
itemid = as.integer(names(res[res >= lower & res < upper]))
retrived_ratings = u[findIndexByValues(u[, 2], itemid), 3]
hist(retrived_ratings, 10, labels = TRUE, main = paste("#Rating in [", lower,
",", upper, ")"), xlab = paste("std=", round(sd(retrived_ratings), 2),
"mean=", round(mean(retrived_ratings), 2)), cex.lab = 2)
}
下面直方图显示了不同热门程度的物品的rating分布直方图(热门程度也就是一个物品获得的rating的数量)
par(mfrow = c(2, 3))
dwf(400)
dwf(300, 400)
dwf(200, 300)
dwf(100, 200)
dwf(50, 100)
dwf(0, 50)