This system recommends the chain hotesl to travelers.
#Using toy dataset
hotels <- matrix(c(4,4,4,3,5,4,4,3,3,5,2,3,3,4,2,4,3,4,5,4,5,5,2,5,4,NA,3,4,3,4,4,5,NA,2,2,3), ncol = 6, byrow = TRUE)
colnames(hotels)<- c("Marriott","Hilton","Best Western", "Hyatt", "Intercontinental","Wyndham")
rownames(hotels)<- c("user1","user2","user3","user4","user5","user6")
hotels## Marriott Hilton Best Western Hyatt Intercontinental Wyndham
## user1 4 4 4 3 5 4
## user2 4 3 3 5 2 3
## user3 3 4 2 4 3 4
## user4 5 4 5 5 2 5
## user5 4 NA 3 4 3 4
## user6 4 5 NA 2 2 3
training <- matrix(c(4,NA,4,3,5,4,4,3,NA,5,2,3,3,4,2,4,NA,4,NA,4,5,5,2,5,4,NA,3,NA,3,4,4,5,NA,2,2,NA),ncol = 6, byrow = TRUE)
colnames(training)<-c("Marriott","Hilton","Best Western", "Hyatt", "Intercontinental","Wyndham")
rownames(training)<- c("user1","user2","user3","user4","user5","user6")
test <- c(5,4,3,4,3,3)RMSE - training dataset
## [1] 1.007624
RMSE - test dataset
## [1] 0.7457362
training dataset
user bias
## user1 user2 user3 user4 user5 user6
## 0.3571429 -0.2428571 -0.2428571 0.5571429 -0.1428571 -0.3928571
item bias
## Marriott Hilton Best Western Hyatt
## 0.1571429 0.3571429 -0.1428571 0.1571429
## Intercontinental Wyndham
## -0.8428571 0.3571429
every user-item combination.
baselineP <- function(n,m){
3.5+ user_bias[n]+item_bias[m]
}
baseline_predictor <- data.frame(matrix(NA, ncol=6, nrow=6))
colnames(baseline_predictor)<- c("Marriott","Hilton","Best Western", "Hyatt", "Intercontinental","Wyndham")
rownames(baseline_predictor)<- c("user1","user2","user3","user4","user5","user6")
for (i in 1:6){
for(j in 1:6){
baseline_predictor[i,j] <- baselineP(i,j)
}
}
baseline_predictor## Marriott Hilton Best Western Hyatt Intercontinental Wyndham
## user1 4.014286 4.214286 3.714286 4.014286 3.014286 4.214286
## user2 3.414286 3.614286 3.114286 3.414286 2.414286 3.614286
## user3 3.414286 3.614286 3.114286 3.414286 2.414286 3.614286
## user4 4.214286 4.414286 3.914286 4.214286 3.214286 4.414286
## user5 3.514286 3.714286 3.214286 3.514286 2.514286 3.714286
## user6 3.264286 3.464286 2.964286 3.264286 2.264286 3.464286
Test dataset
rmse_test_b <- sqrt(((test[1]-4.369565)^2 + (test[2]-4.369565)^2 +(test[3]-2.869565)^2 + (test[4]-3.536232)^2 +(test[5]-3.536232)^2 + (test[6]-3.569565)^2)/5)
rmse_test_b## [1] 0.5249895
Training dataset
## Marriott Hilton Best Western Hyatt Intercontinental
## user1 0.0002040816 NA 0.08163265 1.0287755 3.94306122
## user2 0.3430612245 0.3773469 NA 2.5144898 0.17163265
## user3 0.1716326531 0.1487755 1.24163265 0.3430612 NA
## user4 NA 0.1716327 1.17877551 0.6173469 1.47448980
## user5 0.2359183673 NA 0.04591837 NA 0.23591837
## user6 0.5412755102 2.3584184 NA 1.5984184 0.06984694
## Wyndham
## user1 0.04591837
## user2 0.37734694
## user3 0.14877551
## user4 0.34306122
## user5 0.08163265
## user6 NA
## [1] 0.9299369
Percentage of improvement of test dataset - 29.6%
## [1] 29.60118
Percentage of improvement of training dataset - 7.7%
## [1] 7.709931