Thompson Sampling - Reinforced learning
Importing the dataset
dataset <- read.csv('G:\\RStudio\\udemy\\ml\\Machine Learning AZ\\Part 6 - Reinforcement Learning\\Section 33 - Thompson Sampling\\Thompson_Sampling\\Ads_CTR_Optimisation.csv')
head(dataset)
Implementing Thompson Sampling
There is package to use UCB. So we will create the algorithm from scatch
# initialize variables we will be using
N <- 10000
d <- 10
ads_selected <- integer(0)
total_reward <- 0
# new parameters
numbers_of_rewards_1 <- integer(d)
numbers_of_rewards_0 <- integer(d)
for (n in 1:N) {
ad <- 0
max_random <- 0
for ( i in 1 : d) {
random_beta <- rbeta(n=1,shape1 = numbers_of_rewards_1[i] +1,
shape2 = numbers_of_rewards_0[i]+1)
if (random_beta > max_random) {
max_random <- random_beta
ad <- i
}
}
ads_selected <- append(ads_selected, ad)
reward <- dataset[n,ad]
if (reward ==1) {
numbers_of_rewards_1[ad] <- numbers_of_rewards_1[ad] +1
} else {
numbers_of_rewards_0[ad] <- numbers_of_rewards_0[ad] +1
}
total_reward <- total_reward + reward
}
Visualising the resuls by using a histogram
hist(ads_selected,
col = "Blue",
main = "Histogram of Ad Selections",
xlab = "Ads",
ylab = "Number of times each ad was selected")

LS0tDQp0aXRsZTogIk1MUiBzZWN0aW9uIDI4IFRob21wc29uIFNhbXBsaW5nICINCm91dHB1dDogaHRtbF9ub3RlYm9vaw0KLS0tDQoNCiMgVGhvbXBzb24gU2FtcGxpbmcgLSBSZWluZm9yY2VkIGxlYXJuaW5nDQoNCkltcG9ydGluZyB0aGUgZGF0YXNldA0KYGBge3J9DQpkYXRhc2V0IDwtICByZWFkLmNzdignRzpcXFJTdHVkaW9cXHVkZW15XFxtbFxcTWFjaGluZSBMZWFybmluZyBBWlxcUGFydCA2IC0gUmVpbmZvcmNlbWVudCBMZWFybmluZ1xcU2VjdGlvbiAzMyAtIFRob21wc29uIFNhbXBsaW5nXFxUaG9tcHNvbl9TYW1wbGluZ1xcQWRzX0NUUl9PcHRpbWlzYXRpb24uY3N2JykNCmhlYWQoZGF0YXNldCkNCmBgYA0KDQojIEltcGxlbWVudGluZyBUaG9tcHNvbiBTYW1wbGluZw0KVGhlcmUgaXMgcGFja2FnZSB0byB1c2UgVUNCLiBTbyB3ZSB3aWxsIGNyZWF0ZSB0aGUgYWxnb3JpdGhtIGZyb20gc2NhdGNoDQoNCmBgYHtyfQ0KIyBpbml0aWFsaXplIHZhcmlhYmxlcyB3ZSB3aWxsIGJlIHVzaW5nDQpOIDwtICAxMDAwMA0KZCA8LSAgMTANCmFkc19zZWxlY3RlZCA8LSAgaW50ZWdlcigwKQ0KdG90YWxfcmV3YXJkIDwtICAwDQojIG5ldyBwYXJhbWV0ZXJzDQpudW1iZXJzX29mX3Jld2FyZHNfMSA8LSBpbnRlZ2VyKGQpDQpudW1iZXJzX29mX3Jld2FyZHNfMCA8LSBpbnRlZ2VyKGQpDQoNCmZvciAobiBpbiAxOk4pIHsNCiAgDQogIGFkIDwtICAwDQogIG1heF9yYW5kb20gPC0gIDANCg0KICAgIGZvciAoIGkgaW4gMSA6IGQpIHsgDQogICAgcmFuZG9tX2JldGEgPC0gIHJiZXRhKG49MSxzaGFwZTEgPSBudW1iZXJzX29mX3Jld2FyZHNfMVtpXSArMSwgDQogICAgICAgICAgICAgICAgICAgICAgICAgIHNoYXBlMiA9IG51bWJlcnNfb2ZfcmV3YXJkc18wW2ldKzEpIA0KICAgIGlmIChyYW5kb21fYmV0YSA+IG1heF9yYW5kb20pIHsNCiAgICAgIG1heF9yYW5kb20gPC0gIHJhbmRvbV9iZXRhDQogICAgICBhZCA8LSAgaQ0KICAgIH0NCiAgfQ0KICBhZHNfc2VsZWN0ZWQgPC0gIGFwcGVuZChhZHNfc2VsZWN0ZWQsIGFkKQ0KICByZXdhcmQgPC0gIGRhdGFzZXRbbixhZF0NCiAgaWYgKHJld2FyZCA9PTEpIHsNCiAgICAgIG51bWJlcnNfb2ZfcmV3YXJkc18xW2FkXSA8LSAgbnVtYmVyc19vZl9yZXdhcmRzXzFbYWRdICsxDQogIH0gZWxzZSB7DQogICAgbnVtYmVyc19vZl9yZXdhcmRzXzBbYWRdIDwtICBudW1iZXJzX29mX3Jld2FyZHNfMFthZF0gKzENCiAgfQ0KDQogIHRvdGFsX3Jld2FyZCA8LSAgdG90YWxfcmV3YXJkICsgcmV3YXJkDQogIA0KfQ0KDQpgYGANCg0KVmlzdWFsaXNpbmcgdGhlIHJlc3VscyBieSB1c2luZyBhIGhpc3RvZ3JhbQ0KDQpgYGB7cn0NCmhpc3QoYWRzX3NlbGVjdGVkLCANCiAgICAgY29sID0gIkJsdWUiLA0KICAgICBtYWluID0gIkhpc3RvZ3JhbSBvZiBBZCBTZWxlY3Rpb25zIiwNCiAgICAgeGxhYiA9ICJBZHMiLA0KICAgICB5bGFiID0gIk51bWJlciBvZiB0aW1lcyBlYWNoIGFkIHdhcyBzZWxlY3RlZCIpDQpgYGANCg0K