Thompson_Sampling_1_MultArmBandit.R

# Thompson Sampling

# Importing the dataset
dataset <- read.csv('Ads_CTR_Optimisation.csv')

#Reading the results of the ads
str(dataset)

## 'data.frame':    10000 obs. of  10 variables:
##  $ Ad.1 : int  1 0 0 0 0 1 0 1 0 0 ...
##  $ Ad.2 : int  0 0 0 1 0 1 0 1 0 0 ...
##  $ Ad.3 : int  0 0 0 0 0 0 0 0 0 1 ...
##  $ Ad.4 : int  0 0 0 0 0 0 1 0 0 0 ...
##  $ Ad.5 : int  1 0 0 0 0 0 0 1 0 0 ...
##  $ Ad.6 : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ Ad.7 : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ Ad.8 : int  0 0 0 1 0 0 0 0 0 0 ...
##  $ Ad.9 : int  1 1 0 0 0 0 0 0 0 0 ...
##  $ Ad.10: int  0 0 0 0 0 0 0 0 0 0 ...

#Implementing UCB
N <- 10000
d <- 10

numbers_of_rewards_1 <- integer(d)
numbers_of_rewards_0 <- integer(d)
sum_of_reward <- 0

ads_selected <- integer(0)


for(n in 1:N)
{
  max_random <- 0
  ad <- 0
  
  for(i in 1:d)
  {
    #Generate random draws
    random_beta <- rbeta(n=1 ,
                         shape1 = numbers_of_rewards_1[i] + 1,
                         shape2 = numbers_of_rewards_0[i] + 1)
    
    if(random_beta > max_random)
    {
      max_random <- random_beta
      ad <- i
    }
  }
  reward <- dataset[n,ad]
  if(reward == 1)
  {
    numbers_of_rewards_1[ad] <- numbers_of_rewards_1[ad] + 1
  }
  else
  {
    numbers_of_rewards_0[ad] <- numbers_of_rewards_0[ad] + 1
  }
  ads_selected <- append(ads_selected , ad)
  sum_of_reward <- sum_of_reward + dataset[n,ad]
}

print("Total Reward")

## [1] "Total Reward"

sum_of_reward

## [1] 2610

print("Individual ad performance")

## [1] "Individual ad performance"

numbers_of_rewards_1

##  [1]   11   12    1   10 2500    0    0   65   11    0

# Visualising the results
hist(ads_selected,
     col = 'blue',
     main = 'Histogram of ads selections',
     xlab = 'Ads',
     ylab = 'Number of times each ad was selected')

Thompson_Sampling_1_MultArmBandit.R

Kamalm

Sat May 20 17:52:41 2017