UCB_MultiArmbandit.R

#Upper Confidence Bound Algorithm 

dataset <- read.csv("Ads_CTR_Optimisation.csv")

#Reading the results of the ads


str(dataset)

## 'data.frame':    10000 obs. of  10 variables:
##  $ Ad.1 : int  1 0 0 0 0 1 0 1 0 0 ...
##  $ Ad.2 : int  0 0 0 1 0 1 0 1 0 0 ...
##  $ Ad.3 : int  0 0 0 0 0 0 0 0 0 1 ...
##  $ Ad.4 : int  0 0 0 0 0 0 1 0 0 0 ...
##  $ Ad.5 : int  1 0 0 0 0 0 0 1 0 0 ...
##  $ Ad.6 : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ Ad.7 : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ Ad.8 : int  0 0 0 1 0 0 0 0 0 0 ...
##  $ Ad.9 : int  1 1 0 0 0 0 0 0 0 0 ...
##  $ Ad.10: int  0 0 0 0 0 0 0 0 0 0 ...

#Implementing UCB
N <- 10000
d <- 10

numbers_of_selection <- integer(d)
sum_of_reward <- integer(d)
ads_selected <- integer(0)


for(n in 1:N)
{
  max_upper_bound <- 0
  ad <- 0
  
  for(i in 1:d)
  {
    if(numbers_of_selection[i] >0 )
    {
      average_reward <- sum_of_reward[i]/numbers_of_selection[i]
      delta_i <- sqrt(3/2 * log(n) / numbers_of_selection[i])
      upper_bound <- average_reward + delta_i
    }
    else
    {
      upper_bound <- 1e400
    }
    if(upper_bound > max_upper_bound)
    {
      max_upper_bound <- upper_bound
      ad <- i
    }
  }
  ads_selected <- append(ads_selected , ad)
  numbers_of_selection[ad] <- numbers_of_selection[ad] + 1
  sum_of_reward[ad] <- sum_of_reward[ad] + dataset[n,ad]
}

print("Total Reward")

## [1] "Total Reward"

sum(sum_of_reward)

## [1] 2178

print("Individual ad performance")

## [1] "Individual ad performance"

sum_of_reward

##  [1]  120   47    7   38 1675    1   27  236   20    7

# Visualising the results
hist(ads_selected,
     col = 'blue',
     main = 'Histogram of ads selections',
     xlab = 'Ads',
     ylab = 'Number of times each ad was selected')

UCB_MultiArmbandit.R

Kamalm

Sat May 20 09:48:43 2017