knitr::opts_chunk$set(echo = TRUE, size = "footnotesize",error = TRUE)
load(file=".RData")

Data and Model

Intermediate variables

Case 1:\(A_{jt}^{U},A_{jt}^{L}\) both exist

\[V(A_{jt}) \geq V(A_{jt}^{U})\] \[V(A_{jt}) \geq V(A_{jt}^{L})\] \[L \leq \epsilon_{jt} \leq U\] Case 2: \(A_{jt}^{U}\) does not exist

\[L \leq \epsilon_{jt} \leq +\infty\]

Case 3: \(A_{jt}^{L}\) does not exist

\[L_0 \leq \epsilon_{jt} \leq U\]

Case 4: Do not advertise, \(A_{jt}^{U}\) exists

\[-\infty \leq \epsilon_{jt} \leq U\] Case 5: Do not advertise, \(A_{jt}^{U}\) does not exist

\[-\infty \leq \epsilon_{jt} \leq +\infty\] - Choice probability

\[Prob(A_{jt}|\bar{y}) = Prob(L \leq \epsilon_{jt} \leq U)\]

Coding Process

Step 0: Preparation

arank <- read.csv("actual_rank.csv",head = TRUE, sep= ",")
nrank <- read.csv("nominal_rank.csv",head = TRUE, sep= ",")
cost <- read.csv("price.csv",head = TRUE, sep= ",")
arank <- arank[1:254,2:7]
nrank <- nrank[1:254,2:7]
cost <- cost[,2:6]
p=list(nrow(arank))  # each day we have one clicking probability matrix
for(i in 1:nrow(arank)){ # i represents day
  for(j in 1:6){  # j represents advertiser
    for(k in 1:5){  # k represents rank
      p[[i]][j,k] = exp(dot(c(k,advertiser[[j]][i]),para_click))/(1+exp(dot(c(k,advertiser[[j]][i]),para_click)))
    }
  }
}
#  p[[i]][j,k]: clicking choice probability of advertiser j on position k in day i

Step 1: Compute \(A_{jt}^{U}\),\(A_{jt}^{L}\)

AU <- matrix(data = 0,nrow = 254, ncol = 6) #origin value==0
AL <- matrix(data = 0,nrow = 254, ncol = 6)
AUr<- matrix(data = 0,nrow = 254, ncol = 6)
ALr<- matrix(data = 0,nrow = 254, ncol = 6)
i=144
# suppose day i=144
nrank[i,]  # nominal rank
arank[i,]  # actual rank
p[[i]]
          [,1]      [,2]      [,3]      [,4]       [,5]
[1,] 0.8400993 0.8001484 0.7848633 0.5529304 0.02375946
[2,] 0.9580755 0.8469773 0.7322925 0.7071344 0.23153331
[3,] 0.8205928 0.7013163 0.5742524 0.1583170 0.15064743
[4,] 0.9731791 0.9103690 0.6006388 0.3108546 0.29336418
[5,] 0.9000588 0.5915042 0.5086688 0.3074411 0.27022673
[6,] 0.6660993 0.6162511 0.5416386 0.4032623 0.35280491
ind1 <- which(nrank[i,]!= 0) # who purchases, could move down
ind2 <- which(nrank[i,]!= arank[i,]) # purchases, actual rank != nominal rank, who could move up
ind3 <- setdiff(c(1:6),ind1)  # who does not purchase, could only move up if available
ind1
ind2
ind3
n <- length(ind2)
    if (n > 1){
      temp <- sort(as.numeric(nrank[i,ind2]),index.return = TRUE) # sort the advertisers nominal rank
        index1 <- ind2[temp$ix]  # get advertiser index
        nr <- as.numeric(nrank[i,ind2[temp$ix]]) # get nominal rank
        ar <- as.numeric(arank[i,ind2[temp$ix]]) # get actual rank
        for (j in 2:n){ #start from the second advertiser, the first could not move up
            aU<-setdiff(c(1:nr[j-1]),nrank[i,ind1])  # get all available nominal upper positions for advertiser j
            aUr<-rep(c(0),length(aU)) # compute actual rank for each nominal rank
            for (k in 1:length(aU)){
                temp1 <- sort(c(nr[-j],aU[k])) 
                aUr[k] <- which(temp1==aU[k])
            }
          AUindex <- which.min((cost[i,aU]-cost[i,nr[j]])/(p[[i]][index1[j],aUr]-p[[i]][index1[j],ar[j]])) # minimum of possible upper bounds
          AU[i,index1[j]]<-aU[AUindex] # upper bound of choice:nominal rank
          AUr[i,index1[j]]<-aUr[AUindex] # actual rank 
          }
    }

AU[i,]
AUr[i,]
 #Find available lower position
    n <- length(ind1)
    temp <- sort(as.numeric(nrank[i,ind1]),index.return = TRUE)
    index1 <- ind1[temp$ix]
    nr <- as.numeric(nrank[i,ind1[temp$ix]])
    ar <- as.numeric(arank[i,ind1[temp$ix]])
    if(n>1){
      for (j in 1:(n-1)){
          aL<-setdiff(c(nr[j+1]:5),nr)
          aLr<-rep(c(0),length(aL))
          if (length(aL)>0){
          for (k in 1:length(aL)){
                  temp1 <- sort(c(nr[-j],aL[k]))
                  aLr[k] <- which(temp1==aL[k])
              }
              ALindex <- which.max((cost[i,nr[j]]-cost[i,aL])/(p[[i]][index1[j],ar[j]]-p[[i]][index1[j],aLr]))
              AL[i,index1[j]]<-aL[ALindex]
              ALr[i,index1[j]]<-aLr[ALindex]
          }
      }
    }
AL[i,]
ALr[i,]
 n <- length(ind3)
    temp <- sort(as.numeric(nrank[i,ind1]),index.return = TRUE)
    index1 <- ind1[temp$ix]
    nr <- as.numeric(nrank[i,ind1[temp$ix]])
    ar <- as.numeric(arank[i,ind1[temp$ix]])
    aU <- setdiff(c(1:5),nr)
    if (length(aU)>0){
      for (k in 1:length(aU)){
            temp1 <- sort(c(nr,aU[k]))
            aUr[k] <- which(temp1==aU[k])
        }
      for (j in 1:n){
            AUindex <- which.min(cost[i,aU]/p[[i]][ind3[j],aUr])
            AU[i,ind3[j]]<-aU[AUindex]
            AUr[i,ind3[j]]<-aUr[AUindex]
        }
    }
    
AU[i,]
AUr[i,]
for(i in 1:254){
  
  # all the above code
  
}

Check Case 1 to Case 5, create boundary of \(\epsilon_{jt}\)

j = 1
if(nrank[i,j]==0){  # do not purchase
                if (AU[i,j]!=0){ # do not purchase, AU exists
                  prob <- pnorm(cost[i,AU[i,j]]/p[j,AUr[i,j]]-Y[j])
                }
} else{#already purchase
                  if (AU[i,j]==0&AL[i,j]!=0){ # AU does not exist, AL exists
                    prob <- 1-pnorm((cost[i,AL[i,j]]-cost[i,nrank[i,j]])/
                                   (p[[i]][j,ALr[i,j]]-p[[i]][j,arank[i,j]])-Y[j]) 
                  }else if(AU[i,j]!=0&AL[i,j]!=0){ #AU exists, AL exists
                    prob <- pnorm((cost[i,AU[i,j]]-cost[i,nrank[i,j]])/
                                   (p[[i]][j,AUr[i,j]]-p[[i]][j,arank[i,j]])-Y[j])
                                  -pnorm((cost[i,AL[i,j]]-cost[i,nrank[i,j]])/
                                   (p[[i]][j,ALr[i,j]]-p[[i]][j,arank[i,j]])-Y[j])
                  }else if(AU[i,j]!=0&AL[i,j]==0){ #AU does not exist, AL exists
                    prob <- pnorm((cost[i,AU[i,j]]-cost[i,nrank[i,j]])/
                                   (p[[i]][j,AUr[i,j]]-p[[i]][j,arank[i,j]])-Y[j])
                              -pnorm(cost[i,nrank[i,j]]/p[[i]][j,arank[i,j]]-Y[j])
                      
                  }else{  #AU does not exist, AL does not exists
                  prob <- 1-pnorm(cost[i,nrank[i,j]]/p[[i]][j,arank[i,j]]-Y[j])
                }
}

Step 3: Define likelihood function over all j and i

LL <- function(Y)
{
   ll<-c(0)
   for(i in 1:254){
        for(j in 1:6){
            if(nrank[i,j]==0){
                if (AU[i,j]!=0){
                  prob <- pnorm(cost[i,AU[i,j]]/p[j,AUr[i,j]]-Y[j])
                }
            }else{      
                if (AU[i,j]==0&AL[i,j]!=0){
                  prob <- 1-pnorm((cost[i,AL[i,j]]-cost[i,nrank[i,j]])/
                                   (p[j,ALr[i,j]]-p[j,arank[i,j]])-Y[j])
                }else if(AU[i,j]!=0&AL[i,j]!=0){
                  prob <- pnorm((cost[i,AU[i,j]]-cost[i,nrank[i,j]])/
                                   (p[j,AUr[i,j]]-p[j,arank[i,j]])-Y[j])
                         -pnorm((cost[i,AL[i,j]]-cost[i,nrank[i,j]])/
                                   (p[j,ALr[i,j]]-p[j,arank[i,j]])-Y[j])
                }else if(AU[i,j]!=0&AL[i,j]==0){
                  prob <- pnorm((cost[i,AU[i,j]]-cost[i,nrank[i,j]])/
                                   (p[j,AUr[i,j]]-p[j,arank[i,j]])-Y[j])
                          -pnorm(cost[i,nrank[i,j]]/p[j,arank[i,j]]-Y[j])
                      
                }else{
                  prob <- 1-pnorm(cost[i,nrank[i,j]]/p[j,arank[i,j]]-Y[j])
                }
            }
          ll <- ll + log(prob)
        }
   }
     
return(-ll)
}

Step 4: MLE

#maximize log likelihood function
y0 <- rep(c(1),6)
res <- optim(y0,LL)
yhat <- res$par
---
title: 'Assignment 4: Estimate mean expected profit per click of advertiser'
output:
  html_notebook: default
  html_document: default
  pdf_document: default
---

```{r setup}
knitr::opts_chunk$set(echo = TRUE, size = "footnotesize",error = TRUE)
load(file=".RData")
```


## Data and Model
- Data: Nominal Rank, Advertiser attributes, BIN price

- Model elements

    - Decision Variable: Position $A_{jt}$
    
    - Known variables: Advertiser attributes $S_{jt}$, BIN price $C(A_{jt})$, clicking probability parameters
    
    - Unknown parameters: Mean expected profit per click $\bar{y_j}$
    
- Discrete choice problem

    - Given competitors' choices,$\bar{A_t}$, how to choose $A_{jt}$?
    
    - Choice probability: $Prob(A_{jt}|\bar{y})$
    
    - Likelihood $L(\bar{y})=\Pi_{j,t} Prob(A_{jt}|\bar{y})$
    
## Intermediate variables
- Consumer Clicking probability: $Pr(A_{jt},\bar{A_t},S_{jt})$
    
- Utility for $A_{jt}$, given $\bar{A_t}$: $$V(A_{jt},\bar{A_t},S_{jt}) = (\bar{y_j}+\epsilon_{jt})Pr(A_{jt},\bar{A_t},S_{jt})-C(A_{jt})$$
    
Case 1:$A_{jt}^{U},A_{jt}^{L}$ both exist

$$V(A_{jt}) \geq V(A_{jt}^{U})$$
$$V(A_{jt}) \geq V(A_{jt}^{L})$$
$$L \leq \epsilon_{jt} \leq U$$
Case 2: $A_{jt}^{U}$ does not exist 
    
$$L \leq \epsilon_{jt} \leq +\infty$$

Case 3: $A_{jt}^{L}$ does not exist
    
$$L_0 \leq \epsilon_{jt} \leq U$$

Case 4: Do not advertise, $A_{jt}^{U}$ exists

$$-\infty \leq \epsilon_{jt} \leq U$$
Case 5: Do not advertise, $A_{jt}^{U}$ does not exist

$$-\infty \leq \epsilon_{jt} \leq +\infty$$
- Choice probability 


$$Prob(A_{jt}|\bar{y}) = Prob(L \leq \epsilon_{jt} \leq U)$$

## Coding Process

- Given nominal rank, actual rank, compute $A_{jt}^{U}$,$A_{jt}^{L}$, exising or not for each j,t
- Check Case 1 to Case 5, create boundary of \epsilon_{jt}  from $A_{jt}^{U}$,$A_{jt}^{L}$,$A_{jt}$
- Calculate choice probability as function of $\bar{y}$, $Prob(A_{jt}|\bar{y})$
- Compute likelihood as function of $\bar{y}$,$L(\bar{y})$
- Maximize L over $\bar{y}$


## Step 0: Preparation
- Read Data,each row is for one day
```{r,eval=FALSE}
arank <- read.csv("actual_rank.csv",head = TRUE, sep= ",")
nrank <- read.csv("nominal_rank.csv",head = TRUE, sep= ",")
cost <- read.csv("price.csv",head = TRUE, sep= ",")
arank <- arank[1:254,2:7]
nrank <- nrank[1:254,2:7]
cost <- cost[,2:6]
```
- Create choice probability from the result of discrete choice model of clicking
```{r,eval=FALSE}
p=list(nrow(arank))  # each day we have one clicking probability matrix
for(i in 1:nrow(arank)){ # i represents day
  for(j in 1:6){  # j represents advertiser
    for(k in 1:5){  # k represents rank
      p[[i]][j,k] = exp(dot(c(k,advertiser[[j]][i]),para_click))/(1+exp(dot(c(k,advertiser[[j]][i]),para_click)))
    }
  }
}
#  p[[i]][j,k]: clicking choice probability of advertiser j on position k in day i
```




## Step 1: Compute $A_{jt}^{U}$,$A_{jt}^{L}$



- Create matrixes for available nominal upper and lower positions & actual upper and lower positions 
```{r}
AU <- matrix(data = 0,nrow = 254, ncol = 6) #origin value==0
AL <- matrix(data = 0,nrow = 254, ncol = 6)
AUr<- matrix(data = 0,nrow = 254, ncol = 6)
ALr<- matrix(data = 0,nrow = 254, ncol = 6)
```

- Only look at one day
```{r}
i=144
# suppose day i=144
nrank[i,]  # nominal rank
arank[i,]  # actual rank
p[[i]]
```

- Three conditions of advertisers
```{r}
ind1 <- which(nrank[i,]!= 0) # who purchases, could move down
ind2 <- which(nrank[i,]!= arank[i,]) # purchases, actual rank != nominal rank, who could move up
ind3 <- setdiff(c(1:6),ind1)  # who does not purchase, could only move up if available
ind1
ind2
ind3
```


- Condition 1: Already Purchase, but could move up compute AU,AUr
```{r}
n <- length(ind2)
    if (n > 1){
      temp <- sort(as.numeric(nrank[i,ind2]),index.return = TRUE) # sort the advertisers nominal rank
	    index1 <- ind2[temp$ix]  # get advertiser index
	    nr <- as.numeric(nrank[i,ind2[temp$ix]]) # get nominal rank
	    ar <- as.numeric(arank[i,ind2[temp$ix]]) # get actual rank
	    for (j in 2:n){ #start from the second advertiser, the first could not move up
		    aU<-setdiff(c(1:nr[j-1]),nrank[i,ind1])  # get all available nominal upper positions for advertiser j
		    aUr<-rep(c(0),length(aU)) # compute actual rank for each nominal rank
		    for (k in 1:length(aU)){
			    temp1 <- sort(c(nr[-j],aU[k])) 
			    aUr[k] <- which(temp1==aU[k])
		    }
		  AUindex <- which.min((cost[i,aU]-cost[i,nr[j]])/(p[[i]][index1[j],aUr]-p[[i]][index1[j],ar[j]])) # minimum of possible upper bounds
		  AU[i,index1[j]]<-aU[AUindex] # upper bound of choice:nominal rank
		  AUr[i,index1[j]]<-aUr[AUindex] # actual rank 
		  }
    }

AU[i,]
AUr[i,]
```


- Condition 2: Already purchase, could move down, compute AL,ALr

```{r}
 #Find available lower position
    n <- length(ind1)
    temp <- sort(as.numeric(nrank[i,ind1]),index.return = TRUE)
    index1 <- ind1[temp$ix]
    nr <- as.numeric(nrank[i,ind1[temp$ix]])
    ar <- as.numeric(arank[i,ind1[temp$ix]])
    if(n>1){
      for (j in 1:(n-1)){
	      aL<-setdiff(c(nr[j+1]:5),nr)
	      aLr<-rep(c(0),length(aL))
	      if (length(aL)>0){
      	  for (k in 1:length(aL)){
			      temp1 <- sort(c(nr[-j],aL[k]))
			      aLr[k] <- which(temp1==aL[k])
		      }
		      ALindex <- which.max((cost[i,nr[j]]-cost[i,aL])/(p[[i]][index1[j],ar[j]]-p[[i]][index1[j],aLr]))
		      AL[i,index1[j]]<-aL[ALindex]
		      ALr[i,index1[j]]<-aLr[ALindex]
	      }
      }
    }
AL[i,]
ALr[i,]
```


- Condition 3: Do not purchase, could move up if availabe, but could not move down,compute AU,AUr

```{r}
 n <- length(ind3)
    temp <- sort(as.numeric(nrank[i,ind1]),index.return = TRUE)
    index1 <- ind1[temp$ix]
    nr <- as.numeric(nrank[i,ind1[temp$ix]])
    ar <- as.numeric(arank[i,ind1[temp$ix]])
    aU <- setdiff(c(1:5),nr)
    if (length(aU)>0){
      for (k in 1:length(aU)){
		    temp1 <- sort(c(nr,aU[k]))
		    aUr[k] <- which(temp1==aU[k])
	    }
      for (j in 1:n){
		    AUindex <- which.min(cost[i,aU]/p[[i]][ind3[j],aUr])
		    AU[i,ind3[j]]<-aU[AUindex]
		    AUr[i,ind3[j]]<-aUr[AUindex]
	    }
    }
    
AU[i,]
AUr[i,]
```


- Use for loop to go through each day

```{r,eval=FALSE}
for(i in 1:254){
  
  # all the above code
  
}
```

## Check Case 1 to Case 5, create boundary of $\epsilon_{jt}$

- For advertiser j on day i, check conditions and calculate probabilities
```{r.eval=FALSE}
j = 1
if(nrank[i,j]==0){  # do not purchase
			    if (AU[i,j]!=0){ # do not purchase, AU exists
			      prob <- pnorm(cost[i,AU[i,j]]/p[j,AUr[i,j]]-Y[j])
			    }
} else{#already purchase
			      if (AU[i,j]==0&AL[i,j]!=0){ # AU does not exist, AL exists
			        prob <- 1-pnorm((cost[i,AL[i,j]]-cost[i,nrank[i,j]])/
                                   (p[[i]][j,ALr[i,j]]-p[[i]][j,arank[i,j]])-Y[j]) 
			      }else if(AU[i,j]!=0&AL[i,j]!=0){ #AU exists, AL exists
			        prob <- pnorm((cost[i,AU[i,j]]-cost[i,nrank[i,j]])/
                                   (p[[i]][j,AUr[i,j]]-p[[i]][j,arank[i,j]])-Y[j])
			     	              -pnorm((cost[i,AL[i,j]]-cost[i,nrank[i,j]])/
                                   (p[[i]][j,ALr[i,j]]-p[[i]][j,arank[i,j]])-Y[j])
			      }else if(AU[i,j]!=0&AL[i,j]==0){ #AU does not exist, AL exists
			        prob <- pnorm((cost[i,AU[i,j]]-cost[i,nrank[i,j]])/
                                   (p[[i]][j,AUr[i,j]]-p[[i]][j,arank[i,j]])-Y[j])
				              -pnorm(cost[i,nrank[i,j]]/p[[i]][j,arank[i,j]]-Y[j])
                      
			      }else{  #AU does not exist, AL does not exists
			      prob <- 1-pnorm(cost[i,nrank[i,j]]/p[[i]][j,arank[i,j]]-Y[j])
			    }
}
```

## Step 3: Define likelihood function over all j and i

```{r}
LL <- function(Y)
{
   ll<-c(0)
   for(i in 1:254){
	    for(j in 1:6){
		    if(nrank[i,j]==0){
			    if (AU[i,j]!=0){
			      prob <- pnorm(cost[i,AU[i,j]]/p[j,AUr[i,j]]-Y[j])
			    }
		    }else{		
			    if (AU[i,j]==0&AL[i,j]!=0){
			      prob <- 1-pnorm((cost[i,AL[i,j]]-cost[i,nrank[i,j]])/
                                   (p[j,ALr[i,j]]-p[j,arank[i,j]])-Y[j])
			    }else if(AU[i,j]!=0&AL[i,j]!=0){
			      prob <- pnorm((cost[i,AU[i,j]]-cost[i,nrank[i,j]])/
                                   (p[j,AUr[i,j]]-p[j,arank[i,j]])-Y[j])
			     	     -pnorm((cost[i,AL[i,j]]-cost[i,nrank[i,j]])/
                                   (p[j,ALr[i,j]]-p[j,arank[i,j]])-Y[j])
			    }else if(AU[i,j]!=0&AL[i,j]==0){
			      prob <- pnorm((cost[i,AU[i,j]]-cost[i,nrank[i,j]])/
                                   (p[j,AUr[i,j]]-p[j,arank[i,j]])-Y[j])
				          -pnorm(cost[i,nrank[i,j]]/p[j,arank[i,j]]-Y[j])
                      
			    }else{
			      prob <- 1-pnorm(cost[i,nrank[i,j]]/p[j,arank[i,j]]-Y[j])
			    }
		    }
	      ll <- ll + log(prob)
	    }
   }
	 
return(-ll)
}
```


## Step 4: MLE

```{r,eval=FALSE}
#maximize log likelihood function
y0 <- rep(c(1),6)
res <- optim(y0,LL)
yhat <- res$par
```





  