Importing packages & dataset

# Calling packages
library(readxl)
library(plotly)
library(ggplot2)
library(mlbench)
library(rpart)
library(rpart.plot)
library(ngram)
library(dplyr)
library(ggalt)
library(reshape2)
library(tidyr)
# Importing database
airfrance <- read_excel("/Users/emmanuel/Desktop/AirFrance_Internet_Marketing.xls", sheet="DoubleClick")

# Importing kayak database 
kayak <- read_excel("/Users/emmanuel/Desktop/AirFrance_Internet_Marketing.xls", sheet="Kayak")

Data Cleaning & Massaging

Exploring and cleaning data for AirFance adataset

# Checking structure of our dataframe
str(airfrance)
## tibble [4,510 × 23] (S3: tbl_df/tbl/data.frame)
##  $ Publisher ID            : chr [1:4510] "K2615" "K2615" "K2003" "K1175" ...
##  $ Publisher Name          : chr [1:4510] "Yahoo - US" "Yahoo - US" "MSN - Global" "Google - Global" ...
##  $ Keyword ID              : chr [1:4510] "43000000039657988" "43000000039651113" "43000000019452431" "43000000005663331" ...
##  $ Keyword                 : chr [1:4510] "fly to florence" "low international airfare" "air discount france ticket" "[airfrance]" ...
##  $ Match Type              : chr [1:4510] "Advanced" "Advanced" "Broad" "Exact" ...
##  $ Campaign                : chr [1:4510] "Western Europe Destinations" "Geo Targeted DC" "Air France Brand & French Destinations" "Air France Global Campaign" ...
##  $ Keyword Group           : chr [1:4510] "Florence" "Low International DC" "France" "Air France" ...
##  $ Category                : chr [1:4510] "uncategorized" "uncategorized" "uncategorized" "airfrance" ...
##  $ Bid Strategy            : chr [1:4510] NA NA "Position 2-5 Bid Strategy" "Position 1- 3" ...
##  $ Keyword Type            : chr [1:4510] "Unassigned" "Unassigned" "Unassigned" "Unassigned" ...
##  $ Status                  : chr [1:4510] "Live" "Paused" "Deactivated" "Unavailable" ...
##  $ Search Engine Bid       : num [1:4510] 6.25 6.25 0 7.5 0.25 0.125 6.25 0.325 7.5 5 ...
##  $ Clicks                  : num [1:4510] 1 1 1 59 8 42 3 47 13 19 ...
##  $ Click Charges           : num [1:4510] 2.312 0.625 0.388 2.312 2.2 ...
##  $ Avg. Cost per Click     : num [1:4510] 2.3125 0.625 0.3875 0.0392 0.275 ...
##  $ Impressions             : num [1:4510] 11 6 9 401 318 722 13 547 448 129 ...
##  $ Engine Click Thru %     : num [1:4510] 9.09 16.67 11.11 14.71 2.52 ...
##  $ Avg. Pos.               : num [1:4510] 1.27 1 1.11 2 2.98 ...
##  $ Trans. Conv. %          : num [1:4510] 900 100 100 3.39 12.5 ...
##  $ Total Cost/ Trans.      : num [1:4510] 0.257 0.625 0.388 1.156 2.2 ...
##  $ Amount                  : num [1:4510] 8778 1574 390 1665 935 ...
##  $ Total Cost              : num [1:4510] 2.312 0.625 0.388 2.312 2.2 ...
##  $ Total Volume of Bookings: num [1:4510] 9 1 1 2 1 2 1 2 1 1 ...
head(airfrance)
## # A tibble: 6 × 23
##   `Publisher ID` `Publisher Name`  `Keyword ID` Keyword  `Match Type` Campaign  
##   <chr>          <chr>             <chr>        <chr>    <chr>        <chr>     
## 1 K2615          Yahoo - US        43000000039… fly to … Advanced     Western E…
## 2 K2615          Yahoo - US        43000000039… low int… Advanced     Geo Targe…
## 3 K2003          MSN - Global      43000000019… air dis… Broad        Air Franc…
## 4 K1175          Google - Global   43000000005… [airfra… Exact        Air Franc…
## 5 K1123          Overture - Global 43000000005… air fra… Standard     Unassigned
## 6 K1123          Overture - Global 43000000005… airfran… Standard     Unassigned
## # … with 17 more variables: Keyword Group <chr>, Category <chr>,
## #   Bid Strategy <chr>, Keyword Type <chr>, Status <chr>,
## #   Search Engine Bid <dbl>, Clicks <dbl>, Click Charges <dbl>,
## #   Avg. Cost per Click <dbl>, Impressions <dbl>, Engine Click Thru % <dbl>,
## #   Avg. Pos. <dbl>, Trans. Conv. % <dbl>, Total Cost/ Trans. <dbl>,
## #   Amount <dbl>, Total Cost <dbl>, Total Volume of Bookings <dbl>
summary(airfrance)
##  Publisher ID       Publisher Name      Keyword ID          Keyword         
##  Length:4510        Length:4510        Length:4510        Length:4510       
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##   Match Type          Campaign         Keyword Group        Category        
##  Length:4510        Length:4510        Length:4510        Length:4510       
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##  Bid Strategy       Keyword Type          Status          Search Engine Bid
##  Length:4510        Length:4510        Length:4510        Min.   : 0.000   
##  Class :character   Class :character   Class :character   1st Qu.: 3.384   
##  Mode  :character   Mode  :character   Mode  :character   Median : 6.250   
##                                                           Mean   : 5.435   
##                                                           3rd Qu.: 6.250   
##                                                           Max.   :27.500   
##      Clicks        Click Charges      Avg. Cost per Click  Impressions     
##  Min.   :    0.0   Min.   :    0.00   Min.   : 0.000      Min.   :      0  
##  1st Qu.:    1.0   1st Qu.:    2.31   1st Qu.: 0.825      1st Qu.:     28  
##  Median :    4.0   Median :    6.76   Median : 1.650      Median :    176  
##  Mean   :  113.7   Mean   :  167.48   Mean   : 1.890      Mean   :   9284  
##  3rd Qu.:   19.0   3rd Qu.:   28.49   3rd Qu.: 2.663      3rd Qu.:    844  
##  Max.   :34012.0   Max.   :46188.44   Max.   :10.000      Max.   :8342415  
##  Engine Click Thru %   Avg. Pos.      Trans. Conv. %     Total Cost/ Trans.
##  Min.   :  0.000     Min.   : 0.000   Min.   :  0.0000   Min.   :   0.00   
##  1st Qu.:  1.532     1st Qu.: 1.143   1st Qu.:  0.0000   1st Qu.:   0.00   
##  Median :  4.106     Median : 1.594   Median :  0.0000   Median :   0.00   
##  Mean   : 11.141     Mean   : 1.930   Mean   :  0.5693   Mean   :  27.61   
##  3rd Qu.: 10.917     3rd Qu.: 2.308   3rd Qu.:  0.0000   3rd Qu.:   0.00   
##  Max.   :200.000     Max.   :15.000   Max.   :900.0000   Max.   :9597.17   
##      Amount         Total Cost       Total Volume of Bookings
##  Min.   :     0   Min.   :    0.00   Min.   :  0.0000        
##  1st Qu.:     0   1st Qu.:    2.31   1st Qu.:  0.0000        
##  Median :     0   Median :    6.76   Median :  0.0000        
##  Mean   :  1034   Mean   :  167.48   Mean   :  0.8734        
##  3rd Qu.:     0   3rd Qu.:   28.49   3rd Qu.:  0.0000        
##  Max.   :567463   Max.   :46188.44   Max.   :439.0000

Missing Value Detection / Replacement / Drop

# Update NAs
airfrance[airfrance==""] <- NA
airfrance[airfrance=="N/A"] <- NA

# Check for NAs
sapply(airfrance, function(x) sum(is.na(x)))
##             Publisher ID           Publisher Name               Keyword ID 
##                        0                        0                        0 
##                  Keyword               Match Type                 Campaign 
##                        0                       48                        0 
##            Keyword Group                 Category             Bid Strategy 
##                        0                        0                     1224 
##             Keyword Type                   Status        Search Engine Bid 
##                        0                        0                        0 
##                   Clicks            Click Charges      Avg. Cost per Click 
##                        0                        0                        0 
##              Impressions      Engine Click Thru %                Avg. Pos. 
##                        0                        0                        0 
##           Trans. Conv. %       Total Cost/ Trans.                   Amount 
##                        0                        0                        0 
##               Total Cost Total Volume of Bookings 
##                        0                        0
# Dropping observations with NA
clean_airfrance <- na.omit(airfrance)

Cleaning dataset

# Publisher name to factor (US vs Global)
clean_airfrance$`Publisher Location` <- clean_airfrance$`Publisher Name`
clean_airfrance$`Publisher Location` <- gsub("..*US", 1, clean_airfrance$`Publisher Location`)

# Keyboard ID to num
clean_airfrance$`Keyword ID` <- as.numeric(clean_airfrance$`Keyword ID`)

# Match Type to factor
clean_airfrance$`Match Type` <- factor(clean_airfrance$`Match Type`, 
                                          ordered = T,
                                          levels = c("Broad", "Standard", 
                                                     "Exact", "Advanced"))

# Status to factor
clean_airfrance$`Status` <- factor(clean_airfrance$`Status`, 
                                       ordered = T,
                                       levels = c("Unavailable", "Deactivated", 
                                                  "Paused", "Sent", "Live"))

Grouping Keyword type into categories

# grouping Keyword type into categories: short-tail (1 space or less), mid-tail (2 to 3 spaces), long-tail (4+ spaces)
word_count <- c()
i = 1
while(i <= nrow(clean_airfrance)){
  word_count <- sapply(strsplit(clean_airfrance$`Keyword`[i], " "), length)
  if (word_count <= 2){
    clean_airfrance$`Keyword Type`[i] <- "short-tail"
  } else if (word_count == 3 | word_count == 4) {
    clean_airfrance$`Keyword Type`[i] <- "mid-tail"
  } else if (word_count >= 5){
    clean_airfrance$`Keyword Type`[i] <- "long-tail"
  } #--> closing if statement
  i <- i + 1
} #--> closing while loop

clean_airfrance$`Keyword Type` <- factor(clean_airfrance$`Keyword Type`, ordered = T, 
                                          levels = c("short-tail", "mid-tail", "long-tail"))

Creating KPI to analyze data

# Creating Net Revenue column 
clean_airfrance$net_revenue <- as.numeric(clean_airfrance$Amount - 
                                            clean_airfrance$`Total Cost`)
# Creating ROA column
clean_airfrance$ROA <- as.numeric(clean_airfrance$net_revenue / 
                                    clean_airfrance$`Total Cost`)

# Creating Conversion rate column 
clean_airfrance$conversion_rate <- as.numeric(clean_airfrance$`Trans. Conv. %`/ 
                                                clean_airfrance$`Avg. Cost per Click`)

# Removing values from variable Amount including zero values
dfchannel <- clean_airfrance[!clean_airfrance$Amount == 0, ]
dfchannel$Average_Revenue_Booking <- dfchannel$Amount / dfchannel$`Total Volume of Bookings`

Exploring data for Kayak dataset

summary(kayak)
##  Sponsored Links - Air France     ...2               ...3          
##  Length:5                     Length:5           Length:5          
##  Class :character             Class :character   Class :character  
##  Mode  :character             Mode  :character   Mode  :character  
##      ...4               ...5               ...6               ...7          
##  Length:5           Length:5           Length:5           Length:5          
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character
str(kayak)
## tibble [5 × 7] (S3: tbl_df/tbl/data.frame)
##  $ Sponsored Links - Air France: chr [1:5] NA "Search Engine" "Kayak" NA ...
##  $ ...2                        : chr [1:5] NA "Clicks" "2839" NA ...
##  $ ...3                        : chr [1:5] NA "Media Cost" "3567.1334999999999" NA ...
##  $ ...4                        : chr [1:5] NA "Total Bookings" "208" NA ...
##  $ ...5                        : chr [1:5] NA "Avg Ticket" "1123.5288461538421" NA ...
##  $ ...6                        : chr [1:5] NA "Total Revenue" "233693.99999999916" NA ...
##  $ ...7                        : chr [1:5] NA "Net Revenue" "230126.86649999916" NA ...
head(kayak)
## # A tibble: 5 × 7
##   `Sponsored Links - Air … ...2   ...3      ...4     ...5      ...6     ...7    
##   <chr>                    <chr>  <chr>     <chr>    <chr>     <chr>    <chr>   
## 1 <NA>                     <NA>   <NA>      <NA>     <NA>      <NA>     <NA>    
## 2 Search Engine            Clicks Media Co… Total B… Avg Tick… Total R… Net Rev…
## 3 Kayak                    2839   3567.133… 208      1123.528… 233693.… 230126.…
## 4 <NA>                     <NA>   <NA>      <NA>     <NA>      <NA>     <NA>    
## 5 Time Period is One Week… <NA>   <NA>      <NA>     <NA>      <NA>     <NA>

Cleaning Kayak dataset

# Updating kayak info NAs
kayak[kayak==""] <- NA
kayak[kayak=="N/A"] <- NA

# Check kayak info for NAs
sapply(kayak, function(x) sum(is.na(x)))
## Sponsored Links - Air France                         ...2 
##                            2                            3 
##                         ...3                         ...4 
##                            3                            3 
##                         ...5                         ...6 
##                            3                            3 
##                         ...7 
##                            3
# Dropping kayak info observations with NA
clean_kayak <- na.omit(kayak)

Data Wrangling

# Renaming each variable with correct name
colnames(clean_kayak) <- c("Search_Engine","Clicks","Media_Cost", 
                                     "Total_Bookings", "Avg_Ticket", 
                                     "Total_Revenue","Net_Revenue")

# Subseting to have only the rows with the values 
clean_kayak <- clean_kayak[ 2,]

# Clicks to numeric
clean_kayak$Clicks <- as.numeric(clean_kayak$Clicks)

# Media_Cost to numeric
clean_kayak$Media_Cost <- as.numeric(clean_kayak$Media_Cost)

# Total_Bookings to numeric
clean_kayak$Total_Bookings <- as.numeric(clean_kayak$Total_Bookings)

# Avg_Ticket to numeric
clean_kayak$Avg_Ticket <- as.numeric(clean_kayak$Avg_Ticket)

# Total_Revenue to numeric
clean_kayak$Total_Revenue <- as.numeric(clean_kayak$Total_Revenue)

# Net_Revenue to numeric
clean_kayak$Net_Revenue <- as.numeric(clean_kayak$Net_Revenue)

Data calculations for kayak

# Calculating the average cpc for Kayak 
avg_cpc_kayak  <- clean_kayak$Media_Cost/clean_kayak$Clicks
# Calculating the average ROA for Kayak 
avg_ROA_kayak  <- clean_kayak$Net_Revenue/clean_kayak$Media_Cost
# Calculating the conversion rate price ratio for Kayak 
avg_conversion_rate_kayak  <- ((clean_kayak$Total_Bookings/clean_kayak$Clicks)/avg_cpc_kayak)
# Calculating the average Revenue boooking for Kayak 
# transformed result into percentage 
avg_Revenue_booking_kayak <- clean_kayak$Net_Revenue/clean_kayak$Total_Bookings 

KPI

Checking whether our KPIs are the same for each measure of success

Net Revenue

# Highest in net_revenue
highest_net_revenue <- head(sort(clean_airfrance$net_revenue, 
                                 decreasing=TRUE), n=10)

# Sorting the Data by Net Revenue in the clean Data frame
Sorted_net_revenue <- clean_airfrance[order(- clean_airfrance$net_revenue),]

# Selecting the top 10 values 
Top_10_net_revenue <- head(Sorted_net_revenue,10) 

# Displaying the value in addition to the campaign and Publisher Name to make the results simpler to interpret

Top_10_net_revenue[ , c(2, 6, 25)]
## # A tibble: 10 × 3
##    `Publisher Name`  Campaign                               net_revenue
##    <chr>             <chr>                                        <dbl>
##  1 Google - Global   Air France Global Campaign                 231080.
##  2 Overture - Global Unassigned                                 222631.
##  3 Google - US       Air France Branded                         130159.
##  4 MSN - Global      Air France Brand & French Destinations      92592.
##  5 Google - US       Air France Branded                          74575.
##  6 Overture - US     Unassigned                                  41579.
##  7 Overture - US     Unassigned                                  40942.
##  8 Google - Global   Air France Global Campaign                  34390.
##  9 Google - Global   Air France Global Campaign                  34359.
## 10 Google - US       Air France Branded                          30149.

ROA

# Highest ROA
highest_roa <- head(sort(clean_airfrance$ROA, 
                         decreasing=TRUE), n=10)

# Sorting the Data by ROA in the clean Data frame
Sorted_roa <- clean_airfrance[order(- clean_airfrance$ROA),]

# Selecting the top 10 values 
Top_10_roa <- head(Sorted_roa,10) 

# Displaying the value in addition to the campaign and Publisher Name to make the results simpler to interpret
Top_10_roa[ , c(2, 6, 26)]
## # A tibble: 10 × 3
##    `Publisher Name`  Campaign                                 ROA
##    <chr>             <chr>                                  <dbl>
##  1 MSN - Global      Air France Brand & French Destinations 1006.
##  2 Google - Global   Air France Global Campaign              719.
##  3 Overture - Global Unassigned                              424 
##  4 Overture - Global Unassigned                              345.
##  5 Google - US       Geo Targeted San Francisco              322.
##  6 Overture - US     Unassigned                              321.
##  7 Overture - Global Unassigned                              289.
##  8 Overture - Global Unassigned                              278.
##  9 Overture - US     Unassigned                              242.
## 10 Overture - US     Unassigned                              188.

Conversion Rate

# Highest Conversion Rate
highest_conversion <- head(sort(clean_airfrance$conversion_rate, 
                                decreasing=TRUE), n = 10)
# Sorting the Data by Conversion Rate in the clean Data frame
Sorted_conversion_rate_to_cost <- clean_airfrance[order(- clean_airfrance$conversion_rate),]

# Selecting the top 10 values 
Top_10_conversion_rate_to_cost <- head(Sorted_conversion_rate_to_cost,10) 

# Displaying the value in addition to the campaign and Publisher Name to make the results simpler to interpret
Top_10_conversion_rate_to_cost [ , c(2, 6, 27)]
## # A tibble: 10 × 3
##    `Publisher Name`  Campaign                               conversion_rate
##    <chr>             <chr>                                            <dbl>
##  1 MSN - Global      Air France Brand & French Destinations           258. 
##  2 Google - Global   Air France Global Campaign                        86.5
##  3 Overture - Global Unassigned                                        45.5
##  4 Overture - Global Unassigned                                        38.1
##  5 Google - US       Geo Targeted New York                             37.2
##  6 Google - US       Geo Targeted Miami                                36.2
##  7 Overture - US     Unassigned                                        21.6
##  8 Overture - US     Unassigned                                        21.1
##  9 Google - US       Geo Targeted New York                             20.6
## 10 Google - US       Geo Targeted Philadelphia                         20.5

Publisher Analyzis

Pivot table for KPI overview complete

# Creating Pivot table for averages
dfchannel_pivot <- dfchannel %>% group_by(`Publisher Name`) %>% summarize(
  avg_cpc = mean(`Avg. Cost per Click`), # avg_cpc_kayak = 1.256475
  avg_ROA = mean(ROA), # avg_ROA_kayak = 64.513
  avg_conversion_rate = mean(conversion_rate), # avg_conversion_rate = 0.0583
  avg_Revenue_booking = mean(Average_Revenue_Booking ), # avg_ROA_kayak = 1106.379
)
dfchannel_pivot
## # A tibble: 6 × 5
##   `Publisher Name`  avg_cpc avg_ROA avg_conversion_rate avg_Revenue_booking
##   <chr>               <dbl>   <dbl>               <dbl>               <dbl>
## 1 Google - Global     2.09    29.5                3.23                1019.
## 2 Google - US         2.32    28.1                3.62                 901.
## 3 MSN - Global        1.29   114.                27.1                 1239.
## 4 Overture - Global   0.624   77.0                7.56                1171.
## 5 Overture - US       1.01    27.4                2.52                1153.
## 6 Yahoo - US          1.17     2.26               0.165               1978.
# adding kayak    
kayak.data <- c("Kayak", "1.26","64.513","5.83","1106.38")
dfchannelNew <- rbind(dfchannel_pivot, kayak.data)
dfchannelNew
## # A tibble: 7 × 5
##   `Publisher Name`  avg_cpc           avg_ROA          avg_conversion_… avg_Revenue_boo…
##   <chr>             <chr>             <chr>            <chr>            <chr>           
## 1 Google - Global   2.09171020076531  29.5253034369227 3.22905612533398 1018.81170894233
## 2 Google - US       2.31876814450431  28.1071198151467 3.61936506099911 901.26237473559 
## 3 MSN - Global      1.28844465125     114.326939182814 27.0928806872923 1238.89041557018
## 4 Overture - Global 0.623644691544118 76.9657542183916 7.56329457623991 1170.83645833333
## 5 Overture - US     1.00664621305556  27.4406851209231 2.52278748058719 1153.18428273968
## 6 Yahoo - US        1.17080115        2.26138754940264 0.1648872595061… 1977.95         
## 7 Kayak             1.26              64.513           5.83             1106.38
# creating object for unique channels
channelUnique <- unique(dfchannelNew$`Publisher Name`)

#vector for US based or not
dfchannelNew$US <- c()
US <- factor(dfchannelNew$`Publisher Name`,
                levels = c("Google - Global",
                           "Google - US",
                           "MSN - Global",
                           "Overture - Global",
                           "Overture - US",
                           "Yahoo - US",
                           "Kayak"),
                labels = c(0,1,0,0,1,1,0))

# dfchannelNew$US <- US
# dfchannelNew$US

dfchannelNewUS <- cbind(dfchannelNew, US)
# dfchannelNewUS <- rbind(dfchannelNew, dfchannelNew$US)
View(dfchannelNewUS)
class(dfchannelNewUS)
## [1] "data.frame"
# str(dfchannelNewUS)
dfchannelNewUS$avg_cpc <- as.numeric(dfchannelNewUS$avg_cpc)
dfchannelNewUS$avg_ROA <- as.numeric(dfchannelNewUS$avg_ROA)
dfchannelNewUS$avg_conversion_rate <- as.numeric(dfchannelNewUS$avg_conversion_rate)
dfchannelNewUS$avg_Revenue_booking <- as.numeric(dfchannelNewUS$avg_Revenue_booking)

Visualizing and insight creation

# Visualizing and insight creation
dev.new(width=8,height=10,noRStudioGD = TRUE)

# 1 average ROA
pROA <- ggplot() + geom_bar(aes(y= avg_ROA, x=`Publisher Name`, fill= channelUnique), data=dfchannelNewUS,
                            stat="identity")
pROA
# Overture-Global

# 2 average cost per click
pCPC <- ggplot() + geom_bar(aes(y= avg_cpc , x=`Publisher Name`, fill= channelUnique), data =dfchannelNewUS,
                            stat="identity")
pCPC 
# Google - US 
# Lowest overture - Global

# 3 Conversion rate price ratio
pConversion_rate<- ggplot() + geom_bar(aes(y= avg_conversion_rate , x=`Publisher Name`, fill= channelUnique), data =dfchannelNewUS,
                                                   stat="identity")
pConversion_rate
# Overture - Global

# 4 booking ratio
pRevBook <- ggplot() + geom_bar(aes(y= avg_Revenue_booking , x=`Publisher Name`, fill= channelUnique), data =dfchannelNewUS,
                                stat="identity")
pRevBook  
# Google - US 

# creating ranking for best channels
x <- c('Kayak', 'MSN-Global', 'MSN - US','Yahoo-US', 'Google-Global', 'Overture-Global', 'Google-US', 'Overture-US')
y <- c(4,1,0,5,1,9,4,2) #points for first 3 places given to respective channel from comparison per category

# bar chart for comparison for success
data <- data.frame(x,y)
data$x <- factor(data$x, levels = data[["x"]])

pRanking <- plot_ly(data, x= ~x, y= ~y, type = "bar", name = "Comparison Channels", color = I("blue"), alpha = 0.5) %>%
  layout(title = "Ranking Channels",
         xaxis = list(title = ""),
         yaxis = list(title = ""))
pRanking
# conclusion: Overture-Global is overall best channel in respective to 
# defined KPI's, winning in 3 categories (highest avg ROA, lowest cpc & Conversion_rate)

Data Modeling & analyzis

## Using logistic Regression USpub
UsPub <- factor(dfchannel$`Publisher Name`,
                          levels = c("Google - Global",
                                     "Google - US",
                                     "MSN - Global",
                                     "Overture - Global",
                                     "Overture - US",
                                     "Yahoo - US"),
                          labels = c(0,1,0,0,1,1))

dfchannel$UsPub <- UsPub
# dfchannel$UsPub

# logistic regression

# Build sample
index_dfchannel <- sample(1:nrow(dfchannel),
                          size = 0.8*nrow(dfchannel))

# Build training dataframe 
train_dfchannel <- dfchannel[index_dfchannel,]
# Build testing dataframe
test_dfchannel <- dfchannel[-index_dfchannel,]

# Test if there is a difference of result in SEM between US and non US
logitdfchannel <- glm(UsPub ~ ROA+conversion_rate+Average_Revenue_Booking,
                      data = train_dfchannel, family = "binomial")

summary(logitdfchannel)
## 
## Call:
## glm(formula = UsPub ~ ROA + conversion_rate + Average_Revenue_Booking, 
##     family = "binomial", data = train_dfchannel)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.6554  -1.3641   0.8643   0.9303   1.6266  
## 
## Coefficients:
##                           Estimate Std. Error z value Pr(>|z|)   
## (Intercept)              0.9202908  0.3132470   2.938   0.0033 **
## ROA                     -0.0063443  0.0055534  -1.142   0.2533   
## conversion_rate          0.0374749  0.0497361   0.753   0.4512   
## Average_Revenue_Booking -0.0002814  0.0002556  -1.101   0.2711   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 268.34  on 203  degrees of freedom
## Residual deviance: 262.58  on 200  degrees of freedom
## AIC: 270.58
## 
## Number of Fisher Scoring iterations: 4
##There is no significant different between the USA and Non-USA

Improving campaigns & keyword analyzis

Match Type Analysis per Publisher

# See frequencies and Observation categories for Publishers
table(clean_airfrance$`Publisher Name`)
## 
##   Google - Global       Google - US      MSN - Global Overture - Global 
##               264              2003                99               271 
##     Overture - US        Yahoo - US 
##               648                 1
table(clean_airfrance$`Match Type`)
## 
##    Broad Standard    Exact Advanced 
##     2346      547       20      373
match_types <- c("Broad", "Standard", "Exact", "Advanced") #--> Create vector with match types names
publishers <- c("Google - Global", "Google - US", "MSN - Global", 
                "Overture - Global", "Overture - US", "Yahoo - US ") #--> Create vector with match types names
BASED ON ROA
# Create empty vectors per Publisher
google_GL_ROA_avg <- c()
google_US_ROA_avg <- c()
msn_GL_ROA_avg <- c()
overture_GL_ROA_avg <- c()
overture_US_ROA_avg <- c()
yahoo_US_ROA_avg <- c()
i <- 1 #--> setting i to start at 1

# While loop to get average ROA per Publisher, per match type
while(i <= length(match_types)){
  google_GL_ROA_avg <- c(google_GL_ROA_avg, mean(clean_airfrance$ROA[
    which(clean_airfrance$`Publisher Name` == "Google - Global" & 
            clean_airfrance$`Match Type` == match_types[i])]))
  
  google_US_ROA_avg <- c(google_US_ROA_avg, mean(clean_airfrance$ROA[
    which(clean_airfrance$`Publisher Name` == "Google - US" & 
            clean_airfrance$`Match Type` == match_types[i])]))
  
  msn_GL_ROA_avg <- c(msn_GL_ROA_avg, mean(clean_airfrance$ROA[
    which(clean_airfrance$`Publisher Name` == "MSN - Global" & 
            clean_airfrance$`Match Type` == match_types[i])]))
  
  overture_GL_ROA_avg <- c(overture_GL_ROA_avg, mean(clean_airfrance$ROA[
    which(clean_airfrance$`Publisher Name` == "Overture - Global" & 
            clean_airfrance$`Match Type` == match_types[i])]))
  
  overture_US_ROA_avg <- c(overture_US_ROA_avg, mean(clean_airfrance$ROA[
    which(clean_airfrance$`Publisher Name` == "Overture - US" & 
            clean_airfrance$`Match Type` == match_types[i])]))
  
  yahoo_US_ROA_avg <- c(yahoo_US_ROA_avg, mean(clean_airfrance$ROA[
    which(clean_airfrance$`Publisher Name` == "Yahoo - US" & 
            clean_airfrance$`Match Type` == match_types[i])]))
  
  i <- i + 1
  
} #--> closing ROA while loop

# Combine average ROA per publisher in a matrix
ROA_per_publisher_per_match_type <- rbind(google_GL_ROA_avg, google_US_ROA_avg, msn_GL_ROA_avg, 
                           overture_GL_ROA_avg, overture_US_ROA_avg, 
                           yahoo_US_ROA_avg)

colnames(ROA_per_publisher_per_match_type) <- match_types #--> assign names to the rows

print(ROA_per_publisher_per_match_type)
##                          Broad Standard    Exact  Advanced
## google_GL_ROA_avg    1.7762658      NaN 383.1791       NaN
## google_US_ROA_avg    0.7009702      NaN  -1.0000       NaN
## msn_GL_ROA_avg      10.6491858      NaN      NaN       NaN
## overture_GL_ROA_avg        NaN 9.609336      NaN  8.093833
## overture_US_ROA_avg        NaN 1.906722      NaN -0.788479
## yahoo_US_ROA_avg           NaN      NaN      NaN  2.261388
BASED ON TOTAL REVENUE
# Create empty vectors per Publisher
google_GL_net_revenue_avg <- c()
google_US_net_revenue_avg <- c()
msn_GL_net_revenue_avg <- c()
overture_GL_net_revenue_avg <- c()
overture_US_net_revenue_avg <- c()
yahoo_US_net_revenue_avg <- c()
i <- 1 #--> setting i to start at 1

# While loop to get average net revenue per Publisher, per match type
while(i <= length(match_types)){
  google_GL_net_revenue_avg <- c(google_GL_net_revenue_avg, mean(clean_airfrance$net_revenue[
    which(clean_airfrance$`Publisher Name` == "Google - Global" & 
            clean_airfrance$`Match Type` == match_types[i])]))
  
  google_US_net_revenue_avg <- c(google_US_net_revenue_avg, mean(clean_airfrance$net_revenue[
    which(clean_airfrance$`Publisher Name` == "Google - US" & 
            clean_airfrance$`Match Type` == match_types[i])]))
  
  msn_GL_net_revenue_avg <- c(msn_GL_net_revenue_avg, mean(clean_airfrance$net_revenue[
    which(clean_airfrance$`Publisher Name` == "MSN - Global" & 
            clean_airfrance$`Match Type` == match_types[i])]))
  
  overture_GL_net_revenue_avg <- c(overture_GL_net_revenue_avg, mean(clean_airfrance$net_revenue[
    which(clean_airfrance$`Publisher Name` == "Overture - Global" & 
            clean_airfrance$`Match Type` == match_types[i])]))
  
  overture_US_net_revenue_avg <- c(overture_US_net_revenue_avg, mean(clean_airfrance$net_revenue[
    which(clean_airfrance$`Publisher Name` == "Overture - US" & 
            clean_airfrance$`Match Type` == match_types[i])]))
  
  yahoo_US_net_revenue_avg <- c(yahoo_US_net_revenue_avg, mean(clean_airfrance$net_revenue[
    which(clean_airfrance$`Publisher Name` == "Yahoo - US" & 
            clean_airfrance$`Match Type` == match_types[i])]))
  
  i <- i + 1
  
} #--> closing net revenue while loop

# Combine average ROA per publisher in a matrix
net_revenue_per_publisher_per_match_type <- rbind(google_GL_net_revenue_avg, google_US_net_revenue_avg, msn_GL_net_revenue_avg, 
                           overture_GL_net_revenue_avg, overture_US_net_revenue_avg, 
                           yahoo_US_net_revenue_avg)

colnames(net_revenue_per_publisher_per_match_type) <- match_types  #--> assign names to the rows

print(net_revenue_per_publisher_per_match_type)
##                                 Broad  Standard         Exact   Advanced
## google_GL_net_revenue_avg    664.6852       NaN 116371.343801        NaN
## google_US_net_revenue_avg    135.2095       NaN     -9.961806        NaN
## msn_GL_net_revenue_avg      1347.1100       NaN           NaN        NaN
## overture_GL_net_revenue_avg       NaN  46.36535           NaN 2082.75971
## overture_US_net_revenue_avg       NaN 300.91014           NaN   27.85167
## yahoo_US_net_revenue_avg          NaN       NaN           NaN 1371.47500
BASED ON CONVERSION
#C reate empty vectors per Publisher
google_GL_conversion_rate_avg <- c()
google_US_conversion_rate_avg <- c()
msn_GL_conversion_rate_avg <- c()
overture_GL_conversion_rate_avg <- c()
overture_US_conversion_rate_avg <- c()
yahoo_US_conversion_rate_avg <- c()
i <- 1 #--> setting i to start at 1

#While loop to get average conversion rate per Publisher, per match type
while(i <= length(match_types)){
  google_GL_conversion_rate_avg <- c(google_GL_conversion_rate_avg, mean(clean_airfrance$conversion_rate[
    which(clean_airfrance$`Publisher Name` == "Google - Global" & 
            clean_airfrance$`Match Type` == match_types[i])]))
  
  google_US_conversion_rate_avg <- c(google_US_conversion_rate_avg, mean(clean_airfrance$conversion_rate[
    which(clean_airfrance$`Publisher Name` == "Google - US" & 
            clean_airfrance$`Match Type` == match_types[i])]))
  
  msn_GL_conversion_rate_avg <- c(msn_GL_conversion_rate_avg, mean(clean_airfrance$conversion_rate[
    which(clean_airfrance$`Publisher Name` == "MSN - Global" & 
            clean_airfrance$`Match Type` == match_types[i])]))
  
  overture_GL_conversion_rate_avg <- c(overture_GL_conversion_rate_avg, mean(clean_airfrance$conversion_rate[
    which(clean_airfrance$`Publisher Name` == "Overture - Global" & 
            clean_airfrance$`Match Type` == match_types[i])]))
  
  overture_US_conversion_rate_avg <- c(overture_US_conversion_rate_avg, mean(clean_airfrance$conversion_rate[
    which(clean_airfrance$`Publisher Name` == "Overture - US" & 
            clean_airfrance$`Match Type` == match_types[i])]))
  
  yahoo_US_conversion_rate_avg <- c(yahoo_US_conversion_rate_avg, mean(clean_airfrance$conversion_rate[
    which(clean_airfrance$`Publisher Name` == "Yahoo - US" & 
            clean_airfrance$`Match Type` == match_types[i])]))
  
  i <- i + 1
  
} #--> closing conversion rate while loop

# Combine average ROA per publisher in a matrix
conversion_rate_per_publisher_per_match_type <- rbind(google_GL_conversion_rate_avg, google_US_conversion_rate_avg, msn_GL_conversion_rate_avg, 
                                   overture_GL_conversion_rate_avg, overture_US_conversion_rate_avg, 
                                   yahoo_US_conversion_rate_avg)

colnames(conversion_rate_per_publisher_per_match_type) <- match_types  #--> assign names to the rows

print(conversion_rate_per_publisher_per_match_type)
##                                     Broad  Standard    Exact   Advanced
## google_GL_conversion_rate_avg   0.2599018       NaN 45.06474        NaN
## google_US_conversion_rate_avg   0.2115095       NaN  0.00000        NaN
## msn_GL_conversion_rate_avg      2.7366546       NaN      NaN        NaN
## overture_GL_conversion_rate_avg       NaN 1.1580431      NaN 0.77508591
## overture_US_conversion_rate_avg       NaN 0.2574848      NaN 0.01942795
## yahoo_US_conversion_rate_avg          NaN       NaN      NaN 0.16488726

Match Type Analysis per Campaign

# See frequencies and Observation categories for Campaigns
table(clean_airfrance$Campaign)
## 
## Air France Brand & French Destinations                     Air France Branded 
##                                     99                                     18 
##             Air France Global Campaign                         Business Class 
##                                    264                                     15 
##                    French Destinations                          General Terms 
##                                     83                                      1 
##                    Geo Targeted Boston                   Geo Targeted Chicago 
##                                    115                                     45 
##                        Geo Targeted DC                   Geo Targeted Detroit 
##                                    118                                     83 
##                   Geo Targeted Houston               Geo Targeted Los Angeles 
##                                     88                                    104 
##                     Geo Targeted Miami                  Geo Targeted New York 
##                                     33                                    233 
##              Geo Targeted Philadelphia             Geo Targeted San Francisco 
##                                     69                                     83 
##                   Geo Targeted Seattle                   Google_Yearlong 2006 
##                                     33                                    480 
##                 Outside Western Europe                   Paris & France Terms 
##                                     14                                    102 
##                             Unassigned            Western Europe Destinations 
##                                    919                                    287
campaigns <- c("Air France Brand & French Destinations",
               "Air France Branded",
               "Air France Global Campaign",
               "Business Class",
               "French Destinations",
               "General Terms",
               "Geo Targeted Boston", 
               "Geo Targeted Chicago",
               "Geo Targeted DC",
               "Geo Targeted Detroit",
               "Geo Targeted Houston",
               "Geo Targeted Los Angeles",
               "Geo Targeted Miami",
               "Geo Targeted New York",
               "Geo Targeted Philadelphia",
               "Geo Targeted San Francisco",
               "Geo Targeted Seattle",
               "Google_Yearlong",
               "Outside Western Europe",
               "Paris & France Terms",
               "Unassigned",
               "Western Europe Destinations") #--> creating campaigns vector
BASED ON ROA
# Create empty vectors per Publisher
Air_France_Brand_French_Destinations_ROA_avg <- c()                   
Air_France_Branded_ROA_avg <- c()             
Air_France_Global_Campaign_ROA_avg <- c() 
Business_Class_ROA_avg <- c()                    
French_Destinations_ROA_avg <- c()                          
General_Terms_ROA_avg <- c() 
Geo_Targeted_Boston_ROA_avg <- c()                   
Geo_Targeted_Chicago_ROA_avg <- c()                        
Geo_Targeted_DC_ROA_avg <- c() 
Geo_Targeted_Detroit_ROA_avg <- c()                   
Geo_Targeted_Houston_ROA_avg <- c()               
Geo_Targeted_Los_Angeles_ROA_avg <- c() 
Geo_Targeted_Miami_ROA_avg <- c()                  
Geo_Targeted_New_York_ROA_avg <- c()              
Geo_Targeted_Philadelphia_ROA_avg <- c() 
Geo_Targeted_San_Francisco_ROA_avg <- c()                   
Geo_Targeted_Seattle_ROA_avg <- c()                   
Google_Yearlong_ROA_avg <- c()
Outside_Western_Europe_ROA_avg <- c()                   
Paris_France_Terms_ROA_avg <- c()                             
Unassigned_ROA_avg <- c() 
Western_Europe_Destinations_ROA_avg <- c() 
i <- 1

# While loop to get average ROA per Campaign, per match type
while(i <= length(match_types)){
  Air_France_Brand_French_Destinations_ROA_avg <- 
    c(Air_France_Brand_French_Destinations_ROA_avg, mean(clean_airfrance$ROA[
    which(clean_airfrance$Campaign == "Air France Brand & French Destinations" 
          & clean_airfrance$`Match Type` == match_types[i])]))              
  Air_France_Branded_ROA_avg <- 
    c( Air_France_Branded_ROA_avg, mean(clean_airfrance$ROA[
    which(clean_airfrance$Campaign == "Air France Branded" 
          & clean_airfrance$`Match Type` == match_types[i])]))
  Air_France_Global_Campaign_ROA_avg <- 
    c( Air_France_Global_Campaign_ROA_avg, mean(clean_airfrance$ROA[
    which(clean_airfrance$Campaign == "Air France Global Campaign" 
          & clean_airfrance$`Match Type` == match_types[i])]))
  Business_Class_ROA_avg <- 
    c( Business_Class_ROA_avg, mean(clean_airfrance$ROA[
    which(clean_airfrance$Campaign == "Business Class" 
          & clean_airfrance$`Match Type` == match_types[i])]))
  French_Destinations_ROA_avg <- 
    c( French_Destinations_ROA_avg, mean(clean_airfrance$ROA[
    which(clean_airfrance$Campaign == "French Destinations" 
          & clean_airfrance$`Match Type` == match_types[i])]))
  General_Terms_ROA_avg <- 
    c( General_Terms_ROA_avg, mean(clean_airfrance$ROA[
    which(clean_airfrance$Campaign == "General Terms" 
          & clean_airfrance$`Match Type` == match_types[i])]))
  Geo_Targeted_Boston_ROA_avg <- 
    c( Geo_Targeted_Boston_ROA_avg, mean(clean_airfrance$ROA[
    which(clean_airfrance$Campaign == "Geo Targeted Boston" 
          & clean_airfrance$`Match Type` == match_types[i])]))
  Geo_Targeted_Chicago_ROA_avg <- 
    c( Geo_Targeted_Chicago_ROA_avg, mean(clean_airfrance$ROA[
    which(clean_airfrance$Campaign == "Geo Targeted Chicago" 
          & clean_airfrance$`Match Type` == match_types[i])]))
  Geo_Targeted_DC_ROA_avg <- 
    c( Geo_Targeted_DC_ROA_avg, mean(clean_airfrance$ROA[
    which(clean_airfrance$Campaign == "Geo Targeted DC" 
          & clean_airfrance$`Match Type` == match_types[i])]))
  Geo_Targeted_Detroit_ROA_avg <- 
    c( Geo_Targeted_Detroit_ROA_avg, mean(clean_airfrance$ROA[
    which(clean_airfrance$Campaign == "Geo Targeted Detroit" 
          & clean_airfrance$`Match Type` == match_types[i])]))
  Geo_Targeted_Houston_ROA_avg <- 
    c( Geo_Targeted_Houston_ROA_avg, mean(clean_airfrance$ROA[
    which(clean_airfrance$Campaign == "Geo Targeted Houston" 
          & clean_airfrance$`Match Type` == match_types[i])]))
  Geo_Targeted_Los_Angeles_ROA_avg <- 
    c( Geo_Targeted_Los_Angeles_ROA_avg, mean(clean_airfrance$ROA[
    which(clean_airfrance$Campaign == "Geo Targeted Los Angeles" 
          & clean_airfrance$`Match Type` == match_types[i])]))
  Geo_Targeted_Miami_ROA_avg <- 
    c( Geo_Targeted_Miami_ROA_avg, mean(clean_airfrance$ROA[
    which(clean_airfrance$Campaign == "Geo Targeted Miami" 
          & clean_airfrance$`Match Type` == match_types[i])]))
  Geo_Targeted_New_York_ROA_avg <- 
    c( Geo_Targeted_New_York_ROA_avg, mean(clean_airfrance$ROA[
    which(clean_airfrance$Campaign == "Geo Targeted New York" 
          & clean_airfrance$`Match Type` == match_types[i])]))
  Geo_Targeted_Philadelphia_ROA_avg <- 
    c( Geo_Targeted_Philadelphia_ROA_avg, mean(clean_airfrance$ROA[
    which(clean_airfrance$Campaign == "Geo Targeted Philadelphia" 
          & clean_airfrance$`Match Type` == match_types[i])]))
  Geo_Targeted_San_Francisco_ROA_avg <- 
    c( Geo_Targeted_San_Francisco_ROA_avg, mean(clean_airfrance$ROA[
    which(clean_airfrance$Campaign == "Geo Targeted San Francisco" 
          & clean_airfrance$`Match Type` == match_types[i])]))
  Geo_Targeted_Seattle_ROA_avg <- 
    c( Geo_Targeted_Seattle_ROA_avg, mean(clean_airfrance$ROA[
    which(clean_airfrance$Campaign == "Geo Targeted Seattle" 
          & clean_airfrance$`Match Type` == match_types[i])]))
  Google_Yearlong_ROA_avg <- 
    c( Google_Yearlong_ROA_avg, mean(clean_airfrance$ROA[
    which(clean_airfrance$Campaign == "Google_Yearlong" 
          & clean_airfrance$`Match Type` == match_types[i])]))
  Outside_Western_Europe_ROA_avg <- 
    c( Outside_Western_Europe_ROA_avg, mean(clean_airfrance$ROA[
    which(clean_airfrance$Campaign == "Outside Western Europe" 
          & clean_airfrance$`Match Type` == match_types[i])]))
  Paris_France_Terms_ROA_avg <- 
    c( Paris_France_Terms_ROA_avg, mean(clean_airfrance$ROA[
    which(clean_airfrance$Campaign == "Paris & France Terms" 
          & clean_airfrance$`Match Type` == match_types[i])]))
  Unassigned_ROA_avg <- 
    c( Unassigned_ROA_avg, mean(clean_airfrance$ROA[
    which(clean_airfrance$Campaign == "Unassigned" 
          & clean_airfrance$`Match Type` == match_types[i])]))
  Western_Europe_Destinations_ROA_avg <- 
    c( Western_Europe_Destinations_ROA_avg, mean(clean_airfrance$ROA[
    which(clean_airfrance$Campaign == "Western Europe Destinations" 
          & clean_airfrance$`Match Type` == match_types[i])]))
  i <- i + 1
} #--> closing ROA while loop

# Combine average ROA per campaign in a matrix
campaign_ROA_per_match_type <- rbind(Air_France_Brand_French_Destinations_ROA_avg, 
                                     Air_France_Branded_ROA_avg, 
                                     Air_France_Global_Campaign_ROA_avg, 
                                     Business_Class_ROA_avg, 
                                     French_Destinations_ROA_avg, 
                                     General_Terms_ROA_avg, 
                                     Geo_Targeted_Boston_ROA_avg, 
                                     Geo_Targeted_Chicago_ROA_avg, 
                                     Geo_Targeted_DC_ROA_avg, 
                                     Geo_Targeted_Detroit_ROA_avg, 
                                     Geo_Targeted_Houston_ROA_avg, 
                                     Geo_Targeted_Los_Angeles_ROA_avg, 
                                     Geo_Targeted_Miami_ROA_avg, 
                                     Geo_Targeted_New_York_ROA_avg, 
                                     Geo_Targeted_Philadelphia_ROA_avg, 
                                     Geo_Targeted_San_Francisco_ROA_avg, 
                                     Geo_Targeted_Seattle_ROA_avg, 
                                     Google_Yearlong_ROA_avg, 
                                     Outside_Western_Europe_ROA_avg, 
                                     Paris_France_Terms_ROA_avg, 
                                     Unassigned_ROA_avg, 
                                     Western_Europe_Destinations_ROA_avg)

colnames(campaign_ROA_per_match_type) <- match_types  #--> assign names to the rows
print(campaign_ROA_per_match_type)
##                                                   Broad Standard    Exact
## Air_France_Brand_French_Destinations_ROA_avg 10.6491858      NaN      NaN
## Air_France_Branded_ROA_avg                   39.1142156      NaN      NaN
## Air_France_Global_Campaign_ROA_avg            1.7762658      NaN 383.1791
## Business_Class_ROA_avg                       -0.9942267      NaN      NaN
## French_Destinations_ROA_avg                  -0.5189469      NaN      NaN
## General_Terms_ROA_avg                               NaN      NaN      NaN
## Geo_Targeted_Boston_ROA_avg                   0.3984106      NaN      NaN
## Geo_Targeted_Chicago_ROA_avg                  1.2411431      NaN      NaN
## Geo_Targeted_DC_ROA_avg                       0.8604079      NaN      NaN
## Geo_Targeted_Detroit_ROA_avg                  0.6866557      NaN      NaN
## Geo_Targeted_Houston_ROA_avg                  1.1250347      NaN      NaN
## Geo_Targeted_Los_Angeles_ROA_avg             -0.3514515      NaN      NaN
## Geo_Targeted_Miami_ROA_avg                    4.1561772      NaN      NaN
## Geo_Targeted_New_York_ROA_avg                 1.9793208      NaN      NaN
## Geo_Targeted_Philadelphia_ROA_avg             0.2879647      NaN      NaN
## Geo_Targeted_San_Francisco_ROA_avg            3.5255419      NaN      NaN
## Geo_Targeted_Seattle_ROA_avg                  0.6761150      NaN      NaN
## Google_Yearlong_ROA_avg                             NaN      NaN      NaN
## Outside_Western_Europe_ROA_avg               -1.0000000      NaN      NaN
## Paris_France_Terms_ROA_avg                   -0.2930381      NaN      NaN
## Unassigned_ROA_avg                                  NaN 3.638754      NaN
## Western_Europe_Destinations_ROA_avg          -0.8289824      NaN  -1.0000
##                                              Advanced
## Air_France_Brand_French_Destinations_ROA_avg      NaN
## Air_France_Branded_ROA_avg                        NaN
## Air_France_Global_Campaign_ROA_avg                NaN
## Business_Class_ROA_avg                            NaN
## French_Destinations_ROA_avg                       NaN
## General_Terms_ROA_avg                        2.261388
## Geo_Targeted_Boston_ROA_avg                       NaN
## Geo_Targeted_Chicago_ROA_avg                      NaN
## Geo_Targeted_DC_ROA_avg                           NaN
## Geo_Targeted_Detroit_ROA_avg                      NaN
## Geo_Targeted_Houston_ROA_avg                      NaN
## Geo_Targeted_Los_Angeles_ROA_avg                  NaN
## Geo_Targeted_Miami_ROA_avg                        NaN
## Geo_Targeted_New_York_ROA_avg                     NaN
## Geo_Targeted_Philadelphia_ROA_avg                 NaN
## Geo_Targeted_San_Francisco_ROA_avg                NaN
## Geo_Targeted_Seattle_ROA_avg                      NaN
## Google_Yearlong_ROA_avg                           NaN
## Outside_Western_Europe_ROA_avg                    NaN
## Paris_France_Terms_ROA_avg                        NaN
## Unassigned_ROA_avg                           2.745344
## Western_Europe_Destinations_ROA_avg               NaN
BASED ON NET REVENUE
# Create empty vectors per Publisher
Air_France_Brand_French_Destinations_net_revenue_avg <- c()                   
Air_France_Branded_net_revenue_avg <- c()             
Air_France_Global_Campaign_net_revenue_avg <- c() 
Business_Class_net_revenue_avg <- c()                    
French_Destinations_net_revenue_avg <- c()                          
General_Terms_net_revenue_avg <- c() 
Geo_Targeted_Boston_net_revenue_avg <- c()                   
Geo_Targeted_Chicago_net_revenue_avg <- c()                        
Geo_Targeted_DC_net_revenue_avg <- c() 
Geo_Targeted_Detroit_net_revenue_avg <- c()                   
Geo_Targeted_Houston_net_revenue_avg <- c()               
Geo_Targeted_Los_Angeles_net_revenue_avg <- c() 
Geo_Targeted_Miami_net_revenue_avg <- c()                  
Geo_Targeted_New_York_net_revenue_avg <- c()              
Geo_Targeted_Philadelphia_net_revenue_avg <- c() 
Geo_Targeted_San_Francisco_net_revenue_avg <- c()                   
Geo_Targeted_Seattle_net_revenue_avg <- c()                   
Google_Yearlong_net_revenue_avg <- c()
Outside_Western_Europe_net_revenue_avg <- c()                   
Paris_France_Terms_net_revenue_avg <- c()                             
Unassigned_net_revenue_avg <- c() 
Western_Europe_Destinations_net_revenue_avg <- c() 
i <- 1

# While loop to get average Net Revenue per Campaign, per match type
while(i <= length(match_types)){
  Air_France_Brand_French_Destinations_net_revenue_avg <- 
    c(Air_France_Brand_French_Destinations_net_revenue_avg, mean(clean_airfrance$net_revenue[
      which(clean_airfrance$Campaign == "Air France Brand & French Destinations" 
            & clean_airfrance$`Match Type` == match_types[i])]))              
  Air_France_Branded_net_revenue_avg <- 
    c( Air_France_Branded_net_revenue_avg, mean(clean_airfrance$net_revenue[
      which(clean_airfrance$Campaign == "Air France Branded" 
            & clean_airfrance$`Match Type` == match_types[i])]))
  Air_France_Global_Campaign_net_revenue_avg <- 
    c( Air_France_Global_Campaign_net_revenue_avg, mean(clean_airfrance$net_revenue[
      which(clean_airfrance$Campaign == "Air France Global Campaign" 
            & clean_airfrance$`Match Type` == match_types[i])]))
  Business_Class_net_revenue_avg <- 
    c( Business_Class_net_revenue_avg, mean(clean_airfrance$net_revenue[
      which(clean_airfrance$Campaign == "Business Class" 
            & clean_airfrance$`Match Type` == match_types[i])]))
  French_Destinations_net_revenue_avg <- 
    c( French_Destinations_net_revenue_avg, mean(clean_airfrance$net_revenue[
      which(clean_airfrance$Campaign == "French Destinations" 
            & clean_airfrance$`Match Type` == match_types[i])]))
  General_Terms_net_revenue_avg <- 
    c( General_Terms_net_revenue_avg, mean(clean_airfrance$net_revenue[
      which(clean_airfrance$Campaign == "General Terms" 
            & clean_airfrance$`Match Type` == match_types[i])]))
  Geo_Targeted_Boston_net_revenue_avg <- 
    c( Geo_Targeted_Boston_net_revenue_avg, mean(clean_airfrance$net_revenue[
      which(clean_airfrance$Campaign == "Geo Targeted Boston" 
            & clean_airfrance$`Match Type` == match_types[i])]))
  Geo_Targeted_Chicago_net_revenue_avg <- 
    c( Geo_Targeted_Chicago_net_revenue_avg, mean(clean_airfrance$net_revenue[
      which(clean_airfrance$Campaign == "Geo Targeted Chicago" 
            & clean_airfrance$`Match Type` == match_types[i])]))
  Geo_Targeted_DC_net_revenue_avg <- 
    c( Geo_Targeted_DC_net_revenue_avg, mean(clean_airfrance$net_revenue[
      which(clean_airfrance$Campaign == "Geo Targeted DC" 
            & clean_airfrance$`Match Type` == match_types[i])]))
  Geo_Targeted_Detroit_net_revenue_avg <- 
    c( Geo_Targeted_Detroit_net_revenue_avg, mean(clean_airfrance$net_revenue[
      which(clean_airfrance$Campaign == "Geo Targeted Detroit" 
            & clean_airfrance$`Match Type` == match_types[i])]))
  Geo_Targeted_Houston_net_revenue_avg <- 
    c( Geo_Targeted_Houston_net_revenue_avg, mean(clean_airfrance$net_revenue[
      which(clean_airfrance$Campaign == "Geo Targeted Houston" 
            & clean_airfrance$`Match Type` == match_types[i])]))
  Geo_Targeted_Los_Angeles_net_revenue_avg <- 
    c( Geo_Targeted_Los_Angeles_net_revenue_avg, mean(clean_airfrance$net_revenue[
      which(clean_airfrance$Campaign == "Geo Targeted Los Angeles" 
            & clean_airfrance$`Match Type` == match_types[i])]))
  Geo_Targeted_Miami_net_revenue_avg <- 
    c( Geo_Targeted_Miami_net_revenue_avg, mean(clean_airfrance$net_revenue[
      which(clean_airfrance$Campaign == "Geo Targeted Miami" 
            & clean_airfrance$`Match Type` == match_types[i])]))
  Geo_Targeted_New_York_net_revenue_avg <- 
    c( Geo_Targeted_New_York_net_revenue_avg, mean(clean_airfrance$net_revenue[
      which(clean_airfrance$Campaign == "Geo Targeted New York" 
            & clean_airfrance$`Match Type` == match_types[i])]))
  Geo_Targeted_Philadelphia_net_revenue_avg <- 
    c( Geo_Targeted_Philadelphia_net_revenue_avg, mean(clean_airfrance$net_revenue[
      which(clean_airfrance$Campaign == "Geo Targeted Philadelphia" 
            & clean_airfrance$`Match Type` == match_types[i])]))
  Geo_Targeted_San_Francisco_net_revenue_avg <- 
    c( Geo_Targeted_San_Francisco_net_revenue_avg, mean(clean_airfrance$net_revenue[
      which(clean_airfrance$Campaign == "Geo Targeted San Francisco" 
            & clean_airfrance$`Match Type` == match_types[i])]))
  Geo_Targeted_Seattle_net_revenue_avg <- 
    c( Geo_Targeted_Seattle_net_revenue_avg, mean(clean_airfrance$net_revenue[
      which(clean_airfrance$Campaign == "Geo Targeted Seattle" 
            & clean_airfrance$`Match Type` == match_types[i])]))
  Google_Yearlong_net_revenue_avg <- 
    c( Google_Yearlong_net_revenue_avg, mean(clean_airfrance$net_revenue[
      which(clean_airfrance$Campaign == "Google_Yearlong" 
            & clean_airfrance$`Match Type` == match_types[i])]))
  Outside_Western_Europe_net_revenue_avg <- 
    c( Outside_Western_Europe_net_revenue_avg, mean(clean_airfrance$net_revenue[
      which(clean_airfrance$Campaign == "Outside Western Europe" 
            & clean_airfrance$`Match Type` == match_types[i])]))
  Paris_France_Terms_net_revenue_avg <- 
    c( Paris_France_Terms_net_revenue_avg, mean(clean_airfrance$net_revenue[
      which(clean_airfrance$Campaign == "Paris & France Terms" 
            & clean_airfrance$`Match Type` == match_types[i])]))
  Unassigned_net_revenue_avg <- 
    c( Unassigned_net_revenue_avg, mean(clean_airfrance$net_revenue[
      which(clean_airfrance$Campaign == "Unassigned" 
            & clean_airfrance$`Match Type` == match_types[i])]))
  Western_Europe_Destinations_net_revenue_avg <- 
    c( Western_Europe_Destinations_net_revenue_avg, mean(clean_airfrance$net_revenue[
      which(clean_airfrance$Campaign == "Western Europe Destinations" 
            & clean_airfrance$`Match Type` == match_types[i])]))
  i <- i + 1
} #--> closing net_revenue while loop

# Combine average net_revenue per campaign in a matrix
campaign_net_revenue_per_match_type <- rbind(Air_France_Brand_French_Destinations_net_revenue_avg, 
                                     Air_France_Branded_net_revenue_avg, 
                                     Air_France_Global_Campaign_net_revenue_avg, 
                                     Business_Class_net_revenue_avg, 
                                     French_Destinations_net_revenue_avg, 
                                     General_Terms_net_revenue_avg, 
                                     Geo_Targeted_Boston_net_revenue_avg, 
                                     Geo_Targeted_Chicago_net_revenue_avg, 
                                     Geo_Targeted_DC_net_revenue_avg, 
                                     Geo_Targeted_Detroit_net_revenue_avg, 
                                     Geo_Targeted_Houston_net_revenue_avg, 
                                     Geo_Targeted_Los_Angeles_net_revenue_avg, 
                                     Geo_Targeted_Miami_net_revenue_avg, 
                                     Geo_Targeted_New_York_net_revenue_avg, 
                                     Geo_Targeted_Philadelphia_net_revenue_avg, 
                                     Geo_Targeted_San_Francisco_net_revenue_avg, 
                                     Geo_Targeted_Seattle_net_revenue_avg, 
                                     Google_Yearlong_net_revenue_avg, 
                                     Outside_Western_Europe_net_revenue_avg, 
                                     Paris_France_Terms_net_revenue_avg, 
                                     Unassigned_net_revenue_avg, 
                                     Western_Europe_Destinations_net_revenue_avg)

colnames(campaign_net_revenue_per_match_type) <- match_types  #--> assign names to the rows
print(campaign_net_revenue_per_match_type)
##                                                             Broad Standard
## Air_France_Brand_French_Destinations_net_revenue_avg  1347.109975      NaN
## Air_France_Branded_net_revenue_avg                   15688.183341      NaN
## Air_France_Global_Campaign_net_revenue_avg             664.685163      NaN
## Business_Class_net_revenue_avg                        -142.939166      NaN
## French_Destinations_net_revenue_avg                    -35.111295      NaN
## General_Terms_net_revenue_avg                                 NaN      NaN
## Geo_Targeted_Boston_net_revenue_avg                     12.549130      NaN
## Geo_Targeted_Chicago_net_revenue_avg                    43.625278      NaN
## Geo_Targeted_DC_net_revenue_avg                         21.157521      NaN
## Geo_Targeted_Detroit_net_revenue_avg                     4.905873      NaN
## Geo_Targeted_Houston_net_revenue_avg                    61.265909      NaN
## Geo_Targeted_Los_Angeles_net_revenue_avg                11.696154      NaN
## Geo_Targeted_Miami_net_revenue_avg                       6.855303      NaN
## Geo_Targeted_New_York_net_revenue_avg                  111.676180      NaN
## Geo_Targeted_Philadelphia_net_revenue_avg                1.081159      NaN
## Geo_Targeted_San_Francisco_net_revenue_avg              30.198645      NaN
## Geo_Targeted_Seattle_net_revenue_avg                    49.894697      NaN
## Google_Yearlong_net_revenue_avg                               NaN      NaN
## Outside_Western_Europe_net_revenue_avg                 -42.702679      NaN
## Paris_France_Terms_net_revenue_avg                     163.102208      NaN
## Unassigned_net_revenue_avg                                    NaN 243.6725
## Western_Europe_Destinations_net_revenue_avg            -24.506273      NaN
##                                                              Exact  Advanced
## Air_France_Brand_French_Destinations_net_revenue_avg           NaN       NaN
## Air_France_Branded_net_revenue_avg                             NaN       NaN
## Air_France_Global_Campaign_net_revenue_avg           116371.343801       NaN
## Business_Class_net_revenue_avg                                 NaN       NaN
## French_Destinations_net_revenue_avg                            NaN       NaN
## General_Terms_net_revenue_avg                                  NaN 1371.4750
## Geo_Targeted_Boston_net_revenue_avg                            NaN       NaN
## Geo_Targeted_Chicago_net_revenue_avg                           NaN       NaN
## Geo_Targeted_DC_net_revenue_avg                                NaN       NaN
## Geo_Targeted_Detroit_net_revenue_avg                           NaN       NaN
## Geo_Targeted_Houston_net_revenue_avg                           NaN       NaN
## Geo_Targeted_Los_Angeles_net_revenue_avg                       NaN       NaN
## Geo_Targeted_Miami_net_revenue_avg                             NaN       NaN
## Geo_Targeted_New_York_net_revenue_avg                          NaN       NaN
## Geo_Targeted_Philadelphia_net_revenue_avg                      NaN       NaN
## Geo_Targeted_San_Francisco_net_revenue_avg                     NaN       NaN
## Geo_Targeted_Seattle_net_revenue_avg                           NaN       NaN
## Google_Yearlong_net_revenue_avg                                NaN       NaN
## Outside_Western_Europe_net_revenue_avg                         NaN       NaN
## Paris_France_Terms_net_revenue_avg                             NaN       NaN
## Unassigned_net_revenue_avg                                     NaN  845.3957
## Western_Europe_Destinations_net_revenue_avg              -9.961806       NaN
BASED ON CONVERSION RATE
# Create empty vectors per Publisher
Air_France_Brand_French_Destinations_conversion_rate_avg <- c()                   
Air_France_Branded_conversion_rate_avg <- c()             
Air_France_Global_Campaign_conversion_rate_avg <- c() 
Business_Class_conversion_rate_avg <- c()                    
French_Destinations_conversion_rate_avg <- c()                          
General_Terms_conversion_rate_avg <- c() 
Geo_Targeted_Boston_conversion_rate_avg <- c()                   
Geo_Targeted_Chicago_conversion_rate_avg <- c()                        
Geo_Targeted_DC_conversion_rate_avg <- c() 
Geo_Targeted_Detroit_conversion_rate_avg <- c()                   
Geo_Targeted_Houston_conversion_rate_avg <- c()               
Geo_Targeted_Los_Angeles_conversion_rate_avg <- c() 
Geo_Targeted_Miami_conversion_rate_avg <- c()                  
Geo_Targeted_New_York_conversion_rate_avg <- c()              
Geo_Targeted_Philadelphia_conversion_rate_avg <- c() 
Geo_Targeted_San_Francisco_conversion_rate_avg <- c()                   
Geo_Targeted_Seattle_conversion_rate_avg <- c()                   
Google_Yearlong_conversion_rate_avg <- c()
Outside_Western_Europe_conversion_rate_avg <- c()                   
Paris_France_Terms_conversion_rate_avg <- c()                             
Unassigned_conversion_rate_avg <- c() 
Western_Europe_Destinations_conversion_rate_avg <- c() 
i <- 1

#While loop to get average Conversion rate per Campaign, per match type
while(i <= length(match_types)){
  Air_France_Brand_French_Destinations_conversion_rate_avg <- 
    c(Air_France_Brand_French_Destinations_conversion_rate_avg, mean(clean_airfrance$conversion_rate[
      which(clean_airfrance$Campaign == "Air France Brand & French Destinations" 
            & clean_airfrance$`Match Type` == match_types[i])]))              
  Air_France_Branded_conversion_rate_avg <- 
    c( Air_France_Branded_conversion_rate_avg, mean(clean_airfrance$conversion_rate[
      which(clean_airfrance$Campaign == "Air France Branded" 
            & clean_airfrance$`Match Type` == match_types[i])]))
  Air_France_Global_Campaign_conversion_rate_avg <- 
    c( Air_France_Global_Campaign_conversion_rate_avg, mean(clean_airfrance$conversion_rate[
      which(clean_airfrance$Campaign == "Air France Global Campaign" 
            & clean_airfrance$`Match Type` == match_types[i])]))
  Business_Class_conversion_rate_avg <- 
    c( Business_Class_conversion_rate_avg, mean(clean_airfrance$conversion_rate[
      which(clean_airfrance$Campaign == "Business Class" 
            & clean_airfrance$`Match Type` == match_types[i])]))
  French_Destinations_conversion_rate_avg <- 
    c( French_Destinations_conversion_rate_avg, mean(clean_airfrance$conversion_rate[
      which(clean_airfrance$Campaign == "French Destinations" 
            & clean_airfrance$`Match Type` == match_types[i])]))
  General_Terms_conversion_rate_avg <- 
    c( General_Terms_conversion_rate_avg, mean(clean_airfrance$conversion_rate[
      which(clean_airfrance$Campaign == "General Terms" 
            & clean_airfrance$`Match Type` == match_types[i])]))
  Geo_Targeted_Boston_conversion_rate_avg <- 
    c( Geo_Targeted_Boston_conversion_rate_avg, mean(clean_airfrance$conversion_rate[
      which(clean_airfrance$Campaign == "Geo Targeted Boston" 
            & clean_airfrance$`Match Type` == match_types[i])]))
  Geo_Targeted_Chicago_conversion_rate_avg <- 
    c( Geo_Targeted_Chicago_conversion_rate_avg, mean(clean_airfrance$conversion_rate[
      which(clean_airfrance$Campaign == "Geo Targeted Chicago" 
            & clean_airfrance$`Match Type` == match_types[i])]))
  Geo_Targeted_DC_conversion_rate_avg <- 
    c( Geo_Targeted_DC_conversion_rate_avg, mean(clean_airfrance$conversion_rate[
      which(clean_airfrance$Campaign == "Geo Targeted DC" 
            & clean_airfrance$`Match Type` == match_types[i])]))
  Geo_Targeted_Detroit_conversion_rate_avg <- 
    c( Geo_Targeted_Detroit_conversion_rate_avg, mean(clean_airfrance$conversion_rate[
      which(clean_airfrance$Campaign == "Geo Targeted Detroit" 
            & clean_airfrance$`Match Type` == match_types[i])]))
  Geo_Targeted_Houston_conversion_rate_avg <- 
    c( Geo_Targeted_Houston_conversion_rate_avg, mean(clean_airfrance$conversion_rate[
      which(clean_airfrance$Campaign == "Geo Targeted Houston" 
            & clean_airfrance$`Match Type` == match_types[i])]))
  Geo_Targeted_Los_Angeles_conversion_rate_avg <- 
    c( Geo_Targeted_Los_Angeles_conversion_rate_avg, mean(clean_airfrance$conversion_rate[
      which(clean_airfrance$Campaign == "Geo Targeted Los Angeles" 
            & clean_airfrance$`Match Type` == match_types[i])]))
  Geo_Targeted_Miami_conversion_rate_avg <- 
    c( Geo_Targeted_Miami_conversion_rate_avg, mean(clean_airfrance$conversion_rate[
      which(clean_airfrance$Campaign == "Geo Targeted Miami" 
            & clean_airfrance$`Match Type` == match_types[i])]))
  Geo_Targeted_New_York_conversion_rate_avg <- 
    c( Geo_Targeted_New_York_conversion_rate_avg, mean(clean_airfrance$conversion_rate[
      which(clean_airfrance$Campaign == "Geo Targeted New York" 
            & clean_airfrance$`Match Type` == match_types[i])]))
  Geo_Targeted_Philadelphia_conversion_rate_avg <- 
    c( Geo_Targeted_Philadelphia_conversion_rate_avg, mean(clean_airfrance$conversion_rate[
      which(clean_airfrance$Campaign == "Geo Targeted Philadelphia" 
            & clean_airfrance$`Match Type` == match_types[i])]))
  Geo_Targeted_San_Francisco_conversion_rate_avg <- 
    c( Geo_Targeted_San_Francisco_conversion_rate_avg, mean(clean_airfrance$conversion_rate[
      which(clean_airfrance$Campaign == "Geo Targeted San Francisco" 
            & clean_airfrance$`Match Type` == match_types[i])]))
  Geo_Targeted_Seattle_conversion_rate_avg <- 
    c( Geo_Targeted_Seattle_conversion_rate_avg, mean(clean_airfrance$conversion_rate[
      which(clean_airfrance$Campaign == "Geo Targeted Seattle" 
            & clean_airfrance$`Match Type` == match_types[i])]))
  Google_Yearlong_conversion_rate_avg <- 
    c( Google_Yearlong_conversion_rate_avg, mean(clean_airfrance$conversion_rate[
      which(clean_airfrance$Campaign == "Google_Yearlong" 
            & clean_airfrance$`Match Type` == match_types[i])]))
  Outside_Western_Europe_conversion_rate_avg <- 
    c( Outside_Western_Europe_conversion_rate_avg, mean(clean_airfrance$conversion_rate[
      which(clean_airfrance$Campaign == "Outside Western Europe" 
            & clean_airfrance$`Match Type` == match_types[i])]))
  Paris_France_Terms_conversion_rate_avg <- 
    c( Paris_France_Terms_conversion_rate_avg, mean(clean_airfrance$conversion_rate[
      which(clean_airfrance$Campaign == "Paris & France Terms" 
            & clean_airfrance$`Match Type` == match_types[i])]))
  Unassigned_conversion_rate_avg <- 
    c( Unassigned_conversion_rate_avg, mean(clean_airfrance$conversion_rate[
      which(clean_airfrance$Campaign == "Unassigned" 
            & clean_airfrance$`Match Type` == match_types[i])]))
  Western_Europe_Destinations_conversion_rate_avg <- 
    c( Western_Europe_Destinations_conversion_rate_avg, mean(clean_airfrance$conversion_rate[
      which(clean_airfrance$Campaign == "Western Europe Destinations" 
            & clean_airfrance$`Match Type` == match_types[i])]))
  i <- i + 1
} #--> closing conversion_rate while loop

# Combine average conversion_rate per campaign in a matrix
campaign_conversion_rate_per_match_type <- rbind(Air_France_Brand_French_Destinations_conversion_rate_avg, 
                                             Air_France_Branded_conversion_rate_avg, 
                                             Air_France_Global_Campaign_conversion_rate_avg, 
                                             Business_Class_conversion_rate_avg, 
                                             French_Destinations_conversion_rate_avg, 
                                             General_Terms_conversion_rate_avg, 
                                             Geo_Targeted_Boston_conversion_rate_avg, 
                                             Geo_Targeted_Chicago_conversion_rate_avg, 
                                             Geo_Targeted_DC_conversion_rate_avg, 
                                             Geo_Targeted_Detroit_conversion_rate_avg, 
                                             Geo_Targeted_Houston_conversion_rate_avg, 
                                             Geo_Targeted_Los_Angeles_conversion_rate_avg, 
                                             Geo_Targeted_Miami_conversion_rate_avg, 
                                             Geo_Targeted_New_York_conversion_rate_avg, 
                                             Geo_Targeted_Philadelphia_conversion_rate_avg, 
                                             Geo_Targeted_San_Francisco_conversion_rate_avg, 
                                             Geo_Targeted_Seattle_conversion_rate_avg, 
                                             Google_Yearlong_conversion_rate_avg, 
                                             Outside_Western_Europe_conversion_rate_avg, 
                                             Paris_France_Terms_conversion_rate_avg, 
                                             Unassigned_conversion_rate_avg, 
                                             Western_Europe_Destinations_conversion_rate_avg)

colnames(campaign_conversion_rate_per_match_type) <- match_types  #--> assign names to the rows
print(campaign_conversion_rate_per_match_type)
##                                                                Broad Standard
## Air_France_Brand_French_Destinations_conversion_rate_avg 2.736654615      NaN
## Air_France_Branded_conversion_rate_avg                   3.872158467      NaN
## Air_France_Global_Campaign_conversion_rate_avg           0.259901795      NaN
## Business_Class_conversion_rate_avg                       0.003995395      NaN
## French_Destinations_conversion_rate_avg                  0.103450864      NaN
## General_Terms_conversion_rate_avg                                NaN      NaN
## Geo_Targeted_Boston_conversion_rate_avg                  0.200282747      NaN
## Geo_Targeted_Chicago_conversion_rate_avg                 0.243353789      NaN
## Geo_Targeted_DC_conversion_rate_avg                      0.253264008      NaN
## Geo_Targeted_Detroit_conversion_rate_avg                 0.182548375      NaN
## Geo_Targeted_Houston_conversion_rate_avg                 0.220164005      NaN
## Geo_Targeted_Los_Angeles_conversion_rate_avg             0.093520597      NaN
## Geo_Targeted_Miami_conversion_rate_avg                   1.096942273      NaN
## Geo_Targeted_New_York_conversion_rate_avg                0.483338312      NaN
## Geo_Targeted_Philadelphia_conversion_rate_avg            0.296526928      NaN
## Geo_Targeted_San_Francisco_conversion_rate_avg           0.295945476      NaN
## Geo_Targeted_Seattle_conversion_rate_avg                 0.216440637      NaN
## Google_Yearlong_conversion_rate_avg                              NaN      NaN
## Outside_Western_Europe_conversion_rate_avg               0.000000000      NaN
## Paris_France_Terms_conversion_rate_avg                   0.088097742      NaN
## Unassigned_conversion_rate_avg                                   NaN 0.459987
## Western_Europe_Destinations_conversion_rate_avg          0.023786387      NaN
##                                                             Exact  Advanced
## Air_France_Brand_French_Destinations_conversion_rate_avg      NaN       NaN
## Air_France_Branded_conversion_rate_avg                        NaN       NaN
## Air_France_Global_Campaign_conversion_rate_avg           45.06474       NaN
## Business_Class_conversion_rate_avg                            NaN       NaN
## French_Destinations_conversion_rate_avg                       NaN       NaN
## General_Terms_conversion_rate_avg                             NaN 0.1648873
## Geo_Targeted_Boston_conversion_rate_avg                       NaN       NaN
## Geo_Targeted_Chicago_conversion_rate_avg                      NaN       NaN
## Geo_Targeted_DC_conversion_rate_avg                           NaN       NaN
## Geo_Targeted_Detroit_conversion_rate_avg                      NaN       NaN
## Geo_Targeted_Houston_conversion_rate_avg                      NaN       NaN
## Geo_Targeted_Los_Angeles_conversion_rate_avg                  NaN       NaN
## Geo_Targeted_Miami_conversion_rate_avg                        NaN       NaN
## Geo_Targeted_New_York_conversion_rate_avg                     NaN       NaN
## Geo_Targeted_Philadelphia_conversion_rate_avg                 NaN       NaN
## Geo_Targeted_San_Francisco_conversion_rate_avg                NaN       NaN
## Geo_Targeted_Seattle_conversion_rate_avg                      NaN       NaN
## Google_Yearlong_conversion_rate_avg                           NaN       NaN
## Outside_Western_Europe_conversion_rate_avg                    NaN       NaN
## Paris_France_Terms_conversion_rate_avg                        NaN       NaN
## Unassigned_conversion_rate_avg                                NaN 0.3200661
## Western_Europe_Destinations_conversion_rate_avg           0.00000       NaN

Keyword Type Analysis per Campaign

# See frequencies and Observation categories for keyword type
table(clean_airfrance$`Keyword Type`)
## 
## short-tail   mid-tail  long-tail 
##       1547       1685         54
keyword_types <- c("short-tail", "mid-tail", "long-tail") #--> Create vector with keyword types names
BASED ON ROA
# Create empty vectors per Campaign
Air_France_Brand_French_Destinations_ROA_avg_kt <- c()                   
Air_France_Branded_ROA_avg_kt <- c()             
Air_France_Global_Campaign_ROA_avg_kt <- c() 
Business_Class_ROA_avg_kt <- c()                    
French_Destinations_ROA_avg_kt <- c()                          
General_Terms_ROA_avg_kt <- c() 
Geo_Targeted_Boston_ROA_avg_kt <- c()                   
Geo_Targeted_Chicago_ROA_avg_kt <- c()                        
Geo_Targeted_DC_ROA_avg_kt <- c() 
Geo_Targeted_Detroit_ROA_avg_kt <- c()                   
Geo_Targeted_Houston_ROA_avg_kt <- c()               
Geo_Targeted_Los_Angeles_ROA_avg_kt <- c() 
Geo_Targeted_Miami_ROA_avg_kt <- c()                  
Geo_Targeted_New_York_ROA_avg_kt <- c()              
Geo_Targeted_Philadelphia_ROA_avg_kt <- c() 
Geo_Targeted_San_Francisco_ROA_avg_kt <- c()                   
Geo_Targeted_Seattle_ROA_avg_kt <- c()                   
Google_Yearlong_ROA_avg_kt <- c()
Outside_Western_Europe_ROA_avg_kt <- c()                   
Paris_France_Terms_ROA_avg_kt <- c()                             
Unassigned_ROA_avg_kt <- c() 
Western_Europe_Destinations_ROA_avg_kt <- c() 
i <- 1

# While loop to get average ROA per Campaign, per keyword type
while(i <= length(keyword_types)){
  Air_France_Brand_French_Destinations_ROA_avg_kt <- 
    c(Air_France_Brand_French_Destinations_ROA_avg_kt, mean(clean_airfrance$ROA[
      which(clean_airfrance$Campaign == "Air France Brand & French Destinations" 
            & clean_airfrance$`Keyword Type` == keyword_types[i])]))              
  Air_France_Branded_ROA_avg_kt <- 
    c( Air_France_Branded_ROA_avg_kt, mean(clean_airfrance$ROA[
      which(clean_airfrance$Campaign == "Air France Branded" 
            & clean_airfrance$`Keyword Type` == keyword_types[i])]))
  Air_France_Global_Campaign_ROA_avg_kt <- 
    c( Air_France_Global_Campaign_ROA_avg_kt, mean(clean_airfrance$ROA[
      which(clean_airfrance$Campaign == "Air France Global Campaign" 
            & clean_airfrance$`Keyword Type` == keyword_types[i])]))
  Business_Class_ROA_avg_kt <- 
    c( Business_Class_ROA_avg_kt, mean(clean_airfrance$ROA[
      which(clean_airfrance$Campaign == "Business Class" 
            & clean_airfrance$`Keyword Type` == keyword_types[i])]))
  French_Destinations_ROA_avg_kt <- 
    c( French_Destinations_ROA_avg_kt, mean(clean_airfrance$ROA[
      which(clean_airfrance$Campaign == "French Destinations" 
            & clean_airfrance$`Keyword Type` == keyword_types[i])]))
  General_Terms_ROA_avg_kt <- 
    c( General_Terms_ROA_avg_kt, mean(clean_airfrance$ROA[
      which(clean_airfrance$Campaign == "General Terms" 
            & clean_airfrance$`Keyword Type` == keyword_types[i])]))
  Geo_Targeted_Boston_ROA_avg_kt <- 
    c( Geo_Targeted_Boston_ROA_avg_kt, mean(clean_airfrance$ROA[
      which(clean_airfrance$Campaign == "Geo Targeted Boston" 
            & clean_airfrance$`Keyword Type` == keyword_types[i])]))
  Geo_Targeted_Chicago_ROA_avg_kt <- 
    c( Geo_Targeted_Chicago_ROA_avg_kt, mean(clean_airfrance$ROA[
      which(clean_airfrance$Campaign == "Geo Targeted Chicago" 
            & clean_airfrance$`Keyword Type` == keyword_types[i])]))
  Geo_Targeted_DC_ROA_avg_kt <- 
    c( Geo_Targeted_DC_ROA_avg_kt, mean(clean_airfrance$ROA[
      which(clean_airfrance$Campaign == "Geo Targeted DC" 
            & clean_airfrance$`Keyword Type` == keyword_types[i])]))
  Geo_Targeted_Detroit_ROA_avg_kt <- 
    c( Geo_Targeted_Detroit_ROA_avg_kt, mean(clean_airfrance$ROA[
      which(clean_airfrance$Campaign == "Geo Targeted Detroit" 
            & clean_airfrance$`Keyword Type` == keyword_types[i])]))
  Geo_Targeted_Houston_ROA_avg_kt <- 
    c( Geo_Targeted_Houston_ROA_avg_kt, mean(clean_airfrance$ROA[
      which(clean_airfrance$Campaign == "Geo Targeted Houston" 
            & clean_airfrance$`Keyword Type` == keyword_types[i])]))
  Geo_Targeted_Los_Angeles_ROA_avg_kt <- 
    c( Geo_Targeted_Los_Angeles_ROA_avg_kt, mean(clean_airfrance$ROA[
      which(clean_airfrance$Campaign == "Geo Targeted Los Angeles" 
            & clean_airfrance$`Keyword Type` == keyword_types[i])]))
  Geo_Targeted_Miami_ROA_avg_kt <- 
    c( Geo_Targeted_Miami_ROA_avg_kt, mean(clean_airfrance$ROA[
      which(clean_airfrance$Campaign == "Geo Targeted Miami" 
            & clean_airfrance$`Keyword Type` == keyword_types[i])]))
  Geo_Targeted_New_York_ROA_avg_kt <- 
    c( Geo_Targeted_New_York_ROA_avg_kt, mean(clean_airfrance$ROA[
      which(clean_airfrance$Campaign == "Geo Targeted New York" 
            & clean_airfrance$`Keyword Type` == keyword_types[i])]))
  Geo_Targeted_Philadelphia_ROA_avg_kt <- 
    c( Geo_Targeted_Philadelphia_ROA_avg_kt, mean(clean_airfrance$ROA[
      which(clean_airfrance$Campaign == "Geo Targeted Philadelphia" 
            & clean_airfrance$`Keyword Type` == keyword_types[i])]))
  Geo_Targeted_San_Francisco_ROA_avg_kt <- 
    c( Geo_Targeted_San_Francisco_ROA_avg_kt, mean(clean_airfrance$ROA[
      which(clean_airfrance$Campaign == "Geo Targeted San Francisco" 
            & clean_airfrance$`Keyword Type` == keyword_types[i])]))
  Geo_Targeted_Seattle_ROA_avg_kt <- 
    c( Geo_Targeted_Seattle_ROA_avg_kt, mean(clean_airfrance$ROA[
      which(clean_airfrance$Campaign == "Geo Targeted Seattle" 
            & clean_airfrance$`Keyword Type` == keyword_types[i])]))
  Google_Yearlong_ROA_avg_kt <- 
    c( Google_Yearlong_ROA_avg_kt, mean(clean_airfrance$ROA[
      which(clean_airfrance$Campaign == "Google_Yearlong" 
            & clean_airfrance$`Keyword Type` == keyword_types[i])]))
  Outside_Western_Europe_ROA_avg_kt <- 
    c( Outside_Western_Europe_ROA_avg_kt, mean(clean_airfrance$ROA[
      which(clean_airfrance$Campaign == "Outside Western Europe" 
            & clean_airfrance$`Keyword Type` == keyword_types[i])]))
  Paris_France_Terms_ROA_avg_kt <- 
    c( Paris_France_Terms_ROA_avg_kt, mean(clean_airfrance$ROA[
      which(clean_airfrance$Campaign == "Paris & France Terms" 
            & clean_airfrance$`Keyword Type` == keyword_types[i])]))
  Unassigned_ROA_avg_kt <- 
    c( Unassigned_ROA_avg_kt, mean(clean_airfrance$ROA[
      which(clean_airfrance$Campaign == "Unassigned" 
            & clean_airfrance$`Keyword Type` == keyword_types[i])]))
  Western_Europe_Destinations_ROA_avg_kt <- 
    c( Western_Europe_Destinations_ROA_avg_kt, mean(clean_airfrance$ROA[
      which(clean_airfrance$Campaign == "Western Europe Destinations" 
            & clean_airfrance$`Keyword Type` == keyword_types[i])]))
  i <- i + 1
} #--> closing ROA while loop

# Combine average ROA per campaign in a matrix
campaign_ROA_per_keyword_type <- rbind(Air_France_Brand_French_Destinations_ROA_avg_kt, 
                                     Air_France_Branded_ROA_avg_kt, 
                                     Air_France_Global_Campaign_ROA_avg_kt, 
                                     Business_Class_ROA_avg_kt, 
                                     French_Destinations_ROA_avg_kt, 
                                     General_Terms_ROA_avg_kt, 
                                     Geo_Targeted_Boston_ROA_avg_kt, 
                                     Geo_Targeted_Chicago_ROA_avg_kt, 
                                     Geo_Targeted_DC_ROA_avg_kt, 
                                     Geo_Targeted_Detroit_ROA_avg_kt, 
                                     Geo_Targeted_Houston_ROA_avg_kt, 
                                     Geo_Targeted_Los_Angeles_ROA_avg_kt, 
                                     Geo_Targeted_Miami_ROA_avg_kt, 
                                     Geo_Targeted_New_York_ROA_avg_kt, 
                                     Geo_Targeted_Philadelphia_ROA_avg_kt, 
                                     Geo_Targeted_San_Francisco_ROA_avg_kt, 
                                     Geo_Targeted_Seattle_ROA_avg_kt, 
                                     Google_Yearlong_ROA_avg_kt, 
                                     Outside_Western_Europe_ROA_avg_kt, 
                                     Paris_France_Terms_ROA_avg_kt, 
                                     Unassigned_ROA_avg_kt, 
                                     Western_Europe_Destinations_ROA_avg_kt)

colnames(campaign_ROA_per_keyword_type) <- keyword_types  #--> assign names to the rows
print(campaign_ROA_per_keyword_type)
##                                                  short-tail   mid-tail
## Air_France_Brand_French_Destinations_ROA_avg_kt  1.07267989 20.9537658
## Air_France_Branded_ROA_avg_kt                   61.85295833 20.9232214
## Air_France_Global_Campaign_ROA_avg_kt            7.33928741  1.3065352
## Business_Class_ROA_avg_kt                       -0.98556663 -1.0000000
## French_Destinations_ROA_avg_kt                  -0.70870765 -0.3267687
## General_Terms_ROA_avg_kt                         2.26138755        NaN
## Geo_Targeted_Boston_ROA_avg_kt                  -0.69259404  1.7675144
## Geo_Targeted_Chicago_ROA_avg_kt                  3.58415634 -1.0000000
## Geo_Targeted_DC_ROA_avg_kt                       1.12693007  0.4432428
## Geo_Targeted_Detroit_ROA_avg_kt                  1.59245230 -1.0000000
## Geo_Targeted_Houston_ROA_avg_kt                 -0.83850686  2.9178336
## Geo_Targeted_Los_Angeles_ROA_avg_kt             -0.48139109 -0.2056009
## Geo_Targeted_Miami_ROA_avg_kt                   -1.00000000 11.1538462
## Geo_Targeted_New_York_ROA_avg_kt                 0.04244048  3.5681679
## Geo_Targeted_Philadelphia_ROA_avg_kt             0.97487923 -1.0000000
## Geo_Targeted_San_Francisco_ROA_avg_kt            0.18756985  7.2914591
## Geo_Targeted_Seattle_ROA_avg_kt                  3.60931623 -1.0000000
## Google_Yearlong_ROA_avg_kt                              NaN        NaN
## Outside_Western_Europe_ROA_avg_kt               -1.00000000 -1.0000000
## Paris_France_Terms_ROA_avg_kt                   -0.19977638 -0.2757660
## Unassigned_ROA_avg_kt                            4.89693141  2.7801236
## Western_Europe_Destinations_ROA_avg_kt          -0.86491238 -0.8018555
##                                                  long-tail
## Air_France_Brand_French_Destinations_ROA_avg_kt -1.0000000
## Air_France_Branded_ROA_avg_kt                          NaN
## Air_France_Global_Campaign_ROA_avg_kt                  NaN
## Business_Class_ROA_avg_kt                              NaN
## French_Destinations_ROA_avg_kt                  -1.0000000
## General_Terms_ROA_avg_kt                               NaN
## Geo_Targeted_Boston_ROA_avg_kt                         NaN
## Geo_Targeted_Chicago_ROA_avg_kt                        NaN
## Geo_Targeted_DC_ROA_avg_kt                             NaN
## Geo_Targeted_Detroit_ROA_avg_kt                        NaN
## Geo_Targeted_Houston_ROA_avg_kt                        NaN
## Geo_Targeted_Los_Angeles_ROA_avg_kt                    NaN
## Geo_Targeted_Miami_ROA_avg_kt                          NaN
## Geo_Targeted_New_York_ROA_avg_kt                       NaN
## Geo_Targeted_Philadelphia_ROA_avg_kt                   NaN
## Geo_Targeted_San_Francisco_ROA_avg_kt                  NaN
## Geo_Targeted_Seattle_ROA_avg_kt                        NaN
## Google_Yearlong_ROA_avg_kt                             NaN
## Outside_Western_Europe_ROA_avg_kt                      NaN
## Paris_France_Terms_ROA_avg_kt                   -1.0000000
## Unassigned_ROA_avg_kt                            0.5479197
## Western_Europe_Destinations_ROA_avg_kt          -1.0000000
BASED ON NET REVENUE
#Create empty vectors per Campaign
Air_France_Brand_French_Destinations_net_revenue_avg_kt <- c()                   
Air_France_Branded_net_revenue_avg_kt <- c()             
Air_France_Global_Campaign_net_revenue_avg_kt <- c() 
Business_Class_net_revenue_avg_kt <- c()                    
French_Destinations_net_revenue_avg_kt <- c()                          
General_Terms_net_revenue_avg_kt <- c() 
Geo_Targeted_Boston_net_revenue_avg_kt <- c()                   
Geo_Targeted_Chicago_net_revenue_avg_kt <- c()                        
Geo_Targeted_DC_net_revenue_avg_kt <- c() 
Geo_Targeted_Detroit_net_revenue_avg_kt <- c()                   
Geo_Targeted_Houston_net_revenue_avg_kt <- c()               
Geo_Targeted_Los_Angeles_net_revenue_avg_kt <- c() 
Geo_Targeted_Miami_net_revenue_avg_kt <- c()                  
Geo_Targeted_New_York_net_revenue_avg_kt <- c()              
Geo_Targeted_Philadelphia_net_revenue_avg_kt <- c() 
Geo_Targeted_San_Francisco_net_revenue_avg_kt <- c()                   
Geo_Targeted_Seattle_net_revenue_avg_kt <- c()                   
Google_Yearlong_net_revenue_avg_kt <- c()
Outside_Western_Europe_net_revenue_avg_kt <- c()                   
Paris_France_Terms_net_revenue_avg_kt <- c()                             
Unassigned_net_revenue_avg_kt <- c() 
Western_Europe_Destinations_net_revenue_avg_kt <- c() 
i <- 1

# While loop to get average Net Revenue per Campaign, per keyword type
while(i <= length(keyword_types)){
  Air_France_Brand_French_Destinations_net_revenue_avg_kt <- 
    c(Air_France_Brand_French_Destinations_net_revenue_avg_kt, mean(clean_airfrance$net_revenue[
      which(clean_airfrance$Campaign == "Air France Brand & French Destinations" 
            & clean_airfrance$`Keyword Type` == keyword_types[i])]))              
  Air_France_Branded_net_revenue_avg_kt <- 
    c( Air_France_Branded_net_revenue_avg_kt, mean(clean_airfrance$net_revenue[
      which(clean_airfrance$Campaign == "Air France Branded" 
            & clean_airfrance$`Keyword Type` == keyword_types[i])]))
  Air_France_Global_Campaign_net_revenue_avg_kt <- 
    c( Air_France_Global_Campaign_net_revenue_avg_kt, mean(clean_airfrance$net_revenue[
      which(clean_airfrance$Campaign == "Air France Global Campaign" 
            & clean_airfrance$`Keyword Type` == keyword_types[i])]))
  Business_Class_net_revenue_avg_kt <- 
    c( Business_Class_net_revenue_avg_kt, mean(clean_airfrance$net_revenue[
      which(clean_airfrance$Campaign == "Business Class" 
            & clean_airfrance$`Keyword Type` == keyword_types[i])]))
  French_Destinations_net_revenue_avg_kt <- 
    c( French_Destinations_net_revenue_avg_kt, mean(clean_airfrance$net_revenue[
      which(clean_airfrance$Campaign == "French Destinations" 
            & clean_airfrance$`Keyword Type` == keyword_types[i])]))
  General_Terms_net_revenue_avg_kt <- 
    c( General_Terms_net_revenue_avg_kt, mean(clean_airfrance$net_revenue[
      which(clean_airfrance$Campaign == "General Terms" 
            & clean_airfrance$`Keyword Type` == keyword_types[i])]))
  Geo_Targeted_Boston_net_revenue_avg_kt <- 
    c( Geo_Targeted_Boston_net_revenue_avg_kt, mean(clean_airfrance$net_revenue[
      which(clean_airfrance$Campaign == "Geo Targeted Boston" 
            & clean_airfrance$`Keyword Type` == keyword_types[i])]))
  Geo_Targeted_Chicago_net_revenue_avg_kt <- 
    c( Geo_Targeted_Chicago_net_revenue_avg_kt, mean(clean_airfrance$net_revenue[
      which(clean_airfrance$Campaign == "Geo Targeted Chicago" 
            & clean_airfrance$`Keyword Type` == keyword_types[i])]))
  Geo_Targeted_DC_net_revenue_avg_kt <- 
    c( Geo_Targeted_DC_net_revenue_avg_kt, mean(clean_airfrance$net_revenue[
      which(clean_airfrance$Campaign == "Geo Targeted DC" 
            & clean_airfrance$`Keyword Type` == keyword_types[i])]))
  Geo_Targeted_Detroit_net_revenue_avg_kt <- 
    c( Geo_Targeted_Detroit_net_revenue_avg_kt, mean(clean_airfrance$net_revenue[
      which(clean_airfrance$Campaign == "Geo Targeted Detroit" 
            & clean_airfrance$`Keyword Type` == keyword_types[i])]))
  Geo_Targeted_Houston_net_revenue_avg_kt <- 
    c( Geo_Targeted_Houston_net_revenue_avg_kt, mean(clean_airfrance$net_revenue[
      which(clean_airfrance$Campaign == "Geo Targeted Houston" 
            & clean_airfrance$`Keyword Type` == keyword_types[i])]))
  Geo_Targeted_Los_Angeles_net_revenue_avg_kt <- 
    c( Geo_Targeted_Los_Angeles_net_revenue_avg_kt, mean(clean_airfrance$net_revenue[
      which(clean_airfrance$Campaign == "Geo Targeted Los Angeles" 
            & clean_airfrance$`Keyword Type` == keyword_types[i])]))
  Geo_Targeted_Miami_net_revenue_avg_kt <- 
    c( Geo_Targeted_Miami_net_revenue_avg_kt, mean(clean_airfrance$net_revenue[
      which(clean_airfrance$Campaign == "Geo Targeted Miami" 
            & clean_airfrance$`Keyword Type` == keyword_types[i])]))
  Geo_Targeted_New_York_net_revenue_avg_kt <- 
    c( Geo_Targeted_New_York_net_revenue_avg_kt, mean(clean_airfrance$net_revenue[
      which(clean_airfrance$Campaign == "Geo Targeted New York" 
            & clean_airfrance$`Keyword Type` == keyword_types[i])]))
  Geo_Targeted_Philadelphia_net_revenue_avg_kt <- 
    c( Geo_Targeted_Philadelphia_net_revenue_avg_kt, mean(clean_airfrance$net_revenue[
      which(clean_airfrance$Campaign == "Geo Targeted Philadelphia" 
            & clean_airfrance$`Keyword Type` == keyword_types[i])]))
  Geo_Targeted_San_Francisco_net_revenue_avg_kt <- 
    c( Geo_Targeted_San_Francisco_net_revenue_avg_kt, mean(clean_airfrance$net_revenue[
      which(clean_airfrance$Campaign == "Geo Targeted San Francisco" 
            & clean_airfrance$`Keyword Type` == keyword_types[i])]))
  Geo_Targeted_Seattle_net_revenue_avg_kt <- 
    c( Geo_Targeted_Seattle_net_revenue_avg_kt, mean(clean_airfrance$net_revenue[
      which(clean_airfrance$Campaign == "Geo Targeted Seattle" 
            & clean_airfrance$`Keyword Type` == keyword_types[i])]))
  Google_Yearlong_net_revenue_avg_kt <- 
    c( Google_Yearlong_net_revenue_avg_kt, mean(clean_airfrance$net_revenue[
      which(clean_airfrance$Campaign == "Google_Yearlong" 
            & clean_airfrance$`Keyword Type` == keyword_types[i])]))
  Outside_Western_Europe_net_revenue_avg_kt <- 
    c( Outside_Western_Europe_net_revenue_avg_kt, mean(clean_airfrance$net_revenue[
      which(clean_airfrance$Campaign == "Outside Western Europe" 
            & clean_airfrance$`Keyword Type` == keyword_types[i])]))
  Paris_France_Terms_net_revenue_avg_kt <- 
    c( Paris_France_Terms_net_revenue_avg_kt, mean(clean_airfrance$net_revenue[
      which(clean_airfrance$Campaign == "Paris & France Terms" 
            & clean_airfrance$`Keyword Type` == keyword_types[i])]))
  Unassigned_net_revenue_avg_kt <- 
    c( Unassigned_net_revenue_avg_kt, mean(clean_airfrance$net_revenue[
      which(clean_airfrance$Campaign == "Unassigned" 
            & clean_airfrance$`Keyword Type` == keyword_types[i])]))
  Western_Europe_Destinations_net_revenue_avg_kt <- 
    c( Western_Europe_Destinations_net_revenue_avg_kt, mean(clean_airfrance$net_revenue[
      which(clean_airfrance$Campaign == "Western Europe Destinations" 
            & clean_airfrance$`Keyword Type` == keyword_types[i])]))
  i <- i + 1
} #--> closing Net Revenue while loop

# Combine average Net Revenue per campaign in a matrix
campaign_net_revenue_per_keyword_type <- rbind(Air_France_Brand_French_Destinations_net_revenue_avg_kt, 
                                       Air_France_Branded_net_revenue_avg_kt, 
                                       Air_France_Global_Campaign_net_revenue_avg_kt, 
                                       Business_Class_net_revenue_avg_kt, 
                                       French_Destinations_net_revenue_avg_kt, 
                                       General_Terms_net_revenue_avg_kt, 
                                       Geo_Targeted_Boston_net_revenue_avg_kt, 
                                       Geo_Targeted_Chicago_net_revenue_avg_kt, 
                                       Geo_Targeted_DC_net_revenue_avg_kt, 
                                       Geo_Targeted_Detroit_net_revenue_avg_kt, 
                                       Geo_Targeted_Houston_net_revenue_avg_kt, 
                                       Geo_Targeted_Los_Angeles_net_revenue_avg_kt, 
                                       Geo_Targeted_Miami_net_revenue_avg_kt, 
                                       Geo_Targeted_New_York_net_revenue_avg_kt, 
                                       Geo_Targeted_Philadelphia_net_revenue_avg_kt, 
                                       Geo_Targeted_San_Francisco_net_revenue_avg_kt, 
                                       Geo_Targeted_Seattle_net_revenue_avg_kt, 
                                       Google_Yearlong_net_revenue_avg_kt, 
                                       Outside_Western_Europe_net_revenue_avg_kt, 
                                       Paris_France_Terms_net_revenue_avg_kt, 
                                       Unassigned_net_revenue_avg_kt, 
                                       Western_Europe_Destinations_net_revenue_avg_kt)

colnames(campaign_net_revenue_per_keyword_type) <- keyword_types  #--> assign names to the rows
print(campaign_net_revenue_per_keyword_type)
##                                                           short-tail
## Air_France_Brand_French_Destinations_net_revenue_avg_kt  2580.653126
## Air_France_Branded_net_revenue_avg_kt                   10632.051566
## Air_France_Global_Campaign_net_revenue_avg_kt            2391.140733
## Business_Class_net_revenue_avg_kt                        -285.422916
## French_Destinations_net_revenue_avg_kt                    -18.919375
## General_Terms_net_revenue_avg_kt                         1371.475004
## Geo_Targeted_Boston_net_revenue_avg_kt                      9.969531
## Geo_Targeted_Chicago_net_revenue_avg_kt                   100.529546
## Geo_Targeted_DC_net_revenue_avg_kt                         31.298438
## Geo_Targeted_Detroit_net_revenue_avg_kt                    11.331944
## Geo_Targeted_Houston_net_revenue_avg_kt                    -6.956845
## Geo_Targeted_Los_Angeles_net_revenue_avg_kt                20.887955
## Geo_Targeted_Miami_net_revenue_avg_kt                     -10.428947
## Geo_Targeted_New_York_net_revenue_avg_kt                  158.816905
## Geo_Targeted_Philadelphia_net_revenue_avg_kt                4.625556
## Geo_Targeted_San_Francisco_net_revenue_avg_kt              29.167614
## Geo_Targeted_Seattle_net_revenue_avg_kt                   162.263542
## Google_Yearlong_net_revenue_avg_kt                               NaN
## Outside_Western_Europe_net_revenue_avg_kt                 -75.581250
## Paris_France_Terms_net_revenue_avg_kt                     -38.865070
## Unassigned_net_revenue_avg_kt                            1359.161537
## Western_Europe_Destinations_net_revenue_avg_kt            -38.515286
##                                                             mid-tail
## Air_France_Brand_French_Destinations_net_revenue_avg_kt   197.823438
## Air_France_Branded_net_revenue_avg_kt                   19733.088761
## Air_France_Global_Campaign_net_revenue_avg_kt             473.440278
## Business_Class_net_revenue_avg_kt                         -47.950000
## French_Destinations_net_revenue_avg_kt                    -51.308035
## General_Terms_net_revenue_avg_kt                                 NaN
## Geo_Targeted_Boston_net_revenue_avg_kt                     15.786275
## Geo_Targeted_Chicago_net_revenue_avg_kt                   -10.804891
## Geo_Targeted_DC_net_revenue_avg_kt                          5.284783
## Geo_Targeted_Detroit_net_revenue_avg_kt                    -7.059914
## Geo_Targeted_Houston_net_revenue_avg_kt                   123.556250
## Geo_Targeted_Los_Angeles_net_revenue_avg_kt                 1.378827
## Geo_Targeted_Miami_net_revenue_avg_kt                      30.312500
## Geo_Targeted_New_York_net_revenue_avg_kt                   73.006055
## Geo_Targeted_Philadelphia_net_revenue_avg_kt               -5.564583
## Geo_Targeted_San_Francisco_net_revenue_avg_kt              31.361859
## Geo_Targeted_Seattle_net_revenue_avg_kt                   -14.316071
## Google_Yearlong_net_revenue_avg_kt                               NaN
## Outside_Western_Europe_net_revenue_avg_kt                 -18.043750
## Paris_France_Terms_net_revenue_avg_kt                     290.631050
## Unassigned_net_revenue_avg_kt                             152.512743
## Western_Europe_Destinations_net_revenue_avg_kt             -3.147584
##                                                           long-tail
## Air_France_Brand_French_Destinations_net_revenue_avg_kt  -0.9958333
## Air_France_Branded_net_revenue_avg_kt                           NaN
## Air_France_Global_Campaign_net_revenue_avg_kt                   NaN
## Business_Class_net_revenue_avg_kt                               NaN
## French_Destinations_net_revenue_avg_kt                   -2.5250000
## General_Terms_net_revenue_avg_kt                                NaN
## Geo_Targeted_Boston_net_revenue_avg_kt                          NaN
## Geo_Targeted_Chicago_net_revenue_avg_kt                         NaN
## Geo_Targeted_DC_net_revenue_avg_kt                              NaN
## Geo_Targeted_Detroit_net_revenue_avg_kt                         NaN
## Geo_Targeted_Houston_net_revenue_avg_kt                         NaN
## Geo_Targeted_Los_Angeles_net_revenue_avg_kt                     NaN
## Geo_Targeted_Miami_net_revenue_avg_kt                           NaN
## Geo_Targeted_New_York_net_revenue_avg_kt                        NaN
## Geo_Targeted_Philadelphia_net_revenue_avg_kt                    NaN
## Geo_Targeted_San_Francisco_net_revenue_avg_kt                   NaN
## Geo_Targeted_Seattle_net_revenue_avg_kt                         NaN
## Google_Yearlong_net_revenue_avg_kt                              NaN
## Outside_Western_Europe_net_revenue_avg_kt                       NaN
## Paris_France_Terms_net_revenue_avg_kt                   -10.2145833
## Unassigned_net_revenue_avg_kt                            32.8737806
## Western_Europe_Destinations_net_revenue_avg_kt           -1.7000000
BASED ON CONVERSION RATE
#Create empty vectors per Campaign
Air_France_Brand_French_Destinations_conversion_rate_avg_kt <- c()                   
Air_France_Branded_conversion_rate_avg_kt <- c()             
Air_France_Global_Campaign_conversion_rate_avg_kt <- c() 
Business_Class_conversion_rate_avg_kt <- c()                    
French_Destinations_conversion_rate_avg_kt <- c()                          
General_Terms_conversion_rate_avg_kt <- c() 
Geo_Targeted_Boston_conversion_rate_avg_kt <- c()                   
Geo_Targeted_Chicago_conversion_rate_avg_kt <- c()                        
Geo_Targeted_DC_conversion_rate_avg_kt <- c() 
Geo_Targeted_Detroit_conversion_rate_avg_kt <- c()                   
Geo_Targeted_Houston_conversion_rate_avg_kt <- c()               
Geo_Targeted_Los_Angeles_conversion_rate_avg_kt <- c() 
Geo_Targeted_Miami_conversion_rate_avg_kt <- c()                  
Geo_Targeted_New_York_conversion_rate_avg_kt <- c()              
Geo_Targeted_Philadelphia_conversion_rate_avg_kt <- c() 
Geo_Targeted_San_Francisco_conversion_rate_avg_kt <- c()                   
Geo_Targeted_Seattle_conversion_rate_avg_kt <- c()                   
Google_Yearlong_conversion_rate_avg_kt <- c()
Outside_Western_Europe_conversion_rate_avg_kt <- c()                   
Paris_France_Terms_conversion_rate_avg_kt <- c()                             
Unassigned_conversion_rate_avg_kt <- c() 
Western_Europe_Destinations_conversion_rate_avg_kt <- c() 
i <- 1

#While loop to get average Conversion Rate per Campaign, per keyword type
while(i <= length(keyword_types)){
  Air_France_Brand_French_Destinations_conversion_rate_avg_kt <- 
    c(Air_France_Brand_French_Destinations_conversion_rate_avg_kt, mean(clean_airfrance$conversion_rate[
      which(clean_airfrance$Campaign == "Air France Brand & French Destinations" 
            & clean_airfrance$`Keyword Type` == keyword_types[i])]))              
  Air_France_Branded_conversion_rate_avg_kt <- 
    c( Air_France_Branded_conversion_rate_avg_kt, mean(clean_airfrance$conversion_rate[
      which(clean_airfrance$Campaign == "Air France Branded" 
            & clean_airfrance$`Keyword Type` == keyword_types[i])]))
  Air_France_Global_Campaign_conversion_rate_avg_kt <- 
    c( Air_France_Global_Campaign_conversion_rate_avg_kt, mean(clean_airfrance$conversion_rate[
      which(clean_airfrance$Campaign == "Air France Global Campaign" 
            & clean_airfrance$`Keyword Type` == keyword_types[i])]))
  Business_Class_conversion_rate_avg_kt <- 
    c( Business_Class_conversion_rate_avg_kt, mean(clean_airfrance$conversion_rate[
      which(clean_airfrance$Campaign == "Business Class" 
            & clean_airfrance$`Keyword Type` == keyword_types[i])]))
  French_Destinations_conversion_rate_avg_kt <- 
    c( French_Destinations_conversion_rate_avg_kt, mean(clean_airfrance$conversion_rate[
      which(clean_airfrance$Campaign == "French Destinations" 
            & clean_airfrance$`Keyword Type` == keyword_types[i])]))
  General_Terms_conversion_rate_avg_kt <- 
    c( General_Terms_conversion_rate_avg_kt, mean(clean_airfrance$conversion_rate[
      which(clean_airfrance$Campaign == "General Terms" 
            & clean_airfrance$`Keyword Type` == keyword_types[i])]))
  Geo_Targeted_Boston_conversion_rate_avg_kt <- 
    c( Geo_Targeted_Boston_conversion_rate_avg_kt, mean(clean_airfrance$conversion_rate[
      which(clean_airfrance$Campaign == "Geo Targeted Boston" 
            & clean_airfrance$`Keyword Type` == keyword_types[i])]))
  Geo_Targeted_Chicago_conversion_rate_avg_kt <- 
    c( Geo_Targeted_Chicago_conversion_rate_avg_kt, mean(clean_airfrance$conversion_rate[
      which(clean_airfrance$Campaign == "Geo Targeted Chicago" 
            & clean_airfrance$`Keyword Type` == keyword_types[i])]))
  Geo_Targeted_DC_conversion_rate_avg_kt <- 
    c( Geo_Targeted_DC_conversion_rate_avg_kt, mean(clean_airfrance$conversion_rate[
      which(clean_airfrance$Campaign == "Geo Targeted DC" 
            & clean_airfrance$`Keyword Type` == keyword_types[i])]))
  Geo_Targeted_Detroit_conversion_rate_avg_kt <- 
    c( Geo_Targeted_Detroit_conversion_rate_avg_kt, mean(clean_airfrance$conversion_rate[
      which(clean_airfrance$Campaign == "Geo Targeted Detroit" 
            & clean_airfrance$`Keyword Type` == keyword_types[i])]))
  Geo_Targeted_Houston_conversion_rate_avg_kt <- 
    c( Geo_Targeted_Houston_conversion_rate_avg_kt, mean(clean_airfrance$conversion_rate[
      which(clean_airfrance$Campaign == "Geo Targeted Houston" 
            & clean_airfrance$`Keyword Type` == keyword_types[i])]))
  Geo_Targeted_Los_Angeles_conversion_rate_avg_kt <- 
    c( Geo_Targeted_Los_Angeles_conversion_rate_avg_kt, mean(clean_airfrance$conversion_rate[
      which(clean_airfrance$Campaign == "Geo Targeted Los Angeles" 
            & clean_airfrance$`Keyword Type` == keyword_types[i])]))
  Geo_Targeted_Miami_conversion_rate_avg_kt <- 
    c( Geo_Targeted_Miami_conversion_rate_avg_kt, mean(clean_airfrance$conversion_rate[
      which(clean_airfrance$Campaign == "Geo Targeted Miami" 
            & clean_airfrance$`Keyword Type` == keyword_types[i])]))
  Geo_Targeted_New_York_conversion_rate_avg_kt <- 
    c( Geo_Targeted_New_York_conversion_rate_avg_kt, mean(clean_airfrance$conversion_rate[
      which(clean_airfrance$Campaign == "Geo Targeted New York" 
            & clean_airfrance$`Keyword Type` == keyword_types[i])]))
  Geo_Targeted_Philadelphia_conversion_rate_avg_kt <- 
    c( Geo_Targeted_Philadelphia_conversion_rate_avg_kt, mean(clean_airfrance$conversion_rate[
      which(clean_airfrance$Campaign == "Geo Targeted Philadelphia" 
            & clean_airfrance$`Keyword Type` == keyword_types[i])]))
  Geo_Targeted_San_Francisco_conversion_rate_avg_kt <- 
    c( Geo_Targeted_San_Francisco_conversion_rate_avg_kt, mean(clean_airfrance$conversion_rate[
      which(clean_airfrance$Campaign == "Geo Targeted San Francisco" 
            & clean_airfrance$`Keyword Type` == keyword_types[i])]))
  Geo_Targeted_Seattle_conversion_rate_avg_kt <- 
    c( Geo_Targeted_Seattle_conversion_rate_avg_kt, mean(clean_airfrance$conversion_rate[
      which(clean_airfrance$Campaign == "Geo Targeted Seattle" 
            & clean_airfrance$`Keyword Type` == keyword_types[i])]))
  Google_Yearlong_conversion_rate_avg_kt <- 
    c( Google_Yearlong_conversion_rate_avg_kt, mean(clean_airfrance$conversion_rate[
      which(clean_airfrance$Campaign == "Google_Yearlong" 
            & clean_airfrance$`Keyword Type` == keyword_types[i])]))
  Outside_Western_Europe_conversion_rate_avg_kt <- 
    c( Outside_Western_Europe_conversion_rate_avg_kt, mean(clean_airfrance$conversion_rate[
      which(clean_airfrance$Campaign == "Outside Western Europe" 
            & clean_airfrance$`Keyword Type` == keyword_types[i])]))
  Paris_France_Terms_conversion_rate_avg_kt <- 
    c( Paris_France_Terms_conversion_rate_avg_kt, mean(clean_airfrance$conversion_rate[
      which(clean_airfrance$Campaign == "Paris & France Terms" 
            & clean_airfrance$`Keyword Type` == keyword_types[i])]))
  Unassigned_conversion_rate_avg_kt <- 
    c( Unassigned_conversion_rate_avg_kt, mean(clean_airfrance$conversion_rate[
      which(clean_airfrance$Campaign == "Unassigned" 
            & clean_airfrance$`Keyword Type` == keyword_types[i])]))
  Western_Europe_Destinations_conversion_rate_avg_kt <- 
    c( Western_Europe_Destinations_conversion_rate_avg_kt, mean(clean_airfrance$conversion_rate[
      which(clean_airfrance$Campaign == "Western Europe Destinations" 
            & clean_airfrance$`Keyword Type` == keyword_types[i])]))
  i <- i + 1
} #--> closing Conversion Rate while loop

# Combine average Conversion Rate per campaign in a matrix
campaign_conversion_rate_per_keyword_type <- rbind(Air_France_Brand_French_Destinations_conversion_rate_avg_kt, 
                                               Air_France_Branded_conversion_rate_avg_kt, 
                                               Air_France_Global_Campaign_conversion_rate_avg_kt, 
                                               Business_Class_conversion_rate_avg_kt, 
                                               French_Destinations_conversion_rate_avg_kt, 
                                               General_Terms_conversion_rate_avg_kt, 
                                               Geo_Targeted_Boston_conversion_rate_avg_kt, 
                                               Geo_Targeted_Chicago_conversion_rate_avg_kt, 
                                               Geo_Targeted_DC_conversion_rate_avg_kt, 
                                               Geo_Targeted_Detroit_conversion_rate_avg_kt, 
                                               Geo_Targeted_Houston_conversion_rate_avg_kt, 
                                               Geo_Targeted_Los_Angeles_conversion_rate_avg_kt, 
                                               Geo_Targeted_Miami_conversion_rate_avg_kt, 
                                               Geo_Targeted_New_York_conversion_rate_avg_kt, 
                                               Geo_Targeted_Philadelphia_conversion_rate_avg_kt, 
                                               Geo_Targeted_San_Francisco_conversion_rate_avg_kt, 
                                               Geo_Targeted_Seattle_conversion_rate_avg_kt, 
                                               Google_Yearlong_conversion_rate_avg_kt, 
                                               Outside_Western_Europe_conversion_rate_avg_kt, 
                                               Paris_France_Terms_conversion_rate_avg_kt, 
                                               Unassigned_conversion_rate_avg_kt, 
                                               Western_Europe_Destinations_conversion_rate_avg_kt)

colnames(campaign_conversion_rate_per_keyword_type) <- keyword_types  #--> assign names to the rows
print(campaign_conversion_rate_per_keyword_type)
##                                                              short-tail
## Air_France_Brand_French_Destinations_conversion_rate_avg_kt 0.196799564
## Air_France_Branded_conversion_rate_avg_kt                   5.548345712
## Air_France_Global_Campaign_conversion_rate_avg_kt           0.878938573
## Business_Class_conversion_rate_avg_kt                       0.009988488
## French_Destinations_conversion_rate_avg_kt                  0.098012360
## General_Terms_conversion_rate_avg_kt                        0.164887260
## Geo_Targeted_Boston_conversion_rate_avg_kt                  0.023049972
## Geo_Targeted_Chicago_conversion_rate_avg_kt                 0.497769114
## Geo_Targeted_DC_conversion_rate_avg_kt                      0.227066304
## Geo_Targeted_Detroit_conversion_rate_avg_kt                 0.280583614
## Geo_Targeted_Houston_conversion_rate_avg_kt                 0.016142050
## Geo_Targeted_Los_Angeles_conversion_rate_avg_kt             0.030506406
## Geo_Targeted_Miami_conversion_rate_avg_kt                   0.000000000
## Geo_Targeted_New_York_conversion_rate_avg_kt                0.101167707
## Geo_Targeted_Philadelphia_conversion_rate_avg_kt            0.454674623
## Geo_Targeted_San_Francisco_conversion_rate_avg_kt           0.122245960
## Geo_Targeted_Seattle_conversion_rate_avg_kt                 0.595211751
## Google_Yearlong_conversion_rate_avg_kt                              NaN
## Outside_Western_Europe_conversion_rate_avg_kt               0.000000000
## Paris_France_Terms_conversion_rate_avg_kt                   0.109442243
## Unassigned_conversion_rate_avg_kt                           0.559661492
## Western_Europe_Destinations_conversion_rate_avg_kt          0.013458307
##                                                               mid-tail
## Air_France_Brand_French_Destinations_conversion_rate_avg_kt 5.44755058
## Air_France_Branded_conversion_rate_avg_kt                   2.53120867
## Air_France_Global_Campaign_conversion_rate_avg_kt           0.24803231
## Business_Class_conversion_rate_avg_kt                       0.00000000
## French_Destinations_conversion_rate_avg_kt                  0.11109351
## General_Terms_conversion_rate_avg_kt                               NaN
## Geo_Targeted_Boston_conversion_rate_avg_kt                  0.42269250
## Geo_Targeted_Chicago_conversion_rate_avg_kt                 0.00000000
## Geo_Targeted_DC_conversion_rate_avg_kt                      0.29426911
## Geo_Targeted_Detroit_conversion_rate_avg_kt                 0.00000000
## Geo_Targeted_Houston_conversion_rate_avg_kt                 0.40644492
## Geo_Targeted_Los_Angeles_conversion_rate_avg_kt             0.16425081
## Geo_Targeted_Miami_conversion_rate_avg_kt                   2.58564964
## Geo_Targeted_New_York_conversion_rate_avg_kt                0.79683764
## Geo_Targeted_Philadelphia_conversion_rate_avg_kt            0.00000000
## Geo_Targeted_San_Francisco_conversion_rate_avg_kt           0.49191416
## Geo_Targeted_Seattle_conversion_rate_avg_kt                 0.00000000
## Google_Yearlong_conversion_rate_avg_kt                             NaN
## Outside_Western_Europe_conversion_rate_avg_kt               0.00000000
## Paris_France_Terms_conversion_rate_avg_kt                   0.08491828
## Unassigned_conversion_rate_avg_kt                           0.35369788
## Western_Europe_Destinations_conversion_rate_avg_kt          0.03499545
##                                                             long-tail
## Air_France_Brand_French_Destinations_conversion_rate_avg_kt 0.0000000
## Air_France_Branded_conversion_rate_avg_kt                         NaN
## Air_France_Global_Campaign_conversion_rate_avg_kt                 NaN
## Business_Class_conversion_rate_avg_kt                             NaN
## French_Destinations_conversion_rate_avg_kt                  0.0000000
## General_Terms_conversion_rate_avg_kt                              NaN
## Geo_Targeted_Boston_conversion_rate_avg_kt                        NaN
## Geo_Targeted_Chicago_conversion_rate_avg_kt                       NaN
## Geo_Targeted_DC_conversion_rate_avg_kt                            NaN
## Geo_Targeted_Detroit_conversion_rate_avg_kt                       NaN
## Geo_Targeted_Houston_conversion_rate_avg_kt                       NaN
## Geo_Targeted_Los_Angeles_conversion_rate_avg_kt                   NaN
## Geo_Targeted_Miami_conversion_rate_avg_kt                         NaN
## Geo_Targeted_New_York_conversion_rate_avg_kt                      NaN
## Geo_Targeted_Philadelphia_conversion_rate_avg_kt                  NaN
## Geo_Targeted_San_Francisco_conversion_rate_avg_kt                 NaN
## Geo_Targeted_Seattle_conversion_rate_avg_kt                       NaN
## Google_Yearlong_conversion_rate_avg_kt                            NaN
## Outside_Western_Europe_conversion_rate_avg_kt                     NaN
## Paris_France_Terms_conversion_rate_avg_kt                   0.0000000
## Unassigned_conversion_rate_avg_kt                           0.1655156
## Western_Europe_Destinations_conversion_rate_avg_kt          0.0000000

Keyword Type Analysis per Publisher

BASED ON CONVERSION RATE
# Create empty vectors per Publisher
google_GL_conversion_rate_avg_kt <- c()
google_US_conversion_rate_avg_kt <- c()
msn_GL_conversion_rate_avg_kt <- c()
overture_GL_conversion_rate_avg_kt <- c()
overture_US_conversion_rate_avg_kt <- c()
yahoo_US_conversion_rate_avg_kt <- c()
i <- 1 #--> setting i to start at 1

#While loop to get average conversion rate per Publisher, per match type
while(i <= length(keyword_types)){
  google_GL_conversion_rate_avg_kt <- c(google_GL_conversion_rate_avg_kt, mean(clean_airfrance$conversion_rate[
    which(clean_airfrance$`Publisher Name` == "Google - Global" & 
            clean_airfrance$`Keyword Type` == keyword_types[i])]))
  
  google_US_conversion_rate_avg_kt <- c(google_US_conversion_rate_avg_kt, mean(clean_airfrance$conversion_rate[
    which(clean_airfrance$`Publisher Name` == "Google - US" & 
            clean_airfrance$`Keyword Type` == keyword_types[i])]))
  
  msn_GL_conversion_rate_avg_kt <- c(msn_GL_conversion_rate_avg_kt, mean(clean_airfrance$conversion_rate[
    which(clean_airfrance$`Publisher Name` == "MSN - Global" & 
            clean_airfrance$`Keyword Type` == keyword_types[i])]))
  
  overture_GL_conversion_rate_avg_kt <- c(overture_GL_conversion_rate_avg_kt, mean(clean_airfrance$conversion_rate[
    which(clean_airfrance$`Publisher Name` == "Overture - Global" & 
            clean_airfrance$`Keyword Type` == keyword_types[i])]))
  
  overture_US_conversion_rate_avg_kt <- c(overture_US_conversion_rate_avg_kt, mean(clean_airfrance$conversion_rate[
    which(clean_airfrance$`Publisher Name` == "Overture - US" & 
            clean_airfrance$`Keyword Type` == keyword_types[i])]))
  
  yahoo_US_conversion_rate_avg_kt <- c(yahoo_US_conversion_rate_avg_kt, mean(clean_airfrance$conversion_rate[
    which(clean_airfrance$`Publisher Name` == "Yahoo - US" & 
            clean_airfrance$`Keyword Type` == keyword_types[i])]))
  
  i <- i + 1
  
} #--> closing conversion_rate while loop

# Combine average conversion_rate per publisher in a matrix
conversion_rate_per_publisher_per_keyword_type <- rbind(google_GL_conversion_rate_avg_kt, google_US_conversion_rate_avg_kt, msn_GL_conversion_rate_avg_kt, 
                                            overture_GL_conversion_rate_avg_kt, overture_US_conversion_rate_avg_kt, 
                                            yahoo_US_conversion_rate_avg_kt)

colnames(conversion_rate_per_publisher_per_keyword_type) <- keyword_types #--> assign names to the rows

print(conversion_rate_per_publisher_per_keyword_type)
##                                    short-tail   mid-tail  long-tail
## google_GL_conversion_rate_avg_kt    0.8789386 0.24803231        NaN
## google_US_conversion_rate_avg_kt    0.1453395 0.28982862 0.00000000
## msn_GL_conversion_rate_avg_kt       0.1967996 5.44755058 0.00000000
## overture_GL_conversion_rate_avg_kt  0.8990469 0.99786061 0.47889854
## overture_US_conversion_rate_avg_kt  0.4436022 0.06405093 0.02001641
## yahoo_US_conversion_rate_avg_kt     0.1648873        NaN        NaN

Updating DataSets for Keyword Analysis

# Publishers <> Match Type
ROA_per_publisher_per_match_type <- as.data.frame(ROA_per_publisher_per_match_type)
net_revenue_per_publisher_per_match_type <- as.data.frame(net_revenue_per_publisher_per_match_type)
conversion_rate_per_publisher_per_match_type <- as.data.frame(conversion_rate_per_publisher_per_match_type)

ROA_per_publisher_per_match_type$Publishers <- publishers
net_revenue_per_publisher_per_match_type$Publishers <- publishers
conversion_rate_per_publisher_per_match_type$Publishers <- publishers


# Publishers <> Keyword Type
ROA_per_publisher_per_keyword_type <- as.data.frame(ROA_per_publisher_per_keyword_type)
## Error in as.data.frame(ROA_per_publisher_per_keyword_type): object 'ROA_per_publisher_per_keyword_type' not found
net_revenue_per_publisher_per_keyword_type <- as.data.frame(net_revenue_per_publisher_per_keyword_type)
## Error in as.data.frame(net_revenue_per_publisher_per_keyword_type): object 'net_revenue_per_publisher_per_keyword_type' not found
conversion_rate_per_publisher_per_keyword_type <- as.data.frame(conversion_rate_per_publisher_per_keyword_type)

ROA_per_publisher_per_keyword_type$Publishers <- publishers
## Error in ROA_per_publisher_per_keyword_type$Publishers <- publishers: object 'ROA_per_publisher_per_keyword_type' not found
net_revenue_per_publisher_per_keyword_type$Publishers <- publishers
## Error in net_revenue_per_publisher_per_keyword_type$Publishers <- publishers: object 'net_revenue_per_publisher_per_keyword_type' not found
conversion_rate_per_publisher_per_keyword_type$Publishers <- publishers


# Campaigns <> Match Type
campaign_ROA_per_match_type <- as.data.frame(campaign_ROA_per_match_type)
campaign_net_revenue_per_match_type <- as.data.frame(campaign_net_revenue_per_match_type)
campaign_conversion_rate_per_match_type <- as.data.frame(campaign_conversion_rate_per_match_type)

campaign_ROA_per_match_type$Campaigns <- campaigns
campaign_net_revenue_per_match_type$Campaigns <- campaigns
campaign_conversion_rate_per_match_type$Campaigns <- campaigns


# Campaigns <> Keyword Type
campaign_ROA_per_keyword_type <- as.data.frame(campaign_ROA_per_keyword_type)
campaign_net_revenue_per_keyword_type <- as.data.frame(campaign_net_revenue_per_keyword_type)
campaign_conversion_rate_per_keyword_type <- as.data.frame(campaign_conversion_rate_per_keyword_type)

campaign_ROA_per_keyword_type$Campaigns <- campaigns
campaign_net_revenue_per_keyword_type$Campaigns <- campaigns
campaign_conversion_rate_per_keyword_type$Campaigns <- campaigns

Graphs for Keyword and campaign analysis

# PUBLISHERS

## ROA per match type
ROA_per_publisher_per_match_type %>%
  gather("Type", "Value",-Publishers) %>%
  ggplot(aes(Publishers, Value, fill = Type)) +
  geom_bar(position = "dodge", stat = "identity") +
  theme_bw()+
  ggtitle("ROA per Publisher based on Match Type")
## Warning: Removed 14 rows containing missing values (geom_bar).

### Remove Outlier
ROA_per_publisher_per_match_type_outlier_free <- ROA_per_publisher_per_match_type
ROA_per_publisher_per_match_type_outlier_free[1, 3] <- 0
ROA_per_publisher_per_match_type_outlier_free %>%
  gather("Type", "Value",-Publishers) %>%
  ggplot(aes(Publishers, Value, fill = Type)) +
  geom_bar(position = "dodge", stat = "identity") +
  theme_bw()+
  ggtitle("ROA per Publisher based on Match Type w/o Google Global Exact")
## Warning: Removed 14 rows containing missing values (geom_bar).

## Net Revenue per match type
net_revenue_per_publisher_per_match_type %>%
  gather("Type", "Value",-Publishers) %>%
  ggplot(aes(Publishers, Value, fill = Type)) +
  geom_bar(position = "dodge", stat = "identity") +
  theme_bw()+
  ggtitle("Net revenue per Publisher based on Match Type")
## Warning: Removed 14 rows containing missing values (geom_bar).

### Remove Outlier
net_revenue_per_publisher_per_match_type_outlier_free <- net_revenue_per_publisher_per_match_type
net_revenue_per_publisher_per_match_type_outlier_free[1, 3] <- 0
net_revenue_per_publisher_per_match_type_outlier_free %>%
  gather("Type", "Value",-Publishers) %>%
  ggplot(aes(Publishers, Value, fill = Type)) +
  geom_bar(position = "dodge", stat = "identity") +
  theme_bw()+
  ggtitle("Net Revenue per Publisher based on Match Type w/o Google Global Exact")
## Warning: Removed 14 rows containing missing values (geom_bar).

## Conversion rate per match type
conversion_rate_per_publisher_per_match_type %>%
  gather("Type", "Value",-Publishers) %>%
  ggplot(aes(Publishers, Value, fill = Type)) +
  geom_bar(position = "dodge", stat = "identity") +
  theme_bw()+
  ggtitle("Conversion rate per Publisher based on Match Type")
## Warning: Removed 14 rows containing missing values (geom_bar).

### Remove Outlier
conversion_rate_per_publisher_per_match_type_outlier_free <- conversion_rate_per_publisher_per_match_type
conversion_rate_per_publisher_per_match_type_outlier_free[1, 3] <- 0
conversion_rate_per_publisher_per_match_type_outlier_free %>%
  gather("Type", "Value",-Publishers) %>%
  ggplot(aes(Publishers, Value, fill = Type)) +
  geom_bar(position = "dodge", stat = "identity") +
  theme_bw()+
  ggtitle("Conversion rate per Publisher based on Match Type w/o Google Global Exact")
## Warning: Removed 14 rows containing missing values (geom_bar).

## ROA per keyword type
ROA_per_publisher_per_keyword_type %>%
  gather("Type", "Value",-Publishers) %>%
  ggplot(aes(Publishers, Value, fill = Type)) +
  geom_bar(position = "dodge", stat = "identity") +
  theme_bw()+
  ggtitle("ROA per Publisher based on keyword Type")
## Error in gather(., "Type", "Value", -Publishers): object 'ROA_per_publisher_per_keyword_type' not found
### Net Revenue per keyword type
net_revenue_per_publisher_per_keyword_type %>%
  gather("Type", "Value",-Publishers) %>%
  ggplot(aes(Publishers, Value, fill = Type)) +
  geom_bar(position = "dodge", stat = "identity") +
  theme_bw()+
  ggtitle("Net revenue per Publisher based on Match Type")
## Error in gather(., "Type", "Value", -Publishers): object 'net_revenue_per_publisher_per_keyword_type' not found
## Conversion rate per keyword type
conversion_rate_per_publisher_per_keyword_type %>%
  gather("Type", "Value",-Publishers) %>%
  ggplot(aes(Publishers, Value, fill = Type)) +
  geom_bar(position = "dodge", stat = "identity") +
  theme_bw()+
  ggtitle("Conversion rate per Publisher based on keyword Type")
## Warning: Removed 3 rows containing missing values (geom_bar).

# CAMPAIGNS

## ROA per match type
campaign_ROA_per_match_type %>%
  gather("Type", "Value",-Campaigns) %>%
  ggplot(aes(Campaigns, Value, fill = Type)) +
  geom_bar(position = "dodge", stat = "identity") +
  theme_bw()+
  theme(axis.text.x = element_text(angle = 90))+
  ggtitle("ROA per Campaign based on Match Type")
## Warning: Removed 64 rows containing missing values (geom_bar).

#### Remove Outlier
campaign_ROA_per_match_type_outlier_free <- campaign_ROA_per_match_type
campaign_ROA_per_match_type_outlier_free[3, 3] <- 0
campaign_ROA_per_match_type_outlier_free %>%
  gather("Type", "Value",-Campaigns) %>%
  ggplot(aes(Campaigns, Value, fill = Type)) +
  geom_bar(position = "dodge", stat = "identity") +
  theme_bw()+
  theme(axis.text.x = element_text(angle = 90))+
  ggtitle("ROA per Campaign based on Match Type w/o Air France Global exact match")
## Warning: Removed 64 rows containing missing values (geom_bar).

## Net Revenue per match type
campaign_net_revenue_per_match_type %>%
  gather("Type", "Value",-Campaigns) %>%
  ggplot(aes(Campaigns, Value, fill = Type)) +
  geom_bar(position = "dodge", stat = "identity") +
  theme_bw()+
  theme(axis.text.x = element_text(angle = 90))+
  ggtitle("Net Revenue per Campaign based on Match Type")
## Warning: Removed 64 rows containing missing values (geom_bar).

### Remove Outlier
campaign_net_revenue_per_match_type_outlier_free <- campaign_net_revenue_per_match_type
campaign_net_revenue_per_match_type_outlier_free[2:3, c(1,3)] <- 0
campaign_net_revenue_per_match_type_outlier_free %>%
  gather("Type", "Value",-Campaigns) %>%
  ggplot(aes(Campaigns, Value, fill = Type)) +
  geom_bar(position = "dodge", stat = "identity") +
  theme_bw()+
  theme(axis.text.x = element_text(angle = 90))+
  ggtitle("Net Revenue per Campaign based on Match Type w/o  
          Air France Global exact match & Air France branded broad match")
## Warning: Removed 63 rows containing missing values (geom_bar).

## Conversion rate per match type
campaign_conversion_rate_per_match_type %>%
  gather("Type", "Value",-Campaigns) %>%
  ggplot(aes(Campaigns, Value, fill = Type)) +
  geom_bar(position = "dodge", stat = "identity") +
  theme_bw()+
  theme(axis.text.x = element_text(angle = 90))+
  ggtitle("Conversion rate per Campaign based on Match Type")
## Warning: Removed 64 rows containing missing values (geom_bar).

### Remove Outlier
campaign_conversion_rate_per_match_type_outlier_free <- campaign_conversion_rate_per_match_type
campaign_conversion_rate_per_match_type_outlier_free[3, 3] <- 0
campaign_conversion_rate_per_match_type_outlier_free %>%
  gather("Type", "Value",-Campaigns) %>%
  ggplot(aes(Campaigns, Value, fill = Type)) +
  geom_bar(position = "dodge", stat = "identity") +
  theme_bw()+
  theme(axis.text.x = element_text(angle = 90))+
  ggtitle("Conversion rate per Campaign based on Match Type w/o Air France Global exact match")
## Warning: Removed 64 rows containing missing values (geom_bar).

## ROA per keyword type
campaign_ROA_per_keyword_type %>%
  gather("Type", "Value",-Campaigns) %>%
  ggplot(aes(Campaigns, Value, fill = Type)) +
  geom_bar(position = "dodge", stat = "identity") +
  theme_bw()+
  theme(axis.text.x = element_text(angle = 90))+
  ggtitle("ROA per Campaign based on keyword Type")
## Warning: Removed 20 rows containing missing values (geom_bar).

## Conversion rate per keyword type
campaign_conversion_rate_per_keyword_type %>%
  gather("Type", "Value",-Campaigns) %>%
  ggplot(aes(Campaigns, Value, fill = Type)) +
  geom_bar(position = "dodge", stat = "identity") +
  theme_bw()+
  theme(axis.text.x = element_text(angle = 90))+
  ggtitle("Conversion rate per Campaign based on keyword Type")
## Warning: Removed 20 rows containing missing values (geom_bar).

## Net Revenue per keyword type
campaign_net_revenue_per_keyword_type %>%
  gather("Type", "Value",-Campaigns) %>%
  ggplot(aes(Campaigns, Value, fill = Type)) +
  geom_bar(position = "dodge", stat = "identity") +
  theme_bw()+
  theme(axis.text.x = element_text(angle = 90))+
  ggtitle("Net Revenue per Campaign based on keyword Type")
## Warning: Removed 20 rows containing missing values (geom_bar).

Kayak analyzis

Analyzing if they should invest in Kayak as well Q4

dfchannel_clean <- dfchannelNewUS[ 1:3, ]
dfchannel_clean2 <- dfchannelNewUS[ 6:7, ]
dfchannel_cleanfinal <- rbind(dfchannel_clean, dfchannel_clean2)
View(dfchannel_cleanfinal)
# changing from character to numeric and rounding to 2 decimals 
dfchannel_cleanfinal$avg_cpc <- round(as.numeric(dfchannel_cleanfinal$avg_cpc), digits = 2)

dfchannel_cleanfinal$avg_ROA <- round(as.numeric(dfchannel_cleanfinal$avg_ROA),digits =2)

dfchannel_cleanfinal$avg_conversion_rate <- round(as.numeric(dfchannel_cleanfinal$avg_conversion_rate), digits = 2)

dfchannel_cleanfinal$avg_Revenue_booking <- round(as.numeric(dfchannel_cleanfinal$avg_Revenue_booking), digits = 2)

View(dfchannel_cleanfinal)

Creating plots for two most important variables

# scatter plot for channel ROA 
pChannelROA <- plot_ly(dfchannel_cleanfinal, x= ~ `Publisher Name`,  y= ~avg_ROA, 
                       mode = "markers", type = "scatter", name = "Comparison Channels", 
                       color = I("red"), alpha = 0.8)%>%
  layout(title = "Average Return on Advertisement per Channel",
         xaxis = list(title = "Channel"),
         yaxis = list(title = "Avg ROA"))
pChannelROA
# scatter plot for channel revenue booking
pChannelrevb <- plot_ly(dfchannel_cleanfinal, x= ~ `Publisher Name`,  
                        y= ~avg_Revenue_booking, mode = "markers", type = "scatter", 
                        name = "Comparison Channels", color = I("blue"), alpha = 0.8)%>%
  layout(title = "Booking Revenue per Channel",
         xaxis = list(title = "Channel"),
         yaxis = list(title = "Avg Rev Booking"))
print(pChannelrevb)