Synopsis

This analysis was prepared to measure the performance of the paid ads over time for an ecommerce company.

Data

The data set describes performance of paid ads we show on search engines over time. (This is a limited and sanitized data set, which is similar to the actual data we work with.) The data is available here (https://drive.google.com/open?id=0ByO76VsqFyByVERGaElYMF9nVkk&authuser=0) as a CSV file.

Code Book

The schema of the data is as follows:

Adgroup name: It includes: the destination city, the landing page type (fX), the landing page, the semantic group of the keywords and the performance bucket.

Keywords: The keyword we bid on

Keyword match type: Broad, exact or phrase (as defined by Google)

Account: Account of the adgroup

Impressions: Number of impressions that our ads had

Position:Position of the ad (1 means to be at the top of Google results)

Cost: The cost of buying these clicks

Conversions: Number of users that bought in our website

Cost: Cost of all the clicks

Value conversion: Value of each conversion

Clicks: Number of clicks

Start_date: The date that our ads were shown to the user

Data Processing

# Reading the data from the CSV file  {Note: for the reproducibilty sake, i suppose to use download file code but I didnt use it because the data was large }

raw_data <- read.csv("data_for_DS_test.csv")
##head(raw_data)

# Splitting the adgroup name field to different variables for further analysis and use them as dimensions.
library(splitstackshape)
## Loading required package: data.table
splitteddata<-cbind(raw_data$name,(concat.split.multiple(raw_data, "name", "|")))
## This function is deprecated. Use `cSplit` instead.
#renaming the fields
library(plyr)
invisible(rename(splitteddata, c("V1"="name","name_1"="destination_city", "name_2"="landing_page_Type","name_3"="landing_page", "name_4"="semantic_keywords", "name_5"="performance_bucket")))

# put semantic keywords values in the field performance_bucket for the values {todo} and {roi1}
splitteddata$performance_bucket[splitteddata$semantic_keywords=="{todo}"] <- "{todo}"
splitteddata$performance_bucket[splitteddata$semantic_keywords=="{roi1}"] <- "{roi1}"
splitteddata$performance_bucket[splitteddata$semantic_keywords=="{pos1}"] <- "{pos1}"

# put Na values in semantic keywords values
splitteddata$semantic_keywords[splitteddata$semantic_keywords=="{todo}"] <- NA
splitteddata$semantic_keywords[splitteddata$semantic_keywords=="{roi1}"] <- NA
splitteddata$semantic_keywords[splitteddata$semantic_keywords=="{pos1}"] <- NA


# getting the wrong values 2108 rows
y<- splitteddata[!is.na(splitteddata$name_6),]

head(splitteddata,n=3)
##                                                    name
## 1: new york:59|f5|statue of liberty:2612|generic|{todo}
## 2:      new york:59|f5|central park:2614|generic|{todo}
## 3: new york:59|f5|chrysler building:3397|generic|{todo}
##             keyword_text keyword_match_type    account impressions
## 1: statue de la liberté              Exact FRA_France         549
## 2:          central park             Phrase FRA_France          29
## 3:     chrysler building             Phrase FRA_France           4
##    position conversions cost value_conversion clicks start_date
## 1:      3.6           0    0                0      0 2014-08-13
## 2:      4.5           0    0                0      0 2014-08-13
## 3:      1.0           0    0                0      0 2014-08-13
##    destination_city landing_page_Type           landing_page
## 1:      new york:59                f5 statue of liberty:2612
## 2:      new york:59                f5      central park:2614
## 3:      new york:59                f5 chrysler building:3397
##    semantic_keywords performance_bucket name_6
## 1:           generic             {todo}     NA
## 2:           generic             {todo}     NA
## 3:           generic             {todo}     NA

Data Analysis

Descriptive Analytics

Adgroups that have conversions at least 1

## getting the total conversions by adgroup over all time
converted_adgroups<-aggregate(splitteddata$conversions, by=list(group=splitteddata$name), FUN=sum)
colnames(converted_adgroups) <- c("group","value")

##adgroups that have conversions
converted_adgroups<-converted_adgroups[converted_adgroups$value!=0,]

converted_adgroups <- converted_adgroups[order(-converted_adgroups$value),] 
head(converted_adgroups)
##                                                      group value
## 922                 dresden:20|f4|opera tickets:137|{pos1}   284
## 475  belfast:442|f6|titanic experience:35245|ticket|{roi1}   168
## 927                  dresden:20|f4|stadtrundfahrt:4|{roi1}   115
## 3259      paris:16|f5|palace of versailles tour:317|{roi1}   113
## 543           berlin:17|f5|flughafen tempelhof:3106|{pos1}    92
## 3320         paris:16|f5|palais garnier:3218|visits|{todo}    91

adgroups that have clicks but without conversions

clicked_adgroups<-aggregate(list(splitteddata$clicks,splitteddata$conversions), by=list(group=splitteddata$name), FUN=sum)
colnames(clicked_adgroups) <- c("group","clicks","conversions")
clicked_adgroups<-clicked_adgroups[clicked_adgroups$conversions==0,]
clicked_adgroups <- clicked_adgroups[order(-clicked_adgroups$clicks),] 
head(clicked_adgroups)
##                                                        group clicks
## 1564 kuala lumpur:171|f5|petronas towers:4009|tickets|{roi1}   3062
## 4576            vienna:7|f5|prater:2970|opening hours|{roi1}   1106
## 4546       vienna:7|f5|hundertwasserhaus:3945|generic|{roi1}    861
## 4477                        vienna:7|f3|things to see|{roi1}    772
## 898           cusco:359|f5|macchu picchu:1570|generic|{todo}    759
## 1741      london:57|f5|buckingham palace:2706|tickets|{roi1}    695
##      conversions
## 1564           0
## 4576           0
## 4546           0
## 4477           0
## 898            0
## 1741           0

adgroups that have impressions but not clicks

impressions_adgroups<-aggregate(list(splitteddata$impressions,splitteddata$clicks), by=list(group=splitteddata$name), FUN=sum)
colnames(impressions_adgroups) <- c("group","impressions","clicks")
impressions_adgroups<-impressions_adgroups[impressions_adgroups$clicks==0,]
impressions_adgroups <- impressions_adgroups[order(-impressions_adgroups$impressions),] 
head(impressions_adgroups)
##                                                    group impressions
## 2780   new york:59|f5|yankee stadium:3473|generic|{todo}        1120
## 1962   london:57|f5|st. james's park:3198|generic|{todo}        1080
## 1783    london:57|f5|grosvenor house:4346|generic|{roi1}        1018
## 2280 moscow:181|f5|lenin's mausoleum:4177|generic|{roi1}         982
## 86     amsterdam:36|f5|keizersgracht:2986|generic|{todo}         957
## 1784    london:57|f5|grosvenor house:4346|generic|{todo}         852
##      clicks
## 2780      0
## 1962      0
## 1783      0
## 2280      0
## 86        0
## 1784      0

adgroups that have no impressions at all

Zero_impressions_adgroups<-impressions_adgroups[impressions_adgroups$impressions==0,]
head(Zero_impressions_adgroups)
##                                                     group impressions
## 213  barcelona:45|f5|barcelona aquarium:4326|other|{todo}           0
## 542     berlin:17|f5|east side gallery:3090|ticket|{todo}           0
## 760      boston:260|f5|uss constitution:4164|other|{todo}           0
## 1278                  hamburg:23|f5|hafencity:3147|{todo}           0
## 1658                       llandudno:462|f4|tour:1|{todo}           0
## 1862      london:57|f5|mayfair:4373|special offers|{todo}           0
##      clicks
## 213       0
## 542       0
## 760       0
## 1278      0
## 1658      0
## 1862      0

Results and Evaluation

What are the 10 adgroups that have the highest priority for human inspection?

The adgroups that have the highest number of clicks and without conversions

head(clicked_adgroups,n=10)
##                                                        group clicks
## 1564 kuala lumpur:171|f5|petronas towers:4009|tickets|{roi1}   3062
## 4576            vienna:7|f5|prater:2970|opening hours|{roi1}   1106
## 4546       vienna:7|f5|hundertwasserhaus:3945|generic|{roi1}    861
## 4477                        vienna:7|f3|things to see|{roi1}    772
## 898           cusco:359|f5|macchu picchu:1570|generic|{todo}    759
## 1741      london:57|f5|buckingham palace:2706|tickets|{roi1}    695
## 3058           paris:16|f5|grevin museum:4406|generic|{todo}    630
## 497             berlin:17|f5|berlin wall:2702|generic|{roi1}    614
## 1675                        london:57|f5|big ben:2709|{roi1}    571
## 4578                   vienna:7|f5|prater:2970|prices|{roi1}    554
##      conversions
## 1564           0
## 4576           0
## 4546           0
## 4477           0
## 898            0
## 1741           0
## 3058           0
## 497            0
## 1675           0
## 4578           0

what are the main attributes/dimensions to look at?

clicks - Conversions - cost -impressions - keywords -destination city -time - account

What questions would you want to investigate next? how much time would you expect to spend on each question?

1- How to increase the click/conversion through rate? ** checking the landing page relevancy and applying the A/B tesitng

2- What is the current wasted spend and how to decrease it over time ?

    ** predicting the next wasted spend if we continue with same beaviour 
    
    ** A/B testing for the land pages / positions.
    

3- checking the outliers (zero impressions(keywords relevancy- destination city) /zero clicks(position/ destination city ) )

4- analysis of adgroups performance over time (time series analysis)