searchkeys <- read.csv("~/Downloads/Unsaved report (2).csv", stringsAsFactors=FALSE)
df <- read.csv("~/Downloads/AdWords Analysis from Aug 1 - Aug 14, 2016 - Sheet4.csv")
names(searchkeys)[1] <- "keyword"
## Summarize cost by country
library(dplyr)
cost_by_country <- df %>% 
  group_by(Country.Territory) %>% 
  summarise(Cost = sum(Cost))

## Summarize clicks by country
clicks_by_country <- df %>% 
  group_by(Country.Territory) %>% 
  summarise(Clicks = sum(Clicks))

## Summarize cost by day
cost_by_day <- df %>% 
  group_by(Day.of.week) %>% 
  summarise(Cost = sum(Cost))

## Summarize clicks by day
clicks_by_day <- df %>% 
  group_by(Day.of.week) %>% 
  summarise(Clicks = sum(Clicks))
write.csv(cost_by_country, file = "~/Downloads/CostByCountry.csv", row.names = F)
write.csv(clicks_by_country, file = "~/Downloads/ClicksByCountry.csv", row.names = F)

Textanalytics of keywords being used in the queries that lead to a click on the Foundational Research Institute website

#install.packages("tm")
library(tm)
## Loading required package: NLP
## 
## Attaching package: 'NLP'
## The following object is masked from 'package:ggplot2':
## 
##     annotate
#install.packages("SnowballC")
library(SnowballC)
# Create corpus
 
corpus = Corpus(VectorSource(searchkeys$keyword))

# Look at corpus
corpus
## <<VCorpus>>
## Metadata:  corpus specific: 0, document level (indexed): 0
## Content:  documents: 435
corpus[[1]]$content
## [1] "moral disagreement"
# Convert to lower-case

corpus = tm_map(corpus, tolower)

corpus = tm_map(corpus, PlainTextDocument)
# Remove punctuation

corpus = tm_map(corpus, removePunctuation)

corpus[[1]]$content
## [1] "moral disagreement"
# Look at stop words 
stopwords("english")[1:10]
##  [1] "i"         "me"        "my"        "myself"    "we"       
##  [6] "our"       "ours"      "ourselves" "you"       "your"
# Remove stopwords

corpus = tm_map(corpus, removeWords, c(stopwords("english")))

corpus[[1]]$content
## [1] "moral disagreement"
# Stem words 

corpus = tm_map(corpus, stemDocument)
corpus[[1]]$content
## [1] "moral disagr"
# Create matrix

frequencies = DocumentTermMatrix(corpus)

frequencies
## <<DocumentTermMatrix (documents: 435, terms: 220)>>
## Non-/sparse entries: 975/94725
## Sparsity           : 99%
## Maximal term length: 14
## Weighting          : term frequency (tf)
# Look at matrix 

inspect(frequencies[10:15,50:55])
## <<DocumentTermMatrix (documents: 6, terms: 6)>>
## Non-/sparse entries: 0/36
## Sparsity           : 100%
## Maximal term length: 7
## Weighting          : term frequency (tf)
## 
##               Terms
## Docs           crisi crucial cruel cruelti cultur danger
##   character(0)     0       0     0       0      0      0
##   character(0)     0       0     0       0      0      0
##   character(0)     0       0     0       0      0      0
##   character(0)     0       0     0       0      0      0
##   character(0)     0       0     0       0      0      0
##   character(0)     0       0     0       0      0      0
# Check for sparsity, filter for frequency >5

findFreqTerms(frequencies, lowfreq=5)
##  [1] "abus"        "altruism"    "anim"        "articl"      "artifici"   
##  [6] "chariti"     "chicken"     "circus"      "cooper"      "cruel"      
## [11] "cruelti"     "essay"       "ethic"       "factori"     "farm"       
## [16] "futur"       "happi"       "help"        "intellig"    "intern"     
## [21] "life"        "moral"       "natur"       "philosophi"  "protect"    
## [26] "rescu"       "research"    "risk"        "save"        "scienc"     
## [31] "societi"     "stop"        "suffer"      "technolog"   "test"       
## [36] "utilitarian" "valu"        "vegan"       "welfar"      "wild"       
## [41] "wildlif"
# Remove sparse terms that is terms that occure in less than 99% of search queries

sparse = removeSparseTerms(frequencies, .99)
sparse
## <<DocumentTermMatrix (documents: 435, terms: 41)>>
## Non-/sparse entries: 687/17148
## Sparsity           : 96%
## Maximal term length: 11
## Weighting          : term frequency (tf)
# Convert to a data frame

keywordsSparse = as.data.frame(as.matrix(sparse))

# Make all variable names R-friendly

colnames(keywordsSparse) = make.names(colnames(keywordsSparse))
rownames(keywordsSparse) <- 1:nrow(keywordsSparse)

keywordsFreq <- apply(keywordsSparse, 2, sum)
keywordsFreq <- data.frame(keywords = names(keywordsFreq),n = keywordsFreq)

Plot frequency

keywordsFreq <- transform(keywordsFreq, keywords = reorder(keywords, n))
ggplot(data=keywordsFreq, aes(y=n, x=keywords)) + 
  geom_bar(stat = "identity", color="black", fill="green") + 
  coord_flip() + 
  scale_y_continuous(breaks=seq(0,100, 25)) +
  geom_text(aes(keywords,n, label=n), size=3, show.legend =F, hjust=-.35) +
  ggtitle("Most frequently used search keywords of Google search queries\nwhich lead to AdClicks") 

Set threshlevel to 95%

sparse = removeSparseTerms(frequencies, 0.95)
sparse
## <<DocumentTermMatrix (documents: 435, terms: 7)>>
## Non-/sparse entries: 358/2687
## Sparsity           : 88%
## Maximal term length: 8
## Weighting          : term frequency (tf)
# Convert to a data frame

keywordsSparse = as.data.frame(as.matrix(sparse))

# Make all variable names R-friendly

colnames(keywordsSparse) = make.names(colnames(keywordsSparse))
rownames(keywordsSparse) <- 1:nrow(keywordsSparse)

keywordsFreq <- apply(keywordsSparse, 2, sum)
keywordsFreq <- data.frame(keywords = names(keywordsFreq),n = keywordsFreq)

Plot frequency

keywordsFreq <- transform(keywordsFreq, keywords = reorder(keywords, n))
ggplot(data=keywordsFreq, aes(y=n, x=keywords)) + 
  geom_bar(stat = "identity", color="black", fill="green") + 
  coord_flip() + 
  scale_y_continuous(breaks=seq(0,100, 25)) +
  geom_text(aes(keywords,n, label=n), size=3, show.legend =F, hjust=-.35) +
 ggtitle("Most frequently used search keywords of Google search queries\nwhich lead to AdClicks") 

Create fancy Word Cloud

library(wordcloud)
## Loading required package: RColorBrewer
library(RColorBrewer)
m <- TermDocumentMatrix(corpus, control = list(minWordLength = 1))
m <- as.matrix(m)
# calculate the frequency of words
v <- sort(rowSums(m), decreasing=TRUE)
myNames <- names(v)
a <- which(myNames=="anim")
myNames[a] <- "animal"
i <- which(myNames=="intellig")
myNames[i] <- "intelligence"
d <- data.frame(word=myNames, freq=v)

pal2 <- brewer.pal(8, "Dark2")
wordcloud(d$word, d$freq, scale=c(8,.2),min.freq=1,
max.words=Inf, random.order=FALSE, rot.per=.15, colors=pal2)