About this Notebook
The google search data on this notebook comes from a google account archive
The steps outlined here to collect and analyze the data may change at any time
Below are the steps to claim your google account data
Data Collection: Claiming your Google Search Data
1) Sign into your google account, then Go to:
2) Find the link to download your data archive or Go to:

3) Select all Google products to create a complete archive of your data

4) After selecting the products, choose the file type and max archive size to make sure that all your account data is archive

Data Analysis: Visualizing Google Searches
To get an overall idea of the search volume, we can plot searches by year
p <- ggplot(search_data, aes(year))
p + geom_bar()

After determine the years with the largest search volume we can plot monthly searches
monthly <- search_data[(search_data$year > 2014 & search_data$year< 2018), ]
ggplot(monthly) + geom_bar(aes(x = month, group = year)) +
theme(axis.text.x = element_text(angle=90)) +
facet_grid(.~year, scales="free")

Another interesting metrict is searches by Hour
p <- ggplot(search_data, aes(hour))
p + geom_bar()

We can also plot the search data by day of the week to determine day are the most active
p <- ggplot(search_data, aes(day))
p + geom_bar()

We can take it an step further and group search time with day of the week.
ggplot(search_data) +
geom_bar(aes(x = hour, group = day) ) +
facet_grid(.~day, scales = "free")

We can group the search data by year and day of the week, to visualize the overall trend
wkday <- group_by(search_data, year, day) %>% summarize(count = n())
p <- ggplot(wkday, aes(day, count, fill = year))
p + geom_bar(stat = "identity") + labs(x = "", y = "Search Volume")

Reporting: A Wordcloud from Google Search Data
First we need to extract the text and clean it using regular expressions
search <- tolower(search_data$search)
search <- iconv(search, "ASCII", "UTF-8", " ")
search <- gsub('(http|https)\\S+\\s*|(#|@)\\S+\\s*|\\n|\\"', " ", search)
search <- gsub("(.*.)\\.com(.*.)\\S+\\s|[^[:alnum:]]", " ", search)
search <- trimws(search)
After cleaning the text we can create a Text Corpus (a large and structured set of texts) and remove some words
search_corpus <- Corpus(VectorSource(search))
search_corpus <- tm_map(search_corpus, content_transformer(removePunctuation))
search_corpus <- tm_map(search_corpus, content_transformer(removeNumbers))
stopwords <- c(stopwords("english"), "chrome", "chicago", "jlroo", "google")
search_corpus <- tm_map(search_corpus, removeWords, stopwords)
Now from the corpus we need to create a Term Document Matrix in order to create word associations and a wordcloud
search_tdm <- TermDocumentMatrix(search_corpus)
search_matrix <- as.matrix(search_tdm)
Set a threshold for the min/max frequency of words to create the wordcloud
wordcloud(d$word, d$freq, min.freq = 50, scale = c(3 , 0.5), max.words = 200)

LS0tCnRpdGxlOiAiQW5hbHl6aW5nIEdvb2dsZSBTZWFyY2ggSGlzdG9yeSIKYXV0aG9yOiAiSm9zZSBMdWlzIFJvZHJpZ3VleiIKb3V0cHV0OgogIGh0bWxfbm90ZWJvb2s6IGRlZmF1bHQKICBodG1sX2RvY3VtZW50OiBkZWZhdWx0CmRhdGU6ICJKYW51YXJ5IDMwLCAyMDE4IgpzdWJ0aXRsZTogIkNNRSBHcm91cCBGb3VuZGF0aW9uIEJ1c2luZXNzIEFuYWx5dGljcyBMYWIiCi0tLQoKPGJyPgoKLS0tLS0tLS0tLS0tLS0KCiMjIEFib3V0IHRoaXMgTm90ZWJvb2sKCi0tLS0tLS0tLS0tLS0tCgo8YnI+CgoqIFRoZSBnb29nbGUgc2VhcmNoIGRhdGEgb24gdGhpcyBub3RlYm9vayBjb21lcyBmcm9tIGEgZ29vZ2xlIGFjY291bnQgYXJjaGl2ZQoKKiBUaGUgc3RlcHMgb3V0bGluZWQgaGVyZSB0byBjb2xsZWN0IGFuZCBhbmFseXplIHRoZSBkYXRhIG1heSBjaGFuZ2UgYXQgYW55IHRpbWUKCiogQmVsb3cgYXJlIHRoZSBzdGVwcyB0byBjbGFpbSB5b3VyIGdvb2dsZSBhY2NvdW50IGRhdGEgCgoKPGJyPgoKLS0tLS0tLS0tLS0tLS0KCiMjIEFuYWx5dGljcyBUb29sa2l0OiBSZXF1aXJlIFBhY2thZ2VzCgotLS0tLS0tLS0tLS0tLQoKPGJyPgoKKipJbnN0YWxsIHJlcXVpcmVkIHBhY2thZ2VzKioKCiogUGFja2FnZTogdGlkeXZlcnNlLCBsdWJyaWRhdGUsIHJ2ZXN0LCB0bSwgd29yZGNsb3VkCgpgYGB7cn0KCmluc3RhbGwucGFja2FnZXMoYygicnZlc3QiLCAKICAgICAgICAgICAgICAgICAgICJsdWJyaWRhdGUiLCAKICAgICAgICAgICAgICAgICAgICJ3b3JkY2xvdWQiLCAKICAgICAgICAgICAgICAgICAgICJ0bSIpLCBkZXBlbmRlbmNpZXMgPSBUUlVFKQoKYGBgCgoKPGJyPgoKKipMb2FkIHJlcXVpcmVkIHBhY2thZ2VzKioKYGBge3J9CgpsaWJyYXJ5KHRtKQpsaWJyYXJ5KHJ2ZXN0KQpsaWJyYXJ5KGx1YnJpZGF0ZSkKbGlicmFyeSh0aWR5dmVyc2UpCmxpYnJhcnkod29yZGNsb3VkKQoKYGBgCgo8YnI+CgotLS0tLS0tLS0tLS0tLQoKIyMgRGF0YSBDb2xsZWN0aW9uOiBDbGFpbWluZyB5b3VyIEdvb2dsZSBTZWFyY2ggRGF0YQoKLS0tLS0tLS0tLS0tLS0KCjxicj4KCiMjIyMgMSkgU2lnbiBpbnRvIHlvdXIgZ29vZ2xlIGFjY291bnQsIHRoZW4gR28gdG86CiogaHR0cHM6Ly9teWFjY291bnQuZ29vZ2xlLmNvbS9wcml2YWN5CgojIyMjIDIpIEZpbmQgdGhlIGxpbmsgdG8gZG93bmxvYWQgeW91ciBkYXRhIGFyY2hpdmUgb3IgR28gdG86IAoqIGh0dHBzOi8vdGFrZW91dC5nb29nbGUuY29tL3NldHRpbmdzL3Rha2VvdXQKCmBgYHtyLCBlY2hvPUZBTFNFfQoKa25pdHI6OmluY2x1ZGVfZ3JhcGhpY3MoJ2ltZ3MvaW1nMDEucG5nJykKCmBgYAoKPGJyPgoKIyMjIyAzKSBTZWxlY3QgYWxsIEdvb2dsZSBwcm9kdWN0cyB0byBjcmVhdGUgYSBjb21wbGV0ZSBhcmNoaXZlIG9mIHlvdXIgZGF0YQoKYGBge3IsIGVjaG89RkFMU0V9Cgprbml0cjo6aW5jbHVkZV9ncmFwaGljcygnaW1ncy9pbWcwMi5wbmcnKQoKYGBgCgo8YnI+CgojIyMjIDQpIEFmdGVyIHNlbGVjdGluZyB0aGUgcHJvZHVjdHMsIGNob29zZSB0aGUgZmlsZSB0eXBlIGFuZCBtYXggYXJjaGl2ZSBzaXplIHRvIG1ha2Ugc3VyZSB0aGF0IGFsbCB5b3VyIGFjY291bnQgZGF0YSBpcyBhcmNoaXZlCgpgYGB7ciwgZWNobz1GQUxTRX0KCmtuaXRyOjppbmNsdWRlX2dyYXBoaWNzKCdpbWdzL2ltZzAzLnBuZycpCgpgYGAKCjxicj4KCi0tLS0tLS0tLS0tLS0tCgojIyBEYXRhIFByZXBhcmF0aW9uOiBFeHRyYWN0aW5nIEdvb2dsZSBTZWFyY2ggSW5mb3JtYXRpb24KCi0tLS0tLS0tLS0tLS0tCgo8YnI+CgojIyMjIExvY2F0ZSB0aGUgR29vZ2xlIGFyY2hpdmUsIHRoZW4gZmluZCB0aGUgc2VhcmNoIGRhdGEuIEZvciB0aGlzIGNhc2UsIGl0IGlzIGFuIGh0bWwgZmlsZSBsb2NhdGVkIGluICJNeSBBY3Rpdml0eSIgZm9sZGVyIGluc2lkZSB0aGUgIlNlYXJjaCIgZm9sZGVyIHRoZSBmaWxlIGlzIG5hbWVkICJNeUFjdGl2aXR5Lmh0bWwiCgoqIFRha2VvdXQgLT4gTXkgQWN0aXZpdHkgLT4gU2VhcmNoIC0+IE15QWN0aXZpdHkuaHRtbAoKIyMjIyBVc2luZyB0aGUgcnZlc3QgcGFja2FnZSB3ZSBjYW4gcmVhZCB0aGUgaHRtbCBkb2N1bWVudCB0aGF0IGNvbnRhaW5zIHRoZSByZWxhdGVkIGdvb2dsZSBzZWFyY2ggZGF0YQoKYGBge3J9Cgpkb2MgPC0gIlRha2VvdXQvTXkgQWN0aXZpdHkvU2VhcmNoL015QWN0aXZpdHkuaHRtbCIKc2VhcmNoX2FyY2hpdmUgPC0gcmVhZF9odG1sKGRvYykKCmBgYAoKPGJyPgoKLS0tLS0tLS0tLS0tLS0KCiMjIyBMYXZlcmFnaW5nIHJlZ3VsYXIgZXhwcmVzc2lvbiAocmVnZXgpIHdlIGNhbiBleHRyYWN0IHJlbGF2YW50IGluZm9ybWF0aW9uIGZyb20gdGhlIEhUTUwgZG9jdW1lbnQ6Cgo8YnI+CgojIyMjIEV4dHJhY3QgU2VhcmNoIFRpbWUKCmBgYHtyfQoKZGF0ZV9zZWFyY2ggPC0gc2VhcmNoX2FyY2hpdmUgJT4lIAogIGh0bWxfbm9kZXMoeHBhdGggPSAnLy9kaXZbQGNsYXNzPSJtZGwtZ3JpZCJdL2Rpdi9kaXYnKSAlPiUgCiAgc3RyX2V4dHJhY3QocGF0dGVybiA9ICIoPzw9PGJyPikoLiopKD88PVBNfEFNKSIpICU+JQogIG1keV9obXMoKQoKYGBgCgo8YnI+CgojIyMjIEV4dHJhY3QgU2VhcmNoIFRleHQKCmBgYHtyfQoKdGV4dF9zZWFyY2ggPC0gc2VhcmNoX2FyY2hpdmUgJT4lIAogIGh0bWxfbm9kZXMoeHBhdGggPSAnLy9kaXZbQGNsYXNzPSJtZGwtZ3JpZCJdL2Rpdi9kaXYnKSAlPiUKICBzdHJfZXh0cmFjdChwYXR0ZXJuID0gJyg/PD08YSkoLiopKD89PC9hPiknKSAlPiUgCiAgc3RyX2V4dHJhY3QocGF0dGVybiA9ICcoPzw9XCI+KSguKiknKQoKYGBgCgo8YnI+CgojIyMjIEV4dHJhY3QgU2VhcmNoIFR5cGUKCmBgYHtyfQoKdHlwZV9zZWFyY2ggPC0gc2VhcmNoX2FyY2hpdmUgJT4lIAogIGh0bWxfbm9kZXMoeHBhdGggPSAnLy9kaXZbQGNsYXNzPSJtZGwtZ3JpZCJdL2Rpdi9kaXYnKSAlPiUgCiAgc3RyX2V4dHJhY3QocGF0dGVybiA9ICIoPzw9bWRsLXR5cG9ncmFwaHktLWJvZHktMVwiPikoLiopKD89PGEpIikgJT4lIAogIHN0cl9leHRyYWN0KHBhdHRlcm4gPSAiKFxcdyspKD89XFxzKSIpCgpgYGAKCjxicj4KCiMjIyMgQ3JlYXRlIGEgZGF0YSBmcmFtZSB1c2luZyB0aGUgZGF0YSBleHRyYWN0ZWQgZnJvbSB0aGUgaHRtbCBmaWxlCgpgYGB7cn0KCnNlYXJjaF9kYXRhIDwtIHRpYmJsZSh0aW1lc3RhbXAgPSBkYXRlX3NlYXJjaCwKICAgICAgICAgICAgICAgICAgICAgIGRhdGUgPSBhc19kYXRlKGRhdGVfc2VhcmNoKSwKICAgICAgICAgICAgICAgICAgICAgIHllYXIgPSB5ZWFyKGRhdGVfc2VhcmNoKSwKICAgICAgICAgICAgICAgICAgICAgIG1vbnRoID0gbW9udGgoZGF0ZV9zZWFyY2gsIGxhYmVsID0gVFJVRSksCiAgICAgICAgICAgICAgICAgICAgICBkYXkgPSB3ZWVrZGF5cyhkYXRlX3NlYXJjaCksCiAgICAgICAgICAgICAgICAgICAgICBob3VyID0gaG91cihkYXRlX3NlYXJjaCksCiAgICAgICAgICAgICAgICAgICAgICB0eXBlID0gdHlwZV9zZWFyY2gsCiAgICAgICAgICAgICAgICAgICAgICBzZWFyY2ggPSB0ZXh0X3NlYXJjaCkKCnNlYXJjaF9kYXRhJGRheSA8LSBmYWN0b3Ioc2VhcmNoX2RhdGEkZGF5LCAKICAgICAgICAgICAgICAgICAgICAgICAgICBsZXZlbHMgPSBjKCJTdW5kYXkiLCAiTW9uZGF5IiwgIlR1ZXNkYXkiLAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIldlZG5lc2RheSIsIlRodXJzZGF5IiwgIkZyaWRheSIsCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAiU2F0dXJkYXkiKSkKCnNlYXJjaF9kYXRhIDwtIG5hLm9taXQoc2VhcmNoX2RhdGEpCgpoZWFkKHNlYXJjaF9kYXRhKQoKYGBgCgo8YnI+CgotLS0tLS0tLS0tLS0tLQoKIyMgRGF0YSBBbmFseXNpczogVmlzdWFsaXppbmcgR29vZ2xlIFNlYXJjaGVzCgotLS0tLS0tLS0tLS0tLQoKPGJyPgoKIyMjIyBUbyBnZXQgYW4gb3ZlcmFsbCBpZGVhIG9mIHRoZSBzZWFyY2ggdm9sdW1lLCB3ZSBjYW4gcGxvdCBzZWFyY2hlcyBieSB5ZWFyIAoKYGBge3J9CgpwIDwtIGdncGxvdChzZWFyY2hfZGF0YSwgYWVzKHllYXIpKQpwICsgZ2VvbV9iYXIoKQoKYGBgCgo8YnI+CgojIyMjIEFmdGVyIGRldGVybWluZSB0aGUgeWVhcnMgd2l0aCB0aGUgbGFyZ2VzdCBzZWFyY2ggdm9sdW1lIHdlIGNhbiBwbG90IG1vbnRobHkgc2VhcmNoZXMKCmBgYHtyfQoKbW9udGhseSA8LSBzZWFyY2hfZGF0YVsoc2VhcmNoX2RhdGEkeWVhciA+IDIwMTQgJiBzZWFyY2hfZGF0YSR5ZWFyPCAyMDE4KSwgXQoKZ2dwbG90KG1vbnRobHkpICsgZ2VvbV9iYXIoYWVzKHggPSBtb250aCwgZ3JvdXAgPSB5ZWFyKSkgKwogIHRoZW1lKGF4aXMudGV4dC54ID0gZWxlbWVudF90ZXh0KGFuZ2xlPTkwKSkgKwogIGZhY2V0X2dyaWQoLn55ZWFyLCBzY2FsZXM9ImZyZWUiKQoKYGBgCgo8YnI+CgojIyMjIEFub3RoZXIgaW50ZXJlc3RpbmcgbWV0cmljdCBpcyBzZWFyY2hlcyBieSBIb3VyCgpgYGB7cn0KCnAgPC0gZ2dwbG90KHNlYXJjaF9kYXRhLCBhZXMoaG91cikpCnAgKyBnZW9tX2JhcigpCgpgYGAKCjxicj4KCiMjIyMgV2UgY2FuIGFsc28gcGxvdCB0aGUgc2VhcmNoIGRhdGEgYnkgZGF5IG9mIHRoZSB3ZWVrIHRvIGRldGVybWluZSBkYXkgYXJlIHRoZSBtb3N0IGFjdGl2ZQoKYGBge3J9CgpwIDwtIGdncGxvdChzZWFyY2hfZGF0YSwgYWVzKGRheSkpCnAgKyBnZW9tX2JhcigpCgpgYGAKCjxicj4KCiMjIyMgV2UgY2FuIHRha2UgaXQgYW4gc3RlcCBmdXJ0aGVyIGFuZCBncm91cCBzZWFyY2ggdGltZSB3aXRoIGRheSBvZiB0aGUgd2Vlay4gCgpgYGB7cn0KCmdncGxvdChzZWFyY2hfZGF0YSkgKyAKICBnZW9tX2JhcihhZXMoeCA9IGhvdXIsIGdyb3VwID0gZGF5KSApICsKICBmYWNldF9ncmlkKC5+ZGF5LCBzY2FsZXMgPSAiZnJlZSIpCgpgYGAKCjxicj4KCiMjIyMgV2UgY2FuIGdyb3VwIHRoZSBzZWFyY2ggZGF0YSBieSB5ZWFyIGFuZCBkYXkgb2YgdGhlIHdlZWssIHRvIHZpc3VhbGl6ZSB0aGUgb3ZlcmFsbCB0cmVuZCAKCmBgYHtyfQoKd2tkYXkgPC0gZ3JvdXBfYnkoc2VhcmNoX2RhdGEsIHllYXIsIGRheSkgJT4lIHN1bW1hcml6ZShjb3VudCA9IG4oKSkKcCA8LSBnZ3Bsb3Qod2tkYXksIGFlcyhkYXksIGNvdW50LCBmaWxsID0geWVhcikpIApwICsgZ2VvbV9iYXIoc3RhdCA9ICJpZGVudGl0eSIpICsgbGFicyh4ID0gIiIsIHkgPSAiU2VhcmNoIFZvbHVtZSIpCgpgYGAKCjxicj4KCi0tLS0tLS0tLS0tLS0tCgojIyBSZXBvcnRpbmc6IEEgV29yZGNsb3VkIGZyb20gR29vZ2xlIFNlYXJjaCBEYXRhCgotLS0tLS0tLS0tLS0tLQoKPGJyPgoKIyMjIyBGaXJzdCB3ZSBuZWVkIHRvIGV4dHJhY3QgdGhlIHRleHQgYW5kIGNsZWFuIGl0IHVzaW5nIHJlZ3VsYXIgZXhwcmVzc2lvbnMKCmBgYHtyfQoKc2VhcmNoIDwtIHRvbG93ZXIoc2VhcmNoX2RhdGEkc2VhcmNoKQpzZWFyY2ggPC0gaWNvbnYoc2VhcmNoLCAiQVNDSUkiLCAiVVRGLTgiLCAiICIpCnNlYXJjaCA8LSBnc3ViKCcoaHR0cHxodHRwcylcXFMrXFxzKnwoI3xAKVxcUytcXHMqfFxcbnxcXCInLCAiICIsIHNlYXJjaCkKc2VhcmNoIDwtIGdzdWIoIiguKi4pXFwuY29tKC4qLilcXFMrXFxzfFteWzphbG51bTpdXSIsICIgIiwgc2VhcmNoKQpzZWFyY2ggPC0gdHJpbXdzKHNlYXJjaCkKCmBgYAoKPGJyPgoKIyMjIyBBZnRlciBjbGVhbmluZyB0aGUgdGV4dCB3ZSBjYW4gY3JlYXRlIGEgVGV4dCBDb3JwdXMgKGEgbGFyZ2UgYW5kIHN0cnVjdHVyZWQgc2V0IG9mIHRleHRzKSBhbmQgcmVtb3ZlIHNvbWUgd29yZHMgCgpgYGB7cn0KCnNlYXJjaF9jb3JwdXMgPC0gIENvcnB1cyhWZWN0b3JTb3VyY2Uoc2VhcmNoKSkKc2VhcmNoX2NvcnB1cyA8LSB0bV9tYXAoc2VhcmNoX2NvcnB1cywgY29udGVudF90cmFuc2Zvcm1lcihyZW1vdmVQdW5jdHVhdGlvbikpCnNlYXJjaF9jb3JwdXMgPC0gdG1fbWFwKHNlYXJjaF9jb3JwdXMsIGNvbnRlbnRfdHJhbnNmb3JtZXIocmVtb3ZlTnVtYmVycykpCnN0b3B3b3JkcyA8LSBjKHN0b3B3b3JkcygiZW5nbGlzaCIpLCAiY2hyb21lIiwgImNoaWNhZ28iLCAiamxyb28iLCAiZ29vZ2xlIikKc2VhcmNoX2NvcnB1cyA8LSB0bV9tYXAoc2VhcmNoX2NvcnB1cywgcmVtb3ZlV29yZHMsIHN0b3B3b3JkcykKCmBgYAoKPGJyPgoKIyMjIyBOb3cgZnJvbSB0aGUgY29ycHVzIHdlIG5lZWQgdG8gY3JlYXRlIGEgVGVybSBEb2N1bWVudCBNYXRyaXggaW4gb3JkZXIgdG8gY3JlYXRlIHdvcmQgYXNzb2NpYXRpb25zIGFuZCBhIHdvcmRjbG91ZAoKYGBge3J9CgpzZWFyY2hfdGRtIDwtIFRlcm1Eb2N1bWVudE1hdHJpeChzZWFyY2hfY29ycHVzKQpzZWFyY2hfbWF0cml4IDwtIGFzLm1hdHJpeChzZWFyY2hfdGRtKQoKYGBgCgoKPGJyPgoKIyMjIyBVc2luZyB0aGUgVGVybSBEb2N1bWVudCBtYXRyaXggd2UgY2FuIGNyZWF0ZSBhIGRhdGEgZnJhbWUgd2l0aCB3b3JkcyBhbmQgcmVsYXRlZCBmcmVxdWVuY2llcyAKCmBgYHtyfQoKdiA8LSBzb3J0KHJvd1N1bXMoc2VhcmNoX21hdHJpeCksIGRlY3JlYXNpbmcgPSBUUlVFKQp0d19uYW1lcyA8LSBuYW1lcyh2KQpkIDwtIGRhdGEuZnJhbWUod29yZCA9IHR3X25hbWVzLCBmcmVxID0gdikKCmBgYAoKPGJyPgoKIyMjIyBTZXQgYSB0aHJlc2hvbGQgZm9yIHRoZSBtaW4vbWF4IGZyZXF1ZW5jeSBvZiB3b3JkcyB0byBjcmVhdGUgdGhlIHdvcmRjbG91ZAoKYGBge3J9Cgp3b3JkY2xvdWQoZCR3b3JkLCBkJGZyZXEsIG1pbi5mcmVxID0gNTAsIHNjYWxlID0gYygzICwgMC41KSwgbWF4LndvcmRzID0gMjAwKQoKYGBgCg==