Data analysis using Google Trends
Google searches are one of the most important datasets ever collected. This is not only the tool to search or get answers but also great mean to understand people around the world. It is digital gold mine which can unravel much unknown information.
Analysing Bitcoin price movement with Google search for term “Bitcoin”
#reading
bitcoinPrice <- read.csv("https://raw.githubusercontent.com/chirag-vithlani/Capstone/master/data/bitcoin/BitCoin_price_updated.csv")
#Formatting date
bitcoinPrice$newdate<-as.Date(bitcoinPrice$Date, "%m/%d/%Y")
#Price started moving only after year 2016, so reading data only from year 2016
newbitcoinPrice<-subset(bitcoinPrice, newdate > as.Date("2016-01-01"))
#converting price movement to percentage, as google search shows relative index value
newbitcoinPrice$pricePercent<-(newbitcoinPrice$Bitcoin.Price*100)/max(newbitcoinPrice$Bitcoin.Price)
head(newbitcoinPrice,2)
## Date Bitcoin.Price newdate pricePercent
## 1828 1/2/2016 433.36 2016-01-02 2.257822
## 1829 1/3/2016 433.36 2016-01-03 2.257822
bitcoinTrends <- read.csv("https://raw.githubusercontent.com/chirag-vithlani/Capstone/master/data/bitcoin/BitCoin_Trends_updated.csv")
#Formatting date
bitcoinTrends$newdate<-as.Date(bitcoinTrends$Week, "%m/%d/%Y")
head(bitcoinTrends,2)
## Week Bitcoin...United.States. newdate
## 1 1/3/2016 3 2016-01-03
## 2 1/10/2016 2 2016-01-10
bitCoinPriceAndTrends<-merge(bitcoinTrends,newbitcoinPrice)
head(bitCoinPriceAndTrends,3)
## newdate Week Bitcoin...United.States. Date Bitcoin.Price
## 1 2016-01-03 1/3/2016 3 1/3/2016 433.36
## 2 2016-01-10 1/10/2016 2 1/10/2016 447.46
## 3 2016-01-17 1/17/2016 2 1/17/2016 381.85
## pricePercent
## 1 2.257822
## 2 2.331283
## 3 1.989453
Plotting Bitcoin search Trends v/s price
m <- list(
l = 50,
r = 50,
b = 100,
t = 100,
pad = 4
)
plot_ly(x = ~bitCoinPriceAndTrends$newdate) %>%
add_lines(y = ~bitCoinPriceAndTrends$pricePercent, name = "Actual Price Percentage", line = list(shape = "Actual Price Percentage")) %>%
add_lines(y = ~bitCoinPriceAndTrends$Bitcoin...United.States., name = "Google Bitcoin Serach Trends", line = list(shape = "Google Bitcoin Search Trends"))%>%
layout(autosize = F, width = 1022, height = 500, margin = m,font = list(family = "\"Droid Sans\", sans-serif"),
title = "Google Bitcoin Serach Trends Vs Bitcoin Price Percentage Change",xaxis = list(title = "Timeline"),yaxis = list(title = "Bitcoin Percentage"))
As we can see Bitcoin search and its price graphs are similar.
We frequently query “how to ….” on Google, so here is analysis which “how to” query we do the most.
To get “how to query”, we use Gtrends R package. We query “how to ..” term for the year 2012 to 2017 and showing most searched terms using wordcloud.
# Writing function to display wordcloud
getYearTrends <- function(timeline)
{
HowTo2017<-gtrends("How to", geo="US", time = timeline)
HowTo2017<-HowTo2017$related_queries
return (HowTo2017)
}
gTrendswordcloud <- function(timeline)
{
#HowTo2017<-gtrends("How to", geo="US", time = timeline)
HowTo2017<-getYearTrends(timeline)
HowTo2017$subjectNew<-gsub('%','',HowTo2017$subject)
HowTo2017[which(HowTo2017[,7]=='<1', arr.ind=TRUE), 7] <-0
HowTo2017[which(HowTo2017[,7]=='Breakout', arr.ind=TRUE), 7] <-9999
HowTo2017$subjectNew<-as.numeric(gsub(',','',HowTo2017$subjectNew))
HowTo2017$subjectNew<-as.numeric(HowTo2017$subjectNew)
max<-max(subset(HowTo2017, related_queries == 'rising')$subjectNew)
HowTo2017rising<-subset(HowTo2017,related_queries=='rising')
HowTo2017rising$subjectNew<-subset(HowTo2017rising,related_queries=='rising')$subjectNew*100/max
HowTo2017risingTop<-subset(HowTo2017,related_queries=='top')
HowTo2017<-rbind(HowTo2017rising ,HowTo2017risingTop)
HowTo2017$subjectNew<-as.integer(HowTo2017$subjectNew)
HowTo2017Top<-subset(HowTo2017,as.numeric(HowTo2017$subjectNew)>1)
HowTo2017Top<-subset(HowTo2017Top, select=c("value", "subjectNew"))
HowTo2017Top$subjectNew<-as.numeric(HowTo2017Top$subjectNew)
colnames(HowTo2017Top)[2]<-"freq"
HowTo2017Top<-HowTo2017Top[order(-HowTo2017Top$freq),]
wordcloud2(data = HowTo2017Top)
}
#gTrendswordcloud("2017-01-01 2017-12-31")
| 2012 | 2013 | 2014 | 2015 | 2016 | 2017 |
|---|---|---|---|---|---|
|
|
|
|
|
|
|
Make GIF using ezgif.com
Result shows most searched “How to..” queries on Google. The clouds give greater prominence to words that appear more frequently.
tr2012<-getYearTrends("2012-01-01 2012-12-31")
tr2013<-getYearTrends("2013-01-01 2013-12-31")
tr2014<-getYearTrends("2014-01-01 2014-12-31")
tr2015<-getYearTrends("2015-01-01 2015-12-31")
tr2016<-getYearTrends("2016-01-01 2016-12-31")
tr2017<-getYearTrends("2017-01-01 2017-12-31")
all<-rbind(tr2012,tr2013,tr2014,tr2015,tr2016,tr2017)
#plot_ly(x = df$Var1,y = df$Freq,name = "SF Zoo",type = "bar")
df<-as.data.frame(table(all$value))
df<-subset(df, df$Freq > 4)
df[order(-df$Freq),]
## Var1 Freq
## 8 how to boil eggs 6
## 70 how to tie a tie 6
## 25 how to delete instagram account 5
## 27 how to draw 5
## 32 how to get away with murder 5
## 46 how to lose weight 5
## 78 how to write a check 5
get2012uniqueQueries <- function(){
allExcept2012<-rbind(tr2013,tr2014,tr2015,tr2016,tr2017)
only2012<-setDT(tr2012)[!allExcept2012, on="value"]
only2012df<-as.data.frame(only2012$value)
colnames(only2012df)="Year 2012 Unique queries"
only2012df<-unique(only2012df)
kable(only2012df, "html") %>%
kable_styling(bootstrap_options = "striped", full_width = F)
}
get2016uniqueQueries <- function(){
allExcept2016<-rbind(tr2012,tr2013,tr2014,tr2015,tr2017)
only2016<-setDT(tr2016)[!allExcept2016, on="value"]
only2016df<-as.data.frame(only2016$value)
colnames(only2016df)="Year 2016 Unique queries"
only2016df<-unique(only2016df)
kable(only2016df, "html") %>%
kable_styling(bootstrap_options = "striped", full_width = F)
}
get2017uniqueQueries <- function(){
allExcept2017<-rbind(tr2012,tr2013,tr2014,tr2015,tr2016)
only2017<-setDT(tr2017)[!allExcept2017, on="value"]
only2017df<-as.data.frame(only2017$value)
colnames(only2017df)="Year 2017 Unique queries"
only2017df<-unique(only2017df)
kable(only2017df, "html") %>%
kable_styling(bootstrap_options = "striped", full_width = F)
}
|
|
|
Here are interesting queries in last 12 months.
howToData <- read.csv("https://raw.githubusercontent.com/chirag-vithlani/Capstone/master/data/How_to_Interesting.csv")
howToDataSubSet<-subset(howToData, select = c(1, 4))
colnames(howToDataSubSet)[2] <- "Country"
kable(howToDataSubSet, "html") %>%
kable_styling(bootstrap_options = "striped", full_width = F)
| Topic | Country |
|---|---|
| how to make paper flowers | Bhutan |
| how to take pictures of northern lights | Iceland |
| how to become good teacher | India |
| how to get twins | Kenya |
| how to hack facebook | Myanmar |
| how to make carrot oil | Nigeria |
| how to handle wife | Pakistan |
| how to identify AIDS | Sri Lanka |
| how to delete telegram account | Uzbekistan |
| how to measure infiltration rate | Zimbabwe |
#Create dataframe with toy data:
LAND_ISO <- howToData$Country
value <- howToData$val
topic<-howToData$Topic
data <- data.frame(LAND_ISO, value,topic)
# Run your code:
g <- list(scope = 'world')
plot_geo(data) %>%
add_trace(
z = ~value, locations = ~LAND_ISO, colors = c(Pass="yellow", High="red", Low= "cyan", Sigma= "magenta", Mean='limegreen', Fail="blue", Median="violet"),text = ~paste(howToData$Topic)
) %>%
layout(geo = g)%>% hide_colorbar()
Todo: when we mouse hover it shows some value, which is wrong. I am working on it.
Out of above unique “How to” queries, I found “How to handle wife” quite funny and serious at the same time. It points out gender inequality and wherever we see such query, I expect that location to have high gender inequality. So here we are finding top five such countries.
howToHandleWifeSearch<-gtrends("how to handle wife", time = "today 12-m")
howToHandleWifeSearchHead<-head(howToHandleWifeSearch$interest_by_country,5)
howToHandleWifeSearchHead<-subset(howToHandleWifeSearchHead, select = c(1, 2))
colnames(howToHandleWifeSearchHead)[2] <- "Percentage of Hits"
kable(howToHandleWifeSearchHead, "html") %>%
kable_styling(bootstrap_options = "striped", full_width = F)
| location | Percentage of Hits |
|---|---|
| Pakistan | 100 |
| Sri Lanka | 69 |
| United Arab Emirates | 54 |
| India | 42 |
| Bangladesh | 34 |
It is a natural light display in the Earth’s sky, predominantly seen in the high-latitude regions like Iceland. That is the reason people from Iceland search “how to take pictures of northern lights”. This was the most amazing thing to know while working on this project.
Source : Wikipedia