##============================================##
## google analytics for K&I products and tools##
##============================================##
##install.packages("RGA") ##Google anlaytics API interface
library(dplyr)
##
## Attaching package: 'dplyr'
##
## The following objects are masked from 'package:stats':
##
## filter, lag
##
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(data.table)
##
## Attaching package: 'data.table'
##
## The following objects are masked from 'package:dplyr':
##
## between, last
library(lubridate)
##
## Attaching package: 'lubridate'
##
## The following objects are masked from 'package:data.table':
##
## hour, mday, month, quarter, wday, week, yday, year
library(tidyr)
require(knitr)
## Loading required package: knitr
library(RGA)
## Please use predefined Credentials only for the testing requests. To obtain your own Credentials see help(authorize).
library(RGoogleAnalytics)
## Loading required package: httr
library(gridExtra)
library(ggplot2)
library(viridis)
## authorise access
ga_token <- authorize(client.id = "934830359575-tbflfk33ce99kgatrte3ecpt4gj5kdhf.apps.googleusercontent.com", client.secret = "l5BP84XI1I5d0qrC3f21s9Gp")
## Access token will be stored in the '.ga-token.rds' file.
## extract profile ids
lookup <- list_profiles()
## Auto-refreshing stale OAuth token.
lookup1 <- as.data.table(lookup[c(3,23,42,45,46,63, 112,122,137,133,149,150),c(5,1)])
##ihal
ihal.id<- "35140940" ## extract profile ID
ihal_first <-firstdate(ihal.id) ## set start date to first recording date in analytics
ga_ihal <- get_ga(ihal.id, start.date = ihal_first, end.date = "yesterday", metrics = "ga:users,ga:sessions,ga:avgTimeonPage,ga:pageviews", dimension = "ga:date")
ihalpv <- qplot(data = ga_ihal, date, pageviews, geom = "line") +geom_smooth() +ggtitle("IHAL pageviews")
ihaldur <- qplot(data = ga_ihal, date, avg.timeon.page, geom = "line") + geom_smooth() + ggtitle("IHAL time")
grid.arrange(ihalpv, ihaldur, ncol = 2)

##phof
phof.id<- "65073818"
phof_first <-firstdate(phof.id)
ga_phof <- get_ga(phof.id, start.date = phof_first, end.date = "yesterday", metrics = "ga:users,ga:sessions,ga:pageviews,ga:avgTimeonPage", dimension = "ga:date")
phofpv <- qplot(data = ga_phof, date, pageviews, geom = "line") +geom_smooth() +ggtitle("PHOF pageviews")
phofdur <- qplot(data = ga_phof, date, avg.timeon.page, geom = "line") + geom_smooth() + ggtitle("phof time")
grid.arrange(phofpv, phofdur, ncol = 2)

##fingertips
fingertips.id<- "78054916"
fingertips_first <-firstdate(fingertips.id)
ga_fingertips <- get_ga(fingertips.id, start.date = fingertips_first, end.date = "yesterday", metrics = "ga:users,ga:sessions,ga:pageviews, ga:avgTimeonPage", dimension ="ga:date")
ftipspv <- qplot(data = ga_fingertips, date, pageviews, geom = "line") +geom_smooth() + ggtitle("Fingertips pageviews")
ftipsdur <- qplot(data = ga_phof, date, avg.timeon.page, geom = "line") + geom_smooth()+ ggtitle("Fingertips average session duration")
grid.arrange(ftipspv, ftipsdur, ncol = 2)

##yhpho
##yhpho.id<- list_profiles(accountId = "8735278")$id
##yhpho_first <-firstdate(yhpho.id)
##ga_yhpho <- get_ga(yhpho.id, start.date = yhpho_first, end.date = "yesterday", metrics = "ga:users,ga:sessions,ga:pageviews", fetch.by = "day")
##qplot(data = ga_yhpho, date, pageviews, geom = "line") + geom_line(aes(date, sessions, colour = 'red'))+geom_smooth()
##local_health
local_health.id<- "35140940"
local.first <-firstdate(local_health.id)
ga_local <- get_ga(local_health.id, start.date = local.first, end.date = "yesterday", metrics = "ga:users,ga:sessions,ga:pageviews, ga:avgTimeonPage", dimension = "ga:date")
localpv <- qplot(data = ga_local, date, pageviews, geom = "line") +geom_smooth() + ggtitle("Local health pageviews")
localdur <- qplot(data = ga_local, date, avg.timeon.page, geom = "line") + geom_smooth()+ ggtitle("Local health average session duration")
grid.arrange(localpv, localdur, ncol = 2)

##shape
shape.id<- "26308055"
shape_first <-firstdate(shape.id)
ga_shape <- get_ga(shape.id, start.date = shape_first, end.date = "yesterday", metrics = "ga:users,ga:sessions,ga:pageviews, ga:avgTimeonPage", dimension = "ga:date")
shapepv <- qplot(data = ga_shape, date, pageviews, geom = "line") +geom_smooth()+ ggtitle("SHAPE pageviews")
shapedur <- qplot(data = ga_shape, date, avg.timeon.page, geom = "line") + geom_smooth()+ ggtitle("SHAPE average session duration")
grid.arrange(shapepv, shapedur, ncol = 2)

##noo
noo.id<- "68505331"
noo_first <-firstdate(noo.id)
ga_noo <- get_ga(noo.id, start.date = noo_first, end.date = "yesterday", metrics = "ga:users,ga:sessions,ga:pageviews, ga:avgTimeonPage", dimension = "ga:date")
noopv <- qplot(data = ga_noo, date, pageviews, geom = "line") +geom_smooth()+ ggtitle("noo pageviews")
noodur <- qplot(data = ga_noo, date, avg.timeon.page, geom = "line") + geom_smooth()+ ggtitle("noo average session duration")
grid.arrange(noopv, noodur, ncol = 2)

##chimat
chimat.id<- "12177926"
chimat_first <-firstdate(chimat.id)
ga_chimat <- get_ga(chimat.id, start.date = chimat_first, end.date = "yesterday", metrics = "ga:users,ga:sessions,ga:pageviews, ga:avgTimeonPage", dimension = "ga:date")
chimatpv <- qplot(data = ga_chimat, date, pageviews, geom = "line") +geom_smooth()+ ggtitle("chimat pageviews")
chimatdur <- qplot(data = ga_chimat, date, avg.timeon.page, geom = "line") + geom_smooth()+ ggtitle("chimat average session duration")
grid.arrange(chimatpv, chimatdur, ncol = 2)

##tobacco
tobacco.id<- "35885149"
tobacco_first <-firstdate(tobacco.id)
ga_tobacco <- get_ga(tobacco.id, start.date = tobacco_first, end.date = "yesterday", metrics = "ga:users,ga:sessions,ga:pageviews, ga:avgTimeonPage", dimension = "ga:date")
tobaccopv <- qplot(data = ga_tobacco, date, pageviews, geom = "line") +geom_smooth()+ ggtitle("tobacco pageviews")
tobaccodur <- qplot(data = ga_tobacco, date, avg.timeon.page, geom = "line") + geom_smooth()+ ggtitle("tobacco average session duration")
grid.arrange(tobaccopv, tobaccodur, ncol = 2)

##healthier lives
healthier.lives.id<- "93017633"
healthier.lives_first <-firstdate(healthier.lives.id)
ga_healthier.lives <- get_ga(healthier.lives.id, start.date = healthier.lives_first, end.date = "yesterday", metrics = "ga:users,ga:sessions,ga:pageviews, ga:avgTimeonPage", dimension = "ga:date")
healthier.livespv <- qplot(data = ga_healthier.lives, date, pageviews, geom = "line") +geom_smooth()+ ggtitle("healthier.lives pageviews")
healthier.livesdur <- qplot(data = ga_healthier.lives, date, avg.timeon.page, geom = "line") + geom_smooth()+ ggtitle("healthier.lives average session duration")
grid.arrange(healthier.livespv, healthier.livesdur, ncol = 2)

##Mental health
mental.health.id<- "102894568"
mental.health_first <-firstdate(mental.health.id)
ga_mental.health <- get_ga(mental.health.id, start.date = mental.health_first, end.date = "yesterday", metrics = "ga:users,ga:sessions,ga:pageviews, ga:avgTimeonPage", dimension = "ga:date")
mental.healthpv <- qplot(data = ga_mental.health, date, pageviews, geom = "line") +geom_smooth()+ ggtitle("mental.health pageviews")
mental.healthdur <- qplot(data = ga_mental.health, date, avg.timeon.page, geom = "line") + geom_smooth()+ ggtitle("mental.health average session duration")
grid.arrange(mental.healthpv, mental.healthdur, ncol = 2)

## combine all datasets
library(data.table)
ga_local <- cbind(local_health.id, ga_local)
ga_mental.health <- cbind(mental.health.id, ga_mental.health)
ga_healthier.lives <- cbind(healthier.lives.id, ga_healthier.lives)
ga_tobacco <- cbind(tobacco.id, ga_tobacco)
ga_chimat <- cbind(chimat.id, ga_chimat)
ga_noo <- cbind(noo.id, ga_noo)
ga_fingertips <- cbind(fingertips.id, ga_fingertips)
ga_shape <- cbind(shape.id, ga_shape)
ga_phof <- cbind(phof.id, ga_phof)
ga_ihal <- cbind(ihal.id, ga_ihal)
comb <- list(ga_phof, ga_healthier.lives, ga_ihal, ga_mental.health, ga_tobacco,
ga_chimat, ga_noo, ga_shape, ga_local, ga_fingertips )
ga_comb <- rbindlist(comb)
colnames(ga_comb) <- c("id", "date", "users", "sessions", "pageviews", "avgTimeonPage")
dim(ga_comb)
## [1] 15616 6
str(ga_comb)
## Classes 'data.table' and 'data.frame': 15616 obs. of 6 variables:
## $ id : Factor w/ 9 levels "65073818","93017633",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ date : POSIXct, format: "2012-11-21" "2012-11-22" ...
## $ users : num 571 553 481 59 75 496 362 263 240 189 ...
## $ sessions : num 630 626 542 72 92 565 420 304 278 212 ...
## $ pageviews : num 1625 1624 1338 188 165 ...
## $ avgTimeonPage: num 131 123 129 126 161 ...
## - attr(*, ".internal.selfref")=<externalptr>
head(ga_comb)
## id date users sessions pageviews avgTimeonPage
## 1: 65073818 2012-11-21 571 630 1625 131.4352
## 2: 65073818 2012-11-22 553 626 1624 122.6934
## 3: 65073818 2012-11-23 481 542 1338 129.1432
## 4: 65073818 2012-11-24 59 72 188 125.5345
## 5: 65073818 2012-11-25 75 92 165 161.0548
## 6: 65073818 2012-11-26 496 565 1487 137.1941
## further analysis
ga_comb <- ga_comb %>% mutate(dwell_time = pageviews* avgTimeonPage/ 86400)## calculate dwell time per day
ga_comb <- ga_comb %>% mutate(year = year(date), month = month(date, label = TRUE), period = paste(month, year, sep = "-"))
ga_comb$id <- as.numeric(as.character(ga_comb$id))
setkey(ga_comb, id)
setkey(lookup1, id)
ga_comb <- ga_comb[lookup1]
ga_summary <- ga_comb %>% group_by(year, name) %>% summarise(dwelltime = mean(dwell_time),
users = sum(users), sessions = sum(sessions), pageviews = sum(pageviews))
qplot(data=ga_summary, year, dwelltime, colour=name, geom = c("point", "line"), group = name) + scale_fill_viridis() + theme_bw()
## Warning: Removed 3 rows containing missing values (geom_point).

qplot(data=ga_summary, year, pageviews, colour=name, geom = c("point", "line"), group = name)+ scale_fill_viridis() + theme_bw()
## Warning: Removed 3 rows containing missing values (geom_point).

qplot(data = ga_summary, year, pageviews, geom = c("point", "line"), group = 1) + facet_wrap(~name)
## Warning: Removed 3 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing missing values (geom_path).
## geom_path: Each group consists of only one observation. Do you need to
## adjust the group aesthetic?
## geom_path: Each group consists of only one observation. Do you need to
## adjust the group aesthetic?

qplot(data = filter(ga_comb,date >= '2013-01-01'), date, users, geom = c("line", "smooth")) + facet_wrap(~name)

qplot(data = filter(ga_comb,date >= '2013-01-01'), date, avgTimeonPage, geom = c("line", "smooth")) + facet_wrap(~name)

qplot(data = filter(ga_comb,date >= '2013-01-01'), date, pageviews, geom = c("line", "smooth")) + facet_wrap(~name)

qplot(data = ga_comb, date, dwell_time, geom = c("line", "smooth")) + facet_wrap(~name)
## Warning: Removed 3 rows containing non-finite values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_path).
## geom_path: Each group consists of only one observation. Do you need to
## adjust the group aesthetic?
## geom_path: Each group consists of only one observation. Do you need to
## adjust the group aesthetic?
