##============================================##
## google analytics for K&I products and tools##
##============================================##


##install.packages("RGA") ##Google anlaytics API interface

library(dplyr)
## 
## Attaching package: 'dplyr'
## 
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## 
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(data.table)
## 
## Attaching package: 'data.table'
## 
## The following objects are masked from 'package:dplyr':
## 
##     between, last
library(lubridate)
## 
## Attaching package: 'lubridate'
## 
## The following objects are masked from 'package:data.table':
## 
##     hour, mday, month, quarter, wday, week, yday, year
library(tidyr)
require(knitr)
## Loading required package: knitr
library(RGA)
## Please use predefined Credentials only for the testing requests. To obtain your own Credentials see help(authorize).
library(RGoogleAnalytics)
## Loading required package: httr
library(gridExtra)
library(ggplot2)
library(viridis)

## authorise access
ga_token <- authorize(client.id = "934830359575-tbflfk33ce99kgatrte3ecpt4gj5kdhf.apps.googleusercontent.com", client.secret = "l5BP84XI1I5d0qrC3f21s9Gp")
## Access token will be stored in the '.ga-token.rds' file.
## extract profile ids

lookup <- list_profiles()
## Auto-refreshing stale OAuth token.
lookup1 <- as.data.table(lookup[c(3,23,42,45,46,63, 112,122,137,133,149,150),c(5,1)])



##ihal

ihal.id<-  "35140940" ## extract profile ID
ihal_first <-firstdate(ihal.id) ## set start date to first recording date in analytics

ga_ihal <- get_ga(ihal.id, start.date = ihal_first, end.date = "yesterday", metrics = "ga:users,ga:sessions,ga:avgTimeonPage,ga:pageviews", dimension = "ga:date")
ihalpv <- qplot(data = ga_ihal, date, pageviews, geom = "line") +geom_smooth() +ggtitle("IHAL pageviews")
ihaldur <- qplot(data = ga_ihal, date, avg.timeon.page, geom = "line") + geom_smooth() + ggtitle("IHAL time")

grid.arrange(ihalpv, ihaldur, ncol = 2)

##phof

phof.id<-  "65073818"
phof_first <-firstdate(phof.id)

ga_phof <- get_ga(phof.id, start.date = phof_first, end.date = "yesterday", metrics = "ga:users,ga:sessions,ga:pageviews,ga:avgTimeonPage", dimension = "ga:date")
phofpv <- qplot(data = ga_phof, date, pageviews, geom = "line") +geom_smooth() +ggtitle("PHOF pageviews")
phofdur <- qplot(data = ga_phof, date, avg.timeon.page, geom = "line") + geom_smooth() + ggtitle("phof time")

grid.arrange(phofpv, phofdur, ncol = 2)

##fingertips

fingertips.id<- "78054916"
fingertips_first <-firstdate(fingertips.id)

ga_fingertips <- get_ga(fingertips.id, start.date = fingertips_first, end.date = "yesterday", metrics = "ga:users,ga:sessions,ga:pageviews, ga:avgTimeonPage", dimension ="ga:date")
ftipspv <- qplot(data = ga_fingertips, date, pageviews, geom = "line") +geom_smooth() + ggtitle("Fingertips pageviews")
ftipsdur <- qplot(data = ga_phof, date, avg.timeon.page, geom = "line") + geom_smooth()+ ggtitle("Fingertips average session duration")
grid.arrange(ftipspv, ftipsdur, ncol = 2)

##yhpho

##yhpho.id<-  list_profiles(accountId = "8735278")$id
##yhpho_first <-firstdate(yhpho.id)

##ga_yhpho <- get_ga(yhpho.id, start.date = yhpho_first, end.date = "yesterday", metrics = "ga:users,ga:sessions,ga:pageviews", fetch.by = "day")
##qplot(data = ga_yhpho, date, pageviews, geom = "line") + geom_line(aes(date, sessions, colour = 'red'))+geom_smooth()


##local_health

local_health.id<-  "35140940"
local.first <-firstdate(local_health.id)

ga_local <- get_ga(local_health.id, start.date = local.first, end.date = "yesterday", metrics = "ga:users,ga:sessions,ga:pageviews, ga:avgTimeonPage", dimension = "ga:date")
localpv <- qplot(data = ga_local, date, pageviews, geom = "line") +geom_smooth() + ggtitle("Local health pageviews")
localdur <- qplot(data = ga_local, date, avg.timeon.page, geom = "line") + geom_smooth()+ ggtitle("Local health average session duration")
grid.arrange(localpv, localdur, ncol = 2)

##shape

shape.id<-  "26308055"
shape_first <-firstdate(shape.id)

ga_shape <- get_ga(shape.id, start.date = shape_first, end.date = "yesterday", metrics = "ga:users,ga:sessions,ga:pageviews, ga:avgTimeonPage", dimension = "ga:date")
shapepv <- qplot(data = ga_shape, date, pageviews, geom = "line") +geom_smooth()+ ggtitle("SHAPE pageviews")
shapedur <- qplot(data = ga_shape, date, avg.timeon.page, geom = "line") + geom_smooth()+ ggtitle("SHAPE average session duration")
grid.arrange(shapepv, shapedur, ncol = 2)

##noo

noo.id<-  "68505331"
noo_first <-firstdate(noo.id)

ga_noo <- get_ga(noo.id, start.date = noo_first, end.date = "yesterday", metrics = "ga:users,ga:sessions,ga:pageviews, ga:avgTimeonPage", dimension = "ga:date")
noopv <- qplot(data = ga_noo, date, pageviews, geom = "line") +geom_smooth()+ ggtitle("noo pageviews")
noodur <- qplot(data = ga_noo, date, avg.timeon.page, geom = "line") + geom_smooth()+ ggtitle("noo average session duration")
grid.arrange(noopv, noodur, ncol = 2)

##chimat

chimat.id<-  "12177926"
chimat_first <-firstdate(chimat.id)

ga_chimat <- get_ga(chimat.id, start.date = chimat_first, end.date = "yesterday", metrics = "ga:users,ga:sessions,ga:pageviews, ga:avgTimeonPage", dimension = "ga:date")
chimatpv <- qplot(data = ga_chimat, date, pageviews, geom = "line") +geom_smooth()+ ggtitle("chimat pageviews")
chimatdur <- qplot(data = ga_chimat, date, avg.timeon.page, geom = "line") + geom_smooth()+ ggtitle("chimat average session duration")
grid.arrange(chimatpv, chimatdur, ncol = 2)

##tobacco

tobacco.id<-  "35885149"
tobacco_first <-firstdate(tobacco.id)

ga_tobacco <- get_ga(tobacco.id, start.date = tobacco_first, end.date = "yesterday", metrics = "ga:users,ga:sessions,ga:pageviews, ga:avgTimeonPage", dimension = "ga:date")
tobaccopv <- qplot(data = ga_tobacco, date, pageviews, geom = "line") +geom_smooth()+ ggtitle("tobacco pageviews")
tobaccodur <- qplot(data = ga_tobacco, date, avg.timeon.page, geom = "line") + geom_smooth()+ ggtitle("tobacco average session duration")
grid.arrange(tobaccopv, tobaccodur, ncol = 2)

##healthier lives

healthier.lives.id<-  "93017633"
healthier.lives_first <-firstdate(healthier.lives.id)

ga_healthier.lives <- get_ga(healthier.lives.id, start.date = healthier.lives_first, end.date = "yesterday", metrics = "ga:users,ga:sessions,ga:pageviews, ga:avgTimeonPage", dimension = "ga:date")
healthier.livespv <- qplot(data = ga_healthier.lives, date, pageviews, geom = "line") +geom_smooth()+ ggtitle("healthier.lives pageviews")
healthier.livesdur <- qplot(data = ga_healthier.lives, date, avg.timeon.page, geom = "line") + geom_smooth()+ ggtitle("healthier.lives average session duration")
grid.arrange(healthier.livespv, healthier.livesdur, ncol = 2)

##Mental health

mental.health.id<-  "102894568"
mental.health_first <-firstdate(mental.health.id)

ga_mental.health <- get_ga(mental.health.id, start.date = mental.health_first, end.date = "yesterday", metrics = "ga:users,ga:sessions,ga:pageviews, ga:avgTimeonPage", dimension = "ga:date")
mental.healthpv <- qplot(data = ga_mental.health, date, pageviews, geom = "line") +geom_smooth()+ ggtitle("mental.health pageviews")
mental.healthdur <- qplot(data = ga_mental.health, date, avg.timeon.page, geom = "line") + geom_smooth()+ ggtitle("mental.health average session duration")
grid.arrange(mental.healthpv, mental.healthdur, ncol = 2)

## combine all datasets
library(data.table)
ga_local <- cbind(local_health.id, ga_local)
ga_mental.health <- cbind(mental.health.id, ga_mental.health)
ga_healthier.lives <- cbind(healthier.lives.id, ga_healthier.lives)
ga_tobacco <- cbind(tobacco.id, ga_tobacco)
ga_chimat <- cbind(chimat.id, ga_chimat)
ga_noo <- cbind(noo.id, ga_noo)
ga_fingertips <- cbind(fingertips.id, ga_fingertips)
ga_shape <- cbind(shape.id, ga_shape)
ga_phof <- cbind(phof.id, ga_phof)
ga_ihal <- cbind(ihal.id, ga_ihal)







comb <- list(ga_phof, ga_healthier.lives, ga_ihal, ga_mental.health, ga_tobacco, 
                  ga_chimat, ga_noo, ga_shape, ga_local, ga_fingertips )
ga_comb <- rbindlist(comb)
colnames(ga_comb) <- c("id", "date", "users", "sessions", "pageviews", "avgTimeonPage")
dim(ga_comb)
## [1] 15616     6
str(ga_comb)
## Classes 'data.table' and 'data.frame':   15616 obs. of  6 variables:
##  $ id           : Factor w/ 9 levels "65073818","93017633",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ date         : POSIXct, format: "2012-11-21" "2012-11-22" ...
##  $ users        : num  571 553 481 59 75 496 362 263 240 189 ...
##  $ sessions     : num  630 626 542 72 92 565 420 304 278 212 ...
##  $ pageviews    : num  1625 1624 1338 188 165 ...
##  $ avgTimeonPage: num  131 123 129 126 161 ...
##  - attr(*, ".internal.selfref")=<externalptr>
head(ga_comb)
##          id       date users sessions pageviews avgTimeonPage
## 1: 65073818 2012-11-21   571      630      1625      131.4352
## 2: 65073818 2012-11-22   553      626      1624      122.6934
## 3: 65073818 2012-11-23   481      542      1338      129.1432
## 4: 65073818 2012-11-24    59       72       188      125.5345
## 5: 65073818 2012-11-25    75       92       165      161.0548
## 6: 65073818 2012-11-26   496      565      1487      137.1941
## further analysis
ga_comb <- ga_comb %>% mutate(dwell_time = pageviews* avgTimeonPage/ 86400)## calculate dwell time per day

ga_comb <- ga_comb %>% mutate(year = year(date), month = month(date, label = TRUE), period = paste(month, year, sep = "-"))

ga_comb$id <- as.numeric(as.character(ga_comb$id))                                           

setkey(ga_comb, id)
setkey(lookup1, id)
ga_comb <- ga_comb[lookup1]

ga_summary <- ga_comb %>% group_by(year, name) %>% summarise(dwelltime = mean(dwell_time), 
        users = sum(users), sessions = sum(sessions), pageviews = sum(pageviews))

qplot(data=ga_summary, year, dwelltime, colour=name, geom = c("point", "line"), group = name) + scale_fill_viridis() + theme_bw()
## Warning: Removed 3 rows containing missing values (geom_point).

qplot(data=ga_summary, year, pageviews, colour=name, geom = c("point", "line"), group = name)+ scale_fill_viridis() + theme_bw()
## Warning: Removed 3 rows containing missing values (geom_point).

qplot(data = ga_summary, year, pageviews, geom = c("point", "line"), group = 1) + facet_wrap(~name)
## Warning: Removed 3 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing missing values (geom_path).
## geom_path: Each group consists of only one observation. Do you need to
## adjust the group aesthetic?
## geom_path: Each group consists of only one observation. Do you need to
## adjust the group aesthetic?

qplot(data = filter(ga_comb,date >= '2013-01-01'), date, users, geom = c("line", "smooth")) + facet_wrap(~name)

qplot(data = filter(ga_comb,date >= '2013-01-01'), date, avgTimeonPage, geom = c("line", "smooth")) + facet_wrap(~name)

qplot(data = filter(ga_comb,date >= '2013-01-01'), date, pageviews, geom = c("line", "smooth")) + facet_wrap(~name)

qplot(data = ga_comb, date, dwell_time, geom = c("line", "smooth")) + facet_wrap(~name)
## Warning: Removed 3 rows containing non-finite values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_path).
## geom_path: Each group consists of only one observation. Do you need to
## adjust the group aesthetic?
## geom_path: Each group consists of only one observation. Do you need to
## adjust the group aesthetic?