options(digits = 2)
## Set knitr options
knitr::opts_chunk$set(comment = "#>", collapse = TRUE, dev = "png", dpi = 150, warning = FALSE, message = FALSE)
## Set locale
if (.Platform$OS.type == "unix") {
Sys.setenv(LANG = "en_US.UTF-8")
Sys.setlocale("LC_ALL", "en_US.UTF-8")
} else if (.Platform$OS.type == "windows")
Sys.setlocale("LC_ALL", "English")
## [1] "LC_CTYPE=en_US.UTF-8;LC_NUMERIC=C;LC_TIME=en_US.UTF-8;LC_COLLATE=en_US.UTF-8;LC_MONETARY=en_US.UTF-8;LC_MESSAGES=ru_RU.UTF-8;LC_PAPER=ru_RU.UTF-8;LC_NAME=C;LC_ADDRESS=C;LC_TELEPHONE=C;LC_MEASUREMENT=ru_RU.UTF-8;LC_IDENTIFICATION=C"
Prerequirements
Load R pakcages
library("RGA")
library("ggplot2")
library("scales")
library("reshape2")
library("dplyr", warn.conflicts = FALSE)
library("DT")
Authorisation
authorize()
Get Profile ID
# Profiles (profiles) list
profiles <- list_profiles()
# Site URL
site_url <- "balancer.ru"
# Get Profile ID
id <- profiles[profiles$website.url == site_url, "id"]
Heatmap plot of users activity
Setup dates range
# Get first date
start.date <- firstdate(id)
# Get the next monday
start.date <- as.Date(cut(start.date, "weeks")) + 7
# Get the last sunday
end.date <- as.Date(cut(Sys.Date(), "weeks")) - 1
Get data
ga_data <- get_ga(id, start.date = start.date, end.date = end.date,
metrics = "ga:pageviews", dimensions = "ga:dayOfWeek,ga:hour")
Prepare data
ga_data$day.of.week <- replace(ga_data$day.of.week, ga_data$day.of.week == 0, 7)
ga_data$day.of.week <- ordered(ga_data$day.of.week, levels = 1:7,
labels = c("Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"))
Draw plot
ggplot(ga_data, aes(x = day.of.week, y = hour)) +
geom_tile(aes(fill = pageviews), colour = "white") +
scale_fill_gradient(name = "Page views", low = "steelblue", high = "red") +
scale_y_continuous(breaks = unique(ga_data$hour)) +
labs(title = "Heatmap daily user activity", x = "Day of week", y = "Hour") +
theme(axis.ticks = element_blank())

Time-Series Calendar Heatmap
Setup dates range
# Set first date
start.date <- "2012-01-01"
# Set the last date
end.date <- "2014-12-31"
Get data
ga_data <- get_ga(id, start.date = start.date, end.date = end.date,
metrics = "ga:sessions", dimensions = "ga:date",
filters = "ga:sessions > 0")
Prepare data
ga_data <- ga_data %>%
mutate(year = as.numeric(format(date, "%Y")),
month = as.numeric(format(date, "%m")),
yweek = as.numeric(format(date, "%W")),
mday = as.numeric(format(date, "%d")),
wday = as.numeric(format(date, "%u")),
yday = as.numeric(format(date, "%j"))) %>%
mutate(month = ordered(month, levels = 1:12,
labels = c("Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec")),
wday = ordered(wday, levels = 1:7,
labels = c("Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"))) %>%
group_by(year, month) %>% mutate(mweek = yweek - min(yweek) + 1) %>% ungroup
Draw plot
ggplot(data = ga_data, aes(x = mweek, y = wday)) +
geom_tile(aes(fill = sessions), colour = "white") +
geom_text(aes(label = mday)) +
facet_grid(year ~ month) +
scale_fill_gradient(name = "Sessions", low = "steelblue", high = "red") +
labs(title = "Time-Series Calendar Heatmap", x = "", y = "") +
scale_x_continuous(breaks = NULL) +
scale_y_discrete(limits = rev(levels(ga_data$wday)))

Year by year comparison
Setup dates range
# Set first date
start.date <- "2012-01-01"
# Set the last date
end.date <- "2014-12-31"
Get data
ga_data <- get_ga(id, start.date = start.date, end.date = end.date,
metrics = "ga:sessions", dimensions = "ga:date,ga:year",
filters = "ga:sessions > 0")
Draw plot
ggplot(data = ga_data, aes(x = as.Date(date), y = sessions)) +
geom_line() + geom_point(size = 1.5) +
facet_wrap(~ year, ncol = 1, scales = "free_x") +
scale_x_date(labels = date_format("%b"), breaks = date_breaks("month")) +
labs(title = "Time-Series years comparison", x = "")

Year by year comparison
Setup dates range
# Set first date
start.date <- "2012-01-01"
# Set the last date
end.date <- "2014-12-31"
Get data
ga_data <- get_ga(id, start.date = start.date, end.date = end.date,
metrics = "ga:sessions", dimensions = "ga:yearMonth",
filters = "ga:sessions > 0")
Prepare data
ga_data$date <- as.Date(paste0(ga_data$year.month, "01"), "%Y%m%d")
Draw plot
ggplot(data = ga_data, aes(x = ga_data$date, y = sessions)) +
geom_line() + geom_point(size = 1.5) +
scale_x_date(labels = date_format("%b %Y"), breaks = date_breaks("month")) +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
geom_vline(xintercept = as.numeric(as.Date(levels(cut(ga_data$date, breaks = "year")))),
alpha = 0.5, linetype="dashed") +
labs(title = "Time-Series years comparison", x = "")

Time series
Setup dates range
# Set first date
start.date <- "7daysAgo"
# Set the last date
end.date <- "yesterday"
Get data
ga_data <- get_ga(id, start.date = start.date, end.date = end.date,
metrics = "ga:users,ga:sessions,ga:pageviews", dimensions = "ga:dateHour")
Prepare data
ga_data <- melt(ga_data, id.vars = "date.hour", variable.name = "metrics")
Draw plot
ggplot(ga_data, aes(x = date.hour, y = value)) +
geom_line(aes(colour = metrics)) +
ylim(c(0, NA)) +
scale_x_datetime(labels = date_format("%d %b"), breaks = date_breaks("day")) +
labs(title = "Time series", x = "", y = "") +
scale_color_discrete(name = "Metrics")

Time series
Setup dates range
# Set first date
start.date <- "31daysAgo"
# Set the last date
end.date <- "yesterday"
Get data
ga_data <- get_ga(id, start.date = start.date, end.date = end.date,
metrics = "ga:users,ga:sessions,ga:pageviews", dimensions = "ga:date")
Prepare data
ga_data <- melt(ga_data, id.vars = "date", variable.name = "metrics")
Draw plot
ggplot(ga_data, aes(x = date, y = value)) +
geom_line(aes(colour = metrics)) +
ylim(c(0, NA)) +
scale_x_datetime(labels = date_format("%d %b"), breaks = date_breaks("week")) +
labs(title = "Time series", x = "", y = "") +
scale_color_discrete(name = "Metrics")

Trafic sources
ga_data <- get_ga(id, start.date = "2014-03-01", end.date = "2014-12-31",
metrics = "ga:sessions,ga:pageviews,ga:avgSessionDuration,ga:bounceRate",
dimensions = "ga:source,ga:medium",
sort = "-ga:sessions,ga:medium",
max.results = 20)
datatable(ga_data) %>% formatPercentage("bounce.rate")
Prepare data
ga_data <- get_ga(id, start.date = "2014-03-01", end.date = "2014-12-31",
metrics = "ga:sessions,ga:pageviews,ga:avgSessionDuration,ga:bounceRate",
dimensions = "ga:medium", sort = "-ga:sessions,ga:medium",
filters = "ga:medium!@(not set)")
datatable(ga_data) %>% formatRound("avg.session.duration", 0) %>% formatPercentage("bounce.rate", 2)