#How to pull Google Analytics reports, add a calculated metric, and save results back to an Excel file
#install and load packages
if(!require("googleAnalyticsR")){
install.packages("googleAnalyticsR")
library(googleAnalyticsR)
}
## Loading required package: googleAnalyticsR
if(!require("xlsx")){
install.packages("xlsx")
library(xlsx)
}
## Loading required package: xlsx
## Loading required package: rJava
## Loading required package: xlsxjars
if(!require("quantmod")){
install.packages("quantmod")
library(quantmod)
}
## Loading required package: quantmod
## Loading required package: xts
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
## Loading required package: TTR
## Version 0.4-0 included new data defaults. See ?getSymbols.
if (!require("gridExtra")) {
install.packages("gridExtra", dependencies = TRUE)
library(gridExtra)
}
## Loading required package: gridExtra
if (!require("grid")) {
install.packages("grid", dependencies = TRUE)
library(grid)
}
## Loading required package: grid
library(dplyr)
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:gridExtra':
##
## combine
## The following objects are masked from 'package:xts':
##
## first, last
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
#authorize Google Analytics
ga_auth()
## Token cache file: .httr-oauth
## 2017-06-05 10:14:59> Authenticated
#get account info so you can see what accounts you have access to
account_list<-ga_account_list()
#uncomment "account_list" below to review the accounts you have access to
#account_list
#assign the viewID you want to connect to the view_id variable
view_id<-113994018
#for a list of the API query names
#https://ga-dev-tools.appspot.com/query-explorer/
#access the default channels report
#i want to compare Q1 from 2016 and Q1 from 2017
channelsReport<- google_analytics_4(view_id,
date_range = c("2016-01-01", "2016-03-30", "2017-01-01", "2017-03-30"),
metrics = c("sessions", "newUsers", "bounceRate", "pageviewsPerSession", "avgSessionDuration",
"goalConversionRateAll", "goalCompletionsAll"),
dimensions = "channelGrouping")
## 2017-06-05 10:15:01> Downloaded [5] rows from a total of [5].
#inspect dataset using glimpse function from dplyr package
glimpse(channelsReport)
## Observations: 5
## Variables: 15
## $ channelGrouping <chr> "Direct", "Display", "Organic Search"...
## $ sessions.d1 <dbl> 67, 2, 15, 37, 5
## $ newUsers.d1 <dbl> 44, 2, 2, 7, 4
## $ bounceRate.d1 <dbl> 65.67164, 100.00000, 46.66667, 32.432...
## $ pageviewsPerSession.d1 <dbl> 2.238806, 1.000000, 1.866667, 7.16216...
## $ avgSessionDuration.d1 <dbl> 82.55224, 0.00000, 62.06667, 561.0810...
## $ goalConversionRateAll.d1 <dbl> 47.76119, 0.00000, 33.33333, 75.67568...
## $ goalCompletionsAll.d1 <dbl> 32, 0, 5, 28, 2
## $ sessions.d2 <dbl> 32, 0, 3, 1, 1
## $ newUsers.d2 <dbl> 26, 0, 1, 1, 0
## $ bounceRate.d2 <dbl> 71.875, 0.000, 0.000, 100.000, 100.000
## $ pageviewsPerSession.d2 <dbl> 3.687500, 0.000000, 5.666667, 1.00000...
## $ avgSessionDuration.d2 <dbl> 196.5312, 0.0000, 119.6667, 0.0000, 0...
## $ goalConversionRateAll.d2 <dbl> 31.25000, 0.00000, 66.66667, 0.00000,...
## $ goalCompletionsAll.d2 <dbl> 10, 0, 2, 0, 0
#calculate percentage difference between number of sessions from Q1 last year and Q1 this year
#using quantmod package and round to 2 decimals
Q1overQ1change<-round(Delt(channelsReport$sessions.d1, channelsReport$sessions.d2),2)
#check results
Q1overQ1change
## Delt.0.arithmetic
## [1,] -0.52
## [2,] -1.00
## [3,] -0.80
## [4,] -0.97
## [5,] -0.80
#add Q1overQ1change as a column back to the channelsReport dataframe
channelsReport$Q1oQ1PctgChange<-Q1overQ1change
#check results of new column
#the number of columns have doubled since I have
#one set of columns for the 1st date range
#and a second set for the 2nd date range
glimpse(channelsReport)
## Observations: 5
## Variables: 16
## $ channelGrouping <chr> "Direct", "Display", "Organic Search"...
## $ sessions.d1 <dbl> 67, 2, 15, 37, 5
## $ newUsers.d1 <dbl> 44, 2, 2, 7, 4
## $ bounceRate.d1 <dbl> 65.67164, 100.00000, 46.66667, 32.432...
## $ pageviewsPerSession.d1 <dbl> 2.238806, 1.000000, 1.866667, 7.16216...
## $ avgSessionDuration.d1 <dbl> 82.55224, 0.00000, 62.06667, 561.0810...
## $ goalConversionRateAll.d1 <dbl> 47.76119, 0.00000, 33.33333, 75.67568...
## $ goalCompletionsAll.d1 <dbl> 32, 0, 5, 28, 2
## $ sessions.d2 <dbl> 32, 0, 3, 1, 1
## $ newUsers.d2 <dbl> 26, 0, 1, 1, 0
## $ bounceRate.d2 <dbl> 71.875, 0.000, 0.000, 100.000, 100.000
## $ pageviewsPerSession.d2 <dbl> 3.687500, 0.000000, 5.666667, 1.00000...
## $ avgSessionDuration.d2 <dbl> 196.5312, 0.0000, 119.6667, 0.0000, 0...
## $ goalConversionRateAll.d2 <dbl> 31.25000, 0.00000, 66.66667, 0.00000,...
## $ goalCompletionsAll.d2 <dbl> 10, 0, 2, 0, 0
## $ Q1oQ1PctgChange <dbl> -0.52, -1.00, -0.80, -0.97, -0.80
#display results of new column as a grid using grid package
grid.table(channelsReport[c(1,16)])

#save data to an excel file
write.xlsx(channelsReport, file="channelsReportfile.xlsx", sheetName = "Channels",
col.names = TRUE, row.names = FALSE, append = FALSE)
#There. All done!