#A step-by-step guide to connect Google Analytics to R using the
#googleAnalyticsR package
#browseURL("https://cran.r-project.org/web/packages/googleAnalyticsR/googleAnalyticsR.pdf")
#install and load packages
if(!require("googleAnalyticsR")){
install.packages("googleAnalyticsR")
library(googleAnalyticsR)
}
## Loading required package: googleAnalyticsR
## Warning in as.POSIXlt.POSIXct(x, tz): unknown timezone 'zone/tz/2018c.1.0/
## zoneinfo/Europe/Stockholm'
## 2018-03-23 20:56:27> Default Google Project for googleAnalyticsR is now set. This is shared with all googleAnalyticsR users.
## If making a lot of API calls, please:
## 1) create your own Google Project at https://console.developers.google.com
## 2) Activate the Google Analytics Reporting API
## 3) set options(googleAuthR.client_id) and options(googleAuthR.client_secret)
## 4) Reload the package.
if(!require("quantmod")){
install.packages("quantmod")
library(quantmod)
}
## Loading required package: quantmod
## Loading required package: xts
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
## Loading required package: TTR
## Version 0.4-0 included new data defaults. See ?getSymbols.
library(ggplot2)
library(tidyverse)
## Loading tidyverse: tibble
## Loading tidyverse: tidyr
## Loading tidyverse: readr
## Loading tidyverse: purrr
## Loading tidyverse: dplyr
## Conflicts with tidy packages ----------------------------------------------
## filter(): dplyr, stats
## first(): dplyr, xts
## lag(): dplyr, stats
## last(): dplyr, xts
#authorize Google Analytics
ga_auth()
## Token cache file: .httr-oauth
## 2018-03-23 20:56:29> Authenticated
#get account info so you can see what accounts you have access to
account_list<-ga_account_list()
#uncomment "account_list" below to take a look at your accounts in the console window
#account_list
#assign the view you want to connect to the view_id variable
#it is the viewID column
view_id<-123847950 #my own website blog
#in order to get the API names you will need to access the data
#you can use the Query Explorer (hit command + click): https://ga-dev-tools.appspot.com/query-explorer/
#which will allow you to practice your queries
#or the Dimensions & Metrics Explorer:
#https://developers.google.com/analytics/devguides/reporting/core/dimsmets#cats=user
#https://developers.google.com/analytics/devguides/reporting/core/dimsmets
######################Simple Channels Query###################################
#lets pull some data the last 30 days of channel data
channels<- google_analytics_4(view_id,
date_range = c(Sys.Date() - 30, Sys.Date()),
metrics = c("sessions", "pageviews", "entrances", "bounces"),
dimensions = "channelGrouping")
## 2018-03-23 20:56:30> Downloaded [2] rows from a total of [2].
#for date range you can also use the API's relative dates
channels<- google_analytics_4(view_id,
date_range = c("30daysAgo", "yesterday"),
metrics = c("sessions", "pageviews", "entrances", "bounces"),
dimensions = "channelGrouping")
## 2018-03-23 20:56:30> Downloaded [2] rows from a total of [2].
#check out the structure
str(channels)
## 'data.frame': 2 obs. of 5 variables:
## $ channelGrouping: chr "Direct" "Organic Search"
## $ sessions : num 3 2
## $ pageviews : num 38 4
## $ entrances : num 3 2
## $ bounces : num 0 0
## - attr(*, "totals")=List of 1
## ..$ :List of 4
## .. ..$ sessions : chr "5"
## .. ..$ pageviews: chr "42"
## .. ..$ entrances: chr "5"
## .. ..$ bounces : chr "0"
## - attr(*, "minimums")=List of 1
## ..$ :List of 4
## .. ..$ sessions : chr "2"
## .. ..$ pageviews: chr "4"
## .. ..$ entrances: chr "2"
## .. ..$ bounces : chr "0"
## - attr(*, "maximums")=List of 1
## ..$ :List of 4
## .. ..$ sessions : chr "3"
## .. ..$ pageviews: chr "38"
## .. ..$ entrances: chr "3"
## .. ..$ bounces : chr "0"
## - attr(*, "isDataGolden")= logi TRUE
## - attr(*, "rowCount")= int 2
#################Multiple dimensions and working with dates####################################
#lets pull the same data as before but using a custom date range with more than 1 dimension
channels2<- google_analytics_4(view_id,
date_range = c("2017-01-01","2017-12-31"),
metrics = c("sessions", "pageviews", "entrances", "bounces"),
dimensions = c("channelGrouping", "date"))
## 2018-03-23 20:56:31> Downloaded [115] rows from a total of [115].
#lets view our new data
channels2
## channelGrouping date sessions pageviews entrances bounces
## 1 Direct 2017-01-15 5 73 5 1
## 2 Direct 2017-01-17 1 1 1 1
## 3 Direct 2017-01-30 1 3 1 0
## 4 Direct 2017-02-05 1 5 1 0
## 5 Direct 2017-02-09 1 2 1 0
## 6 Direct 2017-03-12 18 18 18 18
## 7 Direct 2017-03-19 3 5 3 2
## 8 Direct 2017-03-24 1 10 1 0
## 9 Direct 2017-03-28 1 1 1 1
## 10 Direct 2017-04-05 1 5 1 0
## 11 Direct 2017-04-17 1 1 1 1
## 12 Direct 2017-04-29 3 23 3 2
## 13 Direct 2017-05-06 9 143 9 1
## 14 Direct 2017-05-07 4 23 4 0
## 15 Direct 2017-05-08 1 14 1 0
## 16 Direct 2017-05-10 7 46 7 1
## 17 Direct 2017-05-11 1 1 1 1
## 18 Direct 2017-05-12 1 1 1 1
## 19 Direct 2017-05-13 3 26 3 2
## 20 Direct 2017-05-14 2 10 2 1
## 21 Direct 2017-05-16 6 16 6 4
## 22 Direct 2017-05-17 2 2 2 2
## 23 Direct 2017-05-18 7 36 7 4
## 24 Direct 2017-05-19 2 5 2 1
## 25 Direct 2017-05-20 6 49 6 2
## 26 Direct 2017-05-21 1 1 1 1
## 27 Direct 2017-05-22 1 2 1 0
## 28 Direct 2017-05-23 5 10 5 4
## 29 Direct 2017-05-24 1 1 1 1
## 30 Direct 2017-05-25 2 2 2 2
## 31 Direct 2017-05-30 2 2 2 2
## 32 Direct 2017-06-02 16 16 16 16
## 33 Direct 2017-06-04 3 19 3 1
## 34 Direct 2017-06-05 3 15 3 1
## 35 Direct 2017-06-07 2 3 2 1
## 36 Direct 2017-06-09 1 6 1 0
## 37 Direct 2017-06-12 2 3 2 1
## 38 Direct 2017-06-16 2 2 2 2
## 39 Direct 2017-06-20 2 5 2 1
## 40 Direct 2017-06-29 1 6 1 0
## 41 Direct 2017-07-07 2 4 2 1
## 42 Direct 2017-07-17 13 13 13 13
## 43 Direct 2017-08-17 10 10 10 10
## 44 Direct 2017-08-24 2 2 2 2
## 45 Direct 2017-09-11 1 1 1 1
## 46 Direct 2017-10-01 4 38 4 1
## 47 Direct 2017-10-05 1 2 1 0
## 48 Direct 2017-10-18 2 4 2 0
## 49 Direct 2017-10-24 2 10 2 0
## 50 Direct 2017-10-31 1 2 1 0
## 51 Direct 2017-11-05 5 5 5 5
## 52 Direct 2017-11-11 1 2 1 0
## 53 Direct 2017-11-30 1 2 1 0
## 54 Direct 2017-12-03 2 20 2 0
## 55 Direct 2017-12-11 1 2 1 0
## 56 Direct 2017-12-12 1 2 1 0
## 57 Direct 2017-12-24 1 2 1 0
## 58 Organic Search 2017-01-07 2 15 2 0
## 59 Organic Search 2017-02-07 1 2 1 0
## 60 Organic Search 2017-04-26 1 1 1 1
## 61 Organic Search 2017-04-27 1 3 1 0
## 62 Organic Search 2017-05-06 2 5 2 0
## 63 Organic Search 2017-05-07 1 8 1 0
## 64 Organic Search 2017-05-08 1 1 1 1
## 65 Organic Search 2017-05-10 4 16 4 2
## 66 Organic Search 2017-05-13 1 1 1 1
## 67 Organic Search 2017-05-18 1 3 1 0
## 68 Organic Search 2017-05-20 1 1 1 1
## 69 Organic Search 2017-05-23 1 3 1 0
## 70 Organic Search 2017-06-13 1 1 1 1
## 71 Organic Search 2017-07-04 1 14 1 0
## 72 Organic Search 2017-08-18 1 1 1 1
## 73 Organic Search 2017-08-27 1 1 1 1
## 74 Organic Search 2017-09-01 1 4 1 0
## 75 Organic Search 2017-09-03 1 1 1 1
## 76 Organic Search 2017-09-09 1 1 1 1
## 77 Organic Search 2017-09-28 1 1 1 1
## 78 Organic Search 2017-10-02 1 2 1 0
## 79 Organic Search 2017-10-12 1 2 1 0
## 80 Organic Search 2017-10-17 1 2 1 0
## 81 Organic Search 2017-10-24 1 2 1 0
## 82 Organic Search 2017-10-25 1 2 1 0
## 83 Organic Search 2017-11-06 1 2 1 0
## 84 Organic Search 2017-11-07 2 4 2 0
## 85 Organic Search 2017-11-15 1 1 1 1
## 86 Organic Search 2017-11-22 1 2 1 0
## 87 Organic Search 2017-11-24 1 4 1 0
## 88 Organic Search 2017-11-29 1 2 1 0
## 89 Organic Search 2017-12-04 1 2 1 0
## 90 Organic Search 2017-12-08 1 2 1 0
## 91 Organic Search 2017-12-11 1 2 1 0
## 92 Organic Search 2017-12-12 1 2 1 0
## 93 Organic Search 2017-12-17 1 2 1 0
## 94 Referral 2017-01-04 1 1 1 1
## 95 Referral 2017-04-23 1 1 1 1
## 96 Referral 2017-05-04 1 1 1 1
## 97 Referral 2017-05-05 1 1 1 1
## 98 Referral 2017-05-08 1 1 1 1
## 99 Referral 2017-05-22 1 1 1 1
## 100 Referral 2017-06-12 1 6 1 0
## 101 Referral 2017-06-20 1 3 1 0
## 102 Referral 2017-06-27 1 1 1 1
## 103 Referral 2017-07-01 2 2 2 2
## 104 Referral 2017-08-01 1 1 1 1
## 105 Referral 2017-10-02 1 2 1 0
## 106 Referral 2017-10-21 1 1 1 1
## 107 Referral 2017-10-22 4 4 4 4
## 108 Referral 2017-12-21 1 2 1 0
## 109 Referral 2017-12-30 1 1 1 1
## 110 Referral 2017-12-31 1 1 1 1
## 111 Social 2017-01-06 1 1 1 1
## 112 Social 2017-04-18 1 1 1 1
## 113 Social 2017-05-13 1 1 1 1
## 114 Social 2017-05-25 1 1 1 1
## 115 Social 2017-06-11 1 1 1 1
#check date column
class(channels2$date)
## [1] "Date"
#how many rows of data per channel
channels2 %>%
group_by(channelGrouping) %>%
tally()
## # A tibble: 4 x 2
## channelGrouping n
## <chr> <int>
## 1 Direct 57
## 2 Organic Search 36
## 3 Referral 17
## 4 Social 5
###########IGNORE#####################
# #summarize by date
# channels3<-channels2 %>%
# group_by(channelGrouping, date=floor_date(date, "month")) %>%
# summarize(sum = sum(sessions), n = n())
#
#
# #check results
# head(channels3)
#
# #drop last column
# channels3 <- channels3[,1:3]
#
# #rename 3rd column
# #rename(channels3, c("channelGrouping"= "channelGrouping", "date" = "date", "sum" = "totalSessions"))
#
# #find tops and lows of session counts
# findPeaks(channels3$sum, thresh = 0)
# findValleys(channels3$sum, thresh = 0)
######################Device Category Filter###################################
#lets check device categories of our visitors
device_cat<- google_analytics_4(view_id,
date_range = c("2017-01-01","2017-12-31"),
metrics = c("sessions", "users"),
dimensions = "deviceCategory")
## 2018-03-23 20:56:32> Downloaded [2] rows from a total of [2].
#lets view our data
device_cat
## deviceCategory sessions users
## 1 desktop 232 154
## 2 mobile 18 8
# calculate user percentages of each device category
device_cat <- device_cat %>%
mutate(user_pct = users/sum(users) * 100)
#visualize our device information in a bar chart
ggplot(data = device_cat, aes(x = deviceCategory, y = users)) +
geom_bar(stat = "identity", show.legend = FALSE) +
theme_light() + ggtitle("Users by Device")

#same bar chart but with percentages instead
ggplot(data = device_cat, aes(x = reorder(deviceCategory, -users), y = users)) +
geom_bar(stat = "identity", fill = "steelblue") +
ggtitle("Distribution of users by device type")+
xlab(" ") +
ylab("total users") +
geom_text(aes(label = paste(round(user_pct,2), "%", sep = ""), family = "Arial",
fontface = "bold", size = 14, position = "center"), vjust = 1.6,
color = "white", size = 3.5) + theme_minimal()
## Warning: Ignoring unknown aesthetics: position

#lets check sessions by date
device_bydate<- google_analytics_4(view_id,
date_range = c("2017-06-01","2017-12-31"),
metrics = "sessions",
dimensions = c("date", "deviceCategory"))
## 2018-03-23 20:56:34> Downloaded [58] rows from a total of [58].
#lets view our data
device_bydate
## date deviceCategory sessions
## 1 2017-06-02 desktop 16
## 2 2017-06-04 desktop 3
## 3 2017-06-05 desktop 3
## 4 2017-06-07 desktop 2
## 5 2017-06-09 desktop 1
## 6 2017-06-11 mobile 1
## 7 2017-06-12 desktop 3
## 8 2017-06-13 desktop 1
## 9 2017-06-16 desktop 2
## 10 2017-06-20 desktop 3
## 11 2017-06-27 desktop 1
## 12 2017-06-29 desktop 1
## 13 2017-07-01 desktop 2
## 14 2017-07-04 desktop 1
## 15 2017-07-07 desktop 2
## 16 2017-07-17 desktop 13
## 17 2017-08-01 desktop 1
## 18 2017-08-17 desktop 10
## 19 2017-08-18 desktop 1
## 20 2017-08-24 desktop 2
## 21 2017-08-27 desktop 1
## 22 2017-09-01 desktop 1
## 23 2017-09-03 desktop 1
## 24 2017-09-09 desktop 1
## 25 2017-09-11 desktop 1
## 26 2017-09-28 desktop 1
## 27 2017-10-01 desktop 4
## 28 2017-10-02 desktop 1
## 29 2017-10-02 mobile 1
## 30 2017-10-05 mobile 1
## 31 2017-10-12 desktop 1
## 32 2017-10-17 desktop 1
## 33 2017-10-18 desktop 2
## 34 2017-10-21 desktop 1
## 35 2017-10-22 desktop 4
## 36 2017-10-24 desktop 3
## 37 2017-10-25 desktop 1
## 38 2017-10-31 desktop 1
## 39 2017-11-05 desktop 5
## 40 2017-11-06 desktop 1
## 41 2017-11-07 desktop 2
## 42 2017-11-11 desktop 1
## 43 2017-11-15 desktop 1
## 44 2017-11-22 desktop 1
## 45 2017-11-24 desktop 1
## 46 2017-11-29 desktop 1
## 47 2017-11-30 desktop 1
## 48 2017-12-03 desktop 2
## 49 2017-12-04 desktop 1
## 50 2017-12-08 desktop 1
## 51 2017-12-11 desktop 1
## 52 2017-12-11 mobile 1
## 53 2017-12-12 desktop 2
## 54 2017-12-17 desktop 1
## 55 2017-12-21 desktop 1
## 56 2017-12-24 desktop 1
## 57 2017-12-30 desktop 1
## 58 2017-12-31 desktop 1
#create plot
ggplot(device_bydate, aes(x = date, y = sessions, color = deviceCategory)) +
geom_line() +
theme_light()

######################Where do our users come from? Query#############################
#lets see where are users come from
country_info<- google_analytics_4(view_id,
date_range = c("30daysAgo", "yesterday"),
metrics = "users",
dimensions = "country",
order = order_type("users",
sort_order=c("DESCENDING"),
orderType = c("VALUE")))
## 2018-03-23 20:56:35> Downloaded [4] rows from a total of [4].
#let's take a look
country_info
## country users
## 1 Denmark 1
## 2 Sweden 1
## 3 United Kingdom 1
## 4 United States 1
##########Lets visualize our data
# Pie Chart with Percentages using BaseR
slices <- country_info$users
lbls <- country_info$country
pct <- round(slices/sum(slices)*100)
lbls <- paste(lbls, pct) # add percents to labels
lbls <- paste(lbls,"%",sep="") # ad % to labels
pie(slices,labels = lbls, col=rainbow(length(lbls)),
main="Users by Country")

#in a bar chart since we have too many slices in our pie chart
ggplot(data = country_info, aes(x = country, y = users, fill = country)) +
geom_bar(stat = "identity", show.legend = FALSE) +
theme_classic() + ggtitle("Users by Country")

######################Detailed Device Information Query#############################
#here i want to look at device & browser info
#i also want to limit my output
browser_info<- google_analytics_4(view_id,
date_range = c("2017-01-01","2017-12-31"),
metrics = c("sessions", "users", "goalCompletionsAll"),
dimensions = c("browser", "browserVersion",
"browserSize", "screenResolution", "mobileDeviceinfo"),
max = 5)
## 2018-03-23 20:56:35> Downloaded [5] rows from a total of [8].
#let's take a look
browser_info
## browser browserVersion browserSize screenResolution mobileDeviceinfo
## 1 Firefox 38.0 (not set) 800x600 (not set)
## 2 Safari 10.0 320x460 320x568 Apple iPhone
## 3 Safari 10.0 380x560 375x667 Apple iPhone
## 4 Safari 10.0 410x630 414x736 Apple iPhone
## 5 Safari 600.1.4 380x530 375x667 Apple iPhone
## sessions users goalCompletionsAll
## 1 1 1 0
## 2 8 3 0
## 3 2 2 0
## 4 1 1 0
## 5 1 1 0
#lets visualize our data
ggplot(data = browser_info, aes(x = reorder(browserSize, -sessions), y = sessions)) +
geom_bar(stat = "identity") +
theme_classic() +
ggtitle("Which browsers sizes are our visitors using?") +
xlab(" ") +
ylab("total sessions") +
geom_text(aes(label = sessions), vjust = 1.6,
color = "white", size = 3.5)

#stacked bar chart
ggplot(data = browser_info, aes(browser)) +
geom_bar(aes(fill= screenResolution))

###############################Creating Dimension Filters#############################################
#Create a dimension filter object
#here we want only mobile devices
dim_filter_object <- dim_filter("deviceCategory",
operator = "REGEXP",
expressions = ".*mobile.*")
#Put filter object into a filter clause.
dim_filter_clause <- filter_clause_ga4(list(dim_filter_object),
operator = "AND")
# Pull the data
google_analytics_4(viewId = view_id,
date_range = c("2017-01-01","2017-12-31"),
metrics = c("sessions","goalCompletionsAll"),
dimensions = "deviceCategory",
dim_filters = dim_filter_clause)
## 2018-03-23 20:56:36> Downloaded [1] rows from a total of [1].
## deviceCategory sessions goalCompletionsAll
## 1 mobile 18 0
###############################Creating Metric Filters#############################################
#Create a metric filter object
#here we want only want data where the num of goals completed was greater than 1
met_filter_object <- met_filter("goalCompletionsAll",
operator = "GREATER_THAN",
0)
#Put filter object into a filter clause.
met_filter_clause <- filter_clause_ga4(list(met_filter_object),
operator = "AND")
# Pull the data
google_analytics_4(viewId = view_id,
date_range = c("2017-01-01","2017-12-31"),
metrics = c("sessions","goalCompletionsAll"),
dimensions = "deviceCategory",
met_filters = met_filter_clause)
## 2018-03-23 20:56:37> Downloaded [0] rows from a total of [].
## NULL
###############################Creating Multiple Filters (1 dimension + 1 metric) #############################################
#Create a dimension filter object
dim_filter_object <- dim_filter("country",
operator = "REGEXP",
expressions = ".*Sweden.*")
#Put filter object into a filter clause.
dim_filter_clause <- filter_clause_ga4(list(dim_filter_object),
operator = "AND")
met_filter_object <- met_filter("sessions",
operator = "GREATER_THAN",
1)
#Put filter object into a filter clause.
met_filter_clause <- filter_clause_ga4(list(met_filter_object),
operator = "AND")
#lets use both of our filters
filtered_country_info<-google_analytics_4(view_id, date_range = c("30daysAgo", "yesterday"),
metrics = c("sessions", "users"),
dimensions = "country",
met_filters = met_filter_clause,
dim_filters = dim_filter_clause)
## 2018-03-23 20:56:38> Downloaded [1] rows from a total of [1].
#let's take a look
filtered_country_info
## country sessions users
## 1 Sweden 2 1
############################Segmentation###########################################
## get list of segments
my_segments <- ga_segment_list()
## just the segment items
segs <- my_segments$items
####Returning Users ####
## choose the v3 segment
segment_for_call <- "gaid::-3" #returning users
## make the v3 segment object in the v4 segment object:
seg_obj <- segment_ga4("ReturningUsers", segment_id = segment_for_call)
## make the segment call
returning_users_traffic <- google_analytics_4(view_id,
c("2017-10-31","2017-12-31"),
dimensions=c('source','medium','segment'),
segments = seg_obj,
metrics = c('sessions','bounces'))
## 2018-03-23 20:56:39> Downloaded [3] rows from a total of [3].
#view results
View(returning_users_traffic)
## Warning: running command ''/usr/bin/otool' -L '/Library/Frameworks/
## R.framework/Resources/modules/R_de.so'' had status 1
####Organic Traffic ####
## choose the v3 segment
segment_for_call <- "gaid::-5" #organic traffic
## make the v3 segment object in the v4 segment object:
seg_obj_organic <- segment_ga4("Organic Traffic", segment_id = segment_for_call)
## make the segment call
organic_users_traffic <- google_analytics_4(view_id,
c("2017-10-31","2017-12-31"),
dimensions=c("source","medium", "segment"),
segments = seg_obj_organic,
metrics = c("sessions","users", "bounces"))
## 2018-03-23 20:56:39> Downloaded [1] rows from a total of [1].
#View results
View(organic_users_traffic)
## Warning: running command ''/usr/bin/otool' -L '/Library/Frameworks/
## R.framework/Resources/modules/R_de.so'' had status 1
###############################Summary#############################################
#with the metrics and dimensions explorer, you now have the power to access your
#analytics data however you like. There are a few
#great set of examples on http://www.dartistics.com/
#Happy analyzing!