#A step-by-step guide to connect Google Analytics to R using the 
#googleAnalyticsR package
#browseURL("https://cran.r-project.org/web/packages/googleAnalyticsR/googleAnalyticsR.pdf")

#install and load packages
if(!require("googleAnalyticsR")){
  install.packages("googleAnalyticsR")
  library(googleAnalyticsR)
}
## Loading required package: googleAnalyticsR
## Warning in as.POSIXlt.POSIXct(x, tz): unknown timezone 'zone/tz/2018c.1.0/
## zoneinfo/Europe/Stockholm'
## 2018-03-23 20:56:27> Default Google Project for googleAnalyticsR is now set.  This is shared with all googleAnalyticsR users. 
##  If making a lot of API calls, please: 
##  1) create your own Google Project at https://console.developers.google.com 
##  2) Activate the Google Analytics Reporting API 
##  3) set options(googleAuthR.client_id) and options(googleAuthR.client_secret) 
##  4) Reload the package.
if(!require("quantmod")){
  install.packages("quantmod")
  library(quantmod)
}
## Loading required package: quantmod
## Loading required package: xts
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## Loading required package: TTR
## Version 0.4-0 included new data defaults. See ?getSymbols.
library(ggplot2)
library(tidyverse)
## Loading tidyverse: tibble
## Loading tidyverse: tidyr
## Loading tidyverse: readr
## Loading tidyverse: purrr
## Loading tidyverse: dplyr
## Conflicts with tidy packages ----------------------------------------------
## filter(): dplyr, stats
## first():  dplyr, xts
## lag():    dplyr, stats
## last():   dplyr, xts
#authorize Google Analytics
ga_auth()
## Token cache file: .httr-oauth
## 2018-03-23 20:56:29> Authenticated
#get account info so you can see what accounts you have access to
account_list<-ga_account_list()

#uncomment "account_list" below to take a look at your accounts in the console window
#account_list

#assign the view you want to connect to the view_id variable
#it is the viewID column
view_id<-123847950 #my own website blog

#in order to get the API names you will need to access the data
#you can use the Query Explorer (hit command + click): https://ga-dev-tools.appspot.com/query-explorer/
#which will allow you to practice your queries
#or the Dimensions & Metrics Explorer:
#https://developers.google.com/analytics/devguides/reporting/core/dimsmets#cats=user
#https://developers.google.com/analytics/devguides/reporting/core/dimsmets


######################Simple Channels Query###################################

#lets pull some data the last 30 days of channel data
channels<- google_analytics_4(view_id, 
                               date_range = c(Sys.Date() - 30, Sys.Date()),
                               metrics = c("sessions", "pageviews", "entrances", "bounces"),
                               dimensions = "channelGrouping")
## 2018-03-23 20:56:30> Downloaded [2] rows from a total of [2].
#for date range you can also use the API's relative dates
channels<- google_analytics_4(view_id, 
                              date_range = c("30daysAgo", "yesterday"),
                              metrics = c("sessions", "pageviews", "entrances", "bounces"),
                              dimensions = "channelGrouping")
## 2018-03-23 20:56:30> Downloaded [2] rows from a total of [2].
#check out the structure
str(channels)
## 'data.frame':    2 obs. of  5 variables:
##  $ channelGrouping: chr  "Direct" "Organic Search"
##  $ sessions       : num  3 2
##  $ pageviews      : num  38 4
##  $ entrances      : num  3 2
##  $ bounces        : num  0 0
##  - attr(*, "totals")=List of 1
##   ..$ :List of 4
##   .. ..$ sessions : chr "5"
##   .. ..$ pageviews: chr "42"
##   .. ..$ entrances: chr "5"
##   .. ..$ bounces  : chr "0"
##  - attr(*, "minimums")=List of 1
##   ..$ :List of 4
##   .. ..$ sessions : chr "2"
##   .. ..$ pageviews: chr "4"
##   .. ..$ entrances: chr "2"
##   .. ..$ bounces  : chr "0"
##  - attr(*, "maximums")=List of 1
##   ..$ :List of 4
##   .. ..$ sessions : chr "3"
##   .. ..$ pageviews: chr "38"
##   .. ..$ entrances: chr "3"
##   .. ..$ bounces  : chr "0"
##  - attr(*, "isDataGolden")= logi TRUE
##  - attr(*, "rowCount")= int 2
#################Multiple dimensions and working with dates####################################
#lets pull the same data as before but using a custom date range with more than 1 dimension
channels2<- google_analytics_4(view_id, 
                              date_range = c("2017-01-01","2017-12-31"),
                              metrics = c("sessions", "pageviews", "entrances", "bounces"),
                              dimensions = c("channelGrouping", "date"))
## 2018-03-23 20:56:31> Downloaded [115] rows from a total of [115].
#lets view our new data
channels2
##     channelGrouping       date sessions pageviews entrances bounces
## 1            Direct 2017-01-15        5        73         5       1
## 2            Direct 2017-01-17        1         1         1       1
## 3            Direct 2017-01-30        1         3         1       0
## 4            Direct 2017-02-05        1         5         1       0
## 5            Direct 2017-02-09        1         2         1       0
## 6            Direct 2017-03-12       18        18        18      18
## 7            Direct 2017-03-19        3         5         3       2
## 8            Direct 2017-03-24        1        10         1       0
## 9            Direct 2017-03-28        1         1         1       1
## 10           Direct 2017-04-05        1         5         1       0
## 11           Direct 2017-04-17        1         1         1       1
## 12           Direct 2017-04-29        3        23         3       2
## 13           Direct 2017-05-06        9       143         9       1
## 14           Direct 2017-05-07        4        23         4       0
## 15           Direct 2017-05-08        1        14         1       0
## 16           Direct 2017-05-10        7        46         7       1
## 17           Direct 2017-05-11        1         1         1       1
## 18           Direct 2017-05-12        1         1         1       1
## 19           Direct 2017-05-13        3        26         3       2
## 20           Direct 2017-05-14        2        10         2       1
## 21           Direct 2017-05-16        6        16         6       4
## 22           Direct 2017-05-17        2         2         2       2
## 23           Direct 2017-05-18        7        36         7       4
## 24           Direct 2017-05-19        2         5         2       1
## 25           Direct 2017-05-20        6        49         6       2
## 26           Direct 2017-05-21        1         1         1       1
## 27           Direct 2017-05-22        1         2         1       0
## 28           Direct 2017-05-23        5        10         5       4
## 29           Direct 2017-05-24        1         1         1       1
## 30           Direct 2017-05-25        2         2         2       2
## 31           Direct 2017-05-30        2         2         2       2
## 32           Direct 2017-06-02       16        16        16      16
## 33           Direct 2017-06-04        3        19         3       1
## 34           Direct 2017-06-05        3        15         3       1
## 35           Direct 2017-06-07        2         3         2       1
## 36           Direct 2017-06-09        1         6         1       0
## 37           Direct 2017-06-12        2         3         2       1
## 38           Direct 2017-06-16        2         2         2       2
## 39           Direct 2017-06-20        2         5         2       1
## 40           Direct 2017-06-29        1         6         1       0
## 41           Direct 2017-07-07        2         4         2       1
## 42           Direct 2017-07-17       13        13        13      13
## 43           Direct 2017-08-17       10        10        10      10
## 44           Direct 2017-08-24        2         2         2       2
## 45           Direct 2017-09-11        1         1         1       1
## 46           Direct 2017-10-01        4        38         4       1
## 47           Direct 2017-10-05        1         2         1       0
## 48           Direct 2017-10-18        2         4         2       0
## 49           Direct 2017-10-24        2        10         2       0
## 50           Direct 2017-10-31        1         2         1       0
## 51           Direct 2017-11-05        5         5         5       5
## 52           Direct 2017-11-11        1         2         1       0
## 53           Direct 2017-11-30        1         2         1       0
## 54           Direct 2017-12-03        2        20         2       0
## 55           Direct 2017-12-11        1         2         1       0
## 56           Direct 2017-12-12        1         2         1       0
## 57           Direct 2017-12-24        1         2         1       0
## 58   Organic Search 2017-01-07        2        15         2       0
## 59   Organic Search 2017-02-07        1         2         1       0
## 60   Organic Search 2017-04-26        1         1         1       1
## 61   Organic Search 2017-04-27        1         3         1       0
## 62   Organic Search 2017-05-06        2         5         2       0
## 63   Organic Search 2017-05-07        1         8         1       0
## 64   Organic Search 2017-05-08        1         1         1       1
## 65   Organic Search 2017-05-10        4        16         4       2
## 66   Organic Search 2017-05-13        1         1         1       1
## 67   Organic Search 2017-05-18        1         3         1       0
## 68   Organic Search 2017-05-20        1         1         1       1
## 69   Organic Search 2017-05-23        1         3         1       0
## 70   Organic Search 2017-06-13        1         1         1       1
## 71   Organic Search 2017-07-04        1        14         1       0
## 72   Organic Search 2017-08-18        1         1         1       1
## 73   Organic Search 2017-08-27        1         1         1       1
## 74   Organic Search 2017-09-01        1         4         1       0
## 75   Organic Search 2017-09-03        1         1         1       1
## 76   Organic Search 2017-09-09        1         1         1       1
## 77   Organic Search 2017-09-28        1         1         1       1
## 78   Organic Search 2017-10-02        1         2         1       0
## 79   Organic Search 2017-10-12        1         2         1       0
## 80   Organic Search 2017-10-17        1         2         1       0
## 81   Organic Search 2017-10-24        1         2         1       0
## 82   Organic Search 2017-10-25        1         2         1       0
## 83   Organic Search 2017-11-06        1         2         1       0
## 84   Organic Search 2017-11-07        2         4         2       0
## 85   Organic Search 2017-11-15        1         1         1       1
## 86   Organic Search 2017-11-22        1         2         1       0
## 87   Organic Search 2017-11-24        1         4         1       0
## 88   Organic Search 2017-11-29        1         2         1       0
## 89   Organic Search 2017-12-04        1         2         1       0
## 90   Organic Search 2017-12-08        1         2         1       0
## 91   Organic Search 2017-12-11        1         2         1       0
## 92   Organic Search 2017-12-12        1         2         1       0
## 93   Organic Search 2017-12-17        1         2         1       0
## 94         Referral 2017-01-04        1         1         1       1
## 95         Referral 2017-04-23        1         1         1       1
## 96         Referral 2017-05-04        1         1         1       1
## 97         Referral 2017-05-05        1         1         1       1
## 98         Referral 2017-05-08        1         1         1       1
## 99         Referral 2017-05-22        1         1         1       1
## 100        Referral 2017-06-12        1         6         1       0
## 101        Referral 2017-06-20        1         3         1       0
## 102        Referral 2017-06-27        1         1         1       1
## 103        Referral 2017-07-01        2         2         2       2
## 104        Referral 2017-08-01        1         1         1       1
## 105        Referral 2017-10-02        1         2         1       0
## 106        Referral 2017-10-21        1         1         1       1
## 107        Referral 2017-10-22        4         4         4       4
## 108        Referral 2017-12-21        1         2         1       0
## 109        Referral 2017-12-30        1         1         1       1
## 110        Referral 2017-12-31        1         1         1       1
## 111          Social 2017-01-06        1         1         1       1
## 112          Social 2017-04-18        1         1         1       1
## 113          Social 2017-05-13        1         1         1       1
## 114          Social 2017-05-25        1         1         1       1
## 115          Social 2017-06-11        1         1         1       1
#check date column
class(channels2$date)
## [1] "Date"
#how many rows of data per channel
channels2 %>%
  group_by(channelGrouping) %>%
  tally()
## # A tibble: 4 x 2
##   channelGrouping     n
##             <chr> <int>
## 1          Direct    57
## 2  Organic Search    36
## 3        Referral    17
## 4          Social     5
###########IGNORE#####################
# #summarize by date
# channels3<-channels2 %>%
#   group_by(channelGrouping, date=floor_date(date, "month")) %>%
#  summarize(sum = sum(sessions), n = n())
# 
# 
# #check results
# head(channels3)
# 
# #drop last column
# channels3 <- channels3[,1:3]
# 
# #rename 3rd column
# #rename(channels3, c("channelGrouping"= "channelGrouping", "date" = "date", "sum" = "totalSessions"))
# 
# #find tops and lows of session counts
# findPeaks(channels3$sum, thresh = 0)
# findValleys(channels3$sum, thresh = 0)



######################Device Category Filter###################################

#lets check device categories of our visitors
device_cat<- google_analytics_4(view_id, 
                              date_range = c("2017-01-01","2017-12-31"),
                              metrics = c("sessions", "users"),
                              dimensions = "deviceCategory")
## 2018-03-23 20:56:32> Downloaded [2] rows from a total of [2].
#lets view our data
device_cat
##   deviceCategory sessions users
## 1        desktop      232   154
## 2         mobile       18     8
# calculate user percentages of each device category
device_cat <- device_cat %>%
  mutate(user_pct = users/sum(users) * 100)

#visualize our device information in a bar chart
ggplot(data = device_cat, aes(x = deviceCategory, y = users)) +
  geom_bar(stat = "identity", show.legend = FALSE) +
  theme_light() + ggtitle("Users by Device")

#same bar chart but with percentages instead
ggplot(data = device_cat, aes(x = reorder(deviceCategory, -users), y = users)) +
  geom_bar(stat = "identity", fill = "steelblue") +
   ggtitle("Distribution of users by device type")+
  xlab(" ") +
  ylab("total users") +
  geom_text(aes(label = paste(round(user_pct,2), "%", sep = ""), family = "Arial",
    fontface = "bold", size = 14, position = "center"), vjust = 1.6,
    color = "white", size = 3.5) + theme_minimal()
## Warning: Ignoring unknown aesthetics: position

#lets check sessions by date
device_bydate<- google_analytics_4(view_id, 
                              date_range = c("2017-06-01","2017-12-31"),
                              metrics = "sessions",
                              dimensions = c("date", "deviceCategory"))
## 2018-03-23 20:56:34> Downloaded [58] rows from a total of [58].
#lets view our data
device_bydate
##          date deviceCategory sessions
## 1  2017-06-02        desktop       16
## 2  2017-06-04        desktop        3
## 3  2017-06-05        desktop        3
## 4  2017-06-07        desktop        2
## 5  2017-06-09        desktop        1
## 6  2017-06-11         mobile        1
## 7  2017-06-12        desktop        3
## 8  2017-06-13        desktop        1
## 9  2017-06-16        desktop        2
## 10 2017-06-20        desktop        3
## 11 2017-06-27        desktop        1
## 12 2017-06-29        desktop        1
## 13 2017-07-01        desktop        2
## 14 2017-07-04        desktop        1
## 15 2017-07-07        desktop        2
## 16 2017-07-17        desktop       13
## 17 2017-08-01        desktop        1
## 18 2017-08-17        desktop       10
## 19 2017-08-18        desktop        1
## 20 2017-08-24        desktop        2
## 21 2017-08-27        desktop        1
## 22 2017-09-01        desktop        1
## 23 2017-09-03        desktop        1
## 24 2017-09-09        desktop        1
## 25 2017-09-11        desktop        1
## 26 2017-09-28        desktop        1
## 27 2017-10-01        desktop        4
## 28 2017-10-02        desktop        1
## 29 2017-10-02         mobile        1
## 30 2017-10-05         mobile        1
## 31 2017-10-12        desktop        1
## 32 2017-10-17        desktop        1
## 33 2017-10-18        desktop        2
## 34 2017-10-21        desktop        1
## 35 2017-10-22        desktop        4
## 36 2017-10-24        desktop        3
## 37 2017-10-25        desktop        1
## 38 2017-10-31        desktop        1
## 39 2017-11-05        desktop        5
## 40 2017-11-06        desktop        1
## 41 2017-11-07        desktop        2
## 42 2017-11-11        desktop        1
## 43 2017-11-15        desktop        1
## 44 2017-11-22        desktop        1
## 45 2017-11-24        desktop        1
## 46 2017-11-29        desktop        1
## 47 2017-11-30        desktop        1
## 48 2017-12-03        desktop        2
## 49 2017-12-04        desktop        1
## 50 2017-12-08        desktop        1
## 51 2017-12-11        desktop        1
## 52 2017-12-11         mobile        1
## 53 2017-12-12        desktop        2
## 54 2017-12-17        desktop        1
## 55 2017-12-21        desktop        1
## 56 2017-12-24        desktop        1
## 57 2017-12-30        desktop        1
## 58 2017-12-31        desktop        1
#create plot
ggplot(device_bydate, aes(x = date, y = sessions, color = deviceCategory)) +
  geom_line() +
  theme_light()

######################Where do our users come from? Query#############################
#lets see where are users come from
country_info<- google_analytics_4(view_id, 
                                 date_range = c("30daysAgo", "yesterday"),
                                 metrics = "users",
                                 dimensions = "country",
                                 order = order_type("users",
                                sort_order=c("DESCENDING"),
                                orderType = c("VALUE")))
## 2018-03-23 20:56:35> Downloaded [4] rows from a total of [4].
#let's take a look
country_info
##          country users
## 1        Denmark     1
## 2         Sweden     1
## 3 United Kingdom     1
## 4  United States     1
##########Lets visualize our data

# Pie Chart with Percentages using BaseR
slices <- country_info$users
lbls <- country_info$country
pct <- round(slices/sum(slices)*100)
lbls <- paste(lbls, pct) # add percents to labels 
lbls <- paste(lbls,"%",sep="") # ad % to labels 
pie(slices,labels = lbls, col=rainbow(length(lbls)),
    main="Users by Country")

#in a bar chart since we have too many slices in our pie chart
ggplot(data = country_info, aes(x = country, y = users, fill = country)) +
  geom_bar(stat = "identity", show.legend = FALSE) +
  theme_classic() + ggtitle("Users by Country")

######################Detailed Device Information Query#############################

#here i want to look at device & browser info
#i also want to limit my output
browser_info<- google_analytics_4(view_id, 
                                 date_range = c("2017-01-01","2017-12-31"),
                                 metrics = c("sessions", "users", "goalCompletionsAll"),
                                 dimensions = c("browser", "browserVersion", 
                                "browserSize", "screenResolution", "mobileDeviceinfo"),
                                 max = 5)
## 2018-03-23 20:56:35> Downloaded [5] rows from a total of [8].
#let's take a look
browser_info
##   browser browserVersion browserSize screenResolution mobileDeviceinfo
## 1 Firefox           38.0   (not set)          800x600        (not set)
## 2  Safari           10.0     320x460          320x568     Apple iPhone
## 3  Safari           10.0     380x560          375x667     Apple iPhone
## 4  Safari           10.0     410x630          414x736     Apple iPhone
## 5  Safari        600.1.4     380x530          375x667     Apple iPhone
##   sessions users goalCompletionsAll
## 1        1     1                  0
## 2        8     3                  0
## 3        2     2                  0
## 4        1     1                  0
## 5        1     1                  0
#lets visualize our data
ggplot(data = browser_info, aes(x = reorder(browserSize, -sessions), y = sessions)) +
  geom_bar(stat = "identity") +
  theme_classic() + 
  ggtitle("Which browsers sizes are our visitors using?") +
  xlab(" ") +
  ylab("total sessions") +
  geom_text(aes(label = sessions), vjust = 1.6,
  color = "white", size = 3.5)

#stacked bar chart
ggplot(data = browser_info, aes(browser)) +
  geom_bar(aes(fill= screenResolution))

###############################Creating Dimension Filters#############################################
#Create a dimension filter object
#here we want only mobile devices
dim_filter_object <- dim_filter("deviceCategory", 
                                   operator = "REGEXP",
                                   expressions = ".*mobile.*")


#Put filter object into a filter clause.
dim_filter_clause <- filter_clause_ga4(list(dim_filter_object),
                                          operator = "AND")

# Pull the data
google_analytics_4(viewId = view_id,
                              date_range = c("2017-01-01","2017-12-31"),
                              metrics = c("sessions","goalCompletionsAll"),
                              dimensions = "deviceCategory",
                              dim_filters = dim_filter_clause)
## 2018-03-23 20:56:36> Downloaded [1] rows from a total of [1].
##   deviceCategory sessions goalCompletionsAll
## 1         mobile       18                  0
###############################Creating Metric Filters#############################################
#Create a metric filter object
#here we want only want data where the num of goals completed was greater than 1
met_filter_object <- met_filter("goalCompletionsAll", 
                                   operator = "GREATER_THAN",
                                   0)


#Put filter object into a filter clause.
met_filter_clause <- filter_clause_ga4(list(met_filter_object),
                                          operator = "AND")

# Pull the data
google_analytics_4(viewId = view_id,
                              date_range = c("2017-01-01","2017-12-31"),
                              metrics = c("sessions","goalCompletionsAll"),
                              dimensions = "deviceCategory",
                              met_filters = met_filter_clause)
## 2018-03-23 20:56:37> Downloaded [0] rows from a total of [].
## NULL
###############################Creating Multiple Filters (1 dimension + 1 metric) #############################################
#Create a dimension filter object
dim_filter_object <- dim_filter("country", 
                                   operator = "REGEXP",
                                   expressions = ".*Sweden.*")


#Put filter object into a filter clause.
dim_filter_clause <- filter_clause_ga4(list(dim_filter_object),
                                          operator = "AND")

met_filter_object <- met_filter("sessions", 
                                   operator = "GREATER_THAN",
                                   1)


#Put filter object into a filter clause.
met_filter_clause <- filter_clause_ga4(list(met_filter_object),
                                          operator = "AND")

#lets use both of our filters
filtered_country_info<-google_analytics_4(view_id,  date_range = c("30daysAgo", "yesterday"),
                                 metrics = c("sessions", "users"),
                                 dimensions = "country",
                                 met_filters = met_filter_clause,
                                 dim_filters = dim_filter_clause)
## 2018-03-23 20:56:38> Downloaded [1] rows from a total of [1].
#let's take a look
filtered_country_info
##   country sessions users
## 1  Sweden        2     1
############################Segmentation###########################################

## get list of segments
my_segments <- ga_segment_list()

## just the segment items
segs <- my_segments$items

####Returning Users ####
## choose the v3 segment
segment_for_call <- "gaid::-3" #returning users

## make the v3 segment object in the v4 segment object:
seg_obj <- segment_ga4("ReturningUsers", segment_id = segment_for_call)

## make the segment call
returning_users_traffic <- google_analytics_4(view_id, 
                                    c("2017-10-31","2017-12-31"), 
                                    dimensions=c('source','medium','segment'), 
                                    segments = seg_obj, 
                                    metrics = c('sessions','bounces'))
## 2018-03-23 20:56:39> Downloaded [3] rows from a total of [3].
#view results
View(returning_users_traffic)
## Warning: running command ''/usr/bin/otool' -L '/Library/Frameworks/
## R.framework/Resources/modules/R_de.so'' had status 1
####Organic Traffic ####
## choose the v3 segment
segment_for_call <- "gaid::-5" #organic traffic

## make the v3 segment object in the v4 segment object:
seg_obj_organic <- segment_ga4("Organic Traffic", segment_id = segment_for_call)

## make the segment call
organic_users_traffic <- google_analytics_4(view_id, 
                                    c("2017-10-31","2017-12-31"), 
                                    dimensions=c("source","medium", "segment"), 
                                    segments = seg_obj_organic, 
                                    metrics = c("sessions","users", "bounces"))
## 2018-03-23 20:56:39> Downloaded [1] rows from a total of [1].
#View results
View(organic_users_traffic)
## Warning: running command ''/usr/bin/otool' -L '/Library/Frameworks/
## R.framework/Resources/modules/R_de.so'' had status 1
###############################Summary#############################################
#with the metrics and dimensions explorer, you now have the power to access your
#analytics data however you like. There are a few
#great set of examples on http://www.dartistics.com/
#Happy analyzing!