This script can be used to build a data set from Google Analytics data.

It will do each query day by day for each variable and bind them together.

This is for use with ecommerce data, the final data will always have transactionId as a unique primary key.

library(RGoogleAnalytics)
library(tidyverse)
library(lubridate)

load("./token_file")
ValidateToken(token)

start.date <- as.character(date(now()) - 7)
end.date <- as.character(date(now()))
#####
#Base Table
query.init <- Init(start.date = start.date,
                   end.date = end.date,
                   dimensions = c("ga:date","ga:transactionId"),
                   metrics = "ga:transactionRevenue",
                   max.results = 10000000,
                   table.id = table.id)

query <- QueryBuilder(query.init)
data <- GetReportData(query, token, split_daywise = TRUE)

ga.df <- data %>%
    mutate(transactionId = as.integer(transactionId),
           date = ymd(date)) %>% 
    group_by(transactionId) %>% 
    summarise(ga_revenue = sum(transactionRevenue),
              date = min(date))

#write.csv(data, file = "GA_Transaction_IDs.csv")
#fullGATable <- read.csv("GA_Transaction_IDs.csv") %>% select(-1) 
#####

##### 
#Source
query.init <- Init(start.date = start.date,
                   end.date = end.date,
                   dimensions = c("ga:source",
                       "ga:transactionId"),
                   metrics = c("ga:transactionRevenue"),
                   max.results = 10000000,
                   table.id = table.id)
query <- QueryBuilder(query.init)
data <- GetReportData(query, token, split_daywise = TRUE)
source <- data %>%
    mutate(transactionId = as.integer(transactionId))%>% 
    select(source, transactionId, transactionRevenue) %>%
    group_by(transactionId, source) %>% 
    summarise(ga_revenue = sum(transactionRevenue)) %>%
    ungroup() %>%
    select(source, transactionId) %>%
    filter(!duplicated(transactionId))
#####
#####
# Medium
query.init <- Init(start.date = start.date,
                   end.date = end.date,
                   dimensions = c("ga:medium",
                                  "ga:transactionId"),
                   metrics = c("ga:transactionRevenue"),
                   max.results = 10000000,
                   table.id = table.id)

query <- QueryBuilder(query.init)
data <- GetReportData(query, token, split_daywise = TRUE)
medium <- data %>% 
    mutate(transactionId = as.integer(transactionId)) %>%
    select(medium, transactionId, transactionRevenue) %>%
    group_by(transactionId, medium) %>% 
    summarise(ga_revenue = sum(transactionRevenue)) %>%
    ungroup() %>%
    select(medium, transactionId)%>%
    filter(!duplicated(transactionId)) 
#####
#####
# Lat/Long
query.init <- Init(start.date = start.date,
                   end.date = end.date,
                   dimensions = c("ga:latitude",
                                  "ga:longitude",
                                  "ga:transactionId"),
                   metrics = c("ga:transactionRevenue"),
                   max.results = 10000000,
                   table.id = table.id)

query <- QueryBuilder(query.init)
data <- GetReportData(query, token, split_daywise = TRUE)
latlon <- data %>%
    mutate(transactionId = as.integer(transactionId))%>% 
    select(latitude, longitude, transactionId, transactionRevenue) %>%
    group_by(latitude, longitude, transactionId) %>%
    summarise(ga_revenue = sum(transactionRevenue)) %>%
    ungroup() %>%
    select(latitude, longitude, transactionId) %>%
    group_by(transactionId) %>%
    summarise(latitude = max(latitude),
              longitude = max(longitude))%>%
    filter(!duplicated(transactionId))
    
#####
#####
# ga:sessionDurationBucket
query.init <- Init(start.date = start.date,
                   end.date = end.date,
                   dimensions = c("ga:sessionDurationBucket",
                                  "ga:transactionId"),
                   metrics = c("ga:transactionRevenue"),
                   max.results = 10000000,
                   table.id = table.id)

query <- QueryBuilder(query.init)
data <- GetReportData(query, token, split_daywise = TRUE)
sessionDuration <- data %>% 
    select(sessionDurationBucket, transactionId) %>%
    mutate(transactionId = as.integer(transactionId),
           sessionDurationBucket = as.integer(sessionDurationBucket)) %>%
    group_by(transactionId) %>%
    summarise(sessionDuration = sum(sessionDurationBucket))%>%
    filter(!duplicated(transactionId))

#####
# Join to Big Table

GA_Full <- read.csv("GA_Transaction_IDs_Full.csv")%>% select(-1) %>%
     mutate(date = date(date))

gaBig.df <- ga.df %>%
    left_join(source) %>%
    mutate(source = as.factor(source)) %>%
    left_join(medium) %>%
    mutate(medium = as.factor(medium)) %>%
    left_join(latlon) %>%
    mutate(latitude = as.numeric(latitude),
           longitude = as.numeric(longitude)) %>%
    left_join(sessionDuration)

uploadDf <- gaBig.df %>%
    rbind(GA_Full) %>%
    filter(!duplicated(transactionId))

write.csv(uploadDf, file = "GA_Transaction_IDs_Full.csv")