This script can be used to build a data set from Google Analytics data.
It will do each query day by day for each variable and bind them together.
This is for use with ecommerce data, the final data will always have transactionId as a unique primary key.
library(RGoogleAnalytics)
library(tidyverse)
library(lubridate)
load("./token_file")
ValidateToken(token)
start.date <- as.character(date(now()) - 7)
end.date <- as.character(date(now()))
#####
#Base Table
query.init <- Init(start.date = start.date,
end.date = end.date,
dimensions = c("ga:date","ga:transactionId"),
metrics = "ga:transactionRevenue",
max.results = 10000000,
table.id = table.id)
query <- QueryBuilder(query.init)
data <- GetReportData(query, token, split_daywise = TRUE)
ga.df <- data %>%
mutate(transactionId = as.integer(transactionId),
date = ymd(date)) %>%
group_by(transactionId) %>%
summarise(ga_revenue = sum(transactionRevenue),
date = min(date))
#write.csv(data, file = "GA_Transaction_IDs.csv")
#fullGATable <- read.csv("GA_Transaction_IDs.csv") %>% select(-1)
#####
#####
#Source
query.init <- Init(start.date = start.date,
end.date = end.date,
dimensions = c("ga:source",
"ga:transactionId"),
metrics = c("ga:transactionRevenue"),
max.results = 10000000,
table.id = table.id)
query <- QueryBuilder(query.init)
data <- GetReportData(query, token, split_daywise = TRUE)
source <- data %>%
mutate(transactionId = as.integer(transactionId))%>%
select(source, transactionId, transactionRevenue) %>%
group_by(transactionId, source) %>%
summarise(ga_revenue = sum(transactionRevenue)) %>%
ungroup() %>%
select(source, transactionId) %>%
filter(!duplicated(transactionId))
#####
#####
# Medium
query.init <- Init(start.date = start.date,
end.date = end.date,
dimensions = c("ga:medium",
"ga:transactionId"),
metrics = c("ga:transactionRevenue"),
max.results = 10000000,
table.id = table.id)
query <- QueryBuilder(query.init)
data <- GetReportData(query, token, split_daywise = TRUE)
medium <- data %>%
mutate(transactionId = as.integer(transactionId)) %>%
select(medium, transactionId, transactionRevenue) %>%
group_by(transactionId, medium) %>%
summarise(ga_revenue = sum(transactionRevenue)) %>%
ungroup() %>%
select(medium, transactionId)%>%
filter(!duplicated(transactionId))
#####
#####
# Lat/Long
query.init <- Init(start.date = start.date,
end.date = end.date,
dimensions = c("ga:latitude",
"ga:longitude",
"ga:transactionId"),
metrics = c("ga:transactionRevenue"),
max.results = 10000000,
table.id = table.id)
query <- QueryBuilder(query.init)
data <- GetReportData(query, token, split_daywise = TRUE)
latlon <- data %>%
mutate(transactionId = as.integer(transactionId))%>%
select(latitude, longitude, transactionId, transactionRevenue) %>%
group_by(latitude, longitude, transactionId) %>%
summarise(ga_revenue = sum(transactionRevenue)) %>%
ungroup() %>%
select(latitude, longitude, transactionId) %>%
group_by(transactionId) %>%
summarise(latitude = max(latitude),
longitude = max(longitude))%>%
filter(!duplicated(transactionId))
#####
#####
# ga:sessionDurationBucket
query.init <- Init(start.date = start.date,
end.date = end.date,
dimensions = c("ga:sessionDurationBucket",
"ga:transactionId"),
metrics = c("ga:transactionRevenue"),
max.results = 10000000,
table.id = table.id)
query <- QueryBuilder(query.init)
data <- GetReportData(query, token, split_daywise = TRUE)
sessionDuration <- data %>%
select(sessionDurationBucket, transactionId) %>%
mutate(transactionId = as.integer(transactionId),
sessionDurationBucket = as.integer(sessionDurationBucket)) %>%
group_by(transactionId) %>%
summarise(sessionDuration = sum(sessionDurationBucket))%>%
filter(!duplicated(transactionId))
#####
# Join to Big Table
GA_Full <- read.csv("GA_Transaction_IDs_Full.csv")%>% select(-1) %>%
mutate(date = date(date))
gaBig.df <- ga.df %>%
left_join(source) %>%
mutate(source = as.factor(source)) %>%
left_join(medium) %>%
mutate(medium = as.factor(medium)) %>%
left_join(latlon) %>%
mutate(latitude = as.numeric(latitude),
longitude = as.numeric(longitude)) %>%
left_join(sessionDuration)
uploadDf <- gaBig.df %>%
rbind(GA_Full) %>%
filter(!duplicated(transactionId))
write.csv(uploadDf, file = "GA_Transaction_IDs_Full.csv")