#####################Correlation Analysis using Google Analytics Data###########################
#Connect Google Analytics to R using the
#googleAnalyticsR package
#browseURL("https://cran.r-project.org/web/packages/googleAnalyticsR/googleAnalyticsR.pdf")
#install and load packages
if(!require("googleAnalyticsR")){
install.packages("googleAnalyticsR")
library(googleAnalyticsR)
}
## Loading required package: googleAnalyticsR
## Warning in as.POSIXlt.POSIXct(x, tz): unknown timezone 'zone/tz/2017c.1.0/
## zoneinfo/Europe/Stockholm'
## 2018-02-04 09:08:20> Default Google Project for googleAnalyticsR is now set. This is shared with all googleAnalyticsR users.
## If making a lot of API calls, please:
## 1) create your own Google Project at https://console.developers.google.com
## 2) Activate the Google Analytics Reporting API
## 3) set options(googleAuthR.client_id) and options(googleAuthR.client_secret)
## 4) Reload the package.
library(ggplot2)
library(tidyverse)
## Loading tidyverse: tibble
## Loading tidyverse: tidyr
## Loading tidyverse: readr
## Loading tidyverse: purrr
## Loading tidyverse: dplyr
## Conflicts with tidy packages ----------------------------------------------
## filter(): dplyr, stats
## lag(): dplyr, stats
#authorize Google Analytics
ga_auth()
## Token cache file: .httr-oauth
## 2018-02-04 09:08:22> Authenticated
#get account info so you can see what accounts you have access to
account_list<-ga_account_list()
#uncomment "account_list" below to take a look at your accounts in the console window
#account_list
#assign the view you want to connect to the view_id variable
#it is the viewID column
view_id<-123847950 #my own website blog
#in order to get the API names you will need to access the data
#you can use the Query Explorer (hit command + click): https://ga-dev-tools.appspot.com/query-explorer/
#which will allow you to practice your queries
#or the Dimensions & Metrics Explorer:
#https://developers.google.com/analytics/devguides/reporting/core/dimsmets#cats=user
#https://developers.google.com/analytics/devguides/reporting/core/dimsmets
################################Correlation###########################################
#pull the data
correlation_ex<- google_analytics_4(view_id,
date_range = c("2017-01-01","2017-12-31"),
metrics = c("sessions", "pageviews", "entrances", "bounces"),
dimensions = c("channelGrouping", "date", "deviceCategory"))
## 2018-02-04 09:08:24> Downloaded [118] rows from a total of [118].
#lets view our new data
correlation_ex
## channelGrouping date deviceCategory sessions pageviews entrances
## 1 Direct 2017-01-15 desktop 4 70 4
## 2 Direct 2017-01-15 mobile 1 3 1
## 3 Direct 2017-01-17 desktop 1 1 1
## 4 Direct 2017-01-30 desktop 1 3 1
## 5 Direct 2017-02-05 desktop 1 5 1
## 6 Direct 2017-02-09 desktop 1 2 1
## 7 Direct 2017-03-12 desktop 18 18 18
## 8 Direct 2017-03-19 desktop 3 5 3
## 9 Direct 2017-03-24 desktop 1 10 1
## 10 Direct 2017-03-28 desktop 1 1 1
## 11 Direct 2017-04-05 desktop 1 5 1
## 12 Direct 2017-04-17 desktop 1 1 1
## 13 Direct 2017-04-29 desktop 3 23 3
## 14 Direct 2017-05-06 desktop 8 139 8
## 15 Direct 2017-05-06 mobile 1 4 1
## 16 Direct 2017-05-07 desktop 4 23 4
## 17 Direct 2017-05-08 desktop 1 14 1
## 18 Direct 2017-05-10 desktop 7 46 7
## 19 Direct 2017-05-11 desktop 1 1 1
## 20 Direct 2017-05-12 desktop 1 1 1
## 21 Direct 2017-05-13 desktop 3 26 3
## 22 Direct 2017-05-14 desktop 2 10 2
## 23 Direct 2017-05-16 desktop 6 16 6
## 24 Direct 2017-05-17 desktop 2 2 2
## 25 Direct 2017-05-18 desktop 7 36 7
## 26 Direct 2017-05-19 desktop 2 5 2
## 27 Direct 2017-05-20 desktop 6 49 6
## 28 Direct 2017-05-21 desktop 1 1 1
## 29 Direct 2017-05-22 desktop 1 2 1
## 30 Direct 2017-05-23 desktop 5 10 5
## 31 Direct 2017-05-24 desktop 1 1 1
## 32 Direct 2017-05-25 desktop 2 2 2
## 33 Direct 2017-05-30 desktop 2 2 2
## 34 Direct 2017-06-02 desktop 16 16 16
## 35 Direct 2017-06-04 desktop 3 19 3
## 36 Direct 2017-06-05 desktop 3 15 3
## 37 Direct 2017-06-07 desktop 2 3 2
## 38 Direct 2017-06-09 desktop 1 6 1
## 39 Direct 2017-06-12 desktop 2 3 2
## 40 Direct 2017-06-16 desktop 2 2 2
## 41 Direct 2017-06-20 desktop 2 5 2
## 42 Direct 2017-06-29 desktop 1 6 1
## 43 Direct 2017-07-07 desktop 2 4 2
## 44 Direct 2017-07-17 desktop 13 13 13
## 45 Direct 2017-08-17 desktop 10 10 10
## 46 Direct 2017-08-24 desktop 2 2 2
## 47 Direct 2017-09-11 desktop 1 1 1
## 48 Direct 2017-10-01 desktop 4 38 4
## 49 Direct 2017-10-05 mobile 1 2 1
## 50 Direct 2017-10-18 desktop 2 4 2
## 51 Direct 2017-10-24 desktop 2 10 2
## 52 Direct 2017-10-31 desktop 1 2 1
## 53 Direct 2017-11-05 desktop 5 5 5
## 54 Direct 2017-11-11 desktop 1 2 1
## 55 Direct 2017-11-30 desktop 1 2 1
## 56 Direct 2017-12-03 desktop 2 20 2
## 57 Direct 2017-12-11 mobile 1 2 1
## 58 Direct 2017-12-12 desktop 1 2 1
## 59 Direct 2017-12-24 desktop 1 2 1
## 60 Organic Search 2017-01-07 mobile 2 15 2
## 61 Organic Search 2017-02-07 desktop 1 2 1
## 62 Organic Search 2017-04-26 mobile 1 1 1
## 63 Organic Search 2017-04-27 mobile 1 3 1
## 64 Organic Search 2017-05-06 desktop 2 5 2
## 65 Organic Search 2017-05-07 mobile 1 8 1
## 66 Organic Search 2017-05-08 mobile 1 1 1
## 67 Organic Search 2017-05-10 desktop 1 1 1
## 68 Organic Search 2017-05-10 mobile 3 15 3
## 69 Organic Search 2017-05-13 desktop 1 1 1
## 70 Organic Search 2017-05-18 desktop 1 3 1
## 71 Organic Search 2017-05-20 desktop 1 1 1
## 72 Organic Search 2017-05-23 desktop 1 3 1
## 73 Organic Search 2017-06-13 desktop 1 1 1
## 74 Organic Search 2017-07-04 desktop 1 14 1
## 75 Organic Search 2017-08-18 desktop 1 1 1
## 76 Organic Search 2017-08-27 desktop 1 1 1
## 77 Organic Search 2017-09-01 desktop 1 4 1
## 78 Organic Search 2017-09-03 desktop 1 1 1
## 79 Organic Search 2017-09-09 desktop 1 1 1
## 80 Organic Search 2017-09-28 desktop 1 1 1
## 81 Organic Search 2017-10-02 desktop 1 2 1
## 82 Organic Search 2017-10-12 desktop 1 2 1
## 83 Organic Search 2017-10-17 desktop 1 2 1
## 84 Organic Search 2017-10-24 desktop 1 2 1
## 85 Organic Search 2017-10-25 desktop 1 2 1
## 86 Organic Search 2017-11-06 desktop 1 2 1
## 87 Organic Search 2017-11-07 desktop 2 4 2
## 88 Organic Search 2017-11-15 desktop 1 1 1
## 89 Organic Search 2017-11-22 desktop 1 2 1
## 90 Organic Search 2017-11-24 desktop 1 4 1
## 91 Organic Search 2017-11-29 desktop 1 2 1
## 92 Organic Search 2017-12-04 desktop 1 2 1
## 93 Organic Search 2017-12-08 desktop 1 2 1
## 94 Organic Search 2017-12-11 desktop 1 2 1
## 95 Organic Search 2017-12-12 desktop 1 2 1
## 96 Organic Search 2017-12-17 desktop 1 2 1
## 97 Referral 2017-01-04 desktop 1 1 1
## 98 Referral 2017-04-23 desktop 1 1 1
## 99 Referral 2017-05-04 desktop 1 1 1
## 100 Referral 2017-05-05 desktop 1 1 1
## 101 Referral 2017-05-08 desktop 1 1 1
## 102 Referral 2017-05-22 desktop 1 1 1
## 103 Referral 2017-06-12 desktop 1 6 1
## 104 Referral 2017-06-20 desktop 1 3 1
## 105 Referral 2017-06-27 desktop 1 1 1
## 106 Referral 2017-07-01 desktop 2 2 2
## 107 Referral 2017-08-01 desktop 1 1 1
## 108 Referral 2017-10-02 mobile 1 2 1
## 109 Referral 2017-10-21 desktop 1 1 1
## 110 Referral 2017-10-22 desktop 4 4 4
## 111 Referral 2017-12-21 desktop 1 2 1
## 112 Referral 2017-12-30 desktop 1 1 1
## 113 Referral 2017-12-31 desktop 1 1 1
## 114 Social 2017-01-06 mobile 1 1 1
## 115 Social 2017-04-18 desktop 1 1 1
## 116 Social 2017-05-13 mobile 1 1 1
## 117 Social 2017-05-25 mobile 1 1 1
## 118 Social 2017-06-11 mobile 1 1 1
## bounces
## 1 1
## 2 0
## 3 1
## 4 0
## 5 0
## 6 0
## 7 18
## 8 2
## 9 0
## 10 1
## 11 0
## 12 1
## 13 2
## 14 1
## 15 0
## 16 0
## 17 0
## 18 1
## 19 1
## 20 1
## 21 2
## 22 1
## 23 4
## 24 2
## 25 4
## 26 1
## 27 2
## 28 1
## 29 0
## 30 4
## 31 1
## 32 2
## 33 2
## 34 16
## 35 1
## 36 1
## 37 1
## 38 0
## 39 1
## 40 2
## 41 1
## 42 0
## 43 1
## 44 13
## 45 10
## 46 2
## 47 1
## 48 1
## 49 0
## 50 0
## 51 0
## 52 0
## 53 5
## 54 0
## 55 0
## 56 0
## 57 0
## 58 0
## 59 0
## 60 0
## 61 0
## 62 1
## 63 0
## 64 0
## 65 0
## 66 1
## 67 1
## 68 1
## 69 1
## 70 0
## 71 1
## 72 0
## 73 1
## 74 0
## 75 1
## 76 1
## 77 0
## 78 1
## 79 1
## 80 1
## 81 0
## 82 0
## 83 0
## 84 0
## 85 0
## 86 0
## 87 0
## 88 1
## 89 0
## 90 0
## 91 0
## 92 0
## 93 0
## 94 0
## 95 0
## 96 0
## 97 1
## 98 1
## 99 1
## 100 1
## 101 1
## 102 1
## 103 0
## 104 0
## 105 1
## 106 2
## 107 1
## 108 0
## 109 1
## 110 4
## 111 0
## 112 1
## 113 1
## 114 1
## 115 1
## 116 1
## 117 1
## 118 1
#check date column
class(correlation_ex$date)
## [1] "Date"
#how many rows of data per channel
correlation_ex %>%
group_by(channelGrouping) %>%
tally()
## # A tibble: 4 x 2
## channelGrouping n
## <chr> <int>
## 1 Direct 59
## 2 Organic Search 37
## 3 Referral 17
## 4 Social 5
#subset numeric data
corr_metrics_data<-correlation_ex[,c("sessions", "pageviews", "entrances", "bounces")]
#view corr matrix
cor(corr_metrics_data)
## sessions pageviews entrances bounces
## sessions 1.0000000 0.4572692 1.0000000 0.9144177
## pageviews 0.4572692 1.0000000 0.4572692 0.1323279
## entrances 1.0000000 0.4572692 1.0000000 0.9144177
## bounces 0.9144177 0.1323279 0.9144177 1.0000000
#The table is mirrored in the diagonal and provides the correlation coefficient (aka, “rr”) between each pair of values that intersect in the cell. 1 means a perfect correlation, 0 means no correlation and -1 means a perfect negative correlation.
#view correlation plot
pairs(corr_metrics_data)

#A correlation of less than 1 may be a quick diagnostic that something is wrong with the tracking.
#Are web channels correlated?
## Get only desktop rows, and the date, channelGrouping and sessions columns
pivoted <- correlation_ex %>%
filter(deviceCategory == "desktop") %>%
select(date, channelGrouping, sessions) %>%
spread(channelGrouping, sessions)
#view results
View(pivoted)
## Warning: running command ''/usr/bin/otool' -L '/Library/Frameworks/
## R.framework/Resources/modules/R_de.so'' had status 1
## Get rid of any NA's and replace with 0
pivoted[is.na(pivoted)] <- 0
#view results
View(pivoted)
## Warning: running command ''/usr/bin/otool' -L '/Library/Frameworks/
## R.framework/Resources/modules/R_de.so'' had status 1
#remove unnumeric values
pivoted_corr_data<-pivoted[,-1]
#view results
View(pivoted_corr_data)
## Warning: running command ''/usr/bin/otool' -L '/Library/Frameworks/
## R.framework/Resources/modules/R_de.so'' had status 1
pivoted_corr_table<-round(cor(pivoted_corr_data),2)
pairs(pivoted_corr_table)

#####################Is there a relationship between device and marketing channel? Chi-Square Example####################
#lets check device categories of our visitors
deviceCat<- google_analytics_4(view_id,
date_range = c("2017-01-01","2017-11-24"),
metrics = "users",
dimensions = c("deviceCategory", "channelGrouping"))
## 2018-02-04 09:08:29> Downloaded [8] rows from a total of [8].
#lets view our data
deviceCat
## deviceCategory channelGrouping users
## 1 desktop Direct 109
## 2 desktop Organic Search 21
## 3 desktop Referral 15
## 4 desktop Social 1
## 5 mobile Direct 2
## 6 mobile Organic Search 4
## 7 mobile Referral 1
## 8 mobile Social 2
#create column for channelGrouping using spread function
device.tab<-spread(deviceCat, channelGrouping, users)
#chi square test
chisq.test(device.tab[,-1])
## Warning in chisq.test(device.tab[, -1]): Chi-squared approximation may be
## incorrect
##
## Pearson's Chi-squared test
##
## data: device.tab[, -1]
## X-squared = 28.327, df = 3, p-value = 3.101e-06
#the actual distribution
device.tab
## deviceCategory Direct Organic Search Referral Social
## 1 desktop 109 21 15 1
## 2 mobile 2 4 1 2
#the expected values for each of the following groups
chisq.test(device.tab[,-1], correct=FALSE)$expected
## Warning in chisq.test(device.tab[, -1], correct = FALSE): Chi-squared
## approximation may be incorrect
## Direct Organic Search Referral Social
## 1 104.554839 23.548387 15.0709677 2.8258065
## 2 6.445161 1.451613 0.9290323 0.1741935
# Goodness of Fit Test:
# 1. Make a table of counts.
# 2. Create a vector of the expected proportions.
# 3. Check the expected counts assumption.
# 4. Run the chi square test.
# 5. Interpret the chi square statistic and p-value.
# H0 : The population distribution of the variable is the
# same as the proposed distribution--There is no relationship
#between device and channel.
# (i.e. there is nothing going on)
# HA : The distributions are different--There is a relationship
#between device and 1st point of contact.
p_value<-3.101e-06
p_value <= .05 #reject null hypothesis if true
## [1] TRUE
p_value > .05 #fail to reject null hypothesis if true
## [1] FALSE
#There were less users coming in on direct/mobile than expected.
#We reject our null hypothesis. A chi square test of independence
# found there is a relationship between
#device and 1st point of contact. (chi square= 28.327, df=3,
# p=3.101e-06). The assumptions the test were met.