#####################Correlation Analysis using Google Analytics Data###########################

#Connect Google Analytics to R using the 
#googleAnalyticsR package
#browseURL("https://cran.r-project.org/web/packages/googleAnalyticsR/googleAnalyticsR.pdf")

#install and load packages
if(!require("googleAnalyticsR")){
  install.packages("googleAnalyticsR")
  library(googleAnalyticsR)
}
## Loading required package: googleAnalyticsR
## Warning in as.POSIXlt.POSIXct(x, tz): unknown timezone 'zone/tz/2017c.1.0/
## zoneinfo/Europe/Stockholm'
## 2018-02-04 09:08:20> Default Google Project for googleAnalyticsR is now set.  This is shared with all googleAnalyticsR users. 
##  If making a lot of API calls, please: 
##  1) create your own Google Project at https://console.developers.google.com 
##  2) Activate the Google Analytics Reporting API 
##  3) set options(googleAuthR.client_id) and options(googleAuthR.client_secret) 
##  4) Reload the package.
library(ggplot2)
library(tidyverse)
## Loading tidyverse: tibble
## Loading tidyverse: tidyr
## Loading tidyverse: readr
## Loading tidyverse: purrr
## Loading tidyverse: dplyr
## Conflicts with tidy packages ----------------------------------------------
## filter(): dplyr, stats
## lag():    dplyr, stats
#authorize Google Analytics
ga_auth()
## Token cache file: .httr-oauth
## 2018-02-04 09:08:22> Authenticated
#get account info so you can see what accounts you have access to
account_list<-ga_account_list()

#uncomment "account_list" below to take a look at your accounts in the console window
#account_list

#assign the view you want to connect to the view_id variable
#it is the viewID column
view_id<-123847950 #my own website blog

#in order to get the API names you will need to access the data
#you can use the Query Explorer (hit command + click): https://ga-dev-tools.appspot.com/query-explorer/
#which will allow you to practice your queries
#or the Dimensions & Metrics Explorer:
#https://developers.google.com/analytics/devguides/reporting/core/dimsmets#cats=user
#https://developers.google.com/analytics/devguides/reporting/core/dimsmets


################################Correlation###########################################
#pull the data
correlation_ex<- google_analytics_4(view_id, 
                              date_range = c("2017-01-01","2017-12-31"),
                              metrics = c("sessions", "pageviews", "entrances", "bounces"),
                              dimensions = c("channelGrouping", "date", "deviceCategory"))
## 2018-02-04 09:08:24> Downloaded [118] rows from a total of [118].
#lets view our new data
correlation_ex
##     channelGrouping       date deviceCategory sessions pageviews entrances
## 1            Direct 2017-01-15        desktop        4        70         4
## 2            Direct 2017-01-15         mobile        1         3         1
## 3            Direct 2017-01-17        desktop        1         1         1
## 4            Direct 2017-01-30        desktop        1         3         1
## 5            Direct 2017-02-05        desktop        1         5         1
## 6            Direct 2017-02-09        desktop        1         2         1
## 7            Direct 2017-03-12        desktop       18        18        18
## 8            Direct 2017-03-19        desktop        3         5         3
## 9            Direct 2017-03-24        desktop        1        10         1
## 10           Direct 2017-03-28        desktop        1         1         1
## 11           Direct 2017-04-05        desktop        1         5         1
## 12           Direct 2017-04-17        desktop        1         1         1
## 13           Direct 2017-04-29        desktop        3        23         3
## 14           Direct 2017-05-06        desktop        8       139         8
## 15           Direct 2017-05-06         mobile        1         4         1
## 16           Direct 2017-05-07        desktop        4        23         4
## 17           Direct 2017-05-08        desktop        1        14         1
## 18           Direct 2017-05-10        desktop        7        46         7
## 19           Direct 2017-05-11        desktop        1         1         1
## 20           Direct 2017-05-12        desktop        1         1         1
## 21           Direct 2017-05-13        desktop        3        26         3
## 22           Direct 2017-05-14        desktop        2        10         2
## 23           Direct 2017-05-16        desktop        6        16         6
## 24           Direct 2017-05-17        desktop        2         2         2
## 25           Direct 2017-05-18        desktop        7        36         7
## 26           Direct 2017-05-19        desktop        2         5         2
## 27           Direct 2017-05-20        desktop        6        49         6
## 28           Direct 2017-05-21        desktop        1         1         1
## 29           Direct 2017-05-22        desktop        1         2         1
## 30           Direct 2017-05-23        desktop        5        10         5
## 31           Direct 2017-05-24        desktop        1         1         1
## 32           Direct 2017-05-25        desktop        2         2         2
## 33           Direct 2017-05-30        desktop        2         2         2
## 34           Direct 2017-06-02        desktop       16        16        16
## 35           Direct 2017-06-04        desktop        3        19         3
## 36           Direct 2017-06-05        desktop        3        15         3
## 37           Direct 2017-06-07        desktop        2         3         2
## 38           Direct 2017-06-09        desktop        1         6         1
## 39           Direct 2017-06-12        desktop        2         3         2
## 40           Direct 2017-06-16        desktop        2         2         2
## 41           Direct 2017-06-20        desktop        2         5         2
## 42           Direct 2017-06-29        desktop        1         6         1
## 43           Direct 2017-07-07        desktop        2         4         2
## 44           Direct 2017-07-17        desktop       13        13        13
## 45           Direct 2017-08-17        desktop       10        10        10
## 46           Direct 2017-08-24        desktop        2         2         2
## 47           Direct 2017-09-11        desktop        1         1         1
## 48           Direct 2017-10-01        desktop        4        38         4
## 49           Direct 2017-10-05         mobile        1         2         1
## 50           Direct 2017-10-18        desktop        2         4         2
## 51           Direct 2017-10-24        desktop        2        10         2
## 52           Direct 2017-10-31        desktop        1         2         1
## 53           Direct 2017-11-05        desktop        5         5         5
## 54           Direct 2017-11-11        desktop        1         2         1
## 55           Direct 2017-11-30        desktop        1         2         1
## 56           Direct 2017-12-03        desktop        2        20         2
## 57           Direct 2017-12-11         mobile        1         2         1
## 58           Direct 2017-12-12        desktop        1         2         1
## 59           Direct 2017-12-24        desktop        1         2         1
## 60   Organic Search 2017-01-07         mobile        2        15         2
## 61   Organic Search 2017-02-07        desktop        1         2         1
## 62   Organic Search 2017-04-26         mobile        1         1         1
## 63   Organic Search 2017-04-27         mobile        1         3         1
## 64   Organic Search 2017-05-06        desktop        2         5         2
## 65   Organic Search 2017-05-07         mobile        1         8         1
## 66   Organic Search 2017-05-08         mobile        1         1         1
## 67   Organic Search 2017-05-10        desktop        1         1         1
## 68   Organic Search 2017-05-10         mobile        3        15         3
## 69   Organic Search 2017-05-13        desktop        1         1         1
## 70   Organic Search 2017-05-18        desktop        1         3         1
## 71   Organic Search 2017-05-20        desktop        1         1         1
## 72   Organic Search 2017-05-23        desktop        1         3         1
## 73   Organic Search 2017-06-13        desktop        1         1         1
## 74   Organic Search 2017-07-04        desktop        1        14         1
## 75   Organic Search 2017-08-18        desktop        1         1         1
## 76   Organic Search 2017-08-27        desktop        1         1         1
## 77   Organic Search 2017-09-01        desktop        1         4         1
## 78   Organic Search 2017-09-03        desktop        1         1         1
## 79   Organic Search 2017-09-09        desktop        1         1         1
## 80   Organic Search 2017-09-28        desktop        1         1         1
## 81   Organic Search 2017-10-02        desktop        1         2         1
## 82   Organic Search 2017-10-12        desktop        1         2         1
## 83   Organic Search 2017-10-17        desktop        1         2         1
## 84   Organic Search 2017-10-24        desktop        1         2         1
## 85   Organic Search 2017-10-25        desktop        1         2         1
## 86   Organic Search 2017-11-06        desktop        1         2         1
## 87   Organic Search 2017-11-07        desktop        2         4         2
## 88   Organic Search 2017-11-15        desktop        1         1         1
## 89   Organic Search 2017-11-22        desktop        1         2         1
## 90   Organic Search 2017-11-24        desktop        1         4         1
## 91   Organic Search 2017-11-29        desktop        1         2         1
## 92   Organic Search 2017-12-04        desktop        1         2         1
## 93   Organic Search 2017-12-08        desktop        1         2         1
## 94   Organic Search 2017-12-11        desktop        1         2         1
## 95   Organic Search 2017-12-12        desktop        1         2         1
## 96   Organic Search 2017-12-17        desktop        1         2         1
## 97         Referral 2017-01-04        desktop        1         1         1
## 98         Referral 2017-04-23        desktop        1         1         1
## 99         Referral 2017-05-04        desktop        1         1         1
## 100        Referral 2017-05-05        desktop        1         1         1
## 101        Referral 2017-05-08        desktop        1         1         1
## 102        Referral 2017-05-22        desktop        1         1         1
## 103        Referral 2017-06-12        desktop        1         6         1
## 104        Referral 2017-06-20        desktop        1         3         1
## 105        Referral 2017-06-27        desktop        1         1         1
## 106        Referral 2017-07-01        desktop        2         2         2
## 107        Referral 2017-08-01        desktop        1         1         1
## 108        Referral 2017-10-02         mobile        1         2         1
## 109        Referral 2017-10-21        desktop        1         1         1
## 110        Referral 2017-10-22        desktop        4         4         4
## 111        Referral 2017-12-21        desktop        1         2         1
## 112        Referral 2017-12-30        desktop        1         1         1
## 113        Referral 2017-12-31        desktop        1         1         1
## 114          Social 2017-01-06         mobile        1         1         1
## 115          Social 2017-04-18        desktop        1         1         1
## 116          Social 2017-05-13         mobile        1         1         1
## 117          Social 2017-05-25         mobile        1         1         1
## 118          Social 2017-06-11         mobile        1         1         1
##     bounces
## 1         1
## 2         0
## 3         1
## 4         0
## 5         0
## 6         0
## 7        18
## 8         2
## 9         0
## 10        1
## 11        0
## 12        1
## 13        2
## 14        1
## 15        0
## 16        0
## 17        0
## 18        1
## 19        1
## 20        1
## 21        2
## 22        1
## 23        4
## 24        2
## 25        4
## 26        1
## 27        2
## 28        1
## 29        0
## 30        4
## 31        1
## 32        2
## 33        2
## 34       16
## 35        1
## 36        1
## 37        1
## 38        0
## 39        1
## 40        2
## 41        1
## 42        0
## 43        1
## 44       13
## 45       10
## 46        2
## 47        1
## 48        1
## 49        0
## 50        0
## 51        0
## 52        0
## 53        5
## 54        0
## 55        0
## 56        0
## 57        0
## 58        0
## 59        0
## 60        0
## 61        0
## 62        1
## 63        0
## 64        0
## 65        0
## 66        1
## 67        1
## 68        1
## 69        1
## 70        0
## 71        1
## 72        0
## 73        1
## 74        0
## 75        1
## 76        1
## 77        0
## 78        1
## 79        1
## 80        1
## 81        0
## 82        0
## 83        0
## 84        0
## 85        0
## 86        0
## 87        0
## 88        1
## 89        0
## 90        0
## 91        0
## 92        0
## 93        0
## 94        0
## 95        0
## 96        0
## 97        1
## 98        1
## 99        1
## 100       1
## 101       1
## 102       1
## 103       0
## 104       0
## 105       1
## 106       2
## 107       1
## 108       0
## 109       1
## 110       4
## 111       0
## 112       1
## 113       1
## 114       1
## 115       1
## 116       1
## 117       1
## 118       1
#check date column
class(correlation_ex$date)
## [1] "Date"
#how many rows of data per channel
correlation_ex %>%
  group_by(channelGrouping) %>%
  tally()
## # A tibble: 4 x 2
##   channelGrouping     n
##             <chr> <int>
## 1          Direct    59
## 2  Organic Search    37
## 3        Referral    17
## 4          Social     5
#subset numeric data
corr_metrics_data<-correlation_ex[,c("sessions", "pageviews", "entrances", "bounces")]

#view corr matrix
cor(corr_metrics_data)
##            sessions pageviews entrances   bounces
## sessions  1.0000000 0.4572692 1.0000000 0.9144177
## pageviews 0.4572692 1.0000000 0.4572692 0.1323279
## entrances 1.0000000 0.4572692 1.0000000 0.9144177
## bounces   0.9144177 0.1323279 0.9144177 1.0000000
#The table is mirrored in the diagonal and provides the correlation coefficient (aka, “rr”) between each pair of values that intersect in the cell. 1 means a perfect correlation, 0 means no correlation and -1 means a perfect negative correlation.

#view correlation plot
pairs(corr_metrics_data)

#A correlation of less than 1 may be a quick diagnostic that something is wrong with the tracking.

#Are web channels correlated?


## Get only desktop rows, and the date, channelGrouping and sessions columns
pivoted <- correlation_ex %>% 
  filter(deviceCategory == "desktop") %>% 
  select(date, channelGrouping, sessions) %>%
  spread(channelGrouping, sessions)


#view results
View(pivoted)
## Warning: running command ''/usr/bin/otool' -L '/Library/Frameworks/
## R.framework/Resources/modules/R_de.so'' had status 1
## Get rid of any NA's and replace with 0
pivoted[is.na(pivoted)] <- 0

#view results
View(pivoted)
## Warning: running command ''/usr/bin/otool' -L '/Library/Frameworks/
## R.framework/Resources/modules/R_de.so'' had status 1
#remove unnumeric values
pivoted_corr_data<-pivoted[,-1]

#view results
View(pivoted_corr_data)
## Warning: running command ''/usr/bin/otool' -L '/Library/Frameworks/
## R.framework/Resources/modules/R_de.so'' had status 1
pivoted_corr_table<-round(cor(pivoted_corr_data),2)

pairs(pivoted_corr_table)

#####################Is there a relationship between device and marketing channel? Chi-Square Example####################

#lets check device categories of our visitors
deviceCat<- google_analytics_4(view_id, 
                              date_range = c("2017-01-01","2017-11-24"),
                              metrics = "users",
                              dimensions = c("deviceCategory", "channelGrouping"))
## 2018-02-04 09:08:29> Downloaded [8] rows from a total of [8].
#lets view our data
deviceCat
##   deviceCategory channelGrouping users
## 1        desktop          Direct   109
## 2        desktop  Organic Search    21
## 3        desktop        Referral    15
## 4        desktop          Social     1
## 5         mobile          Direct     2
## 6         mobile  Organic Search     4
## 7         mobile        Referral     1
## 8         mobile          Social     2
#create column for channelGrouping using spread function
device.tab<-spread(deviceCat, channelGrouping, users)

#chi square test
chisq.test(device.tab[,-1])
## Warning in chisq.test(device.tab[, -1]): Chi-squared approximation may be
## incorrect
## 
##  Pearson's Chi-squared test
## 
## data:  device.tab[, -1]
## X-squared = 28.327, df = 3, p-value = 3.101e-06
#the actual distribution
device.tab
##   deviceCategory Direct Organic Search Referral Social
## 1        desktop    109             21       15      1
## 2         mobile      2              4        1      2
#the expected values for each of the following groups
chisq.test(device.tab[,-1], correct=FALSE)$expected
## Warning in chisq.test(device.tab[, -1], correct = FALSE): Chi-squared
## approximation may be incorrect
##       Direct Organic Search   Referral    Social
## 1 104.554839      23.548387 15.0709677 2.8258065
## 2   6.445161       1.451613  0.9290323 0.1741935
# Goodness of Fit Test:
# 1. Make a table of counts.
# 2. Create a vector of the expected proportions.
# 3. Check the expected counts assumption.
# 4. Run the chi square test.
# 5. Interpret the chi square statistic and p-value.

# H0 : The population distribution of the variable is the
# same as the proposed distribution--There is no relationship
#between device and channel.
# (i.e. there is nothing going on)
# HA : The distributions are different--There is a relationship
#between device and 1st point of contact.

p_value<-3.101e-06
p_value <= .05 #reject null hypothesis if true
## [1] TRUE
p_value > .05 #fail to reject null hypothesis if true
## [1] FALSE
#There were less users coming in on direct/mobile than expected.
#We reject our null hypothesis.  A chi square test of independence
# found there is a relationship between
#device and 1st point of contact. (chi square= 28.327, df=3,
# p=3.101e-06). The assumptions the test were met.