Casual Impact Analysis

Load data

# data <- read.csv("ecom_9.csv")
# getwd()
# setwd("U:/dpong/SDD Cannibalization Analysis/results")
# data <- read.csv("ecom_9a.csv")
# Manually change to Document Directory in Knit
# data <- read.csv("ecom_9.csv")

# setwd("U:/dpong/SDD Cannibalization Analysis/Casual-Impact-R/")
# Manually change to Document Directory in Knit
raw_data <- read.csv("ecom_9.csv")

Defining coalesce.na()

coalesce.na <- function(x, ...) {
  x.len <- length(x)
  ly <- list(...)
  for (y in ly) {
    y.len <- length(y)
    if (y.len == 1) {
      x[is.na(x)] <- y
    } else {
      if (x.len %% y.len != 0)
        warning('object length is not a multiple of first object length')
      pos <- which(is.na(x))
      x[pos] <- y[(pos - 1) %% y.len + 1]
    }
  }
  x
}
# this finally works. Removing region_name with an empty string
raw_data <- raw_data %>% 
            filter(!REGION_NAME == '')

Define a Custom Function to Calculate the Min or Max

#define custom function to calculate min
custom_min <- function(x) {if (length(x)>0) min(x) else Inf}

Pre-processing step

raw_data <-
raw_data %>% group_by (REGION_NAME) %>%
             mutate(join_idx1 = + (row_number() >= custom_min(which(!is.na(SDD_Orders)))))%>% 
  mutate(join_idx1 = cumsum(join_idx1))
# %>%
#   filter(REGION_NAME == 'NORTH FLORIDA')
raw_data <-
raw_data %>% group_by (REGION_NAME) %>%
             mutate(join_idx2 = - (row_number() <= max(which(is.na(SDD_Orders)))))%>% 
  mutate(join_idx2 = rev(cumsum(rev(join_idx2)))) 
# %>%
# filter(REGION_NAME == 'NORTH FLORIDA')
raw_data <- raw_data %>% 
            group_by (REGION_NAME) %>%
            mutate (join_idx = join_idx1 + join_idx2) 
# %>%
# filter(REGION_NAME == 'NORTH FLORIDA')
# Removing Las Vegas, Bakersfield, Central Florida, and Cleveland
# 11/4/2022 : Removing E COMMER, PUERTO RICO

raw_data2 <- raw_data %>% filter(!REGION_NAME %in% c('BAKERSFIELD', 'CENTRAL FLORIDA', 'LAS VEGAS', 'CLEVELAND', 'E COMMER', 'PUERTO RICO'))

Memphis’ SDD start date is 8/18/2022. So I’ll use 202251 as the start week for SDD.

# replacing the following logic with a sequence logic in the following 2 blocks

# raw_data2[ raw_data2$fyfw == '202151'& raw_data2$REGION_NAME == 'MEMPHIS', 'join_idx'] <- 1
# raw_data2[ raw_data2$fyfw == '202150'& raw_data2$REGION_NAME == 'MEMPHIS', 'join_idx'] <- -1
# this is super difficult~!!
nrow <- nrow(raw_data2[ raw_data2$fyfw >= '202151' & raw_data2$REGION_NAME == 'MEMPHIS', 'join_idx'])

raw_data2[ raw_data2$fyfw >= '202151' & raw_data2$REGION_NAME == 'MEMPHIS', 'join_idx'] <- seq(1,nrow,by =1)
nrow <- nrow(raw_data2[ raw_data2$fyfw <= '202150' & raw_data2$REGION_NAME == 'MEMPHIS', 'join_idx'])

raw_data2[ raw_data2$fyfw <= '202150' & raw_data2$REGION_NAME == 'MEMPHIS', 'join_idx'] <- rev(-1 * seq(1,nrow,by =1))
# To evaluate the status quo of join_idx
raw_data2 %>%  filter (REGION_NAME == 'MEMPHIS') %>% dplyr::select(REGION_NAME,fyfw,join_idx) %>% dplyr::arrange (join_idx)
## # A tibble: 165 × 3
## # Groups:   REGION_NAME [1]
##    REGION_NAME   fyfw join_idx
##    <chr>        <int>    <int>
##  1 MEMPHIS     202001     -102
##  2 MEMPHIS     202002     -101
##  3 MEMPHIS     202003     -100
##  4 MEMPHIS     202004      -99
##  5 MEMPHIS     202005      -98
##  6 MEMPHIS     202006      -97
##  7 MEMPHIS     202007      -96
##  8 MEMPHIS     202008      -95
##  9 MEMPHIS     202009      -94
## 10 MEMPHIS     202010      -93
## # … with 155 more rows
# Use the following check for gaps and anomaly in SDD_Orders
raw_data2 %>%  filter (join_idx == 0)
## # A tibble: 17 × 19
## # Groups:   REGION_NAME [17]
##      fyfw REGION_NAME    STH_O…¹ BOPUS…² NDD_O…³ SDD_O…⁴ STH_R…⁵ BOPUS…⁶ NDD_R…⁷
##     <int> <chr>            <int>   <int>   <int>   <int>   <dbl>   <dbl>   <dbl>
##  1 202252 PHILADELPHIA       482    1003     290      NA  56297.  89441.  25926.
##  2 202252 PITTSBURGH         245     495     235      NA  26432.  43921.  22852.
##  3 202301 DETROIT            415     800     469      NA  48166.  79659.  48991.
##  4 202301 HARTFORD           316     928     264      NA  40377.  84846.  23622.
##  5 202301 INDIANAPOLIS       350     687     249      NA  35776.  63649.  22523.
##  6 202301 LOUISVILLE         191     477     154      NA  22812.  41256.  12629.
##  7 202301 MILWAUKEE          113     845     102      NA  13635.  77764.  10612.
##  8 202301 MINNEAPOLIS        401     732     241      NA  49542.  66820.  21735.
##  9 202301 NASHVILLE          212     504     139      NA  26356.  39990.  13283.
## 10 202301 NEW JERSEY         369    1158     452      NA  37855.  96610.  39005.
## 11 202301 NEW ORLEANS        191     478     155      NA  21471.  45841.  12745.
## 12 202301 OKLAHOMA CITY      238     499     186      NA  23332.  46261.  19172.
## 13 202301 RICHMOND           350     774     175      NA  43226.  63531.  15367.
## 14 202301 SALT LAKE CITY     385     668     186      NA  51274.  54146   14594.
## 15 202301 WASHINGTON D.…     594    1596     498      NA  70068. 140551.  46063.
## 16 202303 S.CENTRAL TEX…     336     680     442      NA  42967.  59890.  44511.
## 17 202303 SOUTH FLORIDA      328     723     350      57  35533.  60480.  33246.
## # … with 10 more variables: SDD_Retail <dbl>, rest_BOPUS_Orders <int>,
## #   rest_NDD_Orders <int>, rest_STH_Orders <int>, rest_BOPUS_Retail <dbl>,
## #   rest_NDD_Retail <dbl>, rest_STH_Retail <dbl>, join_idx1 <int>,
## #   join_idx2 <int>, join_idx <int>, and abbreviated variable names
## #   ¹​STH_Orders, ²​BOPUS_Orders, ³​NDD_Orders, ⁴​SDD_Orders, ⁵​STH_Retail,
## #   ⁶​BOPUS_Retail, ⁷​NDD_Retail
appendix.1<- c("SOUTH FLORIDA",202152)
appendix.2 <- c("S.CENTRAL TEXAS", 202305)
appendix.3 <- c("WASHINGTON D.C.", 202305)
appendix.4 <- c("SALT LAKE CITY", 202305)
appendix.5 <- c("RICHMOND", 202305)
appendix.6 <- c("OKLAHOMA CITY", 202305)
appendix.7 <- c("NEW ORLEANS", 202305)
appendix.8 <- c("NEW JERSEY", 202305)
appendix.9 <- c("NASHVILLE", 202305)
appendix.10 <-c("MINNEAPOLIS", 202305)
appendix.11 <-c("MILWAUKEE", 202305)
appendix.12 <-c("LOUISVILLE", 202305)
appendix.13 <-c("INDIANAPOLIS", 202305)
appendix.14 <-c("HARTFORD", 202305)
appendix.15 <-c("DETROIT", 202305)
appendix.16 <-c("PITTSBURGH", 202303)
appendix.17 <-c("PHILADELPHIA", 202303)

df <- data.frame(matrix(ncol = 2, nrow = 0))
x <- c("region", "seq_start")
colnames(df) <- x
# df <- rbind(df, appendix.1) # this option doesn't preserve the original data.frame's colnames
# str(appendix.1)
df <- rbindlist(list(df, as.list(appendix.1)))
df <- rbindlist(list(df, as.list(appendix.2)))
df <- rbindlist(list(df, as.list(appendix.3)))
df <- rbindlist(list(df, as.list(appendix.4)))
df <- rbindlist(list(df, as.list(appendix.5)))
df <- rbindlist(list(df, as.list(appendix.6)))
df <- rbindlist(list(df, as.list(appendix.7)))
df <- rbindlist(list(df, as.list(appendix.8)))
df <- rbindlist(list(df, as.list(appendix.9)))
df <- rbindlist(list(df, as.list(appendix.10)))
df <- rbindlist(list(df, as.list(appendix.11)))
df <- rbindlist(list(df, as.list(appendix.12)))
df <- rbindlist(list(df, as.list(appendix.13)))
df <- rbindlist(list(df, as.list(appendix.14)))
df <- rbindlist(list(df, as.list(appendix.15)))
df <- rbindlist(list(df, as.list(appendix.16)))
df <- rbindlist(list(df, as.list(appendix.17)))

Defining sequence end ($seq_end)

# as.numeric(df$seq_start) -1
df$seq_end <- as.character(as.numeric(df$seq_start) -1)
#define custom function to calculate min
pre_post_period_adj <- function(dat1=raw_data2, dat2=df) {
# get nrow for post.period
nrow <- nrow(dat1[ dat1$fyfw >= dat2$seq_start & dat1$REGION_NAME == dat2$region, 'join_idx'])
# fixing join_idx for the post.period
dat1[ dat1$fyfw >= dat2$seq_start & dat1$REGION_NAME == dat2$region, 'join_idx'] <- seq(1,nrow,by =1)
print(nrow)
# get nrow for pre.period 
nrow <- nrow(dat1[ dat1$fyfw <= dat2$seq_end & dat1$REGION_NAME == dat2$region, 'join_idx'])
# fixing join_idx for the pre.period 
dat1[ dat1$fyfw <= dat2$seq_end & dat1$REGION_NAME == dat2$region, 'join_idx'] <- rev(-1 * seq(1,nrow,by =1))
print(nrow)
# assign('dat1',raw_data2,envir=.GlobalEnv)
return(dat1[ dat1$REGION_NAME == dat2$region,] )
}
options(warn = -1)

for (r in 1:nrow(df)) {
raw_data2[raw_data2['REGION_NAME'] == as.character(df[r,1]), ] <- pre_post_period_adj(raw_data2 %>% filter(REGION_NAME == as.character(df[r,1])) , df %>% filter(region == as.character(df[r,1])))
}
## [1] 62
## [1] 103
## [1] 5
## [1] 160
## [1] 5
## [1] 160
## [1] 5
## [1] 160
## [1] 5
## [1] 160
## [1] 5
## [1] 160
## [1] 5
## [1] 160
## [1] 5
## [1] 160
## [1] 5
## [1] 160
## [1] 5
## [1] 160
## [1] 5
## [1] 160
## [1] 5
## [1] 160
## [1] 5
## [1] 160
## [1] 5
## [1] 160
## [1] 5
## [1] 160
## [1] 7
## [1] 158
## [1] 7
## [1] 158
# nrow <- nrow(raw_data2[ raw_data2$fyfw >= '202252' & raw_data2$REGION_NAME == 'SOUTH FLORIDA', 'join_idx'])
# 
# raw_data2[ raw_data2$fyfw >= '202252' & raw_data2$REGION_NAME == 'SOUTH FLORIDA', 'join_idx'] <- seq(1,nrow,by =1)
# nrow <- nrow(raw_data2[ raw_data2$fyfw <= '202251' & raw_data2$REGION_NAME == 'SOUTH FLORIDA', 'join_idx'])
# 
# raw_data2[ raw_data2$fyfw <= '202251' & raw_data2$REGION_NAME == 'SOUTH FLORIDA', 'join_idx'] <- rev(-1 * seq(1,nrow,by =1))
# raw_data2 %>%  filter (REGION_NAME == 'SOUTH FLORIDA') %>% dplyr::select(REGION_NAME, fyfw,join_idx) %>% dplyr::arrange (join_idx)

Create a summary table for the metrics we wanted to evaluate in causal impact analysis

summary <- raw_data2 %>% group_by(join_idx) %>%
summarise(bopus_o = sum(rowSums( across(contains("BOPUS_Orders"), ~ coalesce.na(.x, 0)))),
               bopus_r = sum(rowSums( across(contains("BOPUS_Retail"), ~ coalesce.na(.x, 0)))),
               ndd_o   = sum(rowSums( across(contains("NDD_Orders"), ~ coalesce.na(.x, 0)))),
               ndd_r   = sum(rowSums( across(contains("NDD_Retail"), ~ coalesce.na(.x, 0)))),
               sth_o   = sum(rowSums( across(contains("STH_Orders"), ~ coalesce.na(.x, 0)))),
               sth_r   = sum(rowSums( across(contains("STH_Retail"), ~ coalesce.na(.x, 0))))
              )
# Cutting the data frame according to East Carolina's post.period start week of 202247, i.e. smallest pre.period is 150

summary <- summary[summary$join_idx >= -150, ]
# str(raw_data)

# this needs to be commented out before we figure out 2 things: a) filter out a common minimum dataset with parameters using join_idx1 and join_idx2
# b) figure if there is a need to hard code join_idx for Memphis, and  
# qa the results 

# summary <- raw_data %>% group_by(join_idx) %>%
# summarise(bopus_o = sum(rowSums( across(contains("BOPUS_Orders"), ~ coalesce.na(.x, 0)))),
#                bopus_r = sum(rowSums( across(contains("BOPUS_Retail"), ~ coalesce.na(.x, 0)))),
#                ndd_o   = sum(rowSums( across(contains("NDD_Orders"), ~ coalesce.na(.x, 0)))),
#                ndd_r   = sum(rowSums( across(contains("NDD_Retail"), ~ coalesce.na(.x, 0)))),
#                sth_o   = sum(rowSums( across(contains("STH_Orders"), ~ coalesce.na(.x, 0)))),
#                sth_r   = sum(rowSums( across(contains("STH_Retail"), ~ coalesce.na(.x, 0))))
#               )

Appendix level of calculation

sdd_summary <- raw_data %>%
      transmute(sdd_r = sum(rowSums( across(contains("SDD_Retail"), ~ coalesce.na(.x, 0))))
                )

Summarizing the table into useful time series

# first method
# data %>% dplyr:: select(contains("BOPUS_Orders"))

# summary<- data %>%
#       # filter(REGION_NAME == 'PHOENIX' ) %>%
# # group_by(REGION_NAME, .groups ) %>%
#       transmute(bopus_o = rowSums( across(contains("BOPUS_Orders"), ~ coalesce.na(.x, 0))),
#                 bopus_r = rowSums( across(contains("BOPUS_Retail"), ~ coalesce.na(.x, 0))),
#                 ndd_o   = rowSums( across(contains("NDD_Orders"), ~ coalesce.na(.x, 0))),
#                 ndd_r   = rowSums( across(contains("NDD_Retail"), ~ coalesce.na(.x, 0))),
#                 sth_o   = rowSums( across(contains("STH_Orders"), ~ coalesce.na(.x, 0))),
#                 sth_r   = rowSums( across(contains("STH_Retail"), ~ coalesce.na(.x, 0)))
#                )

# calculations of summary (summary1) for rest of chain 

# summary1 <- data %>%
#   filter(!REGION_NAME %in% c('BAKERSFIELD', 'LAS VEGAS', 'CENTRAL FLORIDA')  ) %>%
#           group_by(fyfw) %>%
#       summarise(bopus_o = sum(coalesce.na(rest_BOPUS_Orders, 0)),
#                 bopus_r = sum(coalesce.na(rest_BOPUS_Retail, 0)),
#                 ndd_o   = sum(coalesce.na(rest_NDD_Orders, 0)),
#                 ndd_r   = sum(coalesce.na(rest_NDD_Retail,0)),
#                 sth_o   = sum(coalesce.na(rest_STH_Orders,0)),
#                 sth_r   = sum(coalesce.na(rest_STH_Retail,0))
#                )

Show the data

par(cex = 1.3)
matplot(summary$bopus_r, type = "l", lwd= 3)
# matplot(summary1$bopus_r, type = "l", lwd= 3)
abline(v = 150)

# 152 for Phoenix 
# 55 for LAS VEGAS
# 51 for Bakersfield

Specify pre- and post-period

# post.period is assuming NORTH FLORIDA's start week of 202307 and we have data up to 202309. So post.period's ending parameter needs to be adjusted manually each time if source data has changed!
pre.period <- c(1, 150)
post.period <- c(151, 153)
# ending for post.period is where we stop before Cleveland starts
# post.period to end at 148 just to account for the shortest time series in Central Florida
# standard ends at 162
# post.period <- c(153, 162)

Causal impact analysis

# summary 
impact_br <- CausalImpact(summary %>% dplyr::select(2,1), pre.period, post.period)
impact_nr <- CausalImpact(summary %>% dplyr::select(4,3), pre.period, post.period)
impact_sr <- CausalImpact(summary %>% dplyr::select(6,5), pre.period, post.period)


# summary1
# impact_br <- CausalImpact(summary1 %>% dplyr::select(3,2) , pre.period, post.period)
# impact_nr <- CausalImpact(summary1 %>% dplyr::select(5,4), pre.period, post.period)
# impact_sr <- CausalImpact(summary1 %>% dplyr::select(7,6), pre.period, post.period)
plot(impact_br)

# plot(impact_bo)
plot(impact_nr)

# plot(impact_no)
plot(impact_sr)

# plot(impact_so)

Printing Results

# print(impact)
print(impact_br)
## Posterior inference {CausalImpact}
## 
##                          Average          Cumulative      
## Actual                   37150            111450          
## Prediction (s.d.)        38709 (2055)     116127 (6165)   
## 95% CI                   [34641, 42908]   [103924, 128725]
##                                                           
## Absolute effect (s.d.)   -1559 (2055)     -4677 (6165)    
## 95% CI                   [-5758, 2509]    [-17275, 7526]  
##                                                           
## Relative effect (s.d.)   -4% (5.3%)       -4% (5.3%)      
## 95% CI                   [-15%, 6.5%]     [-15%, 6.5%]    
## 
## Posterior tail-area probability p:   0.20275
## Posterior prob. of a causal effect:  80%
## 
## For more details, type: summary(impact, "report")
# print(impact_bo) 
print(impact_nr) 
## Posterior inference {CausalImpact}
## 
##                          Average          Cumulative    
## Actual                   12791            38374         
## Prediction (s.d.)        12937 (551)      38811 (1654)  
## 95% CI                   [11799, 14003]   [35396, 42010]
##                                                         
## Absolute effect (s.d.)   -146 (551)       -437 (1654)   
## 95% CI                   [-1212, 993]     [-3636, 2978] 
##                                                         
## Relative effect (s.d.)   -1.1% (4.3%)     -1.1% (4.3%)  
## 95% CI                   [-9.4%, 7.7%]    [-9.4%, 7.7%] 
## 
## Posterior tail-area probability p:   0.38955
## Posterior prob. of a causal effect:  61%
## 
## For more details, type: summary(impact, "report")
# print(impact_no) 
print(impact_sr) 
## Posterior inference {CausalImpact}
## 
##                          Average          Cumulative    
## Actual                   16145            48436         
## Prediction (s.d.)        16400 (615)      49200 (1845)  
## 95% CI                   [15172, 17585]   [45516, 52754]
##                                                         
## Absolute effect (s.d.)   -255 (615)       -764 (1845)   
## 95% CI                   [-1439, 973]     [-4318, 2920] 
##                                                         
## Relative effect (s.d.)   -1.6% (3.8%)     -1.6% (3.8%)  
## 95% CI                   [-8.8%, 5.9%]    [-8.8%, 5.9%] 
## 
## Posterior tail-area probability p:   0.35151
## Posterior prob. of a causal effect:  65%
## 
## For more details, type: summary(impact, "report")
# print(impact_so)

# to print mean relative effect: 
# mean(impact$summary$RelEffect)

Summarizing the results in prose

# summary(impact_bo, "report")
summary(impact_br, "report")
## Analysis report {CausalImpact}
## 
## 
## During the post-intervention period, the response variable had an average value of approx. 37.15K. In the absence of an intervention, we would have expected an average response of 38.71K. The 95% interval of this counterfactual prediction is [34.64K, 42.91K]. Subtracting this prediction from the observed response yields an estimate of the causal effect the intervention had on the response variable. This effect is -1.56K with a 95% interval of [-5.76K, 2.51K]. For a discussion of the significance of this effect, see below.
## 
## Summing up the individual data points during the post-intervention period (which can only sometimes be meaningfully interpreted), the response variable had an overall value of 111.45K. Had the intervention not taken place, we would have expected a sum of 116.13K. The 95% interval of this prediction is [103.92K, 128.73K].
## 
## The above results are given in terms of absolute numbers. In relative terms, the response variable showed a decrease of -4%. The 95% interval of this percentage is [-15%, +6%].
## 
## This means that, although it may look as though the intervention has exerted a negative effect on the response variable when considering the intervention period as a whole, this effect is not statistically significant, and so cannot be meaningfully interpreted. The apparent effect could be the result of random fluctuations that are unrelated to the intervention. This is often the case when the intervention period is very long and includes much of the time when the effect has already worn off. It can also be the case when the intervention period is too short to distinguish the signal from the noise. Finally, failing to find a significant effect can happen when there are not enough control variables or when these variables do not correlate well with the response variable during the learning period.
## 
## The probability of obtaining this effect by chance is p = 0.203. This means the effect may be spurious and would generally not be considered statistically significant.
# summary(impact_no, "report")
summary(impact_nr, "report")
## Analysis report {CausalImpact}
## 
## 
## During the post-intervention period, the response variable had an average value of approx. 12.79K. In the absence of an intervention, we would have expected an average response of 12.94K. The 95% interval of this counterfactual prediction is [11.80K, 14.00K]. Subtracting this prediction from the observed response yields an estimate of the causal effect the intervention had on the response variable. This effect is -0.15K with a 95% interval of [-1.21K, 0.99K]. For a discussion of the significance of this effect, see below.
## 
## Summing up the individual data points during the post-intervention period (which can only sometimes be meaningfully interpreted), the response variable had an overall value of 38.37K. Had the intervention not taken place, we would have expected a sum of 38.81K. The 95% interval of this prediction is [35.40K, 42.01K].
## 
## The above results are given in terms of absolute numbers. In relative terms, the response variable showed a decrease of -1%. The 95% interval of this percentage is [-9%, +8%].
## 
## This means that, although it may look as though the intervention has exerted a negative effect on the response variable when considering the intervention period as a whole, this effect is not statistically significant, and so cannot be meaningfully interpreted. The apparent effect could be the result of random fluctuations that are unrelated to the intervention. This is often the case when the intervention period is very long and includes much of the time when the effect has already worn off. It can also be the case when the intervention period is too short to distinguish the signal from the noise. Finally, failing to find a significant effect can happen when there are not enough control variables or when these variables do not correlate well with the response variable during the learning period.
## 
## The probability of obtaining this effect by chance is p = 0.39. This means the effect may be spurious and would generally not be considered statistically significant.
# summary(impact_so, "report")
summary(impact_sr, "report")
## Analysis report {CausalImpact}
## 
## 
## During the post-intervention period, the response variable had an average value of approx. 16.15K. In the absence of an intervention, we would have expected an average response of 16.40K. The 95% interval of this counterfactual prediction is [15.17K, 17.58K]. Subtracting this prediction from the observed response yields an estimate of the causal effect the intervention had on the response variable. This effect is -0.25K with a 95% interval of [-1.44K, 0.97K]. For a discussion of the significance of this effect, see below.
## 
## Summing up the individual data points during the post-intervention period (which can only sometimes be meaningfully interpreted), the response variable had an overall value of 48.44K. Had the intervention not taken place, we would have expected a sum of 49.20K. The 95% interval of this prediction is [45.52K, 52.75K].
## 
## The above results are given in terms of absolute numbers. In relative terms, the response variable showed a decrease of -2%. The 95% interval of this percentage is [-9%, +6%].
## 
## This means that, although it may look as though the intervention has exerted a negative effect on the response variable when considering the intervention period as a whole, this effect is not statistically significant, and so cannot be meaningfully interpreted. The apparent effect could be the result of random fluctuations that are unrelated to the intervention. This is often the case when the intervention period is very long and includes much of the time when the effect has already worn off. It can also be the case when the intervention period is too short to distinguish the signal from the noise. Finally, failing to find a significant effect can happen when there are not enough control variables or when these variables do not correlate well with the response variable during the learning period.
## 
## The probability of obtaining this effect by chance is p = 0.352. This means the effect may be spurious and would generally not be considered statistically significant.