library(zoo)
library(lares)
library(yfR)
library(readr)
library(tidyr)
library(dplyr)
library(CausalImpact)
library(TTR)
library(ggplot2)
library(reshape2)
library(lubridate)
library(knitr)
Gets a list of all the tickers in the S&P500
n_tickers <- 500
df_sp500 <- yf_index_composition("SP500")
rnd_tickers <- base::sample(df_sp500$ticker, n_tickers)
Extracts historical prices from yahoo finance
# this was run once for the analysis, but can be run again if needed
df_yf <- yf_get(tickers = rnd_tickers,
first_date = '2020-01-02',
last_date = Sys.Date())
kable(head(df_yf),col.names = gsub("_", " ", names(df_yf)))
| ticker | ref date | price open | price high | price low | price close | volume | price adjusted | ret adjusted prices | ret closing prices | cumret adjusted prices |
|---|---|---|---|---|---|---|---|---|---|---|
| A | 2020-01-02 | 85.90 | 86.35 | 85.20 | 85.95 | 1410500 | 84.24537 | NA | NA | 1.0000000 |
| A | 2020-01-03 | 84.67 | 85.33 | 84.50 | 84.57 | 1118300 | 82.89276 | -0.0160556 | -0.0160558 | 0.9839444 |
| A | 2020-01-06 | 84.00 | 84.82 | 83.60 | 84.82 | 1993200 | 83.13780 | 0.0029561 | 0.0029561 | 0.9868531 |
| A | 2020-01-07 | 83.96 | 85.26 | 83.94 | 85.08 | 1684700 | 83.39264 | 0.0030652 | 0.0030653 | 0.9898780 |
| A | 2020-01-08 | 85.96 | 86.47 | 85.20 | 85.92 | 1847600 | 84.21597 | 0.0098730 | 0.0098730 | 0.9996511 |
| A | 2020-01-09 | 86.46 | 87.70 | 86.17 | 87.27 | 1912700 | 85.53920 | 0.0157123 | 0.0157123 | 1.0153579 |
Creates Moving Averages
#Removes unnecessary columns and adds a 5, 10, and 20 day moving average. These will come into play later in the code
df_stock_prices_ma <- df_yf %>%
select(ticker, ref_date, price_close) %>%
group_by(ticker) %>%
mutate(mov_avg5 = SMA(price_close, n =5)) %>%
mutate(mov_avg10 = SMA(price_close, n =10)) %>%
mutate(mov_avg20 = SMA(price_close, n =20))%>%
mutate(ref_date = as.Date(ref_date)) %>%
mutate(price_close = as.numeric(price_close)) %>%
ungroup()
kable(tail(df_stock_prices_ma))
| ticker | ref_date | price_close | mov_avg5 | mov_avg10 | mov_avg20 |
|---|---|---|---|---|---|
| ZTS | 2023-03-14 | 164.56 | 165.126 | 166.904 | 168.3245 |
| ZTS | 2023-03-15 | 163.57 | 164.112 | 166.504 | 167.9080 |
| ZTS | 2023-03-16 | 166.31 | 163.876 | 166.277 | 167.4725 |
| ZTS | 2023-03-17 | 164.47 | 164.464 | 165.668 | 166.9995 |
| ZTS | 2023-03-20 | 165.82 | 164.946 | 165.283 | 166.6890 |
| ZTS | 2023-03-21 | 166.24 | 165.282 | 165.204 | 166.5090 |
Stocks are only priced on days the markets are open, so a sequence of dates has been created to account for the days when there wasn’t a closing price.
df_date_list <- df_stock_prices_ma1 %>%
group_by(ticker) %>%
complete(ref_date = seq.Date(as.Date('2019-12-29'), Sys.Date(), by="day")) %>%
ungroup()
Replace NAs with 10dma
df_stock_prices_ma_clean <- df_date_list %>%
group_by(ticker) %>%
fill(mov_avg10, .direction = "downup") %>%
mutate(price_close = ifelse(is.na(price_close), mov_avg10, price_close)) %>%
select(c(ticker,ref_date,price_close)) %>%
filter(ref_date >= '2020-02-01') %>% # first day of sales data
mutate(price_close = round(price_close,2))%>%
ungroup()
5dma Stock Price data
df_stock_prices_5dma <- df_date_list %>%
group_by(ticker) %>%
fill(mov_avg5, .direction = "downup") %>%
select(c(ticker,ref_date,mov_avg5)) %>%
filter(ref_date >= '2020-02-01') %>% #most recent sales data
mutate(mov_avg5 = round(mov_avg5,2)) %>%
select(ref_date, ticker, mov_avg5) %>%
as.data.frame()
Imports data
df_sales <- read.csv("C:/Users/mikek/Desktop/salesdata.csv")
df_sales$Date <- as.Date(df_sales$Date, format = "%m/%d/%Y")
df_sales_7dma <- df_sales %>%
mutate(mov_avg7 = round(SMA(Sales, n =7),2))
Merges daily sales and daily stock prices
df_final_daily_daily<-merge(df_stock_prices_ma_clean,df_sales,by.x='ref_date',by.y='Date',all.x=TRUE) %>%
pivot_wider(names_from = ticker, values_from = price_close) %>%
filter(Sales >=0) %>%
as.data.frame()
Merges 7dma sales and daily stock prices
df_final_7dma_daily <- merge(df_stock_prices_ma_clean, df_sales_7dma, by.x='ref_date',by.y='Date', all.x=TRUE) %>%
select(-c(Sales)) %>%
pivot_wider(names_from = ticker, values_from = price_close) %>%
filter(mov_avg7 >=0) %>%
as.data.frame()
Merges 7dma sales and 5dma stock prices
df_final_7dma_5dma <- merge(df_stock_prices_5dma, df_sales_7dma, by.x='ref_date',by.y='Date', all.x=TRUE) %>%
select(-c(Sales)) %>%
pivot_wider(names_from = ticker, values_from = mov_avg5) %>%
filter(mov_avg7 >=0)%>%
as.data.frame()
In order to create a synthetic control group - aka a baseline - we want to see which securities are most correlated to the number of sales. Actual daily values are used initially, but log values will be used as well
m <- df_final_daily_daily[,-1]
a <- corr_var(m,
Sales,
top = 10,
#max_pvalue = 0.05
)
plot(a, cex = .5,cex.axis=.5)
Correlations don’t look too strong, the most correlated is MOH with a corr of -0.54. Will try with log values of sales
Log values of daily sales and stock prices
m_log <- log(df_final_daily_daily[,-1])
a_log <- corr_var(m_log,
Sales,
top = 10,
#max_pvalue = 0.05
)
plot(a_log, cex = .5,cex.axis=.5)
These results are actually worse than before - with the most correlated being -0.347. Will try 7dma and 5dma
7dma Sales and daily stock prices
m_7dma_daily <- df_final_7dma_daily[,-1]
a_7dma_daily <- corr_var(m_7dma_daily,
mov_avg7,
top = 10,
#max_pvalue = 0.05
)
plot(a_7dma_daily, cex = .5,cex.axis=.5)
These results look a lot better
7dma sales and 5dma stock prices
m_7dma_5dma <- df_final_7dma_5dma[,-1]
a_7dma_5dma <- corr_var(m_7dma_5dma,
mov_avg7,
top = 10,
#max_pvalue = 0.05
)
plot(a_7dma_5dma, cex = .5,cex.axis=.5)
7dma and 5dma are similar to the above
Log 7dma sales and 5dma stock prices
m_7dma_5dma_log <- log(df_final_7dma_5dma[,-1])
a_7dma_5dma_log <- corr_var(m_7dma_5dma_log,
mov_avg7,
top = 10,
#max_pvalue = 0.05
)
plot(a_7dma_5dma_log, cex = .5,cex.axis=.5)
These have provider the best thus far
Top 100 for log 7dma and 5dma
m_7dma_5dma_log <- log(df_final_7dma_5dma[,-1])
a_7dma_5dma_log <- corr_var(m_7dma_5dma_log,
mov_avg7,
top = 100,
#max_pvalue = 0.05
)
log_corr <- a_7dma_5dma_log$data$corr
log_var <- a_7dma_5dma_log$data$variables
log_corr_var <- data.frame(col1 = log_var,col2 =log_corr)
kable(tail(log_corr_var))
| col1 | col2 | |
|---|---|---|
| 95 | HIG | -0.683112 |
| 96 | CAT | -0.681362 |
| 97 | AIG | -0.676988 |
| 98 | ACGL | -0.675902 |
| 99 | LKQ | -0.675821 |
| 100 | HSIC | -0.675290 |
All are above -0.675
Creates a model dataframe
model_df <- df_final_7dma_5dma
# ptm <- proc.time() #starts the clock
p = c()
pre.start <- "2020-03-01" #start of pre period
pre.end <- "2023-01-14" #end of pre period
post.start <- "2023-01-15" #intervention start
post.end <- "2023-02-27" #end of data/intervention end
pre.period <- as.Date(c(pre.start, pre.end))
post.period <- as.Date(c(post.start,post.end))
df_var_selection <- model_df %>% filter(ref_date >= pre.start) %>% select(-c(ref_date)) %>% log()
time.points <- seq.Date(as.Date(pre.start), as.Date(post.end), by =1)
for (i in 1:100) {
baseline_tickers <- a_7dma_5dma_log$data$variables[1:i] #takes the first 50 stocks from the correlation created earlier.
df_var_selection1 <- as_tibble(df_var_selection[,c('mov_avg7', baseline_tickers)]) # creates a tibble for the model
baseline <- zoo(df_var_selection1, order.by = time.points) #creates a time series for stocks and time.points
mm <- CausalImpact(baseline,
pre.period,
post.period,
model.args = list(niter = 1000)
)
# Model performance
df_p <- mm$summary$p[1]
# Save model performance metrics
p[i] = df_p
}
# proc.time() - ptm
Look at p-values for each number of predictors. Put in groups of 5 and take the average.
results1 <- data.frame(seq(1:100),a_7dma_5dma_log$data$variables[1:100],p) %>%
arrange(p) %>%
rename(Ticker = a_7dma_5dma_log.data.variables.1.100.) %>%
rename(Num_Preds = seq.1.100.)
kable(head(results1))
| Num_Preds | Ticker | p |
|---|---|---|
| 93 | NOC | 0.0512552 |
| 100 | HSIC | 0.0636743 |
| 65 | ODFL | 0.1331269 |
| 17 | DVN | 0.1375770 |
| 45 | MPC | 0.1389760 |
| 22 | HES | 0.1441718 |
intervals <- seq(0, 100, by = 5)
avg <- results1 %>%
mutate(interval = cut(Num_Preds, breaks = intervals, include.lowest = TRUE)) %>%
group_by(interval) %>%
summarize(avg_p_value = mean(p)) %>%
arrange(avg_p_value)
kable(head(avg,10))
| interval | avg_p_value |
|---|---|
| (20,25] | 0.1620595 |
| (15,20] | 0.1650741 |
| (10,15] | 0.1928282 |
| (5,10] | 0.2019473 |
| (40,45] | 0.2073024 |
| (95,100] | 0.2090730 |
| (35,40] | 0.2122784 |
| [0,5] | 0.2176575 |
| (90,95] | 0.2305979 |
| (60,65] | 0.2330736 |
between 5 and 25 appear to be the lowest
# ptm <- proc.time() #starts the clock
p = c()
pre.start <- "2020-03-01" #start of pre period
pre.end <- "2023-01-14" #end of pre period
post.start <- "2023-01-15" #intervention start
post.end <- "2023-02-27" #end of data/intervention end
pre.period <- as.Date(c(pre.start, pre.end))
post.period <- as.Date(c(post.start,post.end))
df_var_selection <- model_df %>% filter(ref_date >= pre.start) %>% select(-c(ref_date)) %>% log()
time.points <- seq.Date(as.Date(pre.start), as.Date(post.end), by =1)
for (i in 1:25) {
baseline_tickers <- a_7dma_5dma_log$data$variables[1:i] #takes the first 50 stocks from the correlation created earlier.
df_var_selection1 <- as_tibble(df_var_selection[,c('mov_avg7', baseline_tickers)]) # creates a tibble for the model
baseline <- zoo(df_var_selection1, order.by = time.points) #creates a time series for stocks and time.points
mm <- CausalImpact(baseline,
pre.period,
post.period,
model.args = list(niter = 5000)
)
# Model performance
df_p <- mm$summary$p[1]
# Save model performance metrics
p[i] = df_p
}
# (proc.time() - ptm)[3] / 60 #stops the clock
Look at p-values for each number of predictors. Put in groups of 5 and take the average.
results1 <- data.frame(seq(1:25),a_7dma_5dma_log$data$variables[1:25],p) %>%
arrange(p) %>%
rename(Ticker = a_7dma_5dma_log.data.variables.1.25.) %>%
rename(Num_Preds = seq.1.25.)
kable(head(results1,10))
| Num_Preds | Ticker | p |
|---|---|---|
| 19 | ON | 0.1808318 |
| 10 | MCK | 0.1925553 |
| 7 | MOH | 0.1965399 |
| 12 | CTVA | 0.1965587 |
| 24 | KR | 0.1982272 |
| 17 | DVN | 0.1984318 |
| 20 | MRO | 0.2020527 |
| 11 | EQT | 0.2026918 |
| 22 | HES | 0.2032945 |
| 23 | EOG | 0.2037037 |
intervals <- seq(0, 25, by = 5)
avg <- results1 %>%
mutate(interval = cut(Num_Preds, breaks = intervals, include.lowest = TRUE)) %>%
group_by(interval) %>%
summarize(avg_p_value = mean(p)) %>%
arrange(avg_p_value)
kable(avg)
| interval | avg_p_value |
|---|---|
| (15,20] | 0.1991989 |
| (20,25] | 0.2032579 |
| (5,10] | 0.2059177 |
| (10,15] | 0.2073060 |
| [0,5] | 0.2225324 |
15-20 variables is the lowest. When we look at each individual p-value, 19 has the lowest value between 15-20
Need to establish the pre and post periods. These periods can be shortened as we tune the model if needed. The intervention (price increase) occurred on Jan 15th 2023.
pre.start <- "2020-03-01" #start of pre period
pre.end <- "2023-01-14" #end of pre period
post.start <- "2023-01-15" #intervention start
post.end <- "2023-02-27" #end of data/intervention end
pre.period <- as.Date(c(pre.start, pre.end))
post.period <- as.Date(c(post.start,post.end))
model_df_clean <- model_df %>% filter(ref_date >= pre.start) %>% select(-c(ref_date)) %>% log()
time.points <- seq.Date(as.Date(pre.start), as.Date(post.end), by =1)
19 variables
baseline_tickers <- a_7dma_5dma_log$data$variables[1:19]
model_df_clean1 <- as_tibble(model_df_clean[,c('mov_avg7', baseline_tickers)]) # creates a tibble for the model
baseline <- zoo(model_df_clean1, order.by = time.points) #creates a time series for stocks and time.points
df123 <- data.frame(time = index(baseline), coredata(baseline))
df_melt <- melt(df123 , id.vars = "time")
ggplot(df_melt, aes(x = time, y = value, color = variable)) +
geom_line()
Initial run using default values
impact_defaults <- CausalImpact(baseline,
pre.period,
post.period
)
summary(impact_defaults)
## Posterior inference {CausalImpact}
##
## Average Cumulative
## Actual 7 307
## Prediction (s.d.) 7.1 (0.13) 311.9 (5.74)
## 95% CI [6.8, 7.3] [300.6, 322.7]
##
## Absolute effect (s.d.) -0.12 (0.13) -5.10 (5.74)
## 95% CI [-0.36, 0.14] [-15.87, 6.17]
##
## Relative effect (s.d.) -1.6% (1.8%) -1.6% (1.8%)
## 95% CI [-4.9%, 2.1%] [-4.9%, 2.1%]
##
## Posterior tail-area probability p: 0.18833
## Posterior prob. of a causal effect: 81%
##
## For more details, type: summary(impact, "report")
saveRDS(impact_defaults, 'impact_defaults.rds')
# Load the saved model
impact_defaults_model <- readRDS("impact_defaults.rds")
impact_defaults_model
## Posterior inference {CausalImpact}
##
## Average Cumulative
## Actual 7 307
## Prediction (s.d.) 7.1 (0.13) 311.9 (5.74)
## 95% CI [6.8, 7.3] [300.6, 322.7]
##
## Absolute effect (s.d.) -0.12 (0.13) -5.10 (5.74)
## 95% CI [-0.36, 0.14] [-15.87, 6.17]
##
## Relative effect (s.d.) -1.6% (1.8%) -1.6% (1.8%)
## 95% CI [-4.9%, 2.1%] [-4.9%, 2.1%]
##
## Posterior tail-area probability p: 0.18833
## Posterior prob. of a causal effect: 81%
##
## For more details, type: summary(impact, "report")
Results aren’t significant, but increasing the iterations might produce better results
More Iterations
impact_iter10 <- CausalImpact(baseline,
pre.period,
post.period,
model.args = list(
niter = 10000
# prior.level.sd = 0.01, #default is 0.01
# nseasons = 1, #default is 1
# season.duration = 1, #default is 1
# dynamic.regression = FALSE, #default is FALSE
# max.flips = -1 #default is -1
)
)
summary(impact_iter10)
More Iterations
impact_iter50 <- CausalImpact(baseline,
pre.period,
post.period,
model.args = list(
niter = 50000
# prior.level.sd = 0.01, #default is 0.01
# nseasons = 1, #default is 1
# season.duration = 1, #default is 1
# dynamic.regression = FALSE, #default is FALSE
# max.flips = -1 #default is -1
)
)
summary(impact_iter50)
10,000 produced a higher p-value. 50,000 produced a higher p-value as well - ~0.18
It could be that the price increase did not have an impact on overall sales, but maybe changing some of the hyperparameters will paint a better picture
Creates a grid of various hyperparameters It’s unlikely that there’s weekly seasonality, given that
param_grid = list(niter = c(10000), # default is 1000
standardize.data = c(TRUE, FALSE), #default is true
prior.level.sd = c(0.01, 0.1), # default is 0.01
nseasons = c(1), #default is 1
season.duration = c(1), #default is 1
dynamic.regression = c(FALSE), #default is false
max.flips = c(-1,14,28) #default is -1
)
# Generate all combinations of parameters
params = expand.grid(param_grid)
kable(params)
| niter | standardize.data | prior.level.sd | nseasons | season.duration | dynamic.regression | max.flips |
|---|---|---|---|---|---|---|
| 10000 | TRUE | 0.01 | 1 | 1 | FALSE | -1 |
| 10000 | FALSE | 0.01 | 1 | 1 | FALSE | -1 |
| 10000 | TRUE | 0.10 | 1 | 1 | FALSE | -1 |
| 10000 | FALSE | 0.10 | 1 | 1 | FALSE | -1 |
| 10000 | TRUE | 0.01 | 1 | 1 | FALSE | 14 |
| 10000 | FALSE | 0.01 | 1 | 1 | FALSE | 14 |
| 10000 | TRUE | 0.10 | 1 | 1 | FALSE | 14 |
| 10000 | FALSE | 0.10 | 1 | 1 | FALSE | 14 |
| 10000 | TRUE | 0.01 | 1 | 1 | FALSE | 28 |
| 10000 | FALSE | 0.01 | 1 | 1 | FALSE | 28 |
| 10000 | TRUE | 0.10 | 1 | 1 | FALSE | 28 |
| 10000 | FALSE | 0.10 | 1 | 1 | FALSE | 28 |
Runs various models and stores p-value
#creates an empty list to store p values of each model
p = c()
for(i in 1:nrow(params)){
# Fit a model using one parameter combination
m = CausalImpact(baseline,
pre.period,
post.period,
#alpha = 0.1, # can adjust alpha if necessary
model.args = list(
niter = params[i,1],
standardize.data = params[i,2],
prior.level.sd = params[i,3],
nseasons = params[i,4],
season.duration = params[i,5],
dynamic.regression = params[i,6],
max.flips = params[i,7]
)
)
# Model performance
df_p <- m$summary$p[1]
# Save model performance metrics
p[i] = df_p
}
results <- data.frame(params,p) %>% arrange(p)
kable(head(results))
| niter | standardize.data | prior.level.sd | nseasons | season.duration | dynamic.regression | max.flips | p |
|---|---|---|---|---|---|---|---|
| 10000 | TRUE | 0.01 | 1 | 1 | FALSE | -1 | 0.2054629 |
| 10000 | TRUE | 0.10 | 1 | 1 | FALSE | 14 | 0.2064354 |
| 10000 | FALSE | 0.01 | 1 | 1 | FALSE | -1 | 0.2093445 |
| 10000 | FALSE | 0.01 | 1 | 1 | FALSE | 28 | 0.2131544 |
| 10000 | TRUE | 0.10 | 1 | 1 | FALSE | -1 | 0.2157431 |
| 10000 | FALSE | 0.01 | 1 | 1 | FALSE | 14 | 0.2193309 |
Notes: - it appears that the default values for the model generate the lowest p-value. Although not significant, it’s still worth exploring
# ptm <- proc.time() #starts the clock
pre.start <- "2020-03-01" #start of pre period
pre.end <- "2023-01-14" #end of pre period
post.start <- "2023-01-15" #intervention start
post.end <- "2023-02-27" #end of data/intervention end
baseline_tickers <- a_7dma_5dma_log$data$variables[1:19]
model_df_clean1 <- as_tibble(model_df_clean[,c('mov_avg7', baseline_tickers)]) # creates a tibble for the model
baseline <- zoo(model_df_clean1, order.by = time.points) #creates a time series for stocks and time.points
pre.period <- as.Date(c(pre.start, pre.end))
post.period <- as.Date(c(post.start,post.end))
impact <- CausalImpact(baseline,
pre.period,
post.period,
alpha = 0.05,
model.args = list(
niter = 250000,
prior.level.sd = 0.01, #default is 0.01
nseasons = 1, #default is 1
season.duration = 1, #default is 1
dynamic.regression = FALSE, #default is FALSE
max.flips = -1 #default is -1
)
)
# time <- proc.time() - ptm #stops the clock
# time[3]/60
summary(impact)
## Posterior inference {CausalImpact}
##
## Average Cumulative
## Actual 7 307
## Prediction (s.d.) 7.1 (0.13) 311.3 (5.65)
## 95% CI [6.8, 7.3] [300.3, 322.4]
##
## Absolute effect (s.d.) -0.1 (0.13) -4.5 (5.65)
## 95% CI [-0.35, 0.15] [-15.62, 6.53]
##
## Relative effect (s.d.) -1.4% (1.8%) -1.4% (1.8%)
## 95% CI [-4.8%, 2.2%] [-4.8%, 2.2%]
##
## Posterior tail-area probability p: 0.21226
## Posterior prob. of a causal effect: 79%
##
## For more details, type: summary(impact, "report")
plot(impact)
plot(impact,"original")
summary(impact,"report")
## Analysis report {CausalImpact}
##
##
## During the post-intervention period, the response variable had an average value of approx. 6.97. In the absence of an intervention, we would have expected an average response of 7.08. The 95% interval of this counterfactual prediction is [6.82, 7.33]. Subtracting this prediction from the observed response yields an estimate of the causal effect the intervention had on the response variable. This effect is -0.10 with a 95% interval of [-0.35, 0.15]. For a discussion of the significance of this effect, see below.
##
## Summing up the individual data points during the post-intervention period (which can only sometimes be meaningfully interpreted), the response variable had an overall value of 306.81. Had the intervention not taken place, we would have expected a sum of 311.33. The 95% interval of this prediction is [300.28, 322.43].
##
## The above results are given in terms of absolute numbers. In relative terms, the response variable showed a decrease of -1%. The 95% interval of this percentage is [-5%, +2%].
##
## This means that, although it may look as though the intervention has exerted a negative effect on the response variable when considering the intervention period as a whole, this effect is not statistically significant, and so cannot be meaningfully interpreted. The apparent effect could be the result of random fluctuations that are unrelated to the intervention. This is often the case when the intervention period is very long and includes much of the time when the effect has already worn off. It can also be the case when the intervention period is too short to distinguish the signal from the noise. Finally, failing to find a significant effect can happen when there are not enough control variables or when these variables do not correlate well with the response variable during the learning period.
##
## The probability of obtaining this effect by chance is p = 0.212. This means the effect may be spurious and would generally not be considered statistically significant.
par(mar = c(5, 8, 4, 2) + 0.1)
plot(impact$model$bsts.model, "coefficients", las = 2, cex.axis = .8, cex.names = 0.5)
df_series_results <- fortify(impact$series) %>%
na.omit() %>%
select(c(1,2,4),) %>%
mutate_at(vars(2:3), exp) %>%
rename(Date = Index) %>%
filter(Date >= "2023-01-15") %>%
mutate(week.num = week(Date)) %>%
mutate(week.day = weekdays(Date)) %>%
filter(week.day == 'Saturday') %>%
mutate(point.effect = response - point.pred)
kable(df_series_results)
| Date | response | point.pred | week.num | week.day | point.effect |
|---|---|---|---|---|---|
| 2023-01-21 | 1176.00 | 1182.477 | 3 | Saturday | -6.477137 |
| 2023-01-28 | 1103.29 | 1185.403 | 4 | Saturday | -82.113099 |
| 2023-02-04 | 1000.00 | 1183.484 | 5 | Saturday | -183.484472 |
| 2023-02-11 | 1083.00 | 1183.259 | 6 | Saturday | -100.258861 |
| 2023-02-18 | 980.14 | 1182.673 | 7 | Saturday | -202.532801 |
| 2023-02-25 | 1017.29 | 1178.166 | 8 | Saturday | -160.876315 |
ggplot(df_series_results, aes(x = Date)) +
geom_line(aes(y = response, color = "Actual")) +
geom_line(aes(y = point.pred, color = "Counterfactual")) +
scale_color_manual(values = c("Actual" = "blue", "Counterfactual" = "red")) +
labs(title = "Actual and Counterfactual 7dma on Each Saturday",
x = "Date",
y = "7dma Sales",
color = "") +
scale_x_date(date_labels = "%b %d", breaks = df_series_results$Date)
sessionInfo()
## R version 4.2.2 (2022-10-31 ucrt)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 22621)
##
## Matrix products: default
##
## locale:
## [1] LC_COLLATE=English_United States.utf8
## [2] LC_CTYPE=English_United States.utf8
## [3] LC_MONETARY=English_United States.utf8
## [4] LC_NUMERIC=C
## [5] LC_TIME=English_United States.utf8
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] knitr_1.41 lubridate_1.9.0 timechange_0.1.1
## [4] reshape2_1.4.4 ggplot2_3.4.0 TTR_0.24.3
## [7] CausalImpact_1.3.0 bsts_0.9.9 xts_0.12.2
## [10] BoomSpikeSlab_1.2.5 Boom_0.9.11 dplyr_1.0.10
## [13] tidyr_1.2.1 readr_2.1.3 yfR_1.1.0
## [16] lares_5.1.4 zoo_1.8-11
##
## loaded via a namespace (and not attached):
## [1] Rcpp_1.0.9 rpart.plot_3.1.1 lattice_0.20-45 assertthat_0.2.1
## [5] digest_0.6.30 utf8_1.2.2 R6_2.5.1 plyr_1.8.8
## [9] evaluate_0.18 highr_0.9 httr_1.4.4 pillar_1.8.1
## [13] rlang_1.0.6 curl_4.3.3 rstudioapi_0.14 jquerylib_0.1.4
## [17] rpart_4.1.19 rmarkdown_2.18 labeling_0.4.2 stringr_1.5.0
## [21] RCurl_1.98-1.9 munsell_0.5.0 compiler_4.2.2 xfun_0.35
## [25] pkgconfig_2.0.3 htmltools_0.5.3 tidyselect_1.2.0 tibble_3.1.8
## [29] codetools_0.2-18 fansi_1.0.3 withr_2.5.0 tzdb_0.3.0
## [33] MASS_7.3-58.1 bitops_1.0-7 grid_4.2.2 jsonlite_1.8.3
## [37] gtable_0.3.1 lifecycle_1.0.3 DBI_1.1.3 magrittr_2.0.3
## [41] pROC_1.18.0 scales_1.2.1 zip_2.2.2 cli_3.6.0
## [45] stringi_1.7.8 cachem_1.0.6 farver_2.1.1 xml2_1.3.3
## [49] bslib_0.4.1 ellipsis_0.3.2 generics_0.1.3 vctrs_0.5.1
## [53] openxlsx_4.2.5.1 tools_4.2.2 glue_1.6.2 purrr_0.3.5
## [57] hms_1.1.2 fastmap_1.1.0 yaml_2.3.6 colorspace_2.0-3
## [61] h2o_3.38.0.1 rvest_1.0.3 patchwork_1.1.2 sass_0.4.4