# 2024/11/12 - R SSET Update

# Parameters
pnumerator <- 31891    # Total Revenue (Numerator)
pnumerator_sq <- 1696747  # Sum of squared Revenue values
pdenominator <- 83000  # Total Sessions (Denominator)
pconf <- 0.90          # Confidence level (by default 0.90 for 90%)
power <- 0.80          # Statistical power (by default 0.80 for 80%)
pcount_days <- 1       # Number of days over which the data was collected
pimpact_percent <- 0.01  # Desired impact as a fraction (by default 0.01 for 1%)
pvolume_percent <- 0.5   # Proportion of total sample in AB test (0 to 1) 
#In this case is a 0.5 for a 2 partitions test (0.5 sample proportion per partition)

# Calculate mean, variance, and standard deviation
mean_num <- pnumerator / pdenominator  # Average Revenue per Session
var <- (pnumerator_sq / pdenominator) - (mean_num^2)  # Variance calculation
sum_diff_squares <- var * pdenominator  # Total sum of squared deviations
pbaseline_sd <- sqrt(sum_diff_squares / (pdenominator - 1))  # Standard deviation (Sample SD)

# Z-scores for confidence intervals (alpha) and power (beta)
Zalfa <- qnorm(1 - (1 - pconf) / 2)  # Z-score for the specified confidence level
Zbeta <- qnorm(power)  # Z-score for the specified power level

# Conversion rate for Sessions (Revenue per Session)
cvr_ses <- pnumerator / pdenominator  # Baseline conversion rate

# Calculate delta (effect size) based on impact percentage
delta_ses <- cvr_ses * pimpact_percent  # Effect size based on desired impact

# Sample size estimation per week
maxWeeks <- 28  # Maximum number of weeks to estimate MDE
mdePerWeek <- c()  # Initialize vector to store MDE values per week

for (week in 1:maxWeeks) {  # Iterate through each week up to maxWeeks
  sample_size_ses <- ((pdenominator * pvolume_percent) / pcount_days) * 7 * week
  # Estimated sample size for the given week
  
  # MDE for Sessions (Revenue over Sessions)
  mde_ses <- sqrt((Zalfa + Zbeta)^2 * pbaseline_sd^2 / sample_size_ses) / delta_ses
  # Push the MDE for each week
  mdePerWeek <- c(mdePerWeek, mde_ses)  # Append MDE value for this week
}

# Display the MDE values for each week
data.frame(week_number = 1:maxWeeks, mdePerWeek = round(mdePerWeek, 2))
##    week_number mdePerWeek
## 1            1       5.41
## 2            2       3.82
## 3            3       3.12
## 4            4       2.70
## 5            5       2.42
## 6            6       2.21
## 7            7       2.04
## 8            8       1.91
## 9            9       1.80
## 10          10       1.71
## 11          11       1.63
## 12          12       1.56
## 13          13       1.50
## 14          14       1.45
## 15          15       1.40
## 16          16       1.35
## 17          17       1.31
## 18          18       1.27
## 19          19       1.24
## 20          20       1.21
## 21          21       1.18
## 22          22       1.15
## 23          23       1.13
## 24          24       1.10
## 25          25       1.08
## 26          26       1.06
## 27          27       1.04
## 28          28       1.02
# Create a data frame of week numbers and MDEs, rounded to two decimals
#View(df)