# Refer to the in-depth explanation in the word/pdf report
# Experiment - The experiment involves a two-level factorial design with three factors, \n i.e popcorn brand, microwave power level, and cooking time on the percentage of edible popcorn kernels made in a microwave oven. Two popcorn \n types from Black Jewel company are used in lieu of different brands. The power levels selected were 5 and 10. Cooking time levels were \n 5 mins and 7.5 mins. The experiment ultimately involves maximization of the proportion of popped popcorns versus un-popped or burnt popcorns. \n This optimized maximum is determined through the combination of the three factors.
# Experimental unit - Experimental unit in this example is each set of 50 kernels that are experimented upon.
# Independent variable/factor - Cooking time, Microwave Power, and Brand of Popcorn.
# Background variable/lurking variable - i) Positioning in the microwave \n ii) Reuse of glasses that were used to heat the popcorn \n iii) Variance in oil and butter on the popcorn kernels \n iv) Human error. Experiments were carried out in a randomized fashion to
# Dependent variable/response - Proportion of popped popcorns (% of 50 kernels that popped)
# Effect(s) - To understand the interaction of the independent variables (time, power, and popcorn brand) \n upon the the dependent variable (popping proportion of popcorns)
# Replicates - A total of 3 replicates, or 24 runs were performed in a randomized order. \n Wheeler estimate equates of 2 replicates, however a 3rd set of runs was carried out. # vector of pilot runs (6 batches of 10 kernels) - to be used for sigma calculation
pilot_runs <- c(1,2,1,2,4,2)/10
# determine mean popping proportion
avg_pop <- mean(pilot_runs)
# experimental error is estimated as actual % popped from pilot run vs. average % popped from all runs
exp_error <- pilot_runs - avg_pop
# determine the std. dev / sigma
exp_error %>%
var() %>%
sqrt() -> sigma
sigma## [1] 0.1095445
# shift in marginal mean of 0.25
delta = 0.25
# Wheeler estimate (used for efficiency in lieu of precision)
N_runs <- ((8*sigma)/delta)^2
# Wheeler estimate determines > 12 runs
N_runs## [1] 12.288
# N_runs = replicates * 2^(3) => N/8 = replicates
replicates <- N_runs/8
# To approach this symmetrically, at least 2 replicates should be carried out, or 16 runs
# The group has decided to do 3 replicates (low cost experiment affords us that option)
replicates %>% round()## [1] 2
# Levels are selected based a large difference between the levels of increase response sensitivity
# Generate a data.frame of 2^3 levels
experiment <- expand.grid(
time = c(5, 7.5),
power = c(5, 10),
brand = c("regular", "buttery"),
response = ""
)
# Treat the factors as factors rather than numerical
experiment %>%
mutate(time = as.factor(time),
power = as.factor(power)) -> experiment
# Generate Replicates
experiment <- rbind(experiment, experiment, experiment)
experiment# ensure the same randomization
# randomize
set.seed(150)
experiment[order(sample(1:24)), ] -> rand_exp
# preview the spreadsheet
rand_exppopcorn <- read_excel("popcorn_experiment_excel.xlsx")
# batch size for each run is 50 kernels (we use it to find the percentage of popped popcorns)
popcorn_count <- 50
# ensure that all variables are treated as factor levels rather than numeric
popcorn %>%
mutate(time = as.factor(time),
power = as.factor(power),
brand = brand %>% as.factor(),
pct_pop = response/popcorn_count) -> popcorn
# preview the dataset
popcorn# determine the highest reponse (optimum combination) with exploratory approach
popcorn %>%
slice(which.max(popcorn$pct_pop))# pct_pop is regressed on the interaction of time, power, and popcorn brand
model <- lm(pct_pop ~ time*power*brand, data = popcorn)
# Microwave power of 10 is a significant variable at the 95% confidence interval
# Power of 10 also has the largest coeffecient (impact on popping proportion holding other variables constant)
# The interaction of time (7.5 mins) and power (10) is significant
# The interaction of power (10) and brand (2) can be also be considered significant at 90% confidence interval
# Adj. R-squared shows that 94% of the variance in popcorn popping can be explained
summary(model)##
## Call:
## lm.default(formula = pct_pop ~ time * power * brand, data = popcorn)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.18667 -0.02167 0.00000 0.03000 0.13333
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.333e-02 4.485e-02 1.189 0.25168
## time7.5 4.000e-02 6.342e-02 0.631 0.53714
## power10 3.733e-01 6.342e-02 5.887 2.3e-05 ***
## brandregular 2.667e-02 6.342e-02 0.420 0.67973
## time7.5:power10 3.267e-01 8.969e-02 3.642 0.00219 **
## time7.5:brandregular 9.624e-17 8.969e-02 0.000 1.00000
## power10:brandregular 1.867e-01 8.969e-02 2.081 0.05383 .
## time7.5:power10:brandregular -1.800e-01 1.268e-01 -1.419 0.17506
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.07767 on 16 degrees of freedom
## Multiple R-squared: 0.9607, Adjusted R-squared: 0.9435
## F-statistic: 55.88 on 7 and 16 DF, p-value: 4.623e-10
# Coefficients from the model - Can be used for equation building of the form
# pct_pop (y) = m1x1 + m2x2 + .... + mkxk + intercept
coefficients(model)## (Intercept) time7.5
## 5.333333e-02 4.000000e-02
## power10 brandregular
## 3.733333e-01 2.666667e-02
## time7.5:power10 time7.5:brandregular
## 3.266667e-01 9.623937e-17
## power10:brandregular time7.5:power10:brandregular
## 1.866667e-01 -1.800000e-01
cbind(
Time_Level = popcorn$time,
Power_Level = popcorn$power,
Brand_Level = popcorn$brand,
Percent_Popped = model$fitted.values
) %>%
as_tibble() %>%
arrange(desc(Percent_Popped)) -> optimum_df
optimum_dfplot_ly(optimum_df, x = ~Time_Level, y = ~Power_Level, z = ~ Percent_Popped, color = ~ as_factor(Brand_Level), colors = c('#BF382A', '#0C4B8E')) %>%
add_markers() %>%
layout(scene = list(xaxis = list(title = 'Levels of Time'),
yaxis = list(title = 'Levels of Microwave Power'),
zaxis = list(title = '% Popped')))# an anova summary shows time, power, brand, and the interaction of time and power as significant variables
(model_aov <- aov(pct_pop ~ time*power*brand, data = popcorn))## Call:
## aov.default(formula = pct_pop ~ time * power * brand, data = popcorn)
##
## Terms:
## time power brand time:power time:brand
## Sum of Squares 0.1504167 2.0533500 0.0337500 0.0840167 0.0121500
## Deg. of Freedom 1 1 1 1 1
## power:brand time:power:brand Residuals
## Sum of Squares 0.0140167 0.0121500 0.0965333
## Deg. of Freedom 1 1 16
##
## Residual standard error: 0.07767453
## Estimated effects may be unbalanced
## Df Sum Sq Mean Sq F value Pr(>F)
## time 1 0.1504 0.1504 24.931 0.000133 ***
## power 1 2.0533 2.0533 340.334 3.31e-12 ***
## brand 1 0.0337 0.0337 5.594 0.030992 *
## time:power 1 0.0840 0.0840 13.925 0.001817 **
## time:brand 1 0.0121 0.0121 2.014 0.175061
## power:brand 1 0.0140 0.0140 2.323 0.146978
## time:power:brand 1 0.0121 0.0121 2.014 0.175061
## Residuals 16 0.0965 0.0060
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## time lsmean SE df lower.CL upper.CL
## 5 0.300 0.0224 16 0.252 0.348
## 7.5 0.458 0.0224 16 0.411 0.506
##
## Results are averaged over the levels of: power, brand
## Confidence level used: 0.95
## power lsmean SE df lower.CL upper.CL
## 5 0.0867 0.0224 16 0.0391 0.134
## 10 0.6717 0.0224 16 0.6241 0.719
##
## Results are averaged over the levels of: time, brand
## Confidence level used: 0.95
## brand lsmean SE df lower.CL upper.CL
## buttery 0.342 0.0224 16 0.294 0.389
## regular 0.417 0.0224 16 0.369 0.464
##
## Results are averaged over the levels of: time, power
## Confidence level used: 0.95
# Microwave power vs. percentage of popped popcorns plot shows that microwave power does influence popping of popcorns
# At a power level of 10 - Median pct_pop = 67%. Also notice a maximum of 96%.
# At a power level of 5 - Median pct_pop = 9%. Also notice a minimum of 4%.
ggplotly(
popcorn %>%
ggplot(aes(x = power, y = pct_pop, fill = power)) +
geom_boxplot() + scale_y_continuous(labels = scales::percent_format()) +
labs(
title = "Effect of microwave power on Popping % of popcorns",
caption = "Microwave power has a\n significant impact on pct_popped"
)
)# At a time setting of 7.5 mins - Median pct_pop = 39%. Also notice a maximum of 96%.
# At a time setting of 5 mins - Median pct_pop = 24%. Also notice a minimum of 4%.
ggplotly(
popcorn %>%
ggplot(aes(x = time, y = pct_pop, fill = time)) +
geom_boxplot() + scale_y_continuous(labels = scales::percent_format()) +
labs(
title = "Effect of heating time on Popping % of popcorns",
caption = "Time has a\n significant impact on pct_popped"
)
)# At a power level of 10 - Median pct_pop = 67%
# At a power level of 5 - Median pct_pop = 9%
ggplotly(
popcorn %>%
ggplot(aes(x = brand, y = pct_pop, fill = brand)) +
geom_boxplot() + scale_y_continuous(labels = scales::percent_format()) +
labs(
title = "Effect of popcorn brand/type on Popping % of popcorns",
caption = "Popcorn type in liue of brand has a\n has an impact on pct_popped"
)
)with(popcorn, (interaction.plot(time, power, pct_pop,
main = "Interaction plot of Time and Power",
xlab = "Time", ylab = "Popcorns - Popped")))## NULL
with(popcorn, (interaction.plot(brand, power, pct_pop,
main = "Interaction plot of Brand and Power",
xlab = "Brand", ylab = "Popcorns - Popped")))## NULL
varImp(model) %>%
abs(.) -> vars_df
vars_df %>%
mutate(concept = rownames(vars_df), concept = as_factor(concept) %>% fct_reorder(-Overall)) %>%
mutate(coef_text = scales::scientific(Overall)) %>%
ggplot(aes(x = reorder(concept, -Overall), y = Overall, fill = concept)) +
geom_col() +
geom_label(aes(label = coef_text)) + xlab("") + theme(axis.text.x = element_text(angle = 90)) +
labs(
title = "Importance of variables in Popcorn Experiment",
caption = "Power of 10 is the most significant variable in the percentage of \n popcorns popped followed by the interaction \n of Power of 10 and Time of 7.5 minutes",
y = "Variable Importance - t-value"
)# We consider three factors which are varied in a one_factor_at_a_time (ofat) plan
facts_doe <- 3
# Same number of levels
levels_doe <- 2
# To make results comparable, same number of replicates to give 24 runs
replicates_doe <- 3
# For the same precision, we would have half of 24 runs at one level and half at the other
ofat <- levels_doe^facts_doe*replicates_doe/2
one_fact_approach <- facts_doe*ofat + ofat
# it would take 48 experiments to get the same power for detecting main effects in an ofat plan
one_fact_approach## [1] 48