Introduction

This document details the efforts to replicate the findings presented in the paper titled “Optimizing Conditions in the Acid Tolerance Test for Potential Probiotics Using Response Surface Methodology” by Hye In Ko,Chang Hee Jeong, Sung Wook Hong, Jong-Bang Eun, and Tae-Woon Kim.

Essential libraries

library(readxl)
library(rsm)

Data loading

The data is the Table 1 in the above metioned paper.

setwd('~/repos/RpubPages/')
df <- read_excel("data/rsm.xlsx")

Coded the continuous data as in Table 6

df_coded<- coded.data(df,x1~(pH-3)/0.5,x2~(Time_hours-1.5)/0.5)
df_coded$x3 = factor(df$Pepsin,levels = c("Not added","Added"),labels = c(1,-1),ordered = F)

Model

Various models metioned in the paper can be tried with the following format. Following code illustrates one such model in the paper.

model<- rsm(KCTC21024 ~ FO(x1,x2,x3) +
              TWI(x1,x2) +
              TWI(x1,x3)+
              TWI(x2,x3) + 
              PQ(x1,x2),
            data=df_coded)
summary(model)
## 
## Call:
## rsm(formula = KCTC21024 ~ FO(x1, x2, x3) + TWI(x1, x2) + TWI(x1, 
##     x3) + TWI(x2, x3) + PQ(x1, x2), data = df_coded)
## 
##              Estimate Std. Error  t value  Pr(>|t|)    
## (Intercept)  90.43598    3.46075  26.1319 3.641e-15 ***
## x1           54.28050    4.80460  11.2976 2.517e-09 ***
## x2           -1.35767    4.80460  -0.2826  0.780913    
## x3            2.21069    2.06439   1.0709  0.299196    
## x1:x2         7.17662    1.86081   3.8567  0.001265 ** 
## x1:x3        -7.71767    3.03869  -2.5398  0.021149 *  
## x2:x3        -2.79800    3.03869  -0.9208  0.370043    
## x1^2        -37.64881    2.23938 -16.8121 5.003e-12 ***
## x2^2          0.09119    2.23938   0.0407  0.967993    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Multiple R-squared:  0.9855, Adjusted R-squared:  0.9787 
## F-statistic: 144.6 on 8 and 17 DF,  p-value: 4.286e-14
## 
## Analysis of Variance Table
## 
## Response: KCTC21024
##                Df  Sum Sq Mean Sq    F value    Pr(>F)
## FO(x1, x2, x3)  3 22285.6  7428.5 2.6817e+02 1.645e-14
## TWI(x1, x2)     1   412.0   412.0 1.4874e+01  0.001265
## TWI(x1, x3)     1   178.7   178.7 6.4506e+00  0.021149
## TWI(x2, x3)     1    23.5    23.5 8.4790e-01  0.370043
## PQ(x1, x2)      2  9141.9  4571.0 1.6501e+02 7.341e-12
## Residuals      17   470.9    27.7                     
## Lack of fit     9   470.9    52.3 2.7377e+29 < 2.2e-16
## Pure error      8     0.0     0.0                     
## 
## Stationary point of response surface:
##         x1         x2         x3 
##  0.5576908 -0.7481699  0.8964333 
## 
## Stationary point in original units:
##         pH Time_hours         x3 
##  3.2788454  1.1259150  0.8964333 
## 
## Eigenanalysis:
## eigen() decomposition
## $values
## [1]   2.141614  -1.352486 -38.346748
## 
## $vectors
##          [,1]         [,2]        [,3]
## x1 -0.1311683 -0.005996893  0.99134198
## x2 -0.7050541  0.703542370 -0.08903252
## x3  0.6969172  0.710628012  0.09651058

Survival rate of KCTC 21024 (the result below table 1 in the paper)) can be obtained from summary(model).

ANOVA and model sum of squares

anova_res<- anova(model)
print(anova_res)
## Analysis of Variance Table
## 
## Response: KCTC21024
##                Df  Sum Sq Mean Sq  F value    Pr(>F)    
## FO(x1, x2, x3)  3 22285.6  7428.5 268.1687 1.645e-14 ***
## TWI(x1, x2)     1   412.0   412.0  14.8743  0.001265 ** 
## TWI(x1, x3)     1   178.7   178.7   6.4506  0.021149 *  
## TWI(x2, x3)     1    23.5    23.5   0.8479  0.370043    
## PQ(x1, x2)      2  9141.9  4571.0 165.0114 7.341e-12 ***
## Residuals      17   470.9    27.7                       
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
sum((model$fitted.values - mean(model$model$KCTC21024))^2)  # model sum of squares
## [1] 32041.75

Visualization

for pepsin=1

# plot the response surface
  
persp.lm(
  model,            # Our model 
  ~ x1 + x2  ,    # A formula to obtain the 6 possible graphs
  at= list(x3=1),  # Here try various levels
  col = colorRampPalette(c("red", "blue")), # Color palette
  contours = "colors"     # Include contours with the same color palette
) 

for pepsin=0

# plot the response surface
  
persp.lm(
  model,            # Our model 
  ~x1 + x2  ,    # A formula to obtain the 6 possible graphs
  at= list(x3=0),  # Here try various levels
  col = colorRampPalette(c("red", "blue")), # Color palette
  contours = "colors"     # Include contours with the same color palette
) 

Predicted value

The predicted value from the stationary points on the surface can be obtained by substituing the stationa points (see the output of summary(model) ) in to the predict function

predict(model,data.frame(x1=0.55769084,x2= -0.7481699,x3=0.8964333))
##        1 
## 107.0706

where as the mean and standard deviation of the measured data is

cat(mean(df_coded$KCTC21024), '+-' , sd(df_coded$KCTC21024))
## 76.41773 +- 36.06254