Model Fitting

Lotka Voltera Comeptition

In this document we will be fitting the Lotka Volterra Competition model (as seen below).

\[\frac{dS}{dt}=r_S*S(1-\frac{S}{K_S})-\alpha_S*S*C\]

\[\frac{dC}{dt}=r_C*C(1-\frac{C}{K_C})-\alpha_C*S*C\]

In this chunk we read in the data, created two new columns that are the average population count on each day, and then created a day column that is from day 1 to dat 19. Note, for this chunck to work you need to add the file path to where the data is located on your computer.

algae_dilluted_data<- read_excel("~/MathBio/Data/Algae Data Diluted.xlsx")# Reading in the data
start_date=algae_dilluted_data$Date[1]# Grabbing what the first day we took data is. 
algae_dilluted_data=algae_dilluted_data%>%
  mutate(Scene_Average=(Scene_Count_1+Scene_Count_2+Scene_Count_3+Scene_Count_4# Calculating the average scene on each day
                        +Scene_Count_5+Scene_Count_6+Scene_Count_7+Scene_Count_8)/8,
         Clamy_Average=(Clamy_Count_1+Clamy_Count_2+Clamy_Count_3+Clamy_Count_4# Calculating the average chlamy on each day. 
                        +Clamy_Count_5+Clamy_Count_6+Clamy_Count_7+Clamy_Count_8)/8)%>%
  mutate(day=as.numeric(difftime(Date, start_date, units="days"))+1)

algae_dilluted_data_subset=algae_dilluted_data%>%
  select(day, Flask, Scene_Average, Clamy_Average) # selecting only the columns that I will need going forward.

Here we created our function in r to describe the differential equations and then tested it with some parameter estimates and plotted it to make sure everything is working right.

compderivs=function(t,pop,parms) { # function to describe differential equations
  with(as.list(c(pop,parms)), {
    S=pop[1]
    C=pop[2]
    rS=parms[1]
    kS=parms[2]
    alphaS=parms[3]
    rC=parms[4]
    kC=parms[5]
    alphaC=parms[6]
    dS=rS*S*(1-(S/kS))-alphaS*S*C
    dC=rC*C*(1-(C/kC))-alphaC*C*S
    list(c(dS,dC))
  }
)}


yinitcomp=c(0.01,0.01) #initial conditions
parms=c(.0001,400,.004,.07,.400,.04) # parameter esitmates (really just some random numbers we picked)
t=seq(0,19, length=1000) # our time
outcomp=ode(y=yinitcomp,times=t,func = compderivs, parms=parms) # numerically solving thwe differential equation


outcomp=as_tibble(outcomp) # Making a tibble so that we can plot it easier
comp_test=outcomp%>% # renaming the columns
  rename("Scene"=`1`, 
         "Clamy"=`2`)

comp_test%>% # plotting just to test everything out
  ggplot()+
  geom_line(aes(x=time, y=Scene), color="red")+
  geom_line(aes(x=time, y=Clamy), color="blue")

Manipulate General Usage

This is the code that we used for finding parameter estimates for all of our models. When we switch flasks we just change the points that we are plotting to the corresponding flask. Note, for this to work you need to add the file path to the data on your computer.

algae_dilluted_data<- read_excel("~/MathBio/Data/Algae Data Diluted.xlsx") # loading data
start_date=algae_dilluted_data$Date[1]
algae_dilluted_data=algae_dilluted_data%>% # cleaning up the data and taking some averages
  mutate(Scene_Average=(Scene_Count_1+Scene_Count_2+Scene_Count_3+Scene_Count_4
                        +Scene_Count_5+Scene_Count_6+Scene_Count_7+Scene_Count_8)/8,
         Clamy_Average=(Clamy_Count_1+Clamy_Count_2+Clamy_Count_3+Clamy_Count_4
                        +Clamy_Count_5+Clamy_Count_6+Clamy_Count_7+Clamy_Count_8)/8)%>%
  mutate(day=as.numeric(difftime(Date, start_date, units="days"))+1)

algae_dilluted_data_1=algae_dilluted_data%>%
  filter(Flask=="1.7DC")%>%# selecting the flask we want to look at 
  select(Scene_Average, Clamy_Average, day)


algae_dilluted_data_subset=algae_dilluted_data%>% # selecting only the columns that we want
  select(day, Flask, Scene_Average, Clamy_Average)

algae_dilluted_data_1.7DC_S=algae_dilluted_data_1%>% # making a scene dataset
  select(Scene_Average, day)

algae_dilluted_data_1.7DC_C=algae_dilluted_data_1%>% # making a chlamy dataset
  select(Clamy_Average, day)


manipulate({
  compderivs=function(t,pop,parms) { # function to descrive model 
    with(as.list(c(pop,parms)), {
      S=pop[1]
      C=pop[2]
      rS=parms[1]
      kS=parms[2]
      alphaS=parms[3]
      rC=parms[4]
      kC=parms[5]
      alphaC=parms[6]
      dS=rS*S*(1-(S/kS))-alphaS*S
      dC=rC*C*(1-(C/kC))-alphaC*C
      list(c(dS,dC))
    }
    )}
  
  
  yinitcomp=c(0.01,0.01) # initial conditions
  parms=c(rSs,KSs,alphaSs,rCs,KCs,alphaCs) # parameter values (they are this way for the sliders)
  t=seq(0,19, length=1000) # time that we are looking at
  outcomp=ode(y=yinitcomp,times=t,func = compderivs, parms=parms) # solve with the parameters that we currently have
  outcomp=as_tibble(outcomp) # Make it into a tibble to make the plotting easier. 
  comp_test=outcomp%>%
    rename("Scene"=`1`, # rename the columns 
           "Clamy"=`2`) 
  
 plot(algae_dilluted_data_1$day, algae_dilluted_data_1$Scene_Average, col="red",  xlim=c(0,20), ylim=c(0,30), xlab="Days", ylab="Number of Cells *10^4 cells/ml", main="Manipulate For Parameter Esitmates Plot")# plotting scene
 points(algae_dilluted_data_1$day, algae_dilluted_data_1$Clamy_Average, col="blue", xlim=c(0,20)) # plotting chlamy
 lines(comp_test$time, comp_test$Scene, col="red") # plotting scene prediction
 lines(comp_test$time, comp_test$Clamy, col="blue")# plotting chlamy prediction
 legend("topright",col = c("blue", "red"), legend=c("Chlamy", "Scene"), pch=c(1,1), lty=c(1,1), cex=0.5) # Adding a legend 
 
  
}, rSs=slider(0,2,step=0.001, initial=1.3), #sliders so that we can change the params need one slider for each param.  
KSs=slider(0,100, initial =5, step=0.1),
alphaSs=slider(0,2,step=0.001, initial = 0.029),
rCs=slider(0,1.5,step=0.001, initial=.763),
KCs=slider(0,100, initial = 90, step=0.1),
alphaCs=slider(0,1,step=0.001, initial = 0.07))

1.7DC

Here we fit the model to the 1.7mM diluted and combined sample and plotted the results.

algae_dilluted_data_17dc=algae_dilluted_data%>% 
  filter(Flask=="1.7DC")%>% # getting only the flask we want.
  select(Scene_Average, Clamy_Average, day, Flask)%>%
  mutate(Scene_Average=ifelse(day==19, 4.62, Scene_Average), # We accidentally entered the last two points into the dataframe wrong so we are just changing that here  
         Clamy_Average=ifelse(day==19, 23.8, Clamy_Average))


algae_dilluted_data_1.7DC_S=algae_dilluted_data_17dc%>% # making a scene dataset
  select(Scene_Average, day)
algae_dilluted_data_1.7DC_C=algae_dilluted_data_17dc%>% # making a chlamy dataset
  select(Clamy_Average, day)

tcomp=unique(algae_dilluted_data$day) #changing our time so that it is the same as when we have data points. 

sse.comp=function(parms0){  #function to calculate squared errors
    rS=parms0[1]
    kS=parms0[2]
    alphaS=parms0[3]
    rC=parms0[4]
    kC=parms0[5]
    alphaC=parms0[6]
  out1comp=ode(y = yinitcomp, times = tcomp, func = compderivs, parms=c(rS,kS,alphaS,rC,kC,alphaC)) # numerically solve differential equations
  return(c(((out1comp[,2]-algae_dilluted_data_1.7DC_S$Scene_Average)^2),(out1comp[,3]-algae_dilluted_data_1.7DC_C$Clamy_Average)^2))   #squared errors
}


parms17dc=c(1,5,0.029,1.5,20,0.07)# These were found using mainpulate

fitcomp_17dc=modFit(f=sse.comp, p=parms17dc, lower=c(0,0,0,0,0,0), upper=c(5,100,2,2,50,2)) # modfit to minimize the sum of squared error. 

fitcomp_17dc$par # take a peek at our parameter estimates

## [1]  2.69917445  4.54268009  0.04623947  1.99999699 41.98378849  0.34021565

out2comp_17dc=ode(y=yinitcomp, times=t, func = compderivs, parms=fitcomp_17dc$par) # numerically solve wiht the optimal parameter estimates


#009E73 Scene
#0072B2 Chlamy

dc17_plot=out2comp_17dc%>% # plot the results
  as_tibble()%>%
  rename("Scene"=`1`)%>%
  rename("Clamy"=`2`)%>%
  ggplot()+
  geom_line(aes(x=time, y=Clamy, color="red"))+
  geom_line(aes(x=time, y=Scene, color="blue"))+
  geom_point(data=algae_dilluted_data_1.7DC_C, aes(x=day, y=Clamy_Average, color="red"))+
  geom_point(data=algae_dilluted_data_1.7DC_S, aes(x=day, y=Scene_Average, color="blue"))+
  ggtitle("1.7DC Lotka Volterra")+
  scale_colour_manual(name = 'Population', values =c("#0072B2", "#E69F00"), labels = c("Scenedesmus","Chlamydomonas"))+
  xlab("Days")+
  ylab("Number of Cells *10^4 per ml")+
  theme_light()

dc17_plot

0.34DC

Here we fit the model to the 0.34 mM diluted and combined data.

algae_dilluted_data_34dc=algae_dilluted_data%>%
  filter(Flask=="0.34DC")%>% # getting only the flask we want. 
  select(Scene_Average, Clamy_Average, day)

algae_dilluted_data_34dc_S=algae_dilluted_data_34dc%>%# Make one data set for each species.
  select(Scene_Average, day)
algae_dilluted_data_34dc_C=algae_dilluted_data_34dc%>%
  select(Clamy_Average, day)


tcomp=unique(algae_dilluted_data_34dc$day) # make sure time is the same as when we have data 

sse.comp=function(parms0){  #function to calculate squared errors
    rS=parms0[1]
    kS=parms0[2]
    alphaS=parms0[3]
    rC=parms0[4]
    kC=parms0[5]
    alphaC=parms0[6]
  out1comp=ode(y = yinitcomp, times = tcomp, func = compderivs, parms=c(rS,kS,alphaS,rC,kC,alphaC))
  return(c(((out1comp[,2]-algae_dilluted_data_34dc_S$Scene_Average)^2),(out1comp[,3]-algae_dilluted_data_34dc_C$Clamy_Average)^2))   #sum of squared errors
}
parms_34dc=c(.536,25,0.029,0.87,8.5,0.04) # initial guesses from manipulate. 
fitcomp_34dc=modFit(f=sse.comp, p=parms_34dc,lower=c(0,0,0,0,0,0),upper=c(2,100,2,2,15,2))# modFit to minimize SSE

fitcomp_34dc$par # look at the parameter estimates

## [1]  0.95672888 90.05156007  0.16238677  0.86187192 14.19121631  0.05594502

out2comp_34dc=ode(y=yinitcomp,times=t,func = compderivs,parms=fitcomp_34dc$par)# numerically solve with the optimal parameter estimates. 

dc34_plot=out2comp_34dc%>%# plot the results. 
  as_tibble()%>%
  rename("Scene"=`1`)%>%
  rename("Clamy"=`2`)%>%
  ggplot()+
  geom_line(aes(x=time, y=Clamy, color="red"))+
  geom_line(aes(x=time, y=Scene, color="blue"))+
  geom_point(data=algae_dilluted_data_34dc_C, aes(x=day, y=Clamy_Average, color="red"))+
  geom_point(data=algae_dilluted_data_34dc_S, aes(x=day, y=Scene_Average, color="blue"))+
  ggtitle("0.34DC Lotka Volterra")+
  scale_colour_manual(name = 'Population', values =c("#0072B2", "#E69F00"), labels = c("Scenedesmus","Chlamydomonas"))+
  xlab("Days")+
  ylab("Number of Cells *10^4 per ml")+
  theme_light()
dc34_plot

5DC

Here we fit our model to our 5\(\mu\)M diluted and combined data. The procedure is the same as the first two sets of data that we fit.

algae_dilluted_data_5dc=algae_dilluted_data%>%
  filter(Flask=="5DC")%>%
  select(Scene_Average, Clamy_Average, day)

algae_dilluted_data_5dc_S=algae_dilluted_data_5dc%>%
  select(Scene_Average, day)
algae_dilluted_data_5dc_C=algae_dilluted_data_5dc%>%
  select(Clamy_Average, day)

sse.comp=function(parms0){  #function to calculate squared errors
    rS=parms0[1]
    kS=parms0[2]
    alphaS=parms0[3]
    rC=parms0[4]
    kC=parms0[5]
    alphaC=parms0[6]
  out1comp=ode(y = yinitcomp, times = tcomp, func = compderivs, parms=c(rS,kS,alphaS,rC,kC,alphaC))
  return(c(((out1comp[,2]-algae_dilluted_data_5dc_S$Scene_Average)^2),(out1comp[,3]-algae_dilluted_data_5dc_C$Clamy_Average)^2))   #sum of squared errors
}
parms_5dc=c(1.178,19.3,.979,.555,5,.0846)
fitcomp_5dc=modFit(f=sse.comp, p=parms_5dc,lower=c(1,18,0,0,0,0),upper=c(2,30,1,1,10,.4))
fitcomp_5dc$par

## [1]  1.0022246 18.0079104  0.9999964  0.5757929  8.8902125  0.0719992

out2comp_5dc=ode(y=yinitcomp,times=t,func = compderivs,parms=fitcomp_5dc$par)

dc5_plot=out2comp_5dc%>%
  as_tibble()%>%
  rename("Scene"=`1`)%>%
  rename("Clamy"=`2`)%>%
  ggplot()+
  geom_line(aes(x=time, y=Clamy, color="red"))+
  geom_line(aes(x=time, y=Scene, color="blue"))+
  geom_point(data=algae_dilluted_data_5dc_C, aes(x=day, y=Clamy_Average, color="red"))+
  geom_point(data=algae_dilluted_data_5dc_S, aes(x=day, y=Scene_Average, color="blue"))+
  ggtitle("5DC Lotka Volterra")+
  scale_colour_manual(name = 'Population', values =c("#0072B2", "#E69F00"), labels = c("Scenedesmus","Chlamydomonas"))+
  xlab("Days")+
  ylab("Number of Cells *10^4 per ml")+
  theme_light()
dc5_plot

Nice Plot

Here we made the nice plot that you see on our poster.

out2comp_17dc=as.data.frame(out2comp_17dc) # getting all the data 
out2comp_17dc$Flask="1.7DC"
out2comp_34dc=as.data.frame(out2comp_34dc)
out2comp_34dc$Flask="0.34DC"
out2comp_5dc=as.data.frame(out2comp_5dc)
out2comp_5dc$Flask="5DC"
cbPalette <- c("#E69F00", "#0072B2", "#999999", "#E69F00", "#56B4E9",  "#F0E442", "#009E73" ) # selecting the color palette


clamy_sd <- algae_dilluted_data%>% # reshaping the data
  select(11:18)%>%
  apply(1, sd)%>%
  tibble() %>%
  rename("clamy_sd" = ".")


scene_sd <- algae_dilluted_data%>% # reshaping the data
  select(3:10)%>%
  apply(1, sd)%>%
  tibble()%>%
  rename("scene_sd" = ".")

algae_dilluted_data <- bind_cols(algae_dilluted_data, clamy_sd, scene_sd) # joining them together



dc_plotting_actual=algae_dilluted_data%>% # getting the data ready (more reshaping and cleaning)
  filter(Flask=="0.34DC"| Flask=="1.7DC"| Flask=="5DC")%>%
  select(Scene_Average, Clamy_Average, scene_sd, clamy_sd, day, Flask)%>%
  gather("Species", "Count", 1:2)%>%
  separate(sep="_", Species, into=c("Species", "Trash"))%>%
  select(-Trash)%>%
  mutate(Species=ifelse(Species=="Scene", "Scenedesmus", "Chlamydomonas"))%>%
  mutate(Flask = ifelse(Flask == "1.7DC", "1.7 mM Diluted and Combined", Flask), 
         Flask = ifelse(Flask == "0.34DC", "0.34 mM Diluted and Combined", Flask),
         Flask = ifelse(Flask == "5DC", "5 uM Diluted and Combined", Flask)) %>%
  mutate(Flask = factor((Flask), levels = c("5 uM Diluted and Combined", "0.34 mM Diluted and Combined", "1.7 mM Diluted and Combined")))




dc_plot_data=out2comp_17dc%>% # getting the model fits ready (more reshaping and cleanign)
  rbind(out2comp_34dc)%>%
  rbind(out2comp_5dc)%>%
  as_tibble()%>%
  rename("Scenedesmus"=`1`,
         "Chlamydomonas"=`2`)%>%
  gather("Species", "Count", 2:3)%>%
  mutate(Flask = ifelse(Flask == "1.7DC", "1.7 mM Diluted and Combined", Flask), 
         Flask = ifelse(Flask == "0.34DC", "0.34 mM Diluted and Combined", Flask),
         Flask = ifelse(Flask == "5DC", "5 uM Diluted and Combined", Flask)) %>%
  mutate(Flask = factor((Flask), levels = c("5 uM Diluted and Combined", "0.34 mM Diluted and Combined", "1.7 mM Diluted and Combined")))


ggplot()+ # plotting
  geom_errorbar(data=dc_plotting_actual, # adding error bars to all the points
                aes(x= day, 
                    ymin=Count-clamy_sd/sqrt(8),
                    ymax=Count+clamy_sd/sqrt(8)), 
                  colour="grey",
                  width=.5)+
  geom_point(data=dc_plotting_actual, aes(x=day, y=Count, color=Species), size = 2 )+ # adding the points
  geom_line(data=dc_plot_data, aes(x=time, y=Count, color=Species), size  = 2)+ # adding the best fit lines. 
  facet_wrap(~Flask)+
  ylab("Average Count (x10^4 cells/ml)")+
  xlab("Days")+
  theme_minimal()+
  theme(legend.position="bottom",
        text = element_text(size = 20),
        axis.line = element_line(color="black"))+
  theme(plot.margin=unit(c(1,1,1.5,1.2),"cm"))+
  scale_colour_manual(values=cbPalette)

Parameter estimates

Here we compare all of the parameter estimates for the the diluted and combined data.

fitcomp_17dc$par

## [1]  2.69917445  4.54268009  0.04623947  1.99999699 41.98378849  0.34021565

fitcomp_34dc$par

## [1]  0.95672888 90.05156007  0.16238677  0.86187192 14.19121631  0.05594502

fitcomp_5dc$par

## [1]  1.0022246 18.0079104  0.9999964  0.5757929  8.8902125  0.0719992