Assignment week 5: Saturation Model and Comparative Analysis of Growth Models
# Load necessary libraries
library(stats)
# 1. Simulate your own dataset
set.seed(123) # for reproducibility
n <- 50
x <- seq(0, 10, length.out = n)
a_true <- 5
b_true <- 0.5
epsilon <- rnorm(n, mean = 0, sd = 0.2)
y <- a_true * (1 - exp(-b_true * x)) + epsilon
# Combine into a data frame
data <- data.frame(x, y)
# 2. Fit using nls()
model <- nls(y ~ a * (1 - exp(-b * x)),
data = data,
start = list(a = 4, b = 0.4))
# Summary of the model
summary(model)
##
## Formula: y ~ a * (1 - exp(-b * x))
##
## Parameters:
## Estimate Std. Error t value Pr(>|t|)
## a 4.99682 0.04802 104.06 <2e-16 ***
## b 0.50304 0.01835 27.41 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1872 on 48 degrees of freedom
##
## Number of iterations to convergence: 4
## Achieved convergence tolerance: 1.437e-06
# 3. Plot fitted vs observed
plot(x, y, main = "Saturation Model: Fitted vs Observed",
xlab = "x", ylab = "y")
lines(x, predict(model), col = "red", lwd = 2)
# 4. Interpret parameters
coef(model) # estimated a and b
## a b
## 4.9968231 0.5030393
# Load necessary libraries
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.5.2
# Set seed for reproducibility
set.seed(42)
# Saturation Model Parameters
a_saturation <- 10
b_saturation <- 0.5
x_saturation <- seq(0, 10, length.out = 100)
epsilon_saturation <- rnorm(length(x_saturation), mean = 0, sd = 0.5)
y_saturation <- a_saturation * (1 - exp(-b_saturation * x_saturation)) + epsilon_saturation
data_saturation <- data.frame(x = x_saturation, y = y_saturation)
# Logistic Model Parameters
a_logistic <- 20
b_logistic <- 0.5
c_logistic <- 5
x_logistic <- seq(0, 10, length.out = 100)
epsilon_logistic <- rnorm(length(x_logistic), mean = 0, sd = 0.5)
y_logistic <- a_logistic / (1 + exp(-b_logistic * (x_logistic - c_logistic))) + epsilon_logistic
data_logistic <- data.frame(x = x_logistic, y = y_logistic)
# Plot the datasets
ggplot() +
geom_point(data = data_saturation, aes(x = x, y = y), color = 'blue') +
geom_point(data = data_logistic, aes(x = x, y = y), color = 'green') +
labs(title = "Simulated Datasets: Saturation (blue) & Logistic (green)", x = "x", y = "y") +
theme_minimal()
# Fit Saturation Model
fit_saturation <- nls(y ~ a * (1 - exp(-b * x)), data = data_saturation,
start = list(a = 10, b = 0.5))
summary(fit_saturation)
##
## Formula: y ~ a * (1 - exp(-b * x))
##
## Parameters:
## Estimate Std. Error t value Pr(>|t|)
## a 10.03780 0.09703 103.45 <2e-16 ***
## b 0.49089 0.01765 27.81 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5229 on 98 degrees of freedom
##
## Number of iterations to convergence: 2
## Achieved convergence tolerance: 9.362e-06
# Fit Logistic Model
fit_logistic <- nls(y ~ a / (1 + exp(-b * (x - c))), data = data_logistic,
start = list(a = 20, b = 0.5, c = 5))
summary(fit_logistic)
##
## Formula: y ~ a/(1 + exp(-b * (x - c)))
##
## Parameters:
## Estimate Std. Error t value Pr(>|t|)
## a 20.02233 0.25166 79.56 <2e-16 ***
## b 0.50546 0.01162 43.52 <2e-16 ***
## c 5.03520 0.07038 71.55 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4534 on 97 degrees of freedom
##
## Number of iterations to convergence: 3
## Achieved convergence tolerance: 3.703e-07
# AIC and BIC for Saturation Model
aic_saturation <- AIC(fit_saturation)
bic_saturation <- BIC(fit_saturation)
# AIC and BIC for Logistic Model
aic_logistic <- AIC(fit_logistic)
bic_logistic <- BIC(fit_logistic)
# Display results
data.frame(Model = c("Saturation", "Logistic"),
AIC = c(aic_saturation, aic_logistic),
BIC = c(bic_saturation, bic_logistic))
## Model AIC BIC
## 1 Saturation 158.0953 165.9108
## 2 Logistic 130.5448 140.9655
# Predicting values
data_saturation$predicted <- predict(fit_saturation)
data_logistic$predicted <- predict(fit_logistic)
# Plot fits
ggplot() +
geom_point(data = data_saturation, aes(x = x, y = y), color = 'blue') +
geom_line(data = data_saturation, aes(x = x, y = predicted), color = 'red') +
geom_point(data = data_logistic, aes(x = x, y = y), color = 'green') +
geom_line(data = data_logistic, aes(x = x, y = predicted), color = 'orange') +
labs(title = "Model Fits: Saturation (red) vs Logistic (orange)",
x = "x", y = "y") +
theme_minimal()
# Load necessary libraries
library(ggplot2)
library(minpack.lm)
## Warning: package 'minpack.lm' was built under R version 4.5.2
# Set seed for reproducibility
set.seed(42)
# Parameters for the Asymptotic model
a1 <- 100
k1 <- 0.1
t <- seq(0, 100, by = 1)
epsilon1 <- rnorm(length(t), mean = 0, sd = 5) # Error term
# Simulate Asymptotic data
y1 <- a1 * (1 - exp(-k1 * t)) + epsilon1
# Parameters for the Three-Parameter Logistic model
a2 <- 100
b2 <- 0.1
c2 <- 50
epsilon2 <- rnorm(length(t), mean = 0, sd = 5) # Error term
# Simulate Logistic data
y2 <- a2 / (1 + exp(-b2 * (t - c2))) + epsilon2
# Combine datasets into a data frame
data_asymptotic <- data.frame(t, y = y1)
data_logistic <- data.frame(t, y = y2)
# Fit Asymptotic model
asymptotic_model <- nlsLM(y ~ a * (1 - exp(-k * t)),
data = data_asymptotic,
start = list(a = 100, k = 0.1))
# Fit Logistic model
logistic_model <- nlsLM(y ~ a / (1 + exp(-b * (t - c))),
data = data_logistic,
start = list(a = 100, b = 0.1, c = 50))
# Summary of the models
summary(asymptotic_model)
##
## Formula: y ~ a * (1 - exp(-k * t))
##
## Parameters:
## Estimate Std. Error t value Pr(>|t|)
## a 99.977503 0.658436 151.84 <2e-16 ***
## k 0.100978 0.003911 25.82 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.242 on 99 degrees of freedom
##
## Number of iterations to convergence: 4
## Achieved convergence tolerance: 1.49e-08
summary(logistic_model)
##
## Formula: y ~ a/(1 + exp(-b * (t - c)))
##
## Parameters:
## Estimate Std. Error t value Pr(>|t|)
## a 99.439932 1.114496 89.22 <2e-16 ***
## b 0.104293 0.004218 24.73 <2e-16 ***
## c 50.196955 0.473581 105.99 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.588 on 98 degrees of freedom
##
## Number of iterations to convergence: 3
## Achieved convergence tolerance: 1.49e-08
# AIC and BIC
aic_asymptotic <- AIC(asymptotic_model)
bic_asymptotic <- BIC(asymptotic_model)
aic_logistic <- AIC(logistic_model)
bic_logistic <- BIC(logistic_model)
# Residual Standard Error
residual_std_error_asymptotic <- summary(asymptotic_model)$sigma
residual_std_error_logistic <- summary(logistic_model)$sigma
# Output Comparison
comparison <- data.frame(
Model = c("Asymptotic", "Logistic"),
AIC = c(aic_asymptotic, aic_logistic),
BIC = c(bic_asymptotic, bic_logistic),
Residual_SE = c(residual_std_error_asymptotic, residual_std_error_logistic)
)
print(comparison)
## Model AIC BIC Residual_SE
## 1 Asymptotic 625.2715 633.1169 5.242311
## 2 Logistic 599.3018 609.7623 4.587680
# Predictions for plotting
data_asymptotic$predicted <- predict(asymptotic_model)
data_logistic$predicted <- predict(logistic_model)
# Plotting both models
ggplot() +
geom_point(data = data_asymptotic, aes(x = t, y = y), color = "blue", alpha = 0.5) +
geom_line(data = data_asymptotic, aes(x = t, y = predicted), color = "blue") +
geom_point(data = data_logistic, aes(x = t, y = y), color = "red", alpha = 0.5) +
geom_line(data = data_logistic, aes(x = t, y = predicted), color = "red") +
labs(title = "Model Fit Comparison", x = "Time", y = "Response") +
theme_minimal()
library(MASS)
## Warning: package 'MASS' was built under R version 4.5.2
# Create a profile likelihood
profile_asymptotic <- profile(asymptotic_model)
# Plot the profile likelihood
plot(profile_asymptotic)
Week6 Assigment:Variance Reduction Using Control Variates
# Set seed for reproducibility
set.seed(123)
# Number of simulations
n <- 10000
# Function definition
f <- function(x) {
return(x^2)
}
# Plain Monte Carlo
plain_samples <- runif(n) # Uniform samples from [0, 1]
plain_estimate <- mean(f(plain_samples))
plain_variance <- var(f(plain_samples))
# Importance Sampling
lambda <- 1 # rate parameter for exponential distribution
importance_samples <- rexp(n, rate = lambda) # Exponential samples
importance_weight <- dexp(importance_samples, rate = lambda) / (1) # PDF of target distribution
# Rescale the integral
importance_estimate <- mean(f(importance_samples) * importance_weight)
importance_variance <- var(f(importance_samples) * importance_weight)
# Reduction factor
reduction_factor <- plain_variance / importance_variance
# Report results
cat("Plain Monte Carlo Estimate: ", plain_estimate, "\n")
## Plain Monte Carlo Estimate: 0.3297405
cat("Plain Monte Carlo Variance: ", plain_variance, "\n")
## Plain Monte Carlo Variance: 0.08711874
cat("Importance Sampling Estimate: ", importance_estimate, "\n")
## Importance Sampling Estimate: 0.2517207
cat("Importance Sampling Variance: ", importance_variance, "\n")
## Importance Sampling Variance: 0.03590007
cat("Reduction Factor: ", reduction_factor, "\n")
## Reduction Factor: 2.426701
CAT
# Load necessary libraries
set.seed(123) # Set seed for reproducibility
# Parameters for the simulation
Y0 <- 100 # Initial population
r <- 0.05 # Growth rate
t <- 0:20 # Time from 0 to 20
# Simulate growth
Y <- Y0 * exp(r * t)
# Add some random noise to simulate observed data
observed_data <- Y + rnorm(length(Y), mean = 0, sd = 10)
# Combine into a data frame
data <- data.frame(Time = t, Observed = observed_data)
# View the first few rows of the data
head(data)
## Time Observed
## 1 0 94.39524
## 2 1 102.82533
## 3 2 126.10417
## 4 3 116.88851
## 5 4 123.43315
## 6 5 145.55319
Assumptions: The population grows continuously at a constant proportional rate The error term has mean zero Errors are normally distributed Errors have constant variance (homoscedasticity) Observations are independent over time
# Non-linear model fitting
model <- nls(Observed ~ Y0 * exp(r * Time), data = data, start = list(Y0 = 100, r = 0.05))
# Summary of the model to see estimated parameters
summary(model)
##
## Formula: Observed ~ Y0 * exp(r * Time)
##
## Parameters:
## Estimate Std. Error t value Pr(>|t|)
## Y0 1.034e+02 3.202e+00 32.31 < 2e-16 ***
## r 4.756e-02 2.153e-03 22.08 5.22e-15 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 9.806 on 19 degrees of freedom
##
## Number of iterations to convergence: 3
## Achieved convergence tolerance: 2.953e-07
# Extracting estimated parameters
estimated_params <- coef(model)
cat("Estimated Y0:", estimated_params['Y0'], "\n")
## Estimated Y0: 103.4482
cat("Estimated r:", estimated_params['r'], "\n")
## Estimated r: 0.04755725
# Load the ggplot2 library for plotting
library(ggplot2)
# Create a data frame for fitted values
data$Fitted <- predict(model)
# Plot observed and fitted values
ggplot(data, aes(x = Time)) +
geom_point(aes(y = Observed), color = 'red', size = 2) +
geom_line(aes(y = Fitted), color = 'blue', size = 1) +
labs(title = "Bacterial Growth Model", y = "Population", x = "Time") +
theme_minimal()
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.