Statistical computing 2 Assignments and CAT

Assignment week 5: Saturation Model and Comparative Analysis of Growth Models

# Load necessary libraries
library(stats)

# 1. Simulate your own dataset
set.seed(123)  # for reproducibility
n <- 50
x <- seq(0, 10, length.out = n)
a_true <- 5
b_true <- 0.5
epsilon <- rnorm(n, mean = 0, sd = 0.2)
y <- a_true * (1 - exp(-b_true * x)) + epsilon

# Combine into a data frame
data <- data.frame(x, y)

# 2. Fit using nls()
model <- nls(y ~ a * (1 - exp(-b * x)),
             data = data,
             start = list(a = 4, b = 0.4))

# Summary of the model
summary(model)

## 
## Formula: y ~ a * (1 - exp(-b * x))
## 
## Parameters:
##   Estimate Std. Error t value Pr(>|t|)    
## a  4.99682    0.04802  104.06   <2e-16 ***
## b  0.50304    0.01835   27.41   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1872 on 48 degrees of freedom
## 
## Number of iterations to convergence: 4 
## Achieved convergence tolerance: 1.437e-06

# 3. Plot fitted vs observed
plot(x, y, main = "Saturation Model: Fitted vs Observed",
     xlab = "x", ylab = "y")
lines(x, predict(model), col = "red", lwd = 2)

# 4. Interpret parameters
coef(model)  # estimated a and b

##         a         b 
## 4.9968231 0.5030393

# Load necessary libraries
library(ggplot2)

## Warning: package 'ggplot2' was built under R version 4.5.2

# Set seed for reproducibility
set.seed(42)

# Saturation Model Parameters
a_saturation <- 10
b_saturation <- 0.5
x_saturation <- seq(0, 10, length.out = 100)
epsilon_saturation <- rnorm(length(x_saturation), mean = 0, sd = 0.5)
y_saturation <- a_saturation * (1 - exp(-b_saturation * x_saturation)) + epsilon_saturation
data_saturation <- data.frame(x = x_saturation, y = y_saturation)

# Logistic Model Parameters
a_logistic <- 20
b_logistic <- 0.5
c_logistic <- 5
x_logistic <- seq(0, 10, length.out = 100)
epsilon_logistic <- rnorm(length(x_logistic), mean = 0, sd = 0.5)
y_logistic <- a_logistic / (1 + exp(-b_logistic * (x_logistic - c_logistic))) + epsilon_logistic
data_logistic <- data.frame(x = x_logistic, y = y_logistic)

# Plot the datasets
ggplot() +
  geom_point(data = data_saturation, aes(x = x, y = y), color = 'blue') +
  geom_point(data = data_logistic, aes(x = x, y = y), color = 'green') +
  labs(title = "Simulated Datasets: Saturation (blue) & Logistic (green)", x = "x", y = "y") +
  theme_minimal()

# Fit Saturation Model
fit_saturation <- nls(y ~ a * (1 - exp(-b * x)), data = data_saturation, 
                       start = list(a = 10, b = 0.5))
summary(fit_saturation)

## 
## Formula: y ~ a * (1 - exp(-b * x))
## 
## Parameters:
##   Estimate Std. Error t value Pr(>|t|)    
## a 10.03780    0.09703  103.45   <2e-16 ***
## b  0.49089    0.01765   27.81   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5229 on 98 degrees of freedom
## 
## Number of iterations to convergence: 2 
## Achieved convergence tolerance: 9.362e-06

# Fit Logistic Model
fit_logistic <- nls(y ~ a / (1 + exp(-b * (x - c))), data = data_logistic, 
                    start = list(a = 20, b = 0.5, c = 5))
summary(fit_logistic)

## 
## Formula: y ~ a/(1 + exp(-b * (x - c)))
## 
## Parameters:
##   Estimate Std. Error t value Pr(>|t|)    
## a 20.02233    0.25166   79.56   <2e-16 ***
## b  0.50546    0.01162   43.52   <2e-16 ***
## c  5.03520    0.07038   71.55   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4534 on 97 degrees of freedom
## 
## Number of iterations to convergence: 3 
## Achieved convergence tolerance: 3.703e-07

# AIC and BIC for Saturation Model
aic_saturation <- AIC(fit_saturation)
bic_saturation <- BIC(fit_saturation)

# AIC and BIC for Logistic Model
aic_logistic <- AIC(fit_logistic)
bic_logistic <- BIC(fit_logistic)

# Display results
data.frame(Model = c("Saturation", "Logistic"),
           AIC = c(aic_saturation, aic_logistic),
           BIC = c(bic_saturation, bic_logistic))

##        Model      AIC      BIC
## 1 Saturation 158.0953 165.9108
## 2   Logistic 130.5448 140.9655

# Predicting values
data_saturation$predicted <- predict(fit_saturation)
data_logistic$predicted <- predict(fit_logistic)

# Plot fits
ggplot() +
  geom_point(data = data_saturation, aes(x = x, y = y), color = 'blue') +
  geom_line(data = data_saturation, aes(x = x, y = predicted), color = 'red') +
  geom_point(data = data_logistic, aes(x = x, y = y), color = 'green') +
  geom_line(data = data_logistic, aes(x = x, y = predicted), color = 'orange') +
  labs(title = "Model Fits: Saturation (red) vs Logistic (orange)",
       x = "x", y = "y") +
  theme_minimal()

# Load necessary libraries
library(ggplot2)
library(minpack.lm)

## Warning: package 'minpack.lm' was built under R version 4.5.2

# Set seed for reproducibility
set.seed(42)

# Parameters for the Asymptotic model
a1 <- 100
k1 <- 0.1
t <- seq(0, 100, by = 1)
epsilon1 <- rnorm(length(t), mean = 0, sd = 5)  # Error term

# Simulate Asymptotic data
y1 <- a1 * (1 - exp(-k1 * t)) + epsilon1

# Parameters for the Three-Parameter Logistic model
a2 <- 100
b2 <- 0.1
c2 <- 50
epsilon2 <- rnorm(length(t), mean = 0, sd = 5)  # Error term

# Simulate Logistic data
y2 <- a2 / (1 + exp(-b2 * (t - c2))) + epsilon2

# Combine datasets into a data frame
data_asymptotic <- data.frame(t, y = y1)
data_logistic <- data.frame(t, y = y2)

# Fit Asymptotic model
asymptotic_model <- nlsLM(y ~ a * (1 - exp(-k * t)), 
                           data = data_asymptotic, 
                           start = list(a = 100, k = 0.1))

# Fit Logistic model
logistic_model <- nlsLM(y ~ a / (1 + exp(-b * (t - c))), 
                        data = data_logistic, 
                        start = list(a = 100, b = 0.1, c = 50))

# Summary of the models
summary(asymptotic_model)

## 
## Formula: y ~ a * (1 - exp(-k * t))
## 
## Parameters:
##    Estimate Std. Error t value Pr(>|t|)    
## a 99.977503   0.658436  151.84   <2e-16 ***
## k  0.100978   0.003911   25.82   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.242 on 99 degrees of freedom
## 
## Number of iterations to convergence: 4 
## Achieved convergence tolerance: 1.49e-08

summary(logistic_model)

## 
## Formula: y ~ a/(1 + exp(-b * (t - c)))
## 
## Parameters:
##    Estimate Std. Error t value Pr(>|t|)    
## a 99.439932   1.114496   89.22   <2e-16 ***
## b  0.104293   0.004218   24.73   <2e-16 ***
## c 50.196955   0.473581  105.99   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.588 on 98 degrees of freedom
## 
## Number of iterations to convergence: 3 
## Achieved convergence tolerance: 1.49e-08

# AIC and BIC
aic_asymptotic <- AIC(asymptotic_model)
bic_asymptotic <- BIC(asymptotic_model)

aic_logistic <- AIC(logistic_model)
bic_logistic <- BIC(logistic_model)

# Residual Standard Error
residual_std_error_asymptotic <- summary(asymptotic_model)$sigma
residual_std_error_logistic <- summary(logistic_model)$sigma

# Output Comparison
comparison <- data.frame(
  Model = c("Asymptotic", "Logistic"),
  AIC = c(aic_asymptotic, aic_logistic),
  BIC = c(bic_asymptotic, bic_logistic),
  Residual_SE = c(residual_std_error_asymptotic, residual_std_error_logistic)
)
print(comparison)

##        Model      AIC      BIC Residual_SE
## 1 Asymptotic 625.2715 633.1169    5.242311
## 2   Logistic 599.3018 609.7623    4.587680

# Predictions for plotting
data_asymptotic$predicted <- predict(asymptotic_model)
data_logistic$predicted <- predict(logistic_model)

# Plotting both models
ggplot() +
  geom_point(data = data_asymptotic, aes(x = t, y = y), color = "blue", alpha = 0.5) +
  geom_line(data = data_asymptotic, aes(x = t, y = predicted), color = "blue") +
  geom_point(data = data_logistic, aes(x = t, y = y), color = "red", alpha = 0.5) +
  geom_line(data = data_logistic, aes(x = t, y = predicted), color = "red") +
  labs(title = "Model Fit Comparison", x = "Time", y = "Response") +
  theme_minimal()

library(MASS)

## Warning: package 'MASS' was built under R version 4.5.2

# Create a profile likelihood
profile_asymptotic <- profile(asymptotic_model)

# Plot the profile likelihood
plot(profile_asymptotic)

Week6 Assigment:Variance Reduction Using Control Variates

# Set seed for reproducibility
set.seed(123)

# Number of simulations
n <- 10000

# Function definition
f <- function(x) {
  return(x^2)
}

# Plain Monte Carlo
plain_samples <- runif(n)  # Uniform samples from [0, 1]
plain_estimate <- mean(f(plain_samples))
plain_variance <- var(f(plain_samples))

# Importance Sampling
lambda <- 1  # rate parameter for exponential distribution
importance_samples <- rexp(n, rate = lambda)  # Exponential samples
importance_weight <- dexp(importance_samples, rate = lambda) / (1)  # PDF of target distribution

# Rescale the integral
importance_estimate <- mean(f(importance_samples) * importance_weight)
importance_variance <- var(f(importance_samples) * importance_weight)

# Reduction factor
reduction_factor <- plain_variance / importance_variance

# Report results
cat("Plain Monte Carlo Estimate: ", plain_estimate, "\n")

## Plain Monte Carlo Estimate:  0.3297405

cat("Plain Monte Carlo Variance: ", plain_variance, "\n")

## Plain Monte Carlo Variance:  0.08711874

cat("Importance Sampling Estimate: ", importance_estimate, "\n")

## Importance Sampling Estimate:  0.2517207

cat("Importance Sampling Variance: ", importance_variance, "\n")

## Importance Sampling Variance:  0.03590007

cat("Reduction Factor: ", reduction_factor, "\n")

## Reduction Factor:  2.426701

CAT

# Load necessary libraries
set.seed(123)  # Set seed for reproducibility

# Parameters for the simulation
Y0 <- 100        # Initial population
r <- 0.05        # Growth rate
t <- 0:20        # Time from 0 to 20

# Simulate growth
Y <- Y0 * exp(r * t)

# Add some random noise to simulate observed data
observed_data <- Y + rnorm(length(Y), mean = 0, sd = 10)

# Combine into a data frame
data <- data.frame(Time = t, Observed = observed_data)

# View the first few rows of the data
head(data)

##   Time  Observed
## 1    0  94.39524
## 2    1 102.82533
## 3    2 126.10417
## 4    3 116.88851
## 5    4 123.43315
## 6    5 145.55319

Assumptions: The population grows continuously at a constant proportional rate The error term has mean zero Errors are normally distributed Errors have constant variance (homoscedasticity) Observations are independent over time

# Non-linear model fitting
model <- nls(Observed ~ Y0 * exp(r * Time), data = data, start = list(Y0 = 100, r = 0.05))

# Summary of the model to see estimated parameters
summary(model)

## 
## Formula: Observed ~ Y0 * exp(r * Time)
## 
## Parameters:
##     Estimate Std. Error t value Pr(>|t|)    
## Y0 1.034e+02  3.202e+00   32.31  < 2e-16 ***
## r  4.756e-02  2.153e-03   22.08 5.22e-15 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 9.806 on 19 degrees of freedom
## 
## Number of iterations to convergence: 3 
## Achieved convergence tolerance: 2.953e-07

# Extracting estimated parameters
estimated_params <- coef(model)
cat("Estimated Y0:", estimated_params['Y0'], "\n")

## Estimated Y0: 103.4482

cat("Estimated r:", estimated_params['r'], "\n")

## Estimated r: 0.04755725

# Load the ggplot2 library for plotting
library(ggplot2)

# Create a data frame for fitted values
data$Fitted <- predict(model)

# Plot observed and fitted values
ggplot(data, aes(x = Time)) +
  geom_point(aes(y = Observed), color = 'red', size = 2) +
  geom_line(aes(y = Fitted), color = 'blue', size = 1) +
  labs(title = "Bacterial Growth Model", y = "Population", x = "Time") +
  theme_minimal()

## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

Statistical computing 2 Assignments and CAT

Samuel Boniface:SBO7/JR/MN/15102/2022

2026-01-06