################################################################################
#
#
#     Written by: Julius Mugambe
#     Written on: June 2024
#     Institution: CAU
#     Purpose: Stat
#
#
################################################################################
################################################################################
rm(list = ls()) # Clear the working directory / environment
getwd() # Get the working the working directory
## [1] "/Users/kingjulius/Documents/Mostafa"
Sys.setenv(LANG = "en") # Change the langugue ..."eng" 
# Load the necessary packages 
require(readxl);require(tidyverse);require(minpack.lm)
## Loading required package: readxl
## Loading required package: tidyverse
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.0     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## Loading required package: minpack.lm
# read file 
data <- read_xlsx("02.07.2024-r-data.xlsx")
# quick view of the data
head(data)
## # A tibble: 6 × 3
##   variety  time   gas
##   <chr>   <dbl> <dbl>
## 1 sg1         2  3   
## 2 sg1         2  3   
## 3 sg1         2  4.38
## 4 sg2         2  3   
## 5 sg2         2  3   
## 6 sg2         2  4.2
tail(data,10)
## # A tibble: 10 × 3
##    variety  time   gas
##    <chr>   <dbl> <dbl>
##  1 sg5        48  73.6
##  2 sg6        48   0  
##  3 sg6        48  75.8
##  4 sg6        48  67.6
##  5 maize      48  78.8
##  6 maize      48  81.5
##  7 maize      48  74.0
##  8 wheat      48  71.7
##  9 wheat      48  70.4
## 10 wheat      48  75.2
colnames(data)
## [1] "variety" "time"    "gas"
##########################
## levels
table(data$variety)
## 
## maize   sg1   sg2   sg3   sg4   sg5   sg6 wheat 
##    18    18    18    18    18    18    18    18
table(data$time)
## 
##  2  4  6 12 24 48 
## 24 25 23 24 24 24
##########################
hist(data$gas)

# Plot the data
ggplot(data, aes(x = as.factor(time), y = gas, fill = variety)) +
  geom_boxplot() +
  labs(title = "Gas Values Across Time Points and Varieties",
       x = "Time",
       y = "Gas Value",
       fill = "Variety",
       color = "Variety") +
  theme_minimal()

means <- data |>
  group_by(variety,time)|>
  reframe(
    mean.gas = mean(gas),
    sd.gas  = sd(gas)
  )
means
## # A tibble: 48 × 4
##    variety  time mean.gas sd.gas
##    <chr>   <dbl>    <dbl>  <dbl>
##  1 maize       2     4.27  0.675
##  2 maize       4     9.46  1.79 
##  3 maize       6    17.9   1.05 
##  4 maize      12    34.1   4.78 
##  5 maize      24    52.2  18.7  
##  6 maize      48    78.1   3.77 
##  7 sg1         2     3.46  0.797
##  8 sg1         4     8.48  2.79 
##  9 sg1         6    14.9   0.849
## 10 sg1        12    25.1   1.93 
## # ℹ 38 more rows
# Create bar plot with error bars
ggplot(means, aes(x = as.factor(time), y = mean.gas, fill = variety)) +
  geom_bar(stat = "identity", position = position_dodge(width = 0.9)) +
  geom_errorbar(aes(ymin = mean.gas - sd.gas, ymax = mean.gas + sd.gas),
                width = 0.2, position = position_dodge(width = 0.9)) +
  labs(title = "Mean Gas Values Across Time Points and Varieties",
       x = "Time",
       y = "Mean Gas Value",
       fill = "Variety") +
  theme_minimal()

# Create a line plot
ggplot(means, aes(x = as.factor(time), y = mean.gas, color = variety, group = variety)) +
  geom_line(linewidth = 1) +
  geom_point(size = 3) +
  labs(title = "Mean Gas Values Across Time Points and Varieties",
       x = "Time",
       y = "Mean Gas Value",
       color = "Variety") +
  theme_minimal()

# Ensure time is numeric
data <- data %>% mutate(time = as.numeric(time))
# Define the model function
orskov_model <- function(time, a, b, c) {
a + b * (1 - exp(-c * time))
}
# Fit the model for each variety
results <- data %>%
group_by(variety) %>%
do({
model <- nlsLM(gas ~ orskov_model(time, a, b, c),
data = .,
start = list(a = 0, b = 20, c = 0.1))
data.frame(variety = unique(.$variety),
a = coef(model)["a"],
b = coef(model)["b"],
c = coef(model)["c"])
})
# Print coefficients
print(results)
## # A tibble: 8 × 4
## # Groups:   variety [8]
##   variety     a     b       c
##   <chr>   <dbl> <dbl>   <dbl>
## 1 maize   -2.31  96.4 0.0368 
## 2 sg1     -1.51 111.  0.0240 
## 3 sg2     -2.67 107.  0.0271 
## 4 sg3     -2.88 104.  0.0281 
## 5 sg4     -3.39 109.  0.0262 
## 6 sg5     -3.35 103.  0.0316 
## 7 sg6     -5.53  61.2 0.0461 
## 8 wheat   10.2  481.  0.00286
# Function to predict gas production using fitted parameters
predict_gas <- function(time, a, b, c) {
a + b * (1 - exp(-c * time))
}
# Create a new data frame with the fitted values for plotting
fitted_data <- data %>%
left_join(results, by = "variety") %>%
mutate(fitted_gas = predict_gas(time, a, b, c))
# Plot the data and fitted curves
ggplot(data, aes(x = time, y = gas, color = variety)) +
geom_point() +
geom_line(data = fitted_data, aes(y = fitted_gas, group = variety), linetype = "dotted") +
labs(title = "Gas Production Kinetics According to Ørskov & McDonald Model",
x = "Time",
y = "Gas Production",
color = "Variety") +
theme_minimal()