Reading in the file:

pop_data_1_ <- read.csv("C:/Users/meera/Desktop/R_data/pop_data.csv")

APPENDIX 1 We find the intrinsic growth for species 1 and species 2 by dividing a year’s population by the previous year’s. Intrinsic growth in year 2000 was not used, as previous population data was unavailable. The logs of annual population growth were also taken. To streamline code and have a coherent dataset, the year, temperature, growth rates and logs of growth rate were added into one single dataset using the data.frame function.

# Growth from year t to t+1
species_1_growth <- pop_data_1_$species_1[2:21] / pop_data_1_$species_1[1:20]
species_2_growth <- pop_data_1_$species_2[2:21] / pop_data_1_$species_2[1:20]

log_species_1_growth <- log(species_1_growth)
log_species_2_growth <- log(species_2_growth)

intrinsic_growth <- data.frame(
  year = pop_data_1_$year[1:20],    
  temp = pop_data_1_$temp[1:20],
  species_1_growth_rate = species_1_growth,
  species_2_growth_rate = species_2_growth,
  log_species_1_growth = log_species_1_growth,
  log_species_2_growth = log_species_2_growth
)

intrinsic_growth$log_species_1_growth <- log(intrinsic_growth$species_1_growth_rate)
intrinsic_growth$log_species_2_growth <- log(intrinsic_growth$species_2_growth_rate)

log_intrinsic_growth_species_1 <- intrinsic_growth$log_species_1_growth
log_intrinsic_growth_species_2 <- intrinsic_growth$log_species_2_growth

APPENDIX 2 Finding the mean natural log growth rate of each species:

mean_growth_rate_species_1 <- mean(log_intrinsic_growth_species_1)
mean_growth_rate_species_2 <- mean(log_intrinsic_growth_species_2)

APPENDIX 3 These functions together were used to create the ‘a_func’ and ‘b_func’ parts pf the regression line seen in appendix 4 D and E

  1. Function to find the mean of any vector: this is used in the two subsequent functions
mean_func <- function(vec) {
  mean(vec)
}
  1. Sum of cross products function: in this case ‘vec1’ is temperature, and ‘vec2’ is growth rate
sum_cross_prod_func <- function(vec1, vec2) {
  sum_cp <- 0
  for(i in 1: length(vec1)) {
    cross_prod <- (vec1[i] - mean_func(vec1)) * (vec2[i] - mean_func(vec2))
    sum_cp <- sum_cp + cross_prod
  }
  return(sum_cp)
}
  1. Sum of squares function: this function was applied to the temperature deviation values.
sum_sq_func <- function(vec1) {
  sum_sq <- 0
  for (i in 1:length(vec1)) {
  sum_sq <- sum_sq + (vec1[i] - mean_func(vec1))^2
  }
  return(sum_sq)
}
  1. Function for the ‘b’ coefficient in y=bx+a, using the sum of cross products and sum of squares functions
b_func <- function(vec1, vec2){
  b <- sum_cross_prod_func(vec1, vec2)/sum_sq_func(vec1)
  return(b)
}
  1. Function for the ‘a’ coefficient in y=bx + a
a_func <- function(vec1, vec2) {
  a <- mean_func(vec2) - b_func(vec1, vec2)*mean_func(vec1)
  return(a)
} 

APPENDIX 4 Creating a regression line and plotting temperature against the log intrinsic growth. The complete.cases function had to be used on species 1 after an unexpected problem rmarkdown had with the values of a_species_1 and b_species_1, despite both of them clearly having shown finite and realistic values. This function removes all NA values from the considered dataset.

valid <- complete.cases(intrinsic_growth$temp[1:20],
                        intrinsic_growth$log_species_1_growth,
                        intrinsic_growth$log_species_2_growth)
a_species_1 <- a_func(intrinsic_growth$temp[1:20][valid], intrinsic_growth$log_species_1_growth[valid])
b_species_1 <- b_func(intrinsic_growth$temp[1:20][valid], intrinsic_growth$log_species_1_growth[valid])

plot(intrinsic_growth$temp[1:20][valid], intrinsic_growth$log_species_1_growth[valid],
     pch = 20, col = 'cornflowerblue',
     main = 'Relationship between temperature deviation and growth of species 1', 
     xlab = 'Temperature deviation', ylab = 'Growth of species 1')
abline(a_species_1, b_species_1)

a_species_2 <- a_func(intrinsic_growth$temp[1:20][valid], intrinsic_growth$log_species_2_growth[valid])
b_species_2 <- b_func(intrinsic_growth$temp[1:20][valid], intrinsic_growth$log_species_2_growth[valid])
plot(intrinsic_growth$temp[1:20][valid], intrinsic_growth$log_species_2_growth[valid], 
     pch=20, col='deeppink',
     main='Relationship between temperature deviation and growth of species 2', 
     xlab='Temperature deviation', ylab='Growth of species 2')
abline(a_species_2, b_species_2)

The ‘a_func’ and ‘b_func’ are functions corresponding to the slope (b_func) and intercept (a_func) of the function. The slope has the formula of the least squares regression; the sum of cross products of both variables divided by the sum of squares of the x variable. In this case, the ‘x’ variable is Temperature, while the ‘y’ variable is the growth rate obtained by dividing the current population by the previous year’s population.

While the regression line is not used in the report due to the large p value indicating an insignificant relationship, it can also be visualized here.

APPENDIX 5 Using a T test to find the significance of the relationship between temperature and intrinsic growth rate.

temp <- intrinsic_growth$temp[1:20]
growth_2 <- log_intrinsic_growth_species_2[1:20]
growth_1 <- log_intrinsic_growth_species_1[1:20]
n <- length(growth_1)

standard_error <- function(growth, temp, a, b) {
  df <- length(growth) - 2
  y <- a + b * temp
  ss_intrinsic_growth <- sum((growth - y)^2)
  ss_temperature <- sum((temp - mean(temp))^2)
  standarderror <- sqrt(ss_intrinsic_growth / df) / sqrt(ss_temperature)
  return(standarderror)
}

se_species_2 <- standard_error(growth_2, temp, a_species_2, b_species_2)
t_statistic_species_2 <- b_species_2 / se_species_2

se_species_1 <- standard_error(growth_1, temp, a_species_1, b_species_1)
t_statistic_species_1 <- b_species_1 / se_species_1

print(t_statistic_species_2)
## [1] 0.8256655
print(t_statistic_species_1)
## [1] -7.992189
pt(t_statistic_species_2, n-2)
## [1] 0.7900985
pt(t_statistic_species_1, n-2)
## [1] 1.242523e-07

APPENDIX 6 Species 1 displayed a significant negative relationship with temperature deviations, the nature of which was further explored by using the ‘lm’ (linear model) function. Formulas of different powers were tried on the relationship. The result is that the relationship may be quartic. The I was added since it asks R to interpret ^4 the way we interpret it: to the fourth power. Otherwise it will interpret the temp_no_NA as 4 different interactions, which is not what we want. I had to redefine all the vectors because rmarkdown’s optimum temperature was differing from the optimum temperature that was shown in the knit, so I had to make sure that the rmarkdown was not using any vectors in my existing global environment since the knit would ignore those. I understand that my code seems to have many vectors that contain the same values, but I had to ensure every time that the knit was not using something from my RStudio specifically

species_1 <- pop_data_1_$species_1
temp <- pop_data_1_$temp

species_1_growth <- species_1[2:21] / species_1[1:20]
log_species_1_growth <- log(species_1_growth)
temp_no_NA <- temp[1:20]

species1_temp <- lm(log_species_1_growth ~ temp_no_NA + I(temp_no_NA^4))
summary(species1_temp)
## 
## Call:
## lm(formula = log_species_1_growth ~ temp_no_NA + I(temp_no_NA^4))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.19838 -0.06634  0.01177  0.06396  0.16302 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      0.13107    0.02566   5.109 8.73e-05 ***
## temp_no_NA      -0.52876    0.07040  -7.511 8.53e-07 ***
## I(temp_no_NA^4) -0.97615    0.27737  -3.519  0.00263 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1024 on 17 degrees of freedom
## Multiple R-squared:  0.8728, Adjusted R-squared:  0.8579 
## F-statistic: 58.33 on 2 and 17 DF,  p-value: 2.442e-08

Quartic curves can have maximums, so the optimal temperature was found by taking the first derivative of the quartic formula. The coef function gives 3 coefficients: the intercept, the linear coefficient and the quartic coefficient. These are then part of the first derivative function, using the basic derivation rules. The linear coefficient loses it’s x value, and the quartic coefficient becomes a coefficient of a cubic term. The uniroot function was used to find the roots where the derivative is equal to zero, thus the optimal temperature. Upper and lower bounds were set as 1 and -1 respectively, for the purpose of being biologically realistic. The optimum temperature does change slightly from the calculated -0.513, however this can occur as uniroot is approximating the root each time.

coefs <- coef(species1_temp)
quartic_derivative <- function(x) coefs[2] + 4 * coefs[3] * x^3

optimum_temperature <- uniroot(quartic_derivative, lower = -1, upper = 1)$root
print(optimum_temperature)
## [1] -0.5135084

The same was done on species 2’s relationship with temperature.

species2_temp <- lm(log_intrinsic_growth_species_2[2:21] ~ 
                   intrinsic_growth$temp[1:20] + 
                   I(intrinsic_growth$temp[1:20]^3))
summary(species2_temp)
## 
## Call:
## lm(formula = log_intrinsic_growth_species_2[2:21] ~ intrinsic_growth$temp[1:20] + 
##     I(intrinsic_growth$temp[1:20]^3))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.21002 -0.10660 -0.01485  0.12435  0.22518 
## 
## Coefficients:
##                                  Estimate Std. Error t value Pr(>|t|)  
## (Intercept)                       0.07974    0.03548   2.247   0.0391 *
## intrinsic_growth$temp[1:20]      -0.38382    0.19087  -2.011   0.0615 .
## I(intrinsic_growth$temp[1:20]^3)  0.87205    0.48456   1.800   0.0908 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1495 on 16 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.2023, Adjusted R-squared:  0.1026 
## F-statistic: 2.029 on 2 and 16 DF,  p-value: 0.1639

None of the equations attempted produced any significant relationship between species 2’s growth rate and temperature deviation, so no formula was used and no regression line was plotted. I made a choice not to attempt overly complex equations and formulas, as formulas with increasing numbers of terms will eventually be able to fit any interaction, even if no real relationship exists between the variables.

APPENDIX 7 The year and species 1’s population vectors were turned into a separate dataset. A logistic model was fitted onto species 1, which it consistently followed. Each respective model had initial populations (N0) set as the first populations recorded in 2000 and the mean log growth rates (r). The carrying capacity (K) was estimated multiple times at values between 300 and 700, all of which caused the function to output the same carrying capacity estimate of 395. Higher or lower inputs resulted in a minFactor error message meaning R was not able to converge on an estimate with the given K, N0 and r inputs.

Again to streamline data and ensure that nothing in my current global environment was being used in rmarkdown, I decided to create another table containing the year (as pop_data$time) and the species 1’s population growth. I chose not to use the data point for the final year, due to the final year not containing a value for the temperature deviation which was shown to affect species 1’s growth rate. As a result, pop_data_fit was created. For consistency, I applied the same to species 2 despite its apparent indifference to temperature changes.

year <- 2000:2020
species_1 <- c(57, 61, 68, 90, 110, 147, 225, 208, 205, 172,
               250, 199, 267, 290, 344, 410, 389, 545, 253, 268, 239)
pop_data <- data.frame(year, species_1)
pop_data$time <- 0:20 
pop_data_fit <- pop_data[1:20, ]

logistic_model <- nls(
  species_1 ~ K / (1 + ((K - N0) / N0) * exp(-r * time)),
  data = pop_data_fit,
  start = list(K = 600, N0 = 57, r = 0.07167061)   #Used the mean growth rate and population in year 2000
)
summary(logistic_model)
## 
## Formula: species_1 ~ K/(1 + ((K - N0)/N0) * exp(-r * time))
## 
## Parameters:
##     Estimate Std. Error t value Pr(>|t|)    
## K  395.66281   71.90185   5.503 3.88e-05 ***
## N0  54.23477   27.41655   1.978   0.0643 .  
## r    0.23973    0.09552   2.510   0.0225 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 68.96 on 17 degrees of freedom
## 
## Number of iterations to convergence: 13 
## Achieved convergence tolerance: 6.984e-06

Since species 2 was not a fit for the logistic curve, an exponential curve was fitted onto the population’s pattern, which it successfully followed. Corresponding values for species 2 such as its mean log growth rate and its initial population were inputted into the function. No carrying capacity was required as this is an exponential growth curve.

year_2 <- 2000:2020
species_2 <- c(38, 51, 67, 62, 84, 84, 112, 123, 104, 122, 
               118, 117, 138, 148, 137, 179, 191, 194, 266, 351, 314)
pop_data_2 <- data.frame(year_2, species_2)
pop_data_2$time <- 0:20
pop_data_fit_2 <- pop_data_2[1:20, ]

exponential_model_2 <- nls(
  species_2 ~ N0 * exp(-r * time),
  data = pop_data_fit_2,
  start = list(N0 = 38, r = 0.1055903)   #Used the mean growth rate and population in year 2000
)
summary(exponential_model_2)
## 
## Formula: species_2 ~ N0 * exp(-r * time)
## 
## Parameters:
##     Estimate Std. Error t value Pr(>|t|)    
## N0 46.077522   6.235921   7.389 7.45e-07 ***
## r  -0.095932   0.008837 -10.856 2.49e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 25.36 on 18 degrees of freedom
## 
## Number of iterations to convergence: 8 
## Achieved convergence tolerance: 1.159e-06

APPENDIX 8 Scatter plots were plotted for each species, including year, population growths and their respective population growth curves. The predict function allows the logistic and exponential models to be plotted onto the scatter plots, giving a smooth curve that the models calculated to fit the given data points best.

plot(2000:2020, species_1, type = 'b', pch=16, col= 'steelblue4', 
     main = 'Population of species 1 from year 2000-2020', xlab='Year', ylab='Population')
lines(pop_data_fit$year, predict(logistic_model), col = "royalblue", lwd=2)

plot(2000:2020, species_2, type = "b", pch = 16, col = "magenta",
     main = "Population of Species 2 from year 2000-2020", ylab = "Population", xlab = "Year")
lines(pop_data_fit_2$year_2, predict(exponential_model_2), col='orchid', lwd=2)

I acknowledge the use of ChatGPT in some of my code. It suggested the complete.cases function to remove the NA values, the uniroot function to find the roots of the first derivative. I mainly used it to help proofread the code for the rmarkdown.