Geomatric vs arithmetic

Create some simulated population data

We’re going to create three simulated population trends with different (slightly variable) growth rates. One is increasing by around 5% per year; one declining by about 1% per year; one declining by 10% per year.

We will simulate these trends from 1970 to 2020 and assume they all start with 1000 individuals.

set.seed(11)
#set.seed(111)

# Population data simulated over 50 years
years = 1971:2020

# Number of years
N = length(years)

# Three initial populations all with initial sizes of 1000
init_pop = c(1000, 1000, 1000)

# Three growth rates, one for each population
r = c(1.05, 0.99, 0.9) # 5% growth rate, 1% decline, 10% decline
r_sd = c(0.1, 0.1, 0.1) # variation in growth rates, here set to just be 0.1

pop_values = data.frame(r = r, r_sd = r_sd, N = N)

# Annual growth rates are generated using mean rate and sd
annual_rates = apply(pop_values, 1, FUN = function(x) { rnorm(x[3], mean = x[1], sd = x[2]) })

# Cumulatively multply these to get cumulative rates
cumulative_rates = apply(annual_rates, 2, cumprod)

# And create trends
pop_trends = rbind(init_pop, init_pop*cumulative_rates)

# Turn into dataframe with year column
pop_trends = data.frame(pop_trends)
pop_trends$year = c(1970, years)

colnames(pop_trends) = c("Pop1", "Pop2", "Pop3", "year")

final_values = as.numeric(tail(pop_trends[, 1:3], 1))
change = 100*(final_values/init_pop[1])
arithmetic_mean = mean(change)
geom_mean = 10^mean(log10(change))

# Combine in dataframe
pop_data = data.frame(r = r, r_sd = r_sd, 
                      N_years = N, N_1970 = init_pop[1], N_2020 = final_values, 
                      Change = format(change, digits = 2),
                      arithmetic_mean = c(arithmetic_mean, NA, NA),
                      geom_mean = c(geom_mean, NA, NA))

pop_data %>%
  kbl(caption = "Population parameters", col.names=c("Growth rate","SD","N~years~", "N~1970~", "N~2020~", "Change", "Arith_mean", "Geom_mean")) %>%
  kable_classic(full_width = F, html_font = "Cambria")

Population parameters
Growth rate	SD	N_years	N₁₉₇₀	N₂₀₂₀	Change	Arith_mean	Geom_mean
1.05	0.1	50	1000	2464.489202	246.45	101.6574	19.03014
0.99	0.1	50	1000	580.415394	58.04	NA	NA
0.90	0.1	50	1000	4.817926	0.48	NA	NA

Plot population trends

Let us have a look at these trends

# Plot populaiton trends (linear scale)
melted_data = melt(pop_trends, id.vars = "year")

ggplot(melted_data, aes(x = year, y = value, group = variable, color = variable)) + 
  geom_point() + 
  geom_line() + 
  ggtitle("Simulated population trends") + 
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5))

Make the trends relative to the initial year

We can normalise these trends so that they all start at 100 in 1970. This makes it easier to see the percentage change in a population in a given year.

# Calculate relative abundance for each year, relative to first year

# Get data without year column
trend_data = dplyr::select(pop_trends, -year)
# Convert to relative to baseline year
rel_abund = apply(trend_data, 2, FUN = function(x) {100*x/x[1]})

# Make a data frame
rel_abundance = data.frame(rel_abund)
rel_abundance$year = c(1970, years)

# Plot relative abundance trends
melted_ra_data = melt(rel_abundance, id.vars = "year")

ggplot(melted_ra_data, aes(x = year, y = value, group = variable, color = variable)) + 
  geom_point() + 
  geom_line() + 
  ggtitle("Simulated population trends, relative to initial year") + 
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5))

Calculate average relative abundance (arithmetic and geometric/LPI)

We can then calculate the arithmetic and geomatric mean (LPI) abundance each year.

# Make a data frame
rel_abundance = data.frame(rel_abund)
rel_abundance$year = c(1970, years)

rel_abundance_data = dplyr::select(rel_abundance, -year)

# Calculate average relative abundance
average_rel_abundance = rowMeans(rel_abundance_data)
# Calculate average log10 relative abundance
average_log_rel_abundance = rowMeans(log10(rel_abundance_data))

# Add to data frame
rel_abundance$av_RA = 100*(average_rel_abundance/average_rel_abundance[1])
#rel_abundance$gav_RA = 100*((10^average_log_rel_abundance)/(10^average_log_rel_abundance[1]))

# Also calculate LPI stat in usual way
pop_trends_log = log10(dplyr::select(pop_trends, -year)) # Log 
pop_trends_diff = rbind(c(0, 0, 0), apply(pop_trends_log, 2, diff)) # Log difference
average_log_diff = rowMeans(pop_trends_diff)

# Add to data frame
rel_abundance$lpi_stat = 100*cumprod(10^average_log_diff)

# Plot trends with indices (linear scale)
melted_ra_data = melt(rel_abundance, id.vars = "year")

Plot the population trends with both indices

Viewing these on a linear-scale axis highlights the difference between the arithmetic mean abundance (which here can look more accurate) and geometric mean abundance.

ggplot(melted_ra_data, aes(x = year, y = value, group = variable, color = variable)) + 
  geom_point(position = position_jitter(width = 0.1, height = 0.01), alpha = 0.4) + 
  geom_line() + 
  ggtitle("Simulated population trends (linear scale)") + 
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5))

Plot the population trends with both indices (log scale)

However, on a log-scale (which is the natural scale for multiplicative data like population trends) it is much easier to see the geometric mean capturing the average of the trends on this scale.

# Plot trends with indices (log10 scale)
ggplot(melted_ra_data, aes(x = year, y = value, group = variable, color = variable)) + 
  geom_point(position = position_jitter(width = 0.1, height = 0.01), alpha = 0.4) + 
  geom_line() + 
  ggtitle("Simulated population trends (log scale)") + 
  scale_y_continuous(trans = "log10") + 
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5))