Firstly, leads load in the data. The data shows some errors, this is just formatting errors from turning our data from an excel file to csv. It doesn’t lose any data.

L_11 <- read.csv("11.csv")
L_12 <- read.csv("12.csv")
## Warning in read.table(file = file, header = header, sep = sep, quote = quote, :
## incomplete final line found by readTableHeader on '12.csv'
L_13 <- read.csv("13.csv")
L_14 <- read.csv("14.csv")
L_15 <- read.csv("15.csv")
## Warning in read.table(file = file, header = header, sep = sep, quote = quote, :
## incomplete final line found by readTableHeader on '15.csv'
L_16 <- read.csv("16.csv")
L_17 <- read.csv("17.csv")
L_18 <- read.csv("18.csv")
L_19 <- read.csv("19.csv")
L_20 <- read.csv("20.csv")
L_21 <- read.csv("21.csv")
L_22 <- read.csv("22.csv")
## Warning in read.table(file = file, header = header, sep = sep, quote = quote, :
## incomplete final line found by readTableHeader on '22.csv'
L_23 <- read.csv("23.csv")
## Warning in read.table(file = file, header = header, sep = sep, quote = quote, :
## incomplete final line found by readTableHeader on '23.csv'
L_24 <- read.csv("24.csv")
L_25 <- read.csv("25.csv")
## Warning in read.table(file = file, header = header, sep = sep, quote = quote, :
## incomplete final line found by readTableHeader on '25.csv'
L_26 <- read.csv("26.csv")
L_27 <- read.csv("27.csv")
L_28 <- read.csv("28.csv")
## Warning in read.table(file = file, header = header, sep = sep, quote = quote, :
## incomplete final line found by readTableHeader on '28.csv'
L_29 <- read.csv("29.csv")
## Warning in read.table(file = file, header = header, sep = sep, quote = quote, :
## incomplete final line found by readTableHeader on '29.csv'
L_30 <- read.csv("30.csv")
L_31 <- read.csv("31.csv")
L_32 <- read.csv("32.csv")
L_33 <- read.csv("33.csv")
L_34 <- read.csv("34.csv")
L_35 <- read.csv("35.csv")
L_36 <- read.csv("36.csv")
L_37 <- read.csv("37.csv")
L_38 <- read.csv("38.csv")
L_39 <- read.csv("39.csv")

To get an overview of the data, lets generate a barplot of overall species richness, that is the number of species at each site.

richness <- c(nrow(L_11), nrow(L_12), nrow(L_13), nrow(L_14), nrow(L_15), nrow(L_16), nrow(L_17), nrow(L_18), nrow(L_19), nrow(L_20), nrow(L_21), nrow(L_22), nrow(L_23), nrow(L_24), nrow(L_25), nrow(L_26), nrow(L_27), nrow(L_28), nrow(L_29), nrow(L_30), nrow(L_31), nrow(L_32), nrow(L_33), nrow(L_34), nrow(L_35), nrow(L_36), nrow(L_37), nrow(L_38), nrow(L_39))
latitude <- c("11", "12", "13", "14", "15", "16", "17", "18", "19", "20", "21", "22", "23", "24", "25", "26", "27", "28", "29", "30", "31", "32", "33", "34", "35", "36", "37", "38", "39")
species_richness <- data.frame(latitude, richness)

#plotting the data
library(RColorBrewer)

coul <- brewer.pal(5, "Pastel2") 
xx <- barplot(height=species_richness$richness, names.arg = latitude, col=coul, main= "Species Richness By Latitude", xlab= "Latitude", ylab= "Species Richness", ylim=c(0,80))
text(x = xx, y = species_richness$richness, label = species_richness$richness, pos = 3, cex = 0.8, col = "blue")

Lets do the same thing but rather than a barpot we’re going to use a scatterplot.

plot(y=species_richness$richness, x=latitude, main= "Species Richness By Latitude", xlab= "Latitude", ylab= "Species Richness", ylim=c(0,80))

But is there a linear trend between latitude and species richness? We can test this.

model <- lm(latitude ~ species_richness$richness)
model
## 
## Call:
## lm(formula = latitude ~ species_richness$richness)
## 
## Coefficients:
##               (Intercept)  species_richness$richness  
##                  24.56763                    0.01751
plot(model, 1)

What this hows is our data we have does not present a linear trend. In the residual vs fitted plot, if the assumption of linearity was met, data points would be relatively evenly spread with a flat fitted line. Moreover, our linear model presents a beta value of 0.01751 which is essentially 0. Perhaps our data set is not large enough to produce linearity or another factor is confounding with our data.

Now, lets plot the abundance of all our data combined. First we need to separate the data by count of each latitude.

L11 <- L_11$count
L12 <- L_12$count
L13 <- L_13$count
L14 <- L_14$count
L15 <- L_15$count
L16 <- L_16$count
L17 <- L_17$count
L18 <- L_18$count
L19 <- L_19$count
L20 <- L_20$count
L21 <- L_21$count
L22 <- L_22$count
L23 <- L_23$count
L24 <- L_24$count
L25 <- L_25$count
L26 <- L_26$count
L27 <- L_27$count
L28 <- L_28$count
L29 <- L_29$count
L30 <- L_30$count
L31 <- L_31$count
L32 <- L_32$count
L33 <- L_33$count
L34 <- L_34$count
L35 <- L_35$count
L36 <- L_36$count
L37 <- L_37$count
L38 <- L_38$count
L39 <- L_39$count

all_abundance <- c(L11, L12, L13, L14, L15, L16, L17, L18, L19, L20, L21, L22, L23, L24, L25, L26, L27, L27, L28, L29, L30, L30, L31, L32, L33, L34, L35, L36, L37, L38, L39)

all_abundancea <- table(all_abundance)

Next we plot the data. The first plot is all the data, without limiting the x-axis (abudnance rank).

plot(all_abundancea, main='Rank Abundance of All Latitudes Combined', xlab='Abundance Rank', ylab= 'Abundance', col='blue', type = 'l')

Limiting the x-axis to 100 to get a clearer view

plot(all_abundancea, main='Rank Abundance of All Latitudes Combined x<100', xlab='Abundance Rank', ylab= 'Abundance', col='red', type = 'l', xlim = c(0,100))

And now limiting the x-axis to 70

plot(all_abundancea, main='Rank Abundance of All Latitudes Combined x<70', xlab='Abundance Rank', ylab= 'Abundance', col='orange', type = 'l', xlim = c(0,70))

Lets make some boxplots of species abundance, separated by 5 latitudes to visualise our data.

boxplot(L11, L12, L13, L14, L15, col =c("#ff32f8","#ff53d3", "#ff74ad", "#ff9588", "#ffb662"), main = 'Boxplot Of Abundance Latitude 11 to 15')

boxplot(L16, L17, L18, L19, L20, col =c("#65d1ff","#73ddd5", "#80e8ac", "#8ef482", "#9bff58"), main = 'Boxplot Of Abundance Latitude 16 to 20')  

boxplot(L21, L22, L23, L24, L25, col =c("#ffad65","#ffae7b", "#ffb091", "#ffb1a6", "#ffb2bc"), main = 'Boxplot Of Abundance Latitude 21 to 25')

boxplot(L26, L27, L28, L29, L30, col =c("#97ff8b","#a0d4a8", "#a9a9c5", "#b17ee2", "#ba53ff"), main = 'Boxplot Of Abundance Latitude 26 to 30')

boxplot(L31, L32, L33, L34, L35, col =c("#53f9ff","#7eddc7", "#a9c190", "#d4a458", "#ff8820"), main = 'Boxplot Of Abundance Latitude 31 to 35')

boxplot(L36, L37, L38, L39, col =c("#FFFF74","#e8d197", "#d1a3ba", "#b974dc"), main = 'Boxplot Of Abundance Latitude 36 to 39')

Lets visualise the mean species abundance at each site before doing somne hypothesis testing of the means.

means <- c(mean(L11), mean(L12), mean(L13), mean(L14), mean(L15), mean(L16), mean(L17), mean(L18), mean(L19), mean(L20), mean(L22), mean(L22), mean(L23), mean(L24), mean(L25), mean(L26), mean(L27), mean(L28), mean(L29), mean(L30), mean(L31), mean(L32), mean(L33), mean(L34), mean(L35), mean(L36), mean(L37), mean(L38), mean(L39))

latmeans <- data.frame(latitude, means)


plot(data=latmeans, x=latitude, y=means, type = 'l', col="#d56aff", xlab="Latitude", ylab="Mean Spcies Abundance", main="Mean Species Abundance by Latitude")
## Warning in plot.window(...): "data" is not a graphical parameter
## Warning in plot.xy(xy, type, ...): "data" is not a graphical parameter
## Warning in axis(side = side, at = at, labels = labels, ...): "data" is not a
## graphical parameter

## Warning in axis(side = side, at = at, labels = labels, ...): "data" is not a
## graphical parameter
## Warning in box(...): "data" is not a graphical parameter
## Warning in title(...): "data" is not a graphical parameter

Lastly lets hypothesis test the abundance by latitude. The null is the mean abundance for each latitude is equal and the alternative is the mean species abundance is different. What this means, is if we disprove the null, in our given data set, latitude has an effect of mean species abundance.

First we need to put the data all together and group it by latitude.

Latitude <- c(rep("L11", length(L11)), rep("L12", length(L12)), rep("L13", length(L13)), rep("L14", length(L14)), rep("L15", length(L15)), rep("L16", length(L16)), rep("L17", length(L17)), rep("L18", length(L18)), rep("L19", length(L19)), rep("L20", length(L20)), rep("L21", length(L21)), rep("L22", length(L22)), rep("L23", length(L23)), rep("L24", length(L24)), rep("L25", length(L25)), rep("L26", length(L26)), rep("L27", length(L27)), rep("L28", length(L28)), rep("L29", length(L29)), rep("L30", length(L30)), rep("L31", length(L31)), rep("L32", length(L32)), rep("L33", length(L33)), rep("L34", length(L34)), rep("L35", length(L35)), rep("L36", length(L36)), rep("L37", length(L37)), rep("L38", length(L38)), rep("L39", length(L39)))
df <- data.frame(Latitude = Latitude,
                 Value = c(L11, L12, L13, L14, L15, L16, L17, L18, L19, L20, L21, L22, L23, L24, L25, L26, L27, L28, L29, L30, L31, L32, L33, L34, L35, L36, L37, L38, L39))

Next we perform an ancova test.

ancova_result <- aov(Value ~ Latitude, data=df)
summary(ancova_result)
##              Df  Sum Sq Mean Sq F value   Pr(>F)    
## Latitude     28  126004    4500   2.497 3.72e-05 ***
## Residuals   687 1238123    1802                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

The following shows a p-value less than the alpha value of 0.05. What this means is the data we have used supports the alternative hypothesis of the means of each latitude’s abunadance differing. Thereby this means in our data set, latitude seems to have an effect on abundance. However, many other variables such as the biome where the area was taken e.g. forest vs urban area could be plausible for our findings.