Description of the data

# load library
library(ade4)
library(ggplot2)
library(dplyr)
library(stringr)
library(tidyr)
library(plotly)
library(reshape2)

# load data
data("meaudret")

Exploratory Data Analysis

Here we will clean up the data and create some basic plots.

# environmental measures
env <- meaudret$env

# convert row names to an actual variable for easier plotting, split first
env$season_site <- row.names(env)

# use tidyr separate function
env <- separate(env, season_site, c("season", "site"), sep = "_" )

# relevel so that seasons are in order wi, sp, su, au
env$season <- factor(env$season, levels = c("wi", "sp", "su", "au"))


# group by season
# grouped.env <- env %>% group_by(season) 

How do each of the measurements vary by season?

To answer this question, I have generated boxplots of each variable according to season.

## Create boxplots of each measurement variable according to season

# melt by season
seasoned <- env %>% select(-site) %>% melt(id.var = "season")

ggplot(data = seasoned, aes(x=variable, y=value)) + geom_boxplot(aes(fill=season)) + 
        facet_wrap(~ variable,  scales="free") + 
        ggtitle("Changes in measures by season (across all sites)")

How does oxygen level vary by site?

g <- ggplot(data = env, aes(site, Oxyd)) + geom_bar(stat = "identity") + facet_wrap(~season) +
        ggtitle("Oxygen content by site and season")

ggplotly(g)
###############################
# species counts
spe <- meaudret$spe

# separate
spe$season_site <- row.names(spe)

spe <- separate(spe, season_site, c("season", "site"), sep = "_" )

# relevel so that seasons are in order
spe$season <- factor(spe$season, levels = c("wi", "sp", "su", "au"))

# gather columns into two new variables
gathered.spe <- spe %>% gather("insect_species", "counts", c(1:13))



#grouped.spe <- gathered.spe %>% group_by(insect_species, site, season) %>% summarise(counts = n()) %>%
#        filter(insect_species == "Bni")


# group by season and insect_species and then plot mean 
grouped <- gathered.spe %>% group_by(insect_species, season) %>% summarise(avg_counts = mean(counts))

gg <- gathered.spe %>% group_by(insect_species, season) %>% summarise(avg_counts = mean(counts)) %>%
        ggplot(aes(insect_species, avg_counts)) + geom_bar(stat = "identity", aes(fill = season)) +
        ggtitle("Average number of Trichopters across all sites by season")

ggplotly(gg)

Which sites have higher numbers of species?

Site two experiences a drop off in population of trichopters, especially in the fall. This drop in population is likely correlated with the increase in oxygen content observed at site two in the summer and fall.

# which site has the highest counts of species
#g <- gathered.spe %>% ggplot(aes(site, counts)) + geom_jitter(aes(color = insect_species, size = counts)) + facet_wrap(~season )

#ggplotly(g)

# facet by site
g <- gathered.spe %>% ggplot(aes(season, counts)) + geom_jitter(aes(color = insect_species, size = counts)) + facet_wrap(~site) + 
        ggtitle("Site two experiences something awful in the fall")

ggplotly(g)