library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tidyr)
library(ggplot2)

R Markdown

1. Enter data

To match the steps of the lab handout, we will create two tables: one for each site. In R, these are called data frames, and they are used when we have multiple types of data (such as site categories, and numeric values). To create the data frames, we will use the cbind() function, which binds columns together. The table we create is saved to an object using the <- operator. We then use the print() function to view what we have created.

site1 <- cbind.data.frame(site = "site1", 
                                                    plot = c("plot1", "plot2","plot3"),
                                                    plot_area = c(20, 20, 20),
                                                    Pinus_strobus = c(13, 17, 11),
                                                    Betula_papyrifera = c(16, 19, 23),
                                                    Fagus_grandifolia = c(2, 0, 5))
print(site1)
##    site  plot plot_area Pinus_strobus Betula_papyrifera Fagus_grandifolia
## 1 site1 plot1        20            13                16                 2
## 2 site1 plot2        20            17                19                 0
## 3 site1 plot3        20            11                23                 5

We do the same for site 2, then combine the data frames from each site:

site2 <- cbind.data.frame(site = "site2", 
                                                    plot = c("plot1", "plot2","plot3"),
                                                    plot_area = c(30, 30, 30),
                                                    Pinus_strobus = c(10, 12, 9),
                                                    Betula_papyrifera = c(8, 1, 3),
                                                    Fagus_grandifolia = c(12, 10, 22))

# Combine data from both sites into one data frame using row-bind (`rbind.data.frame`)
sites <- rbind.data.frame(site1, site2)
print(sites)
##    site  plot plot_area Pinus_strobus Betula_papyrifera Fagus_grandifolia
## 1 site1 plot1        20            13                16                 2
## 2 site1 plot2        20            17                19                 0
## 3 site1 plot3        20            11                23                 5
## 4 site2 plot1        30            10                 8                12
## 5 site2 plot2        30            12                 1                10
## 6 site2 plot3        30             9                 3                22

2. Determine Density

Next, we determine the density of each species in each plot on a m2 basis by dividing each species count by the area of the plot. To do this in R, we use functions from the dplyr package, which is one of the most widely-used R packages. It uses a ‘pipe’ operator, %>%, that takes the output of one function and ‘pipes’ it into another function. Below, the sites data frame is piped into the group_by function, which groups the data into sites. This is piped into the summarize() function, which we use to calculate the per-site per-species density.

# Calculate density by dividing counts by plot areas
density <- sites %>% group_by(site) %>% summarize(Pinus_strobus_density = Pinus_strobus/plot_area,
                                                                                                    Betula_papyrifera_density = Betula_papyrifera/plot_area,
                                                                                                    Fagus_grandifolia_density = Fagus_grandifolia/plot_area)
## `summarise()` regrouping output by 'site' (override with `.groups` argument)
print(density)
## # A tibble: 6 x 4
## # Groups:   site [2]
##   site  Pinus_strobus_densi… Betula_papyrifera_dens… Fagus_grandifolia_den…
##   <fct>                <dbl>                   <dbl>                  <dbl>
## 1 site1                0.65                   0.8                     0.1  
## 2 site1                0.85                   0.95                    0    
## 3 site1                0.55                   1.15                    0.25 
## 4 site2                0.333                  0.267                   0.4  
## 5 site2                0.4                    0.0333                  0.333
## 6 site2                0.3                    0.1                     0.733

3. Calculate Average

4. Calculate Standard Deviation

In this section, we will calculate both the average and the standard deviation. First, we reshape the data into “long” format, which makes it easier to plot. We do this using the pivot_longer() function, which is from the tidyr package.

density_long <- density %>% group_by(site) %>% 
    pivot_longer(cols = c(Pinus_strobus_density, 
                                                Betula_papyrifera_density, 
                                                Fagus_grandifolia_density), values_to = "density")
print(density_long)
## # A tibble: 18 x 3
## # Groups:   site [2]
##    site  name                      density
##    <fct> <chr>                       <dbl>
##  1 site1 Pinus_strobus_density      0.65  
##  2 site1 Betula_papyrifera_density  0.8   
##  3 site1 Fagus_grandifolia_density  0.1   
##  4 site1 Pinus_strobus_density      0.85  
##  5 site1 Betula_papyrifera_density  0.95  
##  6 site1 Fagus_grandifolia_density  0     
##  7 site1 Pinus_strobus_density      0.55  
##  8 site1 Betula_papyrifera_density  1.15  
##  9 site1 Fagus_grandifolia_density  0.25  
## 10 site2 Pinus_strobus_density      0.333 
## 11 site2 Betula_papyrifera_density  0.267 
## 12 site2 Fagus_grandifolia_density  0.4   
## 13 site2 Pinus_strobus_density      0.4   
## 14 site2 Betula_papyrifera_density  0.0333
## 15 site2 Fagus_grandifolia_density  0.333 
## 16 site2 Pinus_strobus_density      0.3   
## 17 site2 Betula_papyrifera_density  0.1   
## 18 site2 Fagus_grandifolia_density  0.733

Now, we use the summarize() function from the dplyr package to calculate mean and standard deviation. If there were any empty cells (or NAs) in the data, this could create errors, so we use the na.rm = T argument just in case.

average_density <- density_long %>% group_by(site, name) %>% 
    summarize(mean = mean(density, na.rm = T),
                        sd = sd(density, na.rm = T))
## `summarise()` regrouping output by 'site' (override with `.groups` argument)
print(average_density)
## # A tibble: 6 x 4
## # Groups:   site [2]
##   site  name                       mean     sd
##   <fct> <chr>                     <dbl>  <dbl>
## 1 site1 Betula_papyrifera_density 0.967 0.176 
## 2 site1 Fagus_grandifolia_density 0.117 0.126 
## 3 site1 Pinus_strobus_density     0.683 0.153 
## 4 site2 Betula_papyrifera_density 0.133 0.120 
## 5 site2 Fagus_grandifolia_density 0.489 0.214 
## 6 site2 Pinus_strobus_density     0.344 0.0509

5. Create chart

In the original lab handout, steps 5 through 7 all create a final bar plot. In R, we can combine these steps using the ggplot2 package, which is the most widely-used for graphics and plots.

final_plot <- ggplot(average_density,  aes(x = name, fill = site)) + 
    geom_bar(stat="identity", aes(y = mean), position=position_dodge()) +
    # The line below creates error bars
    geom_errorbar(aes(ymin=mean-sd, ymax=mean+sd), width=.2,
                                position=position_dodge(.9)) 
final_plot

Finally, we’ll edit the plot to add a title and caption. To recieve extra credit, modify the code below to add a custom title and caption.

final_plot + labs(title = "Main title", caption = "My caption") + 
    xlab("x-axis label") +
    ylab("y-axis label") +
    theme(plot.caption = element_text(hjust = 0))