library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.1     ✔ stringr   1.5.2
## ✔ ggplot2   4.0.0     ✔ tibble    3.3.0
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.1.0     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

Plotting with ggplot2

surveys <- read_csv("C:\\Users\\User\\Desktop\\S's Digital lab\\courseware yr 3\\0310 bioinfo\\Tutorial4Project1\\data_raw\\portal_data_joined.csv")
## Rows: 34786 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (6): species_id, sex, genus, species, taxa, plot_type
## dbl (7): record_id, month, day, year, plot_id, hindfoot_length, weight
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
surveys_complete <- surveys %>%
  filter(!is.na(weight),
         !is.na(hindfoot_length),
         !is.na(sex))

ggplot(data = surveys_complete, aes(x = weight, y = hindfoot_length)) +
  geom_point()

# Assign plot to a variable
surveys_plot <- ggplot(data = surveys_complete,
                       mapping = aes(x = weight, y = hindfoot_length))

# Draw the plot
surveys_plot +
    geom_point()

# This is the correct syntax for adding layers
surveys_plot +
  geom_point()

Challenge (optional)

#install.packages("hexbin")
library(hexbin)
## Warning: package 'hexbin' was built under R version 4.5.2
surveys_plot +
 geom_hex()

# A hexagonal bin plot is better for large datasets because it reduces overplotting and highlights density patterns, making clusters and trends much clearer than in a scatter plot. However, a scatter plot preserves individual points and outliers, which are partly lost when data are aggregated into hexagonal bins.

Building your plots iteratively

ggplot(data = surveys_complete, aes(x = weight, y = hindfoot_length)) +
    geom_point(alpha = 0.1)

ggplot(data = surveys_complete, mapping = aes(x = weight, y = hindfoot_length)) +
    geom_point(alpha = 0.1, color = "blue")

ggplot(data = surveys_complete, mapping = aes(x = weight, y = hindfoot_length)) +
    geom_point(alpha = 0.1, aes(color = species_id))

Challenge

ggplot(data = surveys_complete, mapping = aes(x = species_id, y = weight)) +
    geom_point(alpha = 0.1, aes(color = plot_type))

# No — this scatter plot is **not a very good way** to show this type of data. Because there are many observations for each species, the points heavily overlap, making it difficult to see patterns, compare species, or interpret differences between plot types.

Boxplot

ggplot(data = surveys_complete, mapping = aes(x = species_id, y = weight)) +
    geom_boxplot()

ggplot(data = surveys_complete, mapping = aes(x = species_id, y = weight)) +
    geom_jitter(alpha = 0.3, color = "tomato")+
    geom_boxplot(outlier.shape = NA)

Challenge

ggplot(data = surveys_complete, mapping = aes(x = species_id, y = weight)) +
    geom_jitter(alpha = 0.3, color = "tomato")+
    geom_violin(outlier.shape = NA)
## Warning in geom_violin(outlier.shape = NA): Ignoring unknown parameters:
## `outlier.shape`

ggplot(data = surveys_complete, mapping = aes(x = species_id, y = weight)) +
scale_y_log10() +
geom_jitter(alpha = 0.3, color = "tomato") +
geom_boxplot(outlier.shape = NA)

ggplot(data = surveys_complete, mapping = aes(x = species_id, y = hindfoot_length)) +
geom_jitter(alpha = 0.3, color = "tomato") +
geom_boxplot(outlier.shape = NA)

class(surveys_complete$plot_id)
## [1] "numeric"
surveys_complete %>%
  mutate(plot_id = as.factor(plot_id)) %>%   # convert to factor
  ggplot(aes(x = species_id, y = weight)) +
  geom_jitter(aes(color = plot_id), alpha = 0.3) +
  geom_boxplot(outlier.shape = NA)

Plotting time series data

yearly_counts <- surveys_complete %>%
  count(year, genus)

ggplot(data = yearly_counts, aes(x = year, y = n)) +
     geom_line()

ggplot(data = yearly_counts, aes(x = year, y = n, group = genus)) +
    geom_line()

ggplot(data = yearly_counts, aes(x = year, y = n, color = genus)) +
    geom_line()

Integrating the pipe operator with ggplot2

yearly_counts_graph <- surveys_complete %>%
    count(year, genus) %>%
    ggplot(mapping = aes(x = year, y = n, color = genus)) +
    geom_line()

yearly_counts_graph

Faceting

ggplot(data = yearly_counts, aes(x = year, y = n)) +
    geom_line() +
    facet_wrap(facets = vars(genus))

 yearly_sex_counts <- surveys_complete %>%
                      count(year, genus, sex)
 
 ggplot(data = yearly_sex_counts, mapping = aes(x = year, y = n, color = sex)) +
  geom_line() +
  facet_wrap(facets =  vars(genus))

 ggplot(data = yearly_sex_counts,
       mapping = aes(x = year, y = n, color = sex)) +
  geom_line() +
  facet_grid(rows = vars(sex), cols =  vars(genus))

 # One column, facet by rows
ggplot(data = yearly_sex_counts,
       mapping = aes(x = year, y = n, color = sex)) +
  geom_line() +
  facet_grid(rows = vars(genus))

# One row, facet by column
ggplot(data = yearly_sex_counts,
       mapping = aes(x = year, y = n, color = sex)) +
  geom_line() +
  facet_grid(cols = vars(genus))

ggplot2 themes

 ggplot(data = yearly_sex_counts,
        mapping = aes(x = year, y = n, color = sex)) +
     geom_line() +
     facet_wrap(vars(genus)) +
     theme_bw()

Challenge

yearly_weight <- surveys_complete %>%
  group_by(year, species_id) %>%
                    summarize(avg_weight = mean(weight))
## `summarise()` has grouped output by 'year'. You can override using the
## `.groups` argument.
 ggplot(data = yearly_weight,
        mapping = aes(x = year, y = avg_weight, color = species_id)) +
     geom_line() +
     facet_wrap(vars(species_id)) +
     theme_bw()
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?

Customization

ggplot(data = yearly_sex_counts, aes(x = year, y = n, color = sex)) +
    geom_line() +
    facet_wrap(vars(genus)) +
    labs(title = "Observed genera through time",
         x = "Year of observation",
         y = "Number of individuals") +
    theme_bw()

ggplot(data = yearly_sex_counts, mapping = aes(x = year, y = n, color = sex)) +
    geom_line() +
    facet_wrap(vars(genus)) +
    labs(title = "Observed genera through time",
        x = "Year of observation",
        y = "Number of individuals") +
    theme_bw() +
    theme(text=element_text(size = 16))

ggplot(data = yearly_sex_counts, mapping = aes(x = year, y = n, color = sex)) +
    geom_line() +
    facet_wrap(vars(genus)) +
    labs(title = "Observed genera through time",
        x = "Year of observation",
        y = "Number of individuals") +
    theme_bw() +
    theme(axis.text.x = element_text(colour = "grey20", size = 12, angle = 90, hjust = 0.5, vjust = 0.5),
                        axis.text.y = element_text(colour = "grey20", size = 12),
                        strip.text = element_text(face = "italic"),
                        text = element_text(size = 16))

grey_theme <- theme(axis.text.x = element_text(colour="grey20", size = 12,
                                               angle = 90, hjust = 0.5,
                                               vjust = 0.5),
                    axis.text.y = element_text(colour = "grey20", size = 12),
                    text=element_text(size = 16))

ggplot(surveys_complete, aes(x = species_id, y = hindfoot_length)) +
    geom_boxplot() +
    grey_theme

Arranging plots

#install.packages("patchwork")

library(patchwork)
## Warning: package 'patchwork' was built under R version 4.5.2
plot_weight <- ggplot(data = surveys_complete, aes(x = species_id, y = weight)) +
  geom_boxplot() +
  labs(x = "Species", y = expression(log[10](Weight))) +
  scale_y_log10()

plot_count <- ggplot(data = yearly_counts, aes(x = year, y = n, color = genus)) +
  geom_line() +
  labs(x = "Year", y = "Abundance")

plot_weight / plot_count + plot_layout(heights = c(3, 2))

Exporting plots

my_plot <- ggplot(data = yearly_sex_counts,
                  aes(x = year, y = n, color = sex)) +
    geom_line() +
    facet_wrap(vars(genus)) +
    labs(title = "Observed genera through time",
        x = "Year of observation",
        y = "Number of individuals") +
    theme_bw() +
    theme(axis.text.x = element_text(colour = "grey20", size = 12, angle = 90,
                                     hjust = 0.5, vjust = 0.5),
          axis.text.y = element_text(colour = "grey20", size = 12),
          text = element_text(size = 16))

ggsave("name_of_file.png", my_plot, width = 15, height = 10)

## This also works for plots combined with patchwork
plot_combined <- plot_weight / plot_count + plot_layout(heights = c(3, 2))
ggsave("plot_combined.png", plot_combined, width = 10, dpi = 300)
## Saving 10 x 5 in image
surveys_sml <- surveys_complete %>%
  filter(weight < 5) %>%
  select(species_id, sex, weight)

surveys_sml
## # A tibble: 15 × 3
##    species_id sex   weight
##    <chr>      <chr>  <dbl>
##  1 PF         F          4
##  2 PF         F          4
##  3 RM         F          4
##  4 RM         M          4
##  5 PP         M          4
##  6 RM         M          4
##  7 RM         M          4
##  8 RM         M          4
##  9 PF         M          4
## 10 PF         F          4
## 11 RM         M          4
## 12 RM         M          4
## 13 RM         F          4
## 14 RM         M          4
## 15 RM         M          4

Challenge

surveys_before_1995 <- surveys %>% 
  group_by(year) %>% 
  filter(year < 1995) %>% 
  select(year, sex, weight)