library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.1 ✔ stringr 1.5.2
## ✔ ggplot2 4.0.0 ✔ tibble 3.3.0
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.1.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
Plotting with ggplot2
surveys <- read_csv("C:\\Users\\User\\Desktop\\S's Digital lab\\courseware yr 3\\0310 bioinfo\\Tutorial4Project1\\data_raw\\portal_data_joined.csv")
## Rows: 34786 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (6): species_id, sex, genus, species, taxa, plot_type
## dbl (7): record_id, month, day, year, plot_id, hindfoot_length, weight
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
surveys_complete <- surveys %>%
filter(!is.na(weight),
!is.na(hindfoot_length),
!is.na(sex))
ggplot(data = surveys_complete, aes(x = weight, y = hindfoot_length)) +
geom_point()

# Assign plot to a variable
surveys_plot <- ggplot(data = surveys_complete,
mapping = aes(x = weight, y = hindfoot_length))
# Draw the plot
surveys_plot +
geom_point()

# This is the correct syntax for adding layers
surveys_plot +
geom_point()

Challenge (optional)
#install.packages("hexbin")
library(hexbin)
## Warning: package 'hexbin' was built under R version 4.5.2
surveys_plot +
geom_hex()

# A hexagonal bin plot is better for large datasets because it reduces overplotting and highlights density patterns, making clusters and trends much clearer than in a scatter plot. However, a scatter plot preserves individual points and outliers, which are partly lost when data are aggregated into hexagonal bins.
Building your plots iteratively
ggplot(data = surveys_complete, aes(x = weight, y = hindfoot_length)) +
geom_point(alpha = 0.1)

ggplot(data = surveys_complete, mapping = aes(x = weight, y = hindfoot_length)) +
geom_point(alpha = 0.1, color = "blue")

ggplot(data = surveys_complete, mapping = aes(x = weight, y = hindfoot_length)) +
geom_point(alpha = 0.1, aes(color = species_id))

Challenge
ggplot(data = surveys_complete, mapping = aes(x = species_id, y = weight)) +
geom_point(alpha = 0.1, aes(color = plot_type))

# No — this scatter plot is **not a very good way** to show this type of data. Because there are many observations for each species, the points heavily overlap, making it difficult to see patterns, compare species, or interpret differences between plot types.
Boxplot
ggplot(data = surveys_complete, mapping = aes(x = species_id, y = weight)) +
geom_boxplot()

ggplot(data = surveys_complete, mapping = aes(x = species_id, y = weight)) +
geom_jitter(alpha = 0.3, color = "tomato")+
geom_boxplot(outlier.shape = NA)

Challenge
ggplot(data = surveys_complete, mapping = aes(x = species_id, y = weight)) +
geom_jitter(alpha = 0.3, color = "tomato")+
geom_violin(outlier.shape = NA)
## Warning in geom_violin(outlier.shape = NA): Ignoring unknown parameters:
## `outlier.shape`

ggplot(data = surveys_complete, mapping = aes(x = species_id, y = weight)) +
scale_y_log10() +
geom_jitter(alpha = 0.3, color = "tomato") +
geom_boxplot(outlier.shape = NA)

ggplot(data = surveys_complete, mapping = aes(x = species_id, y = hindfoot_length)) +
geom_jitter(alpha = 0.3, color = "tomato") +
geom_boxplot(outlier.shape = NA)

class(surveys_complete$plot_id)
## [1] "numeric"
surveys_complete %>%
mutate(plot_id = as.factor(plot_id)) %>% # convert to factor
ggplot(aes(x = species_id, y = weight)) +
geom_jitter(aes(color = plot_id), alpha = 0.3) +
geom_boxplot(outlier.shape = NA)

Plotting time series data
yearly_counts <- surveys_complete %>%
count(year, genus)
ggplot(data = yearly_counts, aes(x = year, y = n)) +
geom_line()

ggplot(data = yearly_counts, aes(x = year, y = n, group = genus)) +
geom_line()

ggplot(data = yearly_counts, aes(x = year, y = n, color = genus)) +
geom_line()

Integrating the pipe operator with ggplot2
yearly_counts_graph <- surveys_complete %>%
count(year, genus) %>%
ggplot(mapping = aes(x = year, y = n, color = genus)) +
geom_line()
yearly_counts_graph

Faceting
ggplot(data = yearly_counts, aes(x = year, y = n)) +
geom_line() +
facet_wrap(facets = vars(genus))

yearly_sex_counts <- surveys_complete %>%
count(year, genus, sex)
ggplot(data = yearly_sex_counts, mapping = aes(x = year, y = n, color = sex)) +
geom_line() +
facet_wrap(facets = vars(genus))

ggplot(data = yearly_sex_counts,
mapping = aes(x = year, y = n, color = sex)) +
geom_line() +
facet_grid(rows = vars(sex), cols = vars(genus))

# One column, facet by rows
ggplot(data = yearly_sex_counts,
mapping = aes(x = year, y = n, color = sex)) +
geom_line() +
facet_grid(rows = vars(genus))

# One row, facet by column
ggplot(data = yearly_sex_counts,
mapping = aes(x = year, y = n, color = sex)) +
geom_line() +
facet_grid(cols = vars(genus))

ggplot2 themes
ggplot(data = yearly_sex_counts,
mapping = aes(x = year, y = n, color = sex)) +
geom_line() +
facet_wrap(vars(genus)) +
theme_bw()

Challenge
yearly_weight <- surveys_complete %>%
group_by(year, species_id) %>%
summarize(avg_weight = mean(weight))
## `summarise()` has grouped output by 'year'. You can override using the
## `.groups` argument.
ggplot(data = yearly_weight,
mapping = aes(x = year, y = avg_weight, color = species_id)) +
geom_line() +
facet_wrap(vars(species_id)) +
theme_bw()
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?

Customization
ggplot(data = yearly_sex_counts, aes(x = year, y = n, color = sex)) +
geom_line() +
facet_wrap(vars(genus)) +
labs(title = "Observed genera through time",
x = "Year of observation",
y = "Number of individuals") +
theme_bw()

ggplot(data = yearly_sex_counts, mapping = aes(x = year, y = n, color = sex)) +
geom_line() +
facet_wrap(vars(genus)) +
labs(title = "Observed genera through time",
x = "Year of observation",
y = "Number of individuals") +
theme_bw() +
theme(text=element_text(size = 16))

ggplot(data = yearly_sex_counts, mapping = aes(x = year, y = n, color = sex)) +
geom_line() +
facet_wrap(vars(genus)) +
labs(title = "Observed genera through time",
x = "Year of observation",
y = "Number of individuals") +
theme_bw() +
theme(axis.text.x = element_text(colour = "grey20", size = 12, angle = 90, hjust = 0.5, vjust = 0.5),
axis.text.y = element_text(colour = "grey20", size = 12),
strip.text = element_text(face = "italic"),
text = element_text(size = 16))

grey_theme <- theme(axis.text.x = element_text(colour="grey20", size = 12,
angle = 90, hjust = 0.5,
vjust = 0.5),
axis.text.y = element_text(colour = "grey20", size = 12),
text=element_text(size = 16))
ggplot(surveys_complete, aes(x = species_id, y = hindfoot_length)) +
geom_boxplot() +
grey_theme

Arranging plots
#install.packages("patchwork")
library(patchwork)
## Warning: package 'patchwork' was built under R version 4.5.2
plot_weight <- ggplot(data = surveys_complete, aes(x = species_id, y = weight)) +
geom_boxplot() +
labs(x = "Species", y = expression(log[10](Weight))) +
scale_y_log10()
plot_count <- ggplot(data = yearly_counts, aes(x = year, y = n, color = genus)) +
geom_line() +
labs(x = "Year", y = "Abundance")
plot_weight / plot_count + plot_layout(heights = c(3, 2))

Exporting plots
my_plot <- ggplot(data = yearly_sex_counts,
aes(x = year, y = n, color = sex)) +
geom_line() +
facet_wrap(vars(genus)) +
labs(title = "Observed genera through time",
x = "Year of observation",
y = "Number of individuals") +
theme_bw() +
theme(axis.text.x = element_text(colour = "grey20", size = 12, angle = 90,
hjust = 0.5, vjust = 0.5),
axis.text.y = element_text(colour = "grey20", size = 12),
text = element_text(size = 16))
ggsave("name_of_file.png", my_plot, width = 15, height = 10)
## This also works for plots combined with patchwork
plot_combined <- plot_weight / plot_count + plot_layout(heights = c(3, 2))
ggsave("plot_combined.png", plot_combined, width = 10, dpi = 300)
## Saving 10 x 5 in image
surveys_sml <- surveys_complete %>%
filter(weight < 5) %>%
select(species_id, sex, weight)
surveys_sml
## # A tibble: 15 × 3
## species_id sex weight
## <chr> <chr> <dbl>
## 1 PF F 4
## 2 PF F 4
## 3 RM F 4
## 4 RM M 4
## 5 PP M 4
## 6 RM M 4
## 7 RM M 4
## 8 RM M 4
## 9 PF M 4
## 10 PF F 4
## 11 RM M 4
## 12 RM M 4
## 13 RM F 4
## 14 RM M 4
## 15 RM M 4
Challenge
surveys_before_1995 <- surveys %>%
group_by(year) %>%
filter(year < 1995) %>%
select(year, sex, weight)