RM+DA Quarto Demo

Author

Shauna Meredith

Published

October 10, 2024

Quarto

Quarto enables you to weave together content and executable code into a finished document. To learn more about Quarto see https://quarto.org.

We can write in italics, and also bullet points:

one
two.

Running Code

When you click the Render button a document will be generated that includes both content and the output of embedded code. You can embed code like this:

library(tidyverse)

Include a plot

Code

ggplot(diamonds, aes(x = cut)) +
  geom_bar()

"more desciptive stuff here" — Figure 1: A bar chart of diamond cuts

This is a reference: Figure 1.

Exercises from R Data book for Graduates

library(tidyverse)

##Problem A
midwest %>%
  group_by(state) %>%
  summarise(poptotalmean = mean(poptotal),
            poptotalmed = median(poptotal),
            popmax = max(poptotal),
            popmin = min(poptotal),
            popdistinct = n_distinct(poptotal),
            popfirst = first(poptotal),
            popany = any(poptotal < 5000),
            popany2 = any(poptotal > 2000000)) %>%
  ungroup()

# A tibble: 5 × 9
  state poptotalmean poptotalmed  popmax popmin popdistinct popfirst popany
  <chr>        <dbl>       <dbl>   <int>  <int>       <int>    <int> <lgl> 
1 IL         112065.      24486. 5105067   4373         101    66090 TRUE  
2 IN          60263.      30362.  797159   5315          92    31095 FALSE 
3 MI         111992.      37308  2111687   1701          83    10145 TRUE  
4 OH         123263.      54930. 1412140  11098          88    25371 FALSE 
5 WI          67941.      33528   959275   3890          72    15682 TRUE  
# ℹ 1 more variable: popany2 <lgl>

##Problem B
midwest %>%
  group_by(state) %>%
  summarise(num5k = sum(poptotal < 5000),
            num2mil = sum(poptotal > 2000000),
            numrows = n()) %>%
  ungroup()

# A tibble: 5 × 4
  state num5k num2mil numrows
  <chr> <int>   <int>   <int>
1 IL        1       1     102
2 IN        0       0      92
3 MI        1       1      83
4 OH        0       0      88
5 WI        2       0      72

##Problem C
# part I
midwest %>%
  group_by(county) %>%
  summarise(x = n_distinct(state)) %>%
  arrange(desc(x)) %>%
  ungroup()

# A tibble: 320 × 2
   county         x
   <chr>      <int>
 1 CRAWFORD       5
 2 JACKSON        5
 3 MONROE         5
 4 ADAMS          4
 5 BROWN          4
 6 CLARK          4
 7 CLINTON        4
 8 JEFFERSON      4
 9 LAKE           4
10 WASHINGTON     4
# ℹ 310 more rows

# part II
# How does n() differ from n_distinct()?
#When would they be the same? different?
midwest %>%
  group_by(county) %>%
  summarise (x = n()) %>%
  ungroup()

# A tibble: 320 × 2
   county        x
   <chr>     <int>
 1 ADAMS         4
 2 ALCONA        1
 3 ALEXANDER     1
 4 ALGER         1
 5 ALLEGAN       1
 6 ALLEN         2
 7 ALPENA        1
 8 ANTRIM        1
 9 ARENAC        1
10 ASHLAND       2
# ℹ 310 more rows

# part III
# hint:
# - How many distinctly different counties are there for each country?
# - Can there be more than 1 (county) county in each county?
# - What if we replace county with 'state'?
midwest%>%
  group_by(county) %>%
  summarise(x = n_distinct(county)) %>%
  ungroup()

# A tibble: 320 × 2
   county        x
   <chr>     <int>
 1 ADAMS         1
 2 ALCONA        1
 3 ALEXANDER     1
 4 ALGER         1
 5 ALLEGAN       1
 6 ALLEN         1
 7 ALPENA        1
 8 ANTRIM        1
 9 ARENAC        1
10 ASHLAND       1
# ℹ 310 more rows

## Problem D
diamonds %>%
  group_by(clarity) %>%
  summarise(a = n_distinct(color),
            b = n_distinct(price),
            c = n()) %>%
  ungroup()

# A tibble: 8 × 4
  clarity     a     b     c
  <ord>   <int> <int> <int>
1 I1          7   632   741
2 SI2         7  4904  9194
3 SI1         7  5380 13065
4 VS2         7  5051 12258
5 VS1         7  3926  8171
6 VVS2        7  2409  5066
7 VVS1        7  1623  3655
8 IF          7   902  1790

## Problem E
# part I
diamonds %>%
  group_by(color, cut) %>%
  summarise(m = mean(price),
            s = sd(price)) %>%
  ungroup()

`summarise()` has grouped output by 'color'. You can override using the
`.groups` argument.

# A tibble: 35 × 4
   color cut           m     s
   <ord> <ord>     <dbl> <dbl>
 1 D     Fair      4291. 3286.
 2 D     Good      3405. 3175.
 3 D     Very Good 3470. 3524.
 4 D     Premium   3631. 3712.
 5 D     Ideal     2629. 3001.
 6 E     Fair      3682. 2977.
 7 E     Good      3424. 3331.
 8 E     Very Good 3215. 3408.
 9 E     Premium   3539. 3795.
10 E     Ideal     2598. 2956.
# ℹ 25 more rows

# part II
diamonds %>%
  group_by(cut, color) %>%
  summarise(m = mean(price),
            s = sd(price)) %>%
  ungroup()

`summarise()` has grouped output by 'cut'. You can override using the `.groups`
argument.

# A tibble: 35 × 4
   cut   color     m     s
   <ord> <ord> <dbl> <dbl>
 1 Fair  D     4291. 3286.
 2 Fair  E     3682. 2977.
 3 Fair  F     3827. 3223.
 4 Fair  G     4239. 3610.
 5 Fair  H     5136. 3886.
 6 Fair  I     4685. 3730.
 7 Fair  J     4976. 4050.
 8 Good  D     3405. 3175.
 9 Good  E     3424. 3331.
10 Good  F     3496. 3202.
# ℹ 25 more rows

# part III
# hint:
# - How good is the sale if the price of diamonds equaled msale?
# - e.x. The diamonds are x% off the original price in msale.
diamonds %>%
  group_by(cut, color, clarity) %>%
  summarise(m = mean(price),
            s = sd(price),
            msale = m * 0.80) %>%
  ungroup()

`summarise()` has grouped output by 'cut', 'color'. You can override using the
`.groups` argument.

# A tibble: 276 × 6
   cut   color clarity     m     s msale
   <ord> <ord> <ord>   <dbl> <dbl> <dbl>
 1 Fair  D     I1      7383  5899. 5906.
 2 Fair  D     SI2     4355. 3260. 3484.
 3 Fair  D     SI1     4273. 3019. 3419.
 4 Fair  D     VS2     4513. 3383. 3610.
 5 Fair  D     VS1     2921. 2550. 2337.
 6 Fair  D     VVS2    3607  3629. 2886.
 7 Fair  D     VVS1    4473  5457. 3578.
 8 Fair  D     IF      1620.  525. 1296.
 9 Fair  E     I1      2095.  824. 1676.
10 Fair  E     SI2     4172. 3055. 3338.
# ℹ 266 more rows

##Problem F
diamonds %>%
  group_by(cut) %>%
  summarise(potato = mean(depth),
            pizza = mean(price),
            popcorn = median(y),
            pineapple = potato - pizza,
            papaya = pineapple ^ 2,
            peach = n()) %>%
  ungroup()

# A tibble: 5 × 7
  cut       potato pizza popcorn pineapple    papaya peach
  <ord>      <dbl> <dbl>   <dbl>     <dbl>     <dbl> <int>
1 Fair        64.0 4359.    6.1     -4295. 18444586.  1610
2 Good        62.4 3929.    5.99    -3866. 14949811.  4906
3 Very Good   61.8 3982.    5.77    -3920. 15365942. 12082
4 Premium     61.3 4584.    6.06    -4523. 20457466. 13791
5 Ideal       61.7 3458.    5.26    -3396. 11531679. 21551

## Problem G
# part I
diamonds %>%
  group_by(color) %>%
  summarise(m = mean(price)) %>%
  mutate(x1 = str_c("Diamond color ", color),
         x2 = 5) %>%
  ungroup()

# A tibble: 7 × 4
  color     m x1                 x2
  <ord> <dbl> <chr>           <dbl>
1 D     3170. Diamond color D     5
2 E     3077. Diamond color E     5
3 F     3725. Diamond color F     5
4 G     3999. Diamond color G     5
5 H     4487. Diamond color H     5
6 I     5092. Diamond color I     5
7 J     5324. Diamond color J     5

# part II
# What does the first ungroup () do? Is it useful here? Why/why not?
# Why isn't there a closing ungroup() after the mutate()?
diamonds %>%
  group_by(color) %>%
  summarise(m = mean(price)) %>%
  ungroup() %>%
  mutate(x1 = str_c("Diamond color ", color),
         x2 = 5)

# A tibble: 7 × 4
  color     m x1                 x2
  <ord> <dbl> <chr>           <dbl>
1 D     3170. Diamond color D     5
2 E     3077. Diamond color E     5
3 F     3725. Diamond color F     5
4 G     3999. Diamond color G     5
5 H     4487. Diamond color H     5
6 I     5092. Diamond color I     5
7 J     5324. Diamond color J     5

## Problem H
# part I
diamonds %>%
  group_by(color) %>%
  mutate(x1 = price * 0.5) %>%
  summarise(m = mean(x1)) %>%
  ungroup()

# A tibble: 7 × 2
  color     m
  <ord> <dbl>
1 D     1585.
2 E     1538.
3 F     1862.
4 G     2000.
5 H     2243.
6 I     2546.
7 J     2662.

# part II
# What's the difference between part I and part II?
diamonds%>%
  group_by(color) %>%
  mutate(x1 = price * 0.5) %>%
  ungroup() %>%
  summarise(m = mean(x1))

# A tibble: 1 × 1
      m
  <dbl>
1 1966.

library(tidyverse)
view(diamonds)
diamonds %>%
  group_by(price)%>%
  ungroup()

# A tibble: 53,940 × 10
   carat cut       color clarity depth table price     x     y     z
   <dbl> <ord>     <ord> <ord>   <dbl> <dbl> <int> <dbl> <dbl> <dbl>
 1  0.23 Ideal     E     SI2      61.5    55   326  3.95  3.98  2.43
 2  0.21 Premium   E     SI1      59.8    61   326  3.89  3.84  2.31
 3  0.23 Good      E     VS1      56.9    65   327  4.05  4.07  2.31
 4  0.29 Premium   I     VS2      62.4    58   334  4.2   4.23  2.63
 5  0.31 Good      J     SI2      63.3    58   335  4.34  4.35  2.75
 6  0.24 Very Good J     VVS2     62.8    57   336  3.94  3.96  2.48
 7  0.24 Very Good I     VVS1     62.3    57   336  3.95  3.98  2.47
 8  0.26 Very Good H     SI1      61.9    55   337  4.07  4.11  2.53
 9  0.22 Fair      E     VS2      65.1    61   337  3.87  3.78  2.49
10  0.23 Very Good H     VS1      59.4    61   338  4     4.05  2.39
# ℹ 53,930 more rows

diamonds %>%
  group_by(desc(price))%>%
  ungroup()

# A tibble: 53,940 × 11
   carat cut     color clarity depth table price     x     y     z `desc(price)`
   <dbl> <ord>   <ord> <ord>   <dbl> <dbl> <int> <dbl> <dbl> <dbl>         <int>
 1  0.23 Ideal   E     SI2      61.5    55   326  3.95  3.98  2.43          -326
 2  0.21 Premium E     SI1      59.8    61   326  3.89  3.84  2.31          -326
 3  0.23 Good    E     VS1      56.9    65   327  4.05  4.07  2.31          -327
 4  0.29 Premium I     VS2      62.4    58   334  4.2   4.23  2.63          -334
 5  0.31 Good    J     SI2      63.3    58   335  4.34  4.35  2.75          -335
 6  0.24 Very G… J     VVS2     62.8    57   336  3.94  3.96  2.48          -336
 7  0.24 Very G… I     VVS1     62.3    57   336  3.95  3.98  2.47          -336
 8  0.26 Very G… H     SI1      61.9    55   337  4.07  4.11  2.53          -337
 9  0.22 Fair    E     VS2      65.1    61   337  3.87  3.78  2.49          -337
10  0.23 Very G… H     VS1      59.4    61   338  4     4.05  2.39          -338
# ℹ 53,930 more rows

diamonds %>%
  group_by(price, cut)%>%
  ungroup()

# A tibble: 53,940 × 10
   carat cut       color clarity depth table price     x     y     z
   <dbl> <ord>     <ord> <ord>   <dbl> <dbl> <int> <dbl> <dbl> <dbl>
 1  0.23 Ideal     E     SI2      61.5    55   326  3.95  3.98  2.43
 2  0.21 Premium   E     SI1      59.8    61   326  3.89  3.84  2.31
 3  0.23 Good      E     VS1      56.9    65   327  4.05  4.07  2.31
 4  0.29 Premium   I     VS2      62.4    58   334  4.2   4.23  2.63
 5  0.31 Good      J     SI2      63.3    58   335  4.34  4.35  2.75
 6  0.24 Very Good J     VVS2     62.8    57   336  3.94  3.96  2.48
 7  0.24 Very Good I     VVS1     62.3    57   336  3.95  3.98  2.47
 8  0.26 Very Good H     SI1      61.9    55   337  4.07  4.11  2.53
 9  0.22 Fair      E     VS2      65.1    61   337  3.87  3.78  2.49
10  0.23 Very Good H     VS1      59.4    61   338  4     4.05  2.39
# ℹ 53,930 more rows

diamonds %>%
  group_by(-price, cut)%>%
  ungroup()

# A tibble: 53,940 × 11
   carat cut       color clarity depth table price     x     y     z `-price`
   <dbl> <ord>     <ord> <ord>   <dbl> <dbl> <int> <dbl> <dbl> <dbl>    <int>
 1  0.23 Ideal     E     SI2      61.5    55   326  3.95  3.98  2.43     -326
 2  0.21 Premium   E     SI1      59.8    61   326  3.89  3.84  2.31     -326
 3  0.23 Good      E     VS1      56.9    65   327  4.05  4.07  2.31     -327
 4  0.29 Premium   I     VS2      62.4    58   334  4.2   4.23  2.63     -334
 5  0.31 Good      J     SI2      63.3    58   335  4.34  4.35  2.75     -335
 6  0.24 Very Good J     VVS2     62.8    57   336  3.94  3.96  2.48     -336
 7  0.24 Very Good I     VVS1     62.3    57   336  3.95  3.98  2.47     -336
 8  0.26 Very Good H     SI1      61.9    55   337  4.07  4.11  2.53     -337
 9  0.22 Fair      E     VS2      65.1    61   337  3.87  3.78  2.49     -337
10  0.23 Very Good H     VS1      59.4    61   338  4     4.05  2.39     -338
# ℹ 53,930 more rows

diamonds %>%
  group_by(price, -clarity)%>%
  ungroup()

Warning: There was 1 warning in `group_by()`.
ℹ In argument: `-clarity`.
Caused by warning in `Ops.ordered()`:
! '-' is not meaningful for ordered factors

# A tibble: 53,940 × 11
   carat cut       color clarity depth table price     x     y     z `-clarity`
   <dbl> <ord>     <ord> <ord>   <dbl> <dbl> <int> <dbl> <dbl> <dbl> <lgl>     
 1  0.23 Ideal     E     SI2      61.5    55   326  3.95  3.98  2.43 NA        
 2  0.21 Premium   E     SI1      59.8    61   326  3.89  3.84  2.31 NA        
 3  0.23 Good      E     VS1      56.9    65   327  4.05  4.07  2.31 NA        
 4  0.29 Premium   I     VS2      62.4    58   334  4.2   4.23  2.63 NA        
 5  0.31 Good      J     SI2      63.3    58   335  4.34  4.35  2.75 NA        
 6  0.24 Very Good J     VVS2     62.8    57   336  3.94  3.96  2.48 NA        
 7  0.24 Very Good I     VVS1     62.3    57   336  3.95  3.98  2.47 NA        
 8  0.26 Very Good H     SI1      61.9    55   337  4.07  4.11  2.53 NA        
 9  0.22 Fair      E     VS2      65.1    61   337  3.87  3.78  2.49 NA        
10  0.23 Very Good H     VS1      59.4    61   338  4     4.05  2.39 NA        
# ℹ 53,930 more rows

library(tidyverse)
diamonds %>%
  mutate(salePrice = price - 250)

# A tibble: 53,940 × 11
   carat cut       color clarity depth table price     x     y     z salePrice
   <dbl> <ord>     <ord> <ord>   <dbl> <dbl> <int> <dbl> <dbl> <dbl>     <dbl>
 1  0.23 Ideal     E     SI2      61.5    55   326  3.95  3.98  2.43        76
 2  0.21 Premium   E     SI1      59.8    61   326  3.89  3.84  2.31        76
 3  0.23 Good      E     VS1      56.9    65   327  4.05  4.07  2.31        77
 4  0.29 Premium   I     VS2      62.4    58   334  4.2   4.23  2.63        84
 5  0.31 Good      J     SI2      63.3    58   335  4.34  4.35  2.75        85
 6  0.24 Very Good J     VVS2     62.8    57   336  3.94  3.96  2.48        86
 7  0.24 Very Good I     VVS1     62.3    57   336  3.95  3.98  2.47        86
 8  0.26 Very Good H     SI1      61.9    55   337  4.07  4.11  2.53        87
 9  0.22 Fair      E     VS2      65.1    61   337  3.87  3.78  2.49        87
10  0.23 Very Good H     VS1      59.4    61   338  4     4.05  2.39        88
# ℹ 53,930 more rows

library(tidyverse)
diamonds %>% 
  select(-x, -y, -z)

# A tibble: 53,940 × 7
   carat cut       color clarity depth table price
   <dbl> <ord>     <ord> <ord>   <dbl> <dbl> <int>
 1  0.23 Ideal     E     SI2      61.5    55   326
 2  0.21 Premium   E     SI1      59.8    61   326
 3  0.23 Good      E     VS1      56.9    65   327
 4  0.29 Premium   I     VS2      62.4    58   334
 5  0.31 Good      J     SI2      63.3    58   335
 6  0.24 Very Good J     VVS2     62.8    57   336
 7  0.24 Very Good I     VVS1     62.3    57   336
 8  0.26 Very Good H     SI1      61.9    55   337
 9  0.22 Fair      E     VS2      65.1    61   337
10  0.23 Very Good H     VS1      59.4    61   338
# ℹ 53,930 more rows

library(tidyverse)
diamonds %>%
  group_by(cut) %>%
  summarise(count = n())

# A tibble: 5 × 2
  cut       count
  <ord>     <int>
1 Fair       1610
2 Good       4906
3 Very Good 12082
4 Premium   13791
5 Ideal     21551

library(tidyverse)
diamonds %>%
  mutate(totalNum = n())

# A tibble: 53,940 × 11
   carat cut       color clarity depth table price     x     y     z totalNum
   <dbl> <ord>     <ord> <ord>   <dbl> <dbl> <int> <dbl> <dbl> <dbl>    <int>
 1  0.23 Ideal     E     SI2      61.5    55   326  3.95  3.98  2.43    53940
 2  0.21 Premium   E     SI1      59.8    61   326  3.89  3.84  2.31    53940
 3  0.23 Good      E     VS1      56.9    65   327  4.05  4.07  2.31    53940
 4  0.29 Premium   I     VS2      62.4    58   334  4.2   4.23  2.63    53940
 5  0.31 Good      J     SI2      63.3    58   335  4.34  4.35  2.75    53940
 6  0.24 Very Good J     VVS2     62.8    57   336  3.94  3.96  2.48    53940
 7  0.24 Very Good I     VVS1     62.3    57   336  3.95  3.98  2.47    53940
 8  0.26 Very Good H     SI1      61.9    55   337  4.07  4.11  2.53    53940
 9  0.22 Fair      E     VS2      65.1    61   337  3.87  3.78  2.49    53940
10  0.23 Very Good H     VS1      59.4    61   338  4     4.05  2.39    53940
# ℹ 53,930 more rows

Why is grouping data necessary?

It allows you to summarise data for specific categories
It helps to organise data in a way that is meaningful, easy to understand and analyse.
It allows for complex analyses of data by providing a framework for comparing groups side by side. This is essential for exploratory data analysis.
Grouping also makes operations more efficient by allowing R to process subsets of data at a time instead of the entire dataset. This is beneficial when working with a large dataset.
It fascilitates the visualisation of data. For example, if you were to want a bar chart showing the count of diamonds for each cut, the data would first have to be grouped and summarised.

In short, grouping data is a crucial step in data manipulation and analysis that enhances clarity, organisation, and the ability to perforn specific calculations efficiently.

Why is ungrouping data inportant?

Ungrouping data is important for a number of reasons:

Returning to the original structure - this helps when you are wanting to perform subsequent analyses or manipulations without the grouping affecting the results.
Avoiding unintended consequences - if you forget to ungroup data after a grouped operation, subsequent calculations or transformations may be inadvertently applied to the group data, leading to unexpected results.
Fascilitating other operations - certain functions and operations expect a regukar data frame, not a grouped one. Ungrouping allows you to apply functions that may not behave correctly on grouped data.
Simplifying further analysis - once data had been summarised, you may want to perform further analysis on the resulting data frame. Ungrouping ensures that you’re working with the data in a straightforward manner.
Clarity in code - ungrouping can improve code readability. It makes it clear to anyone reading the code, that the intention is to work with a regular data frame, rather than one that retains rouping attributes.

In short, ungrouping is a necessary step to ensure that subsequent operations are performed correctly, and to maintain clarity and structure in the data analysis workflow.

When should you ungroup data?

Data should be ungrouped:

After summarising - once data has been summarised, it should be ungrouped so that further operations can be performed that should not be grouped.
Before additional transformations - if transformations are going to be applied (e.g., mutate) that will apply to the entire dataset rather than withing groups, it would be best to ungroup data first.
When you no longer need grouping - if your analysis requires a flat data structure (e.g., when plotting or exporting data), ungrouping helps achieve that.
To avoid errors - if you notice that subsequent calculations are behaving unexpectedly or producing errors due to residual grouping, ungrouping can resolve these issues.
To improve code clarity - if your code is becoming complex, ungrouping can make it clearer where the data is being grouped and when its being treated as a standard data frame.

In short, ungrouping is good practice after you have completed the intended grouped operations, ensuring that workflow remains clear and that results are as expected.

If the code does not contain group_by, do you still need to ungroup at the end?

No, because the data has not been grouped. Ungroup should only be used when the group_by function has been performed.

Andrew’s data

library(tidyverse)
library(modeldata)

View(crickets)

# The basics

ggplot(crickets, aes(x = temp, 
                     y = rate)) + 
  geom_point() +
  labs(x = "Temperature",
       y = "Chirp rate",
       title = "Cricket chirps",
       caption = "Source: McDonald (2009)")

ggplot(crickets, aes(x = temp, 
                     y = rate,
                     color = species)) + 
  geom_point() +
  labs(x = "Temperature",
       y = "Chirp rate",
       color = "Species",
       title = "Cricket chirps",
       caption = "Source: McDonald (2009)") +
  scale_color_brewer(palette = "Dark2")

# Modifying basic properties of the plot

ggplot(crickets, aes(x = temp, 
                     y = rate)) + 
  geom_point(color = "red",
             size = 2,
             alpha = .4,
             shape = "square") +
  labs(x = "Temperature",
       y = "Chirp rate",
       title = "Cricket chirps",
       caption = "Source: McDonald (2009)")

ggplot(crickets, aes(x = temp, 
                     y = rate)) + 
  geom_point() +
  geom_smooth(method = "lm",
              se = FALSE) +
  labs(x = "Temperature",
       y = "Chirp rate",
       title = "Cricket chirps",
       caption = "Source: McDonald (2009)")

`geom_smooth()` using formula = 'y ~ x'

ggplot(crickets, aes(x = temp, 
                     y = rate,
                     color = species)) + 
  geom_point() +
  geom_smooth(method = "lm",
              se = FALSE) +
  labs(x = "Temperature",
       y = "Chirp rate",
       color = "Species",
       title = "Cricket chirps",
       caption = "Source: McDonald (2009)") +
  scale_color_brewer(palette = "Dark2")

`geom_smooth()` using formula = 'y ~ x'

# Other plots by Andrew

ggplot(crickets, aes(x = rate)) + 
  geom_histogram(bins = 15) # one quantitative variable

ggplot(crickets, aes(x = rate)) + 
  geom_freqpoly(bins = 15)

ggplot(crickets, aes(x = species)) + 
  geom_bar(color = "black",
           fill = "lightblue")

ggplot(crickets, aes(x = species, 
                     fill = species)) + 
  geom_bar(show.legend = FALSE) +
  scale_fill_brewer(palette = "Dark2")

ggplot(crickets, aes(x = species, 
                     y = rate,
                     color = species)) + 
  geom_boxplot(show.legend = FALSE) +
  scale_color_brewer(palette = "Dark2") +
  theme_minimal()

# Faceting

# Not great:
ggplot(crickets, aes(x = rate, 
                     fill = species)) + 
  geom_histogram(bins = 15) +
  scale_fill_brewer(palette = "Dark2")

ggplot(crickets, aes(x = rate,
                     fill = species)) + 
  geom_histogram(bins = 15,
                 show.legend = FALSE) + 
  facet_wrap(~species) +
  scale_fill_brewer(palette = "Dark2")

ggplot(crickets, aes(x = rate,
                     fill = species)) + 
  geom_histogram(bins = 15,
                 show.legend = FALSE) + 
  facet_wrap(~species,
             ncol = 1) +
  scale_fill_brewer(palette = "Dark2") + 
  theme_minimal()