You will be using this dataset for the homework
library(ggplot2)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(palmerpenguins)
##
## Attaching package: 'palmerpenguins'
## The following objects are masked from 'package:datasets':
##
## penguins, penguins_raw
penguins_clean <- penguins %>%
filter(!is.na(species), !is.na(sex), !is.na(body_mass_g))
#View(penguins_clean)
# library
library(ggplot2)
library(dplyr)
library(palmerpenguins)
str(penguins_clean)
## tibble [333 × 8] (S3: tbl_df/tbl/data.frame)
## $ species : Factor w/ 3 levels "Adelie","Chinstrap",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ island : Factor w/ 3 levels "Biscoe","Dream",..: 3 3 3 3 3 3 3 3 3 3 ...
## $ bill_length_mm : num [1:333] 39.1 39.5 40.3 36.7 39.3 38.9 39.2 41.1 38.6 34.6 ...
## $ bill_depth_mm : num [1:333] 18.7 17.4 18 19.3 20.6 17.8 19.6 17.6 21.2 21.1 ...
## $ flipper_length_mm: int [1:333] 181 186 195 193 190 181 195 182 191 198 ...
## $ body_mass_g : int [1:333] 3750 3800 3250 3450 3650 3625 4675 3200 3800 4400 ...
## $ sex : Factor w/ 2 levels "female","male": 2 1 1 1 2 1 2 1 2 2 ...
## $ year : int [1:333] 2007 2007 2007 2007 2007 2007 2007 2007 2007 2007 ...
penguins_clean %>%
ggplot(
aes(x = species,
y = body_mass_g,
fill = sex)) +
geom_violin(alpha = 0.5) +
geom_point(
aes(color = sex),
position = position_jitterdodge(
jitter.width = 0.3,
dodge.width = 0.8
),
alpha = 0.7,
size = 2
) +
labs(
title = "Body Mass Across Penguin Species by Sex",
x = "Species",
y = "Body Mass (g)",
fill = "Sex",
color = "Sex"
) +
theme_minimal()
#install.packages("ggdist")
library(ggdist)
penguins_clean %>%
ggplot(aes(x = species, y = body_mass_g, fill = species)) +
stat_halfeye(
adjust = 0.4,
justification = -0.1,
alpha = 0.8
) +
geom_boxplot(
outlier.shape = NA,
width = 0.11
) +
geom_jitter(
width = 0.06,
alpha = 0.4,
size = 1.2
) +
labs(
title = "Raincloud Plot; Body Mass by Species",
x = "Body Mass (g)" ,
y = "Species",
fill = "Species"
) +
theme_minimal()
# Don't forget, you will need to make summary data.
penguins_sum <- penguins_clean %>%
group_by(species) %>%
summarise(
mean_mass = mean(body_mass_g),
sd_mass = sd(body_mass_g),
n = n(),
se = sd_mass/sqrt(n),
lower_lim = mean_mass - 1.96 * se,
upper_lim = mean_mass + 1.96 * se
)
head(penguins_sum)
## # A tibble: 3 × 7
## species mean_mass sd_mass n se lower_lim upper_lim
## <fct> <dbl> <dbl> <int> <dbl> <dbl> <dbl>
## 1 Adelie 3706. 459. 146 38.0 3632. 3781.
## 2 Chinstrap 3733. 384. 68 46.6 3642. 3824.
## 3 Gentoo 5092. 501. 119 46.0 5002. 5183.
ggplot(penguins_sum, aes(x = mean_mass, y = species)) +
geom_point(size = 4) +
geom_errorbarh(aes(
xmin = lower_lim,
xmax = upper_lim,
width = 0.3
)) +
labs(
title = "Forest Plot; Mean Body Mass by Species",
x = "Mean Body Mass (g)",
y = "species"
) +
theme_minimal()