#This is codelab from data science box on data visualization
#load packages to use
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✔ ggplot2 3.3.6 ✔ purrr 0.3.4
## ✔ tibble 3.1.7 ✔ dplyr 1.0.9
## ✔ tidyr 1.2.0 ✔ stringr 1.4.0
## ✔ readr 2.1.2 ✔ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(palmerpenguins)
#take a glimpse of the data
glimpse(penguins)
## Rows: 344
## Columns: 8
## $ species <fct> Adelie, Adelie, Adelie, Adelie, Adelie, Adelie, Adel…
## $ island <fct> Torgersen, Torgersen, Torgersen, Torgersen, Torgerse…
## $ bill_length_mm <dbl> 39.1, 39.5, 40.3, NA, 36.7, 39.3, 38.9, 39.2, 34.1, …
## $ bill_depth_mm <dbl> 18.7, 17.4, 18.0, NA, 19.3, 20.6, 17.8, 19.6, 18.1, …
## $ flipper_length_mm <int> 181, 186, 195, NA, 193, 190, 181, 195, 193, 190, 186…
## $ body_mass_g <int> 3750, 3800, 3250, NA, 3450, 3650, 3625, 4675, 3475, …
## $ sex <fct> male, female, female, NA, female, male, female, male…
## $ year <int> 2007, 2007, 2007, 2007, 2007, 2007, 2007, 2007, 2007…
#place the penguin data into a data frame to inspect the data
penguin_data <- data.frame(penguins)
#penguin_data to view the dataframe
#visualize the data in scatter plot
ggplot(penguins, aes(bill_depth_mm, bill_length_mm, color = species)) +
geom_point() +
labs(
title = "Bill length and depth",
subtitle = "Dimensions for Adelie, Chinstrap, and Gentoo Penguins",
x = "Bill depth in mm", y = "Bill length in mm",
color = "species",
caption = "Source: Palmer Station LTER / palmerpenguins package"
) +
scale_color_viridis_d()
## Warning: Removed 2 rows containing missing values (geom_point).

#aesthetics options: shape aesthetic to map the distribution of the different penguin species across the islands
ggplot(penguins, aes(bill_depth_mm, bill_length_mm, color = species, shape = island)) +
geom_point() +
labs(
title = "Bill length and depth",
subtitle = "Dimensions for Adelie, Chinstrap, and Gentoo Penguins",
x = "Bill depth in mm", y = "Bill length in mm",
color = "species",
caption = "Source: Palmer Station LTER / palmerpenguins package"
) +
scale_color_viridis_d()
## Warning: Removed 2 rows containing missing values (geom_point).

#size aestheic to map body_mass of the penguins
ggplot(penguins, aes(bill_depth_mm, bill_length_mm, color = species, size = body_mass_g, shape = species)) +
geom_point() +
labs(
title = "Bill length and depth",
subtitle = "Dimensions for Adelie, Chinstrap, and Gentoo Penguins",
x = "Bill depth in mm", y = "Bill length in mm",
color = "species",
caption = "Source: Palmer Station LTER / palmerpenguins package"
) +
scale_color_viridis_d()
## Warning: Removed 2 rows containing missing values (geom_point).

#Mappingthe penguin flipper length to alpha level: the more faint the alpha level, the smaller the flipper length and the more opaque the alpha level the larger the flipper length.
ggplot(penguins, aes(bill_depth_mm, bill_length_mm, color = species, shape = species, size = body_mass_g, alpha = flipper_length_mm)) +
geom_point() +
labs(
title = "Bill length and depth",
subtitle = "Dimensions for Adelie, Chinstrap, and Gentoo Penguins",
x = "Bill depth in mm", y = "Bill length in mm",
color = "species",
caption = "Source: Palmer Station LTER / palmerpenguins package"
) +
scale_color_viridis_d()
## Warning: Removed 2 rows containing missing values (geom_point).

#faceting: smaller plots that display subsets of the data
#use the facet-grid function to facet the plot into a grid where species will be mapped to the facet grid_rows & island will be mapped to the facet grid_columns
#facet_grid() is a 2d grid and we follow the order of row ~ columns
ggplot(penguins, aes(bill_depth_mm, bill_length_mm, color = species)) +
geom_point() +
facet_grid(species ~ island) +
labs(
title = "Bill length and depth",
subtitle = "Dimensions for Adelie, Chinstrap, and Gentoo Penguins",
x = "Bill depth in mm", y = "Bill length in mm",
color = "species",
caption = "Source: Palmer Station LTER / palmerpenguins package"
) +
scale_color_viridis_d() +
guides(color = FALSE)
## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.
## Warning: Removed 2 rows containing missing values (geom_point).

# facet the data to show the sex of the penguin species distribution across the islands
#species get split along the y-axis(facet_rows) while the sex along the x-axis(facet_grid columns)
ggplot(penguins, aes(bill_depth_mm, bill_length_mm, color = species)) +
geom_point() +
facet_grid(species ~ sex) +
labs(
title = "Bill length and depth",
subtitle = "Dimensions for Adelie, Chinstrap, and Gentoo Penguins",
x = "Bill depth in mm", y = "Bill length in mm",
color = "species",
caption = "Source: Palmer Station LTER / palmerpenguins package"
) +
scale_color_viridis_d() +
guides(color = FALSE)
## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.
## Warning: Removed 2 rows containing missing values (geom_point).

#On the other hand facet_wrap is usefull when you want to subset the dataset by a single variable
#facet_wrap is a 1D ribbon wrapped according to number of rows & columns
ggplot(penguins, aes(bill_depth_mm, bill_length_mm, color = species)) +
geom_point() +
facet_wrap(~island) +
labs(
title = "Bill length and depth",
subtitle = "Dimensions for Adelie, Chinstrap, and Gentoo Penguins",
x = "Bill depth in mm", y = "Bill length in mm",
color = "species",
caption = "Source: Palmer Station LTER / palmerpenguins package"
) +
scale_color_viridis_d() +
guides(color = FALSE)
## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.
## Warning: Removed 2 rows containing missing values (geom_point).
