Data Sources

Data was collected and made available by Dr. Kristen Gorman and the Palmer Station, Antarctica LTER, a member of the Long Term Ecological Research Network. Data is available by CC-0 license in accordance with the Palmer Station LTER Data Policy and the LTER Data Access Policy for Type I data. The palmerpenguins package contains two datasets. Penguins_raw and Penguins. Both datasets contain data for 344 penguins. There are 3 different species of penguins in this dataset, collected from 3 islands in the Palmer Archipelago, Antarctica from 2007- 2009.

Install Packages

install.packages("tidyverse")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.2'
## (as 'lib' is unspecified)
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✔ ggplot2 3.3.6     ✔ purrr   0.3.4
## ✔ tibble  3.1.7     ✔ dplyr   1.0.9
## ✔ tidyr   1.2.0     ✔ stringr 1.4.0
## ✔ readr   2.1.2     ✔ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
install.packages("dplyr")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.2'
## (as 'lib' is unspecified)
library(dplyr)
install.packages("ggplot2")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.2'
## (as 'lib' is unspecified)
library(ggplot2)
install.packages("palmerpenguins")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.2'
## (as 'lib' is unspecified)
library(palmerpenguins)
data(penguins)

Taking a closer look at dataset

head(penguins)
## # A tibble: 6 × 8
##   species island bill_length_mm bill_depth_mm flipper_length_… body_mass_g sex  
##   <fct>   <fct>           <dbl>         <dbl>            <int>       <int> <fct>
## 1 Adelie  Torge…           39.1          18.7              181        3750 male 
## 2 Adelie  Torge…           39.5          17.4              186        3800 fema…
## 3 Adelie  Torge…           40.3          18                195        3250 fema…
## 4 Adelie  Torge…           NA            NA                 NA          NA <NA> 
## 5 Adelie  Torge…           36.7          19.3              193        3450 fema…
## 6 Adelie  Torge…           39.3          20.6              190        3650 male 
## # … with 1 more variable: year <int>

How many penguins of each species?

penguins%>%
  count(species, .drop = FALSE)
## # A tibble: 3 × 2
##   species       n
##   <fct>     <int>
## 1 Adelie      152
## 2 Chinstrap    68
## 3 Gentoo      124
ggplot(data = penguins)+ geom_bar(mapping = aes(x=species, fill = species))

How many penguin species on each island?

penguins%>%
  count(species, island, .drop = FALSE)
## # A tibble: 9 × 3
##   species   island        n
##   <fct>     <fct>     <int>
## 1 Adelie    Biscoe       44
## 2 Adelie    Dream        56
## 3 Adelie    Torgersen    52
## 4 Chinstrap Biscoe        0
## 5 Chinstrap Dream        68
## 6 Chinstrap Torgersen     0
## 7 Gentoo    Biscoe      124
## 8 Gentoo    Dream         0
## 9 Gentoo    Torgersen     0
ggplot(data = penguins)+
  geom_bar(mapping = aes(x =island, fill = species))+
  facet_wrap(~species) +
  labs(title = "Island Penguin Species")

Count sex of each penguin species

penguins%>%
  count(species, sex, .drop=FALSE)
## # A tibble: 8 × 3
##   species   sex        n
##   <fct>     <fct>  <int>
## 1 Adelie    female    73
## 2 Adelie    male      73
## 3 Adelie    <NA>       6
## 4 Chinstrap female    34
## 5 Chinstrap male      34
## 6 Gentoo    female    58
## 7 Gentoo    male      61
## 8 Gentoo    <NA>       5
ggplot(data=penguins)+
  geom_bar(mapping=aes(x=sex, fill=species))+
  facet_wrap(~species)+
  labs(title = "Penguin Species Sex")

Group by island and summarize mean bill length

penguins%>%
  group_by(island)%>%
  drop_na()%>%
  summarize(mean_bill_length_mm = mean(bill_depth_mm))
## # A tibble: 3 × 2
##   island    mean_bill_length_mm
##   <fct>                   <dbl>
## 1 Biscoe                   15.9
## 2 Dream                    18.3
## 3 Torgersen                18.5

Group by island and find max bill length

penguins%>%
  group_by(island)%>%
  drop_na()%>%
  summarize(max_bill_length_mm = max(bill_length_mm))
## # A tibble: 3 × 2
##   island    max_bill_length_mm
##   <fct>                  <dbl>
## 1 Biscoe                  59.6
## 2 Dream                   58  
## 3 Torgersen               46

Group by species

penguins%>%
  group_by(species)%>%
  drop_na()%>%
  summarize(max_bill_length_mm = max(bill_length_mm))
## # A tibble: 3 × 2
##   species   max_bill_length_mm
##   <fct>                  <dbl>
## 1 Adelie                  46  
## 2 Chinstrap               58  
## 3 Gentoo                  59.6

Taking a closer look at body mass

penguins%>%
  group_by(species)%>%
  drop_na()%>%
  summarize(max_body_mass_g = max(body_mass_g))
## # A tibble: 3 × 2
##   species   max_body_mass_g
##   <fct>               <int>
## 1 Adelie               4775
## 2 Chinstrap            4800
## 3 Gentoo               6300

Filter Gentoo penguins and arrange body mass and bill length in descending order

penguins%>%
  filter(species== "Gentoo")%>%
  arrange(-body_mass_g)
## # A tibble: 124 × 8
##    species island bill_length_mm bill_depth_mm flipper_length_mm body_mass_g
##    <fct>   <fct>           <dbl>         <dbl>             <int>       <int>
##  1 Gentoo  Biscoe           49.2          15.2               221        6300
##  2 Gentoo  Biscoe           59.6          17                 230        6050
##  3 Gentoo  Biscoe           51.1          16.3               220        6000
##  4 Gentoo  Biscoe           48.8          16.2               222        6000
##  5 Gentoo  Biscoe           45.2          16.4               223        5950
##  6 Gentoo  Biscoe           49.8          15.9               229        5950
##  7 Gentoo  Biscoe           48.4          14.6               213        5850
##  8 Gentoo  Biscoe           49.3          15.7               217        5850
##  9 Gentoo  Biscoe           55.1          16                 230        5850
## 10 Gentoo  Biscoe           49.5          16.2               229        5800
## # … with 114 more rows, and 2 more variables: sex <fct>, year <int>
penguins%>%
  filter(species == "Gentoo")%>%
  arrange(-bill_length_mm)
## # A tibble: 124 × 8
##    species island bill_length_mm bill_depth_mm flipper_length_mm body_mass_g
##    <fct>   <fct>           <dbl>         <dbl>             <int>       <int>
##  1 Gentoo  Biscoe           59.6          17                 230        6050
##  2 Gentoo  Biscoe           55.9          17                 228        5600
##  3 Gentoo  Biscoe           55.1          16                 230        5850
##  4 Gentoo  Biscoe           54.3          15.7               231        5650
##  5 Gentoo  Biscoe           53.4          15.8               219        5500
##  6 Gentoo  Biscoe           52.5          15.6               221        5450
##  7 Gentoo  Biscoe           52.2          17.1               228        5400
##  8 Gentoo  Biscoe           52.1          17                 230        5550
##  9 Gentoo  Biscoe           51.5          16.3               230        5500
## 10 Gentoo  Biscoe           51.3          14.2               218        5300
## # … with 114 more rows, and 2 more variables: sex <fct>, year <int>

#### Relationship between flipper length and body mass

ggplot(data = penguins)+
  geom_smooth(mapping = aes(x= flipper_length_mm, y= body_mass_g, linetype = species))+
  geom_point(mapping = aes(x=flipper_length_mm, y= body_mass_g))+
  labs(title = "Flipper length v's Body mass")
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 2 rows containing non-finite values (stat_smooth).
## Warning: Removed 2 rows containing missing values (geom_point).

ggplot(data=penguins, aes(x= flipper_length_mm, y= body_mass_g))+
  geom_point(aes(color=species, shape=species))
## Warning: Removed 2 rows containing missing values (geom_point).

ggplot(data = penguins)+
  geom_point(mapping = aes(x= flipper_length_mm, y= body_mass_g, color= species))+
  facet_wrap(~species)+
  labs(title = "Relationship between Species, Flipperlength and Body mass")
## Warning: Removed 2 rows containing missing values (geom_point).

ggplot(data = penguins)+
  geom_point(mapping = aes(x= flipper_length_mm, y= body_mass_g, color= species))+
  facet_grid(sex~species)+
  labs(title = "Relationship between Sex, Flipper length and Body mass")
## Warning: Removed 2 rows containing missing values (geom_point).