Set up my environment

install.packages(“palmerpenguins”) #first time only

library(palmerpenguins)#data set pre-loaded
library("tidyverse")#package for data analysis

view the data

Let’s check if the data was installed properly

glimpse(penguins)
## Rows: 344
## Columns: 8
## $ species           <fct> Adelie, Adelie, Adelie, Adelie, Adelie, Adelie, Adel~
## $ island            <fct> Torgersen, Torgersen, Torgersen, Torgersen, Torgerse~
## $ bill_length_mm    <dbl> 39.1, 39.5, 40.3, NA, 36.7, 39.3, 38.9, 39.2, 34.1, ~
## $ bill_depth_mm     <dbl> 18.7, 17.4, 18.0, NA, 19.3, 20.6, 17.8, 19.6, 18.1, ~
## $ flipper_length_mm <int> 181, 186, 195, NA, 193, 190, 181, 195, 193, 190, 186~
## $ body_mass_g       <int> 3750, 3800, 3250, NA, 3450, 3650, 3625, 4675, 3475, ~
## $ sex               <fct> male, female, female, NA, female, male, female, male~
## $ year              <int> 2007, 2007, 2007, 2007, 2007, 2007, 2007, 2007, 2007~
head(penguins)
## # A tibble: 6 x 8
##   species island bill_length_mm bill_depth_mm flipper_length_~ body_mass_g sex  
##   <fct>   <fct>           <dbl>         <dbl>            <int>       <int> <fct>
## 1 Adelie  Torge~           39.1          18.7              181        3750 male 
## 2 Adelie  Torge~           39.5          17.4              186        3800 fema~
## 3 Adelie  Torge~           40.3          18                195        3250 fema~
## 4 Adelie  Torge~           NA            NA                 NA          NA <NA> 
## 5 Adelie  Torge~           36.7          19.3              193        3450 fema~
## 6 Adelie  Torge~           39.3          20.6              190        3650 male 
## # ... with 1 more variable: year <int>
View(penguins)#pulls up the table to view

Pipe operator (ctrl+shift+m)

penguins %>% 
  arrange(bill_length_mm)
## # A tibble: 344 x 8
##    species island    bill_length_mm bill_depth_mm flipper_length_mm body_mass_g
##    <fct>   <fct>              <dbl>         <dbl>             <int>       <int>
##  1 Adelie  Dream               32.1          15.5               188        3050
##  2 Adelie  Dream               33.1          16.1               178        2900
##  3 Adelie  Torgersen           33.5          19                 190        3600
##  4 Adelie  Dream               34            17.1               185        3400
##  5 Adelie  Torgersen           34.1          18.1               193        3475
##  6 Adelie  Torgersen           34.4          18.4               184        3325
##  7 Adelie  Biscoe              34.5          18.1               187        2900
##  8 Adelie  Torgersen           34.6          21.1               198        4400
##  9 Adelie  Torgersen           34.6          17.2               189        3200
## 10 Adelie  Biscoe              35            17.9               190        3450
## # ... with 334 more rows, and 2 more variables: sex <fct>, year <int>
penguins %>% 
  arrange(-bill_length_mm)
## # A tibble: 344 x 8
##    species   island bill_length_mm bill_depth_mm flipper_length_mm body_mass_g
##    <fct>     <fct>           <dbl>         <dbl>             <int>       <int>
##  1 Gentoo    Biscoe           59.6          17                 230        6050
##  2 Chinstrap Dream            58            17.8               181        3700
##  3 Gentoo    Biscoe           55.9          17                 228        5600
##  4 Chinstrap Dream            55.8          19.8               207        4000
##  5 Gentoo    Biscoe           55.1          16                 230        5850
##  6 Gentoo    Biscoe           54.3          15.7               231        5650
##  7 Chinstrap Dream            54.2          20.8               201        4300
##  8 Chinstrap Dream            53.5          19.9               205        4500
##  9 Gentoo    Biscoe           53.4          15.8               219        5500
## 10 Chinstrap Dream            52.8          20                 205        4550
## # ... with 334 more rows, and 2 more variables: sex <fct>, year <int>
penguins %>% 
  group_by(island) %>% 
  drop_na() %>% 
  summarize(max_bill_length_mm = max(bill_length_mm))
## # A tibble: 3 x 2
##   island    max_bill_length_mm
##   <fct>                  <dbl>
## 1 Biscoe                  59.6
## 2 Dream                   58  
## 3 Torgersen               46
penguins %>% 
  group_by(species, island) %>% 
  drop_na() %>% 
  summarize(max_bill_length_mm = max(bill_length_mm))
## `summarise()` has grouped output by 'species'. You can override using the `.groups` argument.
## # A tibble: 5 x 3
## # Groups:   species [3]
##   species   island    max_bill_length_mm
##   <fct>     <fct>                  <dbl>
## 1 Adelie    Biscoe                  45.6
## 2 Adelie    Dream                   44.1
## 3 Adelie    Torgersen               46  
## 4 Chinstrap Dream                   58  
## 5 Gentoo    Biscoe                  59.6

Using ggplot2 and creating different charts

ggplot(data= penguins) + geom_point(mapping= aes(x= flipper_length_mm, y= body_mass_g))
## Warning: Removed 2 rows containing missing values (geom_point).

ggplot(data= penguins) + geom_point(mapping= aes(x= bill_length_mm, y= bill_depth_mm))
## Warning: Removed 2 rows containing missing values (geom_point).

#This scatter plot has data points that shows the the different species by color and shape

ggplot(data= penguins) + geom_point(mapping= aes(x= flipper_length_mm, y= body_mass_g, color= species, shape= species))
## Warning: Removed 2 rows containing missing values (geom_point).

#Alpha is helpful to make data points transparent for each species

ggplot(data= penguins) + geom_point(mapping= aes(x= flipper_length_mm, y= body_mass_g, alpha= species))
## Warning: Using alpha for a discrete variable is not advised.
## Warning: Removed 2 rows containing missing values (geom_point).

#Having the color purple outside of the aes function allows for all data points to reflect a single color

ggplot(data= penguins) + geom_point(mapping= aes(x= flipper_length_mm, y= body_mass_g), color= "purple")
## Warning: Removed 2 rows containing missing values (geom_point).

facet function

#facet fuction is very helpful to display the different graphs for each data point

ggplot(data= penguins) + geom_point(mapping= aes(x= flipper_length_mm, y= body_mass_g, color= species, shape= species))+
  facet_wrap(~species) 
## Warning: Removed 2 rows containing missing values (geom_point).

#This facet function shows the different sex and species for the penguins (too busy and becomes harder to make an analysis from so many charts)

ggplot(data= penguins) + geom_point(mapping= aes(x= flipper_length_mm, y= body_mass_g, color= species))+
  facet_grid(sex~species)
## Warning: Removed 2 rows containing missing values (geom_point).

geom_smooth()

#This creates a smooth line graph

ggplot(data= penguins) + geom_smooth(mapping= aes(x= flipper_length_mm, y= body_mass_g))
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 2 rows containing non-finite values (stat_smooth).

labs()

#Lets add some labels and annotations to some of these graphs

ggplot(data= penguins) + geom_point(mapping= aes(x= flipper_length_mm, y= body_mass_g, color= species)) + geom_smooth(mapping= aes(x= flipper_length_mm, y= body_mass_g)) +
  labs(title="Palmer Penguins: Body Mass vs. Flipper Length", subtitle = "Sample of Three Penguin Species", caption="Data Collected By Dr. Kristen Gorman")+
  annotate("text", x=220, y=3500,label="The Gentoos are the largest", color="purple", fontface= "bold", size= 4.5, angle=25)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 2 rows containing non-finite values (stat_smooth).
## Warning: Removed 2 rows containing missing values (geom_point).

#Let’s save this graph as a .png

ggsave(‘Three Penguins Species.png’)#collapse environmet pane for complete graph