install.packages(“palmerpenguins”) #first time only
library(palmerpenguins)#data set pre-loaded
library("tidyverse")#package for data analysis
Let’s check if the data was installed properly
glimpse(penguins)
## Rows: 344
## Columns: 8
## $ species <fct> Adelie, Adelie, Adelie, Adelie, Adelie, Adelie, Adel~
## $ island <fct> Torgersen, Torgersen, Torgersen, Torgersen, Torgerse~
## $ bill_length_mm <dbl> 39.1, 39.5, 40.3, NA, 36.7, 39.3, 38.9, 39.2, 34.1, ~
## $ bill_depth_mm <dbl> 18.7, 17.4, 18.0, NA, 19.3, 20.6, 17.8, 19.6, 18.1, ~
## $ flipper_length_mm <int> 181, 186, 195, NA, 193, 190, 181, 195, 193, 190, 186~
## $ body_mass_g <int> 3750, 3800, 3250, NA, 3450, 3650, 3625, 4675, 3475, ~
## $ sex <fct> male, female, female, NA, female, male, female, male~
## $ year <int> 2007, 2007, 2007, 2007, 2007, 2007, 2007, 2007, 2007~
head(penguins)
## # A tibble: 6 x 8
## species island bill_length_mm bill_depth_mm flipper_length_~ body_mass_g sex
## <fct> <fct> <dbl> <dbl> <int> <int> <fct>
## 1 Adelie Torge~ 39.1 18.7 181 3750 male
## 2 Adelie Torge~ 39.5 17.4 186 3800 fema~
## 3 Adelie Torge~ 40.3 18 195 3250 fema~
## 4 Adelie Torge~ NA NA NA NA <NA>
## 5 Adelie Torge~ 36.7 19.3 193 3450 fema~
## 6 Adelie Torge~ 39.3 20.6 190 3650 male
## # ... with 1 more variable: year <int>
View(penguins)#pulls up the table to view
penguins %>%
arrange(bill_length_mm)
## # A tibble: 344 x 8
## species island bill_length_mm bill_depth_mm flipper_length_mm body_mass_g
## <fct> <fct> <dbl> <dbl> <int> <int>
## 1 Adelie Dream 32.1 15.5 188 3050
## 2 Adelie Dream 33.1 16.1 178 2900
## 3 Adelie Torgersen 33.5 19 190 3600
## 4 Adelie Dream 34 17.1 185 3400
## 5 Adelie Torgersen 34.1 18.1 193 3475
## 6 Adelie Torgersen 34.4 18.4 184 3325
## 7 Adelie Biscoe 34.5 18.1 187 2900
## 8 Adelie Torgersen 34.6 21.1 198 4400
## 9 Adelie Torgersen 34.6 17.2 189 3200
## 10 Adelie Biscoe 35 17.9 190 3450
## # ... with 334 more rows, and 2 more variables: sex <fct>, year <int>
penguins %>%
arrange(-bill_length_mm)
## # A tibble: 344 x 8
## species island bill_length_mm bill_depth_mm flipper_length_mm body_mass_g
## <fct> <fct> <dbl> <dbl> <int> <int>
## 1 Gentoo Biscoe 59.6 17 230 6050
## 2 Chinstrap Dream 58 17.8 181 3700
## 3 Gentoo Biscoe 55.9 17 228 5600
## 4 Chinstrap Dream 55.8 19.8 207 4000
## 5 Gentoo Biscoe 55.1 16 230 5850
## 6 Gentoo Biscoe 54.3 15.7 231 5650
## 7 Chinstrap Dream 54.2 20.8 201 4300
## 8 Chinstrap Dream 53.5 19.9 205 4500
## 9 Gentoo Biscoe 53.4 15.8 219 5500
## 10 Chinstrap Dream 52.8 20 205 4550
## # ... with 334 more rows, and 2 more variables: sex <fct>, year <int>
penguins %>%
group_by(island) %>%
drop_na() %>%
summarize(max_bill_length_mm = max(bill_length_mm))
## # A tibble: 3 x 2
## island max_bill_length_mm
## <fct> <dbl>
## 1 Biscoe 59.6
## 2 Dream 58
## 3 Torgersen 46
penguins %>%
group_by(species, island) %>%
drop_na() %>%
summarize(max_bill_length_mm = max(bill_length_mm))
## `summarise()` has grouped output by 'species'. You can override using the `.groups` argument.
## # A tibble: 5 x 3
## # Groups: species [3]
## species island max_bill_length_mm
## <fct> <fct> <dbl>
## 1 Adelie Biscoe 45.6
## 2 Adelie Dream 44.1
## 3 Adelie Torgersen 46
## 4 Chinstrap Dream 58
## 5 Gentoo Biscoe 59.6
ggplot(data= penguins) + geom_point(mapping= aes(x= flipper_length_mm, y= body_mass_g))
## Warning: Removed 2 rows containing missing values (geom_point).
ggplot(data= penguins) + geom_point(mapping= aes(x= bill_length_mm, y= bill_depth_mm))
## Warning: Removed 2 rows containing missing values (geom_point).
#This scatter plot has data points that shows the the different species by color and shape
ggplot(data= penguins) + geom_point(mapping= aes(x= flipper_length_mm, y= body_mass_g, color= species, shape= species))
## Warning: Removed 2 rows containing missing values (geom_point).
#Alpha is helpful to make data points transparent for each species
ggplot(data= penguins) + geom_point(mapping= aes(x= flipper_length_mm, y= body_mass_g, alpha= species))
## Warning: Using alpha for a discrete variable is not advised.
## Warning: Removed 2 rows containing missing values (geom_point).
#Having the color purple outside of the aes function allows for all data points to reflect a single color
ggplot(data= penguins) + geom_point(mapping= aes(x= flipper_length_mm, y= body_mass_g), color= "purple")
## Warning: Removed 2 rows containing missing values (geom_point).
#facet fuction is very helpful to display the different graphs for each data point
ggplot(data= penguins) + geom_point(mapping= aes(x= flipper_length_mm, y= body_mass_g, color= species, shape= species))+
facet_wrap(~species)
## Warning: Removed 2 rows containing missing values (geom_point).
#This facet function shows the different sex and species for the penguins (too busy and becomes harder to make an analysis from so many charts)
ggplot(data= penguins) + geom_point(mapping= aes(x= flipper_length_mm, y= body_mass_g, color= species))+
facet_grid(sex~species)
## Warning: Removed 2 rows containing missing values (geom_point).
#This creates a smooth line graph
ggplot(data= penguins) + geom_smooth(mapping= aes(x= flipper_length_mm, y= body_mass_g))
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 2 rows containing non-finite values (stat_smooth).
#Lets add some labels and annotations to some of these graphs
ggplot(data= penguins) + geom_point(mapping= aes(x= flipper_length_mm, y= body_mass_g, color= species)) + geom_smooth(mapping= aes(x= flipper_length_mm, y= body_mass_g)) +
labs(title="Palmer Penguins: Body Mass vs. Flipper Length", subtitle = "Sample of Three Penguin Species", caption="Data Collected By Dr. Kristen Gorman")+
annotate("text", x=220, y=3500,label="The Gentoos are the largest", color="purple", fontface= "bold", size= 4.5, angle=25)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 2 rows containing non-finite values (stat_smooth).
## Warning: Removed 2 rows containing missing values (geom_point).
#Let’s save this graph as a .png
ggsave(‘Three Penguins Species.png’)#collapse environmet pane for complete graph