library(tidyverse)
library(openintro)
library(palmerpenguins)
library(gridExtra)
Exercise 1
How many penguins of each species were observed? Show a graphic of
your choice.
data("penguins")
head(penguins)
## # A tibble: 6 × 8
## species island bill_length_mm bill_depth_mm flipper_length_mm body_mass_g
## <fct> <fct> <dbl> <dbl> <int> <int>
## 1 Adelie Torgersen 39.1 18.7 181 3750
## 2 Adelie Torgersen 39.5 17.4 186 3800
## 3 Adelie Torgersen 40.3 18 195 3250
## 4 Adelie Torgersen NA NA NA NA
## 5 Adelie Torgersen 36.7 19.3 193 3450
## 6 Adelie Torgersen 39.3 20.6 190 3650
## # ℹ 2 more variables: sex <fct>, year <int>
ggplot(data=penguins, aes(species)) +
geom_bar(aes(fill=species)) +
labs(title="Palmer Penguins, by Species") +
theme_classic()

Exercise 2
What are the means and standard deviations of the penguins’
bill_length_mm and bill_depth_mm, by species? Show summary statistics
and the appropriate boxplots.
penguins %>%
group_by(species) %>%
summarize(mean.bl=mean(bill_length_mm, na.rm=TRUE), sd.bl=sd(bill_length_mm, na.rm=TRUE),
mean.bd=mean(bill_depth_mm, na.rm=TRUE), sd.bd=sd(bill_depth_mm, na.rm=TRUE),
n=n()) %>%
print.data.frame(., digits=3)
## species mean.bl sd.bl mean.bd sd.bd n
## 1 Adelie 38.8 2.66 18.3 1.217 152
## 2 Chinstrap 48.8 3.34 18.4 1.135 68
## 3 Gentoo 47.5 3.08 15.0 0.981 124
ggplot(penguins, aes(x=species, y=bill_length_mm, fill=species)) +
geom_boxplot(outlier.colour = "blue", outlier.size=2) +
geom_jitter(width=0.2) + # 1 flower, 1 dot
labs(title="Palmer Penguins Bill Length ") +
theme_light()

Exercise 3
It seems reasonable to think that bill_length_mm and bill_depth_mm
might be associated, as might flipper_length_mm and body_mass_g.
Calculate the correlation between these two pairs of variables, by
species.
penguins %>%
group_by(species) %>%
summarize(r.bill =cor(bill_length_mm, bill_depth_mm, use="pairwise.complete.obs"), r.flipper=cor(flipper_length_mm, body_mass_g, use="pairwise.complete.obs")) %>%
print.data.frame(., digits=3)
## species r.bill r.flipper
## 1 Adelie 0.391 0.468
## 2 Chinstrap 0.654 0.642
## 3 Gentoo 0.643 0.703
Exercise 4
Numerical correlations are nice, but nothing speaks like a picture.
Plot each pair of variables from the previous question as a scatterplot
to get a visual idea of the relationship. For the flipper_length_mm vs
body_mass_g, scatterplot, use the facet_ribbon() layer to plot each
Species in a separate facet.
gbill_length <- ggplot(penguins, aes(x=bill_length_mm, y=bill_depth_mm) ) +
geom_point( aes(colour=species), size=2.5 ) +
labs(title="Bill Length vs Bill Depth") +
theme_light()
gflipper_length <- ggplot(penguins, aes(x=flipper_length_mm, y=body_mass_g) ) +
geom_point( aes(colour=species), size=2.5 ) +
facet_wrap(vars(species), nrow=1) +
labs(title="Flipper Length vs Body Mass") +
theme_light()
grid.arrange(gbill_length, gflipper_length, nrow=2, ncol=1) # stacked, not side-by-side

Exercise 5
Classification algorithms work on the idea of “clustering,” where
different groups are concentrated in different regions of a graph of
variable values. Is there any clear concentration of the 3 penguin
species in the bill_length_mm vs bill_depth_mm scatterplot? (no
additional computation needed here, just look and think
Yes, there are some clear concentration of the 3 penguin species in
the bill_length_mm vs bill_depth_mm scatterplot.
LS0tDQp0aXRsZTogIk1lZXQgdGhlIFBhbG1lciBQZW5ndWlucyINCmF1dGhvcjogIlRpZW4gVm8iDQpkYXRlOiAiYHIgU3lzLkRhdGUoKWAiDQpvdXRwdXQ6IG9wZW5pbnRybzo6bGFiX3JlcG9ydA0KLS0tDQoNCmBgYHtyIGxvYWQtcGFja2FnZXMsIG1lc3NhZ2U9RkFMU0V9DQpsaWJyYXJ5KHRpZHl2ZXJzZSkNCmxpYnJhcnkob3BlbmludHJvKQ0KbGlicmFyeShwYWxtZXJwZW5ndWlucykNCmxpYnJhcnkoZ3JpZEV4dHJhKQ0KYGBgDQoNCiMjIyBFeGVyY2lzZSAxDQoNCiMjIyBIb3cgbWFueSBwZW5ndWlucyBvZiBlYWNoIHNwZWNpZXMgd2VyZSBvYnNlcnZlZD8gU2hvdyBhIGdyYXBoaWMgb2YgeW91ciBjaG9pY2UuDQoNCmBgYHtyIGNvZGUtY2h1bmstbGFiZWx9DQpkYXRhKCJwZW5ndWlucyIpDQpoZWFkKHBlbmd1aW5zKQ0KDQpnZ3Bsb3QoZGF0YT1wZW5ndWlucywgYWVzKHNwZWNpZXMpKSArDQogIGdlb21fYmFyKGFlcyhmaWxsPXNwZWNpZXMpKSArDQogIGxhYnModGl0bGU9IlBhbG1lciBQZW5ndWlucywgYnkgU3BlY2llcyIpICsNCiAgdGhlbWVfY2xhc3NpYygpDQpgYGANCg0KIyMjIEV4ZXJjaXNlIDIgDQoNCiMjIyBXaGF0IGFyZSB0aGUgbWVhbnMgYW5kIHN0YW5kYXJkIGRldmlhdGlvbnMgb2YgdGhlIHBlbmd1aW5z4oCZIGJpbGxfbGVuZ3RoX21tIGFuZCBiaWxsX2RlcHRoX21tLCBieSBzcGVjaWVzPyBTaG93IHN1bW1hcnkgc3RhdGlzdGljcyBhbmQgdGhlIGFwcHJvcHJpYXRlIGJveHBsb3RzLg0KDQpgYGB7ciBzdXBwcmVzc3dhcm5pbmcsIHdhcm5pbmc9RkFMU0UsICBtZXNzYWdlPUZBTFNFfQ0KcGVuZ3VpbnMgJT4lDQogIGdyb3VwX2J5KHNwZWNpZXMpICU+JQ0KICBzdW1tYXJpemUobWVhbi5ibD1tZWFuKGJpbGxfbGVuZ3RoX21tLCBuYS5ybT1UUlVFKSwgc2QuYmw9c2QoYmlsbF9sZW5ndGhfbW0sIG5hLnJtPVRSVUUpLCANCiAgICAgICAgICAgIG1lYW4uYmQ9bWVhbihiaWxsX2RlcHRoX21tLCBuYS5ybT1UUlVFKSwgc2QuYmQ9c2QoYmlsbF9kZXB0aF9tbSwgbmEucm09VFJVRSksDQogICAgICAgICAgICBuPW4oKSkgJT4lDQogIHByaW50LmRhdGEuZnJhbWUoLiwgZGlnaXRzPTMpDQoNCmdncGxvdChwZW5ndWlucywgYWVzKHg9c3BlY2llcywgeT1iaWxsX2xlbmd0aF9tbSwgZmlsbD1zcGVjaWVzKSkgKw0KICBnZW9tX2JveHBsb3Qob3V0bGllci5jb2xvdXIgPSAiYmx1ZSIsIG91dGxpZXIuc2l6ZT0yKSArDQogIGdlb21faml0dGVyKHdpZHRoPTAuMikgKyAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICMgMSBmbG93ZXIsIDEgZG90DQogIGxhYnModGl0bGU9IlBhbG1lciBQZW5ndWlucyBCaWxsIExlbmd0aCAiKSArDQogIHRoZW1lX2xpZ2h0KCkNCmBgYA0KDQojIyMgRXhlcmNpc2UgMw0KDQojIyMgSXQgc2VlbXMgcmVhc29uYWJsZSB0byB0aGluayB0aGF0IGJpbGxfbGVuZ3RoX21tIGFuZCBiaWxsX2RlcHRoX21tIG1pZ2h0IGJlIGFzc29jaWF0ZWQsIGFzIG1pZ2h0IGZsaXBwZXJfbGVuZ3RoX21tIGFuZCBib2R5X21hc3NfZy4gQ2FsY3VsYXRlIHRoZSBjb3JyZWxhdGlvbiBiZXR3ZWVuIHRoZXNlIHR3byBwYWlycyBvZiB2YXJpYWJsZXMsIGJ5IHNwZWNpZXMuDQoNCmBgYHtyIGV4ZXJjaXNlMiwgd2FybmluZz1GQUxTRSwgbWVzc2FnZT1GQUxTRX0NCg0KcGVuZ3VpbnMgJT4lDQogIGdyb3VwX2J5KHNwZWNpZXMpICU+JQ0KICBzdW1tYXJpemUoci5iaWxsID1jb3IoYmlsbF9sZW5ndGhfbW0sIGJpbGxfZGVwdGhfbW0sIHVzZT0icGFpcndpc2UuY29tcGxldGUub2JzIiksIHIuZmxpcHBlcj1jb3IoZmxpcHBlcl9sZW5ndGhfbW0sIGJvZHlfbWFzc19nLCB1c2U9InBhaXJ3aXNlLmNvbXBsZXRlLm9icyIpKSAgJT4lDQogIHByaW50LmRhdGEuZnJhbWUoLiwgZGlnaXRzPTMpDQpgYGANCg0KIyMjIEV4ZXJjaXNlIDQNCg0KIyMjIE51bWVyaWNhbCBjb3JyZWxhdGlvbnMgYXJlIG5pY2UsIGJ1dCBub3RoaW5nIHNwZWFrcyBsaWtlIGEgcGljdHVyZS4gUGxvdCBlYWNoIHBhaXIgb2YgdmFyaWFibGVzIGZyb20gdGhlIHByZXZpb3VzIHF1ZXN0aW9uIGFzIGEgc2NhdHRlcnBsb3QgdG8gZ2V0IGEgdmlzdWFsIGlkZWEgb2YgdGhlIHJlbGF0aW9uc2hpcC4gRm9yIHRoZSBmbGlwcGVyX2xlbmd0aF9tbSB2cyBib2R5X21hc3NfZywgc2NhdHRlcnBsb3QsIHVzZSB0aGUgZmFjZXRfcmliYm9uKCkgbGF5ZXIgdG8gcGxvdCBlYWNoIFNwZWNpZXMgaW4gYSBzZXBhcmF0ZSBmYWNldC4NCg0KYGBge3IsIGV4ZXJjaXNlNCwgbWVzc2FnZT1GQUxTRSwgd2FybmluZz1GQUxTRX0NCg0KZ2JpbGxfbGVuZ3RoIDwtIGdncGxvdChwZW5ndWlucywgYWVzKHg9YmlsbF9sZW5ndGhfbW0sIHk9YmlsbF9kZXB0aF9tbSkgKSArDQogIGdlb21fcG9pbnQoIGFlcyhjb2xvdXI9c3BlY2llcyksIHNpemU9Mi41ICkgKw0KICBsYWJzKHRpdGxlPSJCaWxsIExlbmd0aCB2cyBCaWxsIERlcHRoIikgKyANCiAgdGhlbWVfbGlnaHQoKQ0KDQpnZmxpcHBlcl9sZW5ndGggPC0gZ2dwbG90KHBlbmd1aW5zLCBhZXMoeD1mbGlwcGVyX2xlbmd0aF9tbSwgeT1ib2R5X21hc3NfZykgKSArDQogIGdlb21fcG9pbnQoIGFlcyhjb2xvdXI9c3BlY2llcyksIHNpemU9Mi41ICkgKw0KICBmYWNldF93cmFwKHZhcnMoc3BlY2llcyksIG5yb3c9MSkgKw0KICBsYWJzKHRpdGxlPSJGbGlwcGVyIExlbmd0aCB2cyBCb2R5IE1hc3MiKSArIA0KICB0aGVtZV9saWdodCgpDQoNCmdyaWQuYXJyYW5nZShnYmlsbF9sZW5ndGgsIGdmbGlwcGVyX2xlbmd0aCwgbnJvdz0yLCBuY29sPTEpICMgc3RhY2tlZCwgbm90IHNpZGUtYnktc2lkZQ0KYGBgDQoNCiMjIyBFeGVyY2lzZSA1DQoNCiMjIyBDbGFzc2lmaWNhdGlvbiBhbGdvcml0aG1zIHdvcmsgb24gdGhlIGlkZWEgb2Yg4oCcY2x1c3RlcmluZyzigJ0gd2hlcmUgZGlmZmVyZW50IGdyb3VwcyBhcmUgY29uY2VudHJhdGVkIGluIGRpZmZlcmVudCByZWdpb25zIG9mIGEgZ3JhcGggb2YgdmFyaWFibGUgdmFsdWVzLiBJcyB0aGVyZSBhbnkgY2xlYXIgY29uY2VudHJhdGlvbiBvZiB0aGUgMyBwZW5ndWluIHNwZWNpZXMgaW4gdGhlIGJpbGxfbGVuZ3RoX21tIHZzIGJpbGxfZGVwdGhfbW0gc2NhdHRlcnBsb3Q/IChubyBhZGRpdGlvbmFsIGNvbXB1dGF0aW9uIG5lZWRlZCBoZXJlLCBqdXN0IGxvb2sgYW5kIHRoaW5rDQoNClllcywgdGhlcmUgYXJlIHNvbWUgY2xlYXIgY29uY2VudHJhdGlvbiBvZiB0aGUgMyBwZW5ndWluIHNwZWNpZXMgaW4gdGhlIGJpbGxfbGVuZ3RoX21tIHZzIGJpbGxfZGVwdGhfbW0gc2NhdHRlcnBsb3QuDQoNCg0KDQo=