library(tidyverse)
library(openintro)
library(palmerpenguins)
library(gridExtra)

Exercise 1

How many penguins of each species were observed? Show a graphic of your choice.

data("penguins")
head(penguins)
## # A tibble: 6 × 8
##   species island    bill_length_mm bill_depth_mm flipper_length_mm body_mass_g
##   <fct>   <fct>              <dbl>         <dbl>             <int>       <int>
## 1 Adelie  Torgersen           39.1          18.7               181        3750
## 2 Adelie  Torgersen           39.5          17.4               186        3800
## 3 Adelie  Torgersen           40.3          18                 195        3250
## 4 Adelie  Torgersen           NA            NA                  NA          NA
## 5 Adelie  Torgersen           36.7          19.3               193        3450
## 6 Adelie  Torgersen           39.3          20.6               190        3650
## # ℹ 2 more variables: sex <fct>, year <int>
ggplot(data=penguins, aes(species)) +
  geom_bar(aes(fill=species)) +
  labs(title="Palmer Penguins, by Species") +
  theme_classic()

Exercise 2

What are the means and standard deviations of the penguins’ bill_length_mm and bill_depth_mm, by species? Show summary statistics and the appropriate boxplots.

penguins %>%
  group_by(species) %>%
  summarize(mean.bl=mean(bill_length_mm, na.rm=TRUE), sd.bl=sd(bill_length_mm, na.rm=TRUE), 
            mean.bd=mean(bill_depth_mm, na.rm=TRUE), sd.bd=sd(bill_depth_mm, na.rm=TRUE),
            n=n()) %>%
  print.data.frame(., digits=3)
##     species mean.bl sd.bl mean.bd sd.bd   n
## 1    Adelie    38.8  2.66    18.3 1.217 152
## 2 Chinstrap    48.8  3.34    18.4 1.135  68
## 3    Gentoo    47.5  3.08    15.0 0.981 124
ggplot(penguins, aes(x=species, y=bill_length_mm, fill=species)) +
  geom_boxplot(outlier.colour = "blue", outlier.size=2) +
  geom_jitter(width=0.2) +                                 # 1 flower, 1 dot
  labs(title="Palmer Penguins Bill Length ") +
  theme_light()

Exercise 3

It seems reasonable to think that bill_length_mm and bill_depth_mm might be associated, as might flipper_length_mm and body_mass_g. Calculate the correlation between these two pairs of variables, by species.

penguins %>%
  group_by(species) %>%
  summarize(r.bill =cor(bill_length_mm, bill_depth_mm, use="pairwise.complete.obs"), r.flipper=cor(flipper_length_mm, body_mass_g, use="pairwise.complete.obs"))  %>%
  print.data.frame(., digits=3)
##     species r.bill r.flipper
## 1    Adelie  0.391     0.468
## 2 Chinstrap  0.654     0.642
## 3    Gentoo  0.643     0.703

Exercise 4

Numerical correlations are nice, but nothing speaks like a picture. Plot each pair of variables from the previous question as a scatterplot to get a visual idea of the relationship. For the flipper_length_mm vs body_mass_g, scatterplot, use the facet_ribbon() layer to plot each Species in a separate facet.

gbill_length <- ggplot(penguins, aes(x=bill_length_mm, y=bill_depth_mm) ) +
  geom_point( aes(colour=species), size=2.5 ) +
  labs(title="Bill Length vs Bill Depth") + 
  theme_light()

gflipper_length <- ggplot(penguins, aes(x=flipper_length_mm, y=body_mass_g) ) +
  geom_point( aes(colour=species), size=2.5 ) +
  facet_wrap(vars(species), nrow=1) +
  labs(title="Flipper Length vs Body Mass") + 
  theme_light()

grid.arrange(gbill_length, gflipper_length, nrow=2, ncol=1) # stacked, not side-by-side

Exercise 5

Classification algorithms work on the idea of “clustering,” where different groups are concentrated in different regions of a graph of variable values. Is there any clear concentration of the 3 penguin species in the bill_length_mm vs bill_depth_mm scatterplot? (no additional computation needed here, just look and think

Yes, there are some clear concentration of the 3 penguin species in the bill_length_mm vs bill_depth_mm scatterplot.

LS0tDQp0aXRsZTogIk1lZXQgdGhlIFBhbG1lciBQZW5ndWlucyINCmF1dGhvcjogIlRpZW4gVm8iDQpkYXRlOiAiYHIgU3lzLkRhdGUoKWAiDQpvdXRwdXQ6IG9wZW5pbnRybzo6bGFiX3JlcG9ydA0KLS0tDQoNCmBgYHtyIGxvYWQtcGFja2FnZXMsIG1lc3NhZ2U9RkFMU0V9DQpsaWJyYXJ5KHRpZHl2ZXJzZSkNCmxpYnJhcnkob3BlbmludHJvKQ0KbGlicmFyeShwYWxtZXJwZW5ndWlucykNCmxpYnJhcnkoZ3JpZEV4dHJhKQ0KYGBgDQoNCiMjIyBFeGVyY2lzZSAxDQoNCiMjIyBIb3cgbWFueSBwZW5ndWlucyBvZiBlYWNoIHNwZWNpZXMgd2VyZSBvYnNlcnZlZD8gU2hvdyBhIGdyYXBoaWMgb2YgeW91ciBjaG9pY2UuDQoNCmBgYHtyIGNvZGUtY2h1bmstbGFiZWx9DQpkYXRhKCJwZW5ndWlucyIpDQpoZWFkKHBlbmd1aW5zKQ0KDQpnZ3Bsb3QoZGF0YT1wZW5ndWlucywgYWVzKHNwZWNpZXMpKSArDQogIGdlb21fYmFyKGFlcyhmaWxsPXNwZWNpZXMpKSArDQogIGxhYnModGl0bGU9IlBhbG1lciBQZW5ndWlucywgYnkgU3BlY2llcyIpICsNCiAgdGhlbWVfY2xhc3NpYygpDQpgYGANCg0KIyMjIEV4ZXJjaXNlIDIgDQoNCiMjIyBXaGF0IGFyZSB0aGUgbWVhbnMgYW5kIHN0YW5kYXJkIGRldmlhdGlvbnMgb2YgdGhlIHBlbmd1aW5z4oCZIGJpbGxfbGVuZ3RoX21tIGFuZCBiaWxsX2RlcHRoX21tLCBieSBzcGVjaWVzPyBTaG93IHN1bW1hcnkgc3RhdGlzdGljcyBhbmQgdGhlIGFwcHJvcHJpYXRlIGJveHBsb3RzLg0KDQpgYGB7ciBzdXBwcmVzc3dhcm5pbmcsIHdhcm5pbmc9RkFMU0UsICBtZXNzYWdlPUZBTFNFfQ0KcGVuZ3VpbnMgJT4lDQogIGdyb3VwX2J5KHNwZWNpZXMpICU+JQ0KICBzdW1tYXJpemUobWVhbi5ibD1tZWFuKGJpbGxfbGVuZ3RoX21tLCBuYS5ybT1UUlVFKSwgc2QuYmw9c2QoYmlsbF9sZW5ndGhfbW0sIG5hLnJtPVRSVUUpLCANCiAgICAgICAgICAgIG1lYW4uYmQ9bWVhbihiaWxsX2RlcHRoX21tLCBuYS5ybT1UUlVFKSwgc2QuYmQ9c2QoYmlsbF9kZXB0aF9tbSwgbmEucm09VFJVRSksDQogICAgICAgICAgICBuPW4oKSkgJT4lDQogIHByaW50LmRhdGEuZnJhbWUoLiwgZGlnaXRzPTMpDQoNCmdncGxvdChwZW5ndWlucywgYWVzKHg9c3BlY2llcywgeT1iaWxsX2xlbmd0aF9tbSwgZmlsbD1zcGVjaWVzKSkgKw0KICBnZW9tX2JveHBsb3Qob3V0bGllci5jb2xvdXIgPSAiYmx1ZSIsIG91dGxpZXIuc2l6ZT0yKSArDQogIGdlb21faml0dGVyKHdpZHRoPTAuMikgKyAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICMgMSBmbG93ZXIsIDEgZG90DQogIGxhYnModGl0bGU9IlBhbG1lciBQZW5ndWlucyBCaWxsIExlbmd0aCAiKSArDQogIHRoZW1lX2xpZ2h0KCkNCmBgYA0KDQojIyMgRXhlcmNpc2UgMw0KDQojIyMgSXQgc2VlbXMgcmVhc29uYWJsZSB0byB0aGluayB0aGF0IGJpbGxfbGVuZ3RoX21tIGFuZCBiaWxsX2RlcHRoX21tIG1pZ2h0IGJlIGFzc29jaWF0ZWQsIGFzIG1pZ2h0IGZsaXBwZXJfbGVuZ3RoX21tIGFuZCBib2R5X21hc3NfZy4gQ2FsY3VsYXRlIHRoZSBjb3JyZWxhdGlvbiBiZXR3ZWVuIHRoZXNlIHR3byBwYWlycyBvZiB2YXJpYWJsZXMsIGJ5IHNwZWNpZXMuDQoNCmBgYHtyIGV4ZXJjaXNlMiwgd2FybmluZz1GQUxTRSwgbWVzc2FnZT1GQUxTRX0NCg0KcGVuZ3VpbnMgJT4lDQogIGdyb3VwX2J5KHNwZWNpZXMpICU+JQ0KICBzdW1tYXJpemUoci5iaWxsID1jb3IoYmlsbF9sZW5ndGhfbW0sIGJpbGxfZGVwdGhfbW0sIHVzZT0icGFpcndpc2UuY29tcGxldGUub2JzIiksIHIuZmxpcHBlcj1jb3IoZmxpcHBlcl9sZW5ndGhfbW0sIGJvZHlfbWFzc19nLCB1c2U9InBhaXJ3aXNlLmNvbXBsZXRlLm9icyIpKSAgJT4lDQogIHByaW50LmRhdGEuZnJhbWUoLiwgZGlnaXRzPTMpDQpgYGANCg0KIyMjIEV4ZXJjaXNlIDQNCg0KIyMjIE51bWVyaWNhbCBjb3JyZWxhdGlvbnMgYXJlIG5pY2UsIGJ1dCBub3RoaW5nIHNwZWFrcyBsaWtlIGEgcGljdHVyZS4gUGxvdCBlYWNoIHBhaXIgb2YgdmFyaWFibGVzIGZyb20gdGhlIHByZXZpb3VzIHF1ZXN0aW9uIGFzIGEgc2NhdHRlcnBsb3QgdG8gZ2V0IGEgdmlzdWFsIGlkZWEgb2YgdGhlIHJlbGF0aW9uc2hpcC4gRm9yIHRoZSBmbGlwcGVyX2xlbmd0aF9tbSB2cyBib2R5X21hc3NfZywgc2NhdHRlcnBsb3QsIHVzZSB0aGUgZmFjZXRfcmliYm9uKCkgbGF5ZXIgdG8gcGxvdCBlYWNoIFNwZWNpZXMgaW4gYSBzZXBhcmF0ZSBmYWNldC4NCg0KYGBge3IsIGV4ZXJjaXNlNCwgbWVzc2FnZT1GQUxTRSwgd2FybmluZz1GQUxTRX0NCg0KZ2JpbGxfbGVuZ3RoIDwtIGdncGxvdChwZW5ndWlucywgYWVzKHg9YmlsbF9sZW5ndGhfbW0sIHk9YmlsbF9kZXB0aF9tbSkgKSArDQogIGdlb21fcG9pbnQoIGFlcyhjb2xvdXI9c3BlY2llcyksIHNpemU9Mi41ICkgKw0KICBsYWJzKHRpdGxlPSJCaWxsIExlbmd0aCB2cyBCaWxsIERlcHRoIikgKyANCiAgdGhlbWVfbGlnaHQoKQ0KDQpnZmxpcHBlcl9sZW5ndGggPC0gZ2dwbG90KHBlbmd1aW5zLCBhZXMoeD1mbGlwcGVyX2xlbmd0aF9tbSwgeT1ib2R5X21hc3NfZykgKSArDQogIGdlb21fcG9pbnQoIGFlcyhjb2xvdXI9c3BlY2llcyksIHNpemU9Mi41ICkgKw0KICBmYWNldF93cmFwKHZhcnMoc3BlY2llcyksIG5yb3c9MSkgKw0KICBsYWJzKHRpdGxlPSJGbGlwcGVyIExlbmd0aCB2cyBCb2R5IE1hc3MiKSArIA0KICB0aGVtZV9saWdodCgpDQoNCmdyaWQuYXJyYW5nZShnYmlsbF9sZW5ndGgsIGdmbGlwcGVyX2xlbmd0aCwgbnJvdz0yLCBuY29sPTEpICMgc3RhY2tlZCwgbm90IHNpZGUtYnktc2lkZQ0KYGBgDQoNCiMjIyBFeGVyY2lzZSA1DQoNCiMjIyBDbGFzc2lmaWNhdGlvbiBhbGdvcml0aG1zIHdvcmsgb24gdGhlIGlkZWEgb2Yg4oCcY2x1c3RlcmluZyzigJ0gd2hlcmUgZGlmZmVyZW50IGdyb3VwcyBhcmUgY29uY2VudHJhdGVkIGluIGRpZmZlcmVudCByZWdpb25zIG9mIGEgZ3JhcGggb2YgdmFyaWFibGUgdmFsdWVzLiBJcyB0aGVyZSBhbnkgY2xlYXIgY29uY2VudHJhdGlvbiBvZiB0aGUgMyBwZW5ndWluIHNwZWNpZXMgaW4gdGhlIGJpbGxfbGVuZ3RoX21tIHZzIGJpbGxfZGVwdGhfbW0gc2NhdHRlcnBsb3Q/IChubyBhZGRpdGlvbmFsIGNvbXB1dGF0aW9uIG5lZWRlZCBoZXJlLCBqdXN0IGxvb2sgYW5kIHRoaW5rDQoNClllcywgdGhlcmUgYXJlIHNvbWUgY2xlYXIgY29uY2VudHJhdGlvbiBvZiB0aGUgMyBwZW5ndWluIHNwZWNpZXMgaW4gdGhlIGJpbGxfbGVuZ3RoX21tIHZzIGJpbGxfZGVwdGhfbW0gc2NhdHRlcnBsb3QuDQoNCg0KDQo=