library(tidyverse)
library(openintro)
data('arbuthnot', package='openintro')

Exercise 1

data('arbuthnot', package='openintro')
arbuthnot$girls
##  [1] 4683 4457 4102 4590 4839 4820 4928 4605 4457 4952 4784 5332 5200 4910 4617
## [16] 3997 3919 3395 3536 3181 2746 2722 2840 2908 2959 3179 3349 3382 3289 3013
## [31] 2781 3247 4107 4803 4881 5681 4858 4319 5322 5560 5829 5719 6061 6120 5822
## [46] 5738 5717 5847 6203 6033 6041 6299 6533 6744 7158 7127 7246 7119 7214 7101
## [61] 7167 7302 7392 7316 7483 6647 6713 7229 7767 7626 7452 7061 7514 7656 7683
## [76] 5738 7779 7417 7687 7623 7380 7288

Exercise 2

Trendline is increasing with a decrease from 1640 to 1660 but overall increasing

ggplot(data = arbuthnot, aes(x = year, y = girls)) + 
  geom_point()

Exercise 3

The proportion is somewhat volatile, oscillating between 53% and 51%

arbuthnot <- arbuthnot %>%
  mutate(total = boys + girls)
arbuthnot <- arbuthnot %>%
  mutate(boy_ratio = boys / total)
ggplot(data = arbuthnot, aes(x = year, y = boy_ratio)) + 
  geom_line()

Exercise 4

Years 1940 - 2002, I think it is the number of kids born dimensions 63 X 3, column names are “year”, “boys” and “girls”

data('present', package='openintro')
present
## # A tibble: 63 × 3
##     year    boys   girls
##    <dbl>   <dbl>   <dbl>
##  1  1940 1211684 1148715
##  2  1941 1289734 1223693
##  3  1942 1444365 1364631
##  4  1943 1508959 1427901
##  5  1944 1435301 1359499
##  6  1945 1404587 1330869
##  7  1946 1691220 1597452
##  8  1947 1899876 1800064
##  9  1948 1813852 1721216
## 10  1949 1826352 1733177
## # … with 53 more rows
## # ℹ Use `print(n = ...)` to see more rows
present %>% summarize(min = min(boys), max = max(boys))
## # A tibble: 1 × 2
##       min     max
##     <dbl>   <dbl>
## 1 1211684 2186274

The counts here are much much larger, back then we were discussing thousands of children, here we are discussing millions

Insert any text here.

Exercise 6

his observation seems somewhat likely, as the proportion never dips below 51%, so it is possible

present <- present %>%
  mutate(total = boys + girls)
present <- present %>%
  mutate(boy_ratio = boys / total)

ggplot(data = present, aes(x = year, y = boy_ratio)) + 
  geom_line()

Exercise 7

1961 is the highest total year

present %>% arrange(desc(total))
## # A tibble: 63 × 5
##     year    boys   girls   total boy_ratio
##    <dbl>   <dbl>   <dbl>   <dbl>     <dbl>
##  1  1961 2186274 2082052 4268326     0.512
##  2  1960 2179708 2078142 4257850     0.512
##  3  1957 2179960 2074824 4254784     0.512
##  4  1959 2173638 2071158 4244796     0.512
##  5  1958 2152546 2051266 4203812     0.512
##  6  1962 2132466 2034896 4167362     0.512
##  7  1956 2133588 2029502 4163090     0.513
##  8  1990 2129495 2028717 4158212     0.512
##  9  1991 2101518 2009389 4110907     0.511
## 10  1963 2101632 1996388 4098020     0.513
## # … with 53 more rows
## # ℹ Use `print(n = ...)` to see more rows
LS0tDQp0aXRsZTogIkxhYiAxOiBJbnRybyB0byBSIg0KYXV0aG9yOiAiQXV0aG9yIE5hbWUiDQpkYXRlOiAiYHIgU3lzLkRhdGUoKWAiDQpvdXRwdXQ6IG9wZW5pbnRybzo6bGFiX3JlcG9ydA0KLS0tDQoNCmBgYHtyIGxvYWQtcGFja2FnZXMsIG1lc3NhZ2U9RkFMU0V9DQpsaWJyYXJ5KHRpZHl2ZXJzZSkNCmxpYnJhcnkob3BlbmludHJvKQ0KZGF0YSgnYXJidXRobm90JywgcGFja2FnZT0nb3BlbmludHJvJykNCmBgYA0KDQojIyMgRXhlcmNpc2UgMQ0KDQpgYGB7ciB2aWV3LWdpcmxzLWNvdW50c30NCmRhdGEoJ2FyYnV0aG5vdCcsIHBhY2thZ2U9J29wZW5pbnRybycpDQphcmJ1dGhub3QkZ2lybHMNCmBgYA0KDQoNCiMjIyBFeGVyY2lzZSAyDQpUcmVuZGxpbmUgaXMgaW5jcmVhc2luZyB3aXRoIGEgZGVjcmVhc2UgZnJvbSAxNjQwIHRvIDE2NjAgYnV0IG92ZXJhbGwgaW5jcmVhc2luZw0KYGBge3IgdHJlbmQtZ2lybHN9DQpnZ3Bsb3QoZGF0YSA9IGFyYnV0aG5vdCwgYWVzKHggPSB5ZWFyLCB5ID0gZ2lybHMpKSArIA0KICBnZW9tX3BvaW50KCkNCmBgYA0KDQoNCiMjIyBFeGVyY2lzZSAzDQpUaGUgcHJvcG9ydGlvbiBpcyBzb21ld2hhdCB2b2xhdGlsZSwgb3NjaWxsYXRpbmcgYmV0d2VlbiA1MyUgYW5kIDUxJQ0KYGBge3IgcGxvdC1wcm9wLWJveXMtYXJidXRobm90fQ0KYXJidXRobm90IDwtIGFyYnV0aG5vdCAlPiUNCiAgbXV0YXRlKHRvdGFsID0gYm95cyArIGdpcmxzKQ0KYXJidXRobm90IDwtIGFyYnV0aG5vdCAlPiUNCiAgbXV0YXRlKGJveV9yYXRpbyA9IGJveXMgLyB0b3RhbCkNCmdncGxvdChkYXRhID0gYXJidXRobm90LCBhZXMoeCA9IHllYXIsIHkgPSBib3lfcmF0aW8pKSArIA0KICBnZW9tX2xpbmUoKQ0KDQpgYGANCg0KDQojIyMgRXhlcmNpc2UgNA0KDQpZZWFycyAxOTQwIC0gMjAwMiwgSSB0aGluayBpdCBpcyB0aGUgbnVtYmVyIG9mIGtpZHMgYm9ybiANCmRpbWVuc2lvbnMgNjMgWCAzLCBjb2x1bW4gbmFtZXMgYXJlICJ5ZWFyIiwgImJveXMiIGFuZCAiZ2lybHMiDQoNCmBgYHtyIGRpbS1wcmVzZW50fQ0KZGF0YSgncHJlc2VudCcsIHBhY2thZ2U9J29wZW5pbnRybycpDQpwcmVzZW50DQpwcmVzZW50ICU+JSBzdW1tYXJpemUobWluID0gbWluKGJveXMpLCBtYXggPSBtYXgoYm95cykpDQoNCmBgYA0KDQoNCiMjIyBUaGUgY291bnRzIGhlcmUgYXJlIG11Y2ggbXVjaCBsYXJnZXIsIGJhY2sgdGhlbiB3ZSB3ZXJlIGRpc2N1c3NpbmcgdGhvdXNhbmRzIG9mIGNoaWxkcmVuLCBoZXJlIHdlIGFyZSBkaXNjdXNzaW5nIG1pbGxpb25zIA0KDQpJbnNlcnQgYW55IHRleHQgaGVyZS4NCg0KYGBge3IgY291bnQtY29tcGFyZX0NCg0KYGBgDQoNCg0KIyMjIEV4ZXJjaXNlIDYNCg0KaGlzIG9ic2VydmF0aW9uIHNlZW1zIHNvbWV3aGF0IGxpa2VseSwgYXMgdGhlIHByb3BvcnRpb24gbmV2ZXIgZGlwcyBiZWxvdyA1MSUsIHNvIGl0IGlzIHBvc3NpYmxlIA0KDQpgYGB7ciBwbG90LXByb3AtYm95cy1wcmVzZW50fQ0KcHJlc2VudCA8LSBwcmVzZW50ICU+JQ0KICBtdXRhdGUodG90YWwgPSBib3lzICsgZ2lybHMpDQpwcmVzZW50IDwtIHByZXNlbnQgJT4lDQogIG11dGF0ZShib3lfcmF0aW8gPSBib3lzIC8gdG90YWwpDQoNCmdncGxvdChkYXRhID0gcHJlc2VudCwgYWVzKHggPSB5ZWFyLCB5ID0gYm95X3JhdGlvKSkgKyANCiAgZ2VvbV9saW5lKCkNCg0KYGBgDQoNCg0KIyMjIEV4ZXJjaXNlIDcNCg0KMTk2MSBpcyB0aGUgaGlnaGVzdCB0b3RhbCB5ZWFyDQoNCmBgYHtyIGZpbmQtbWF4LXRvdGFsfQ0KcHJlc2VudCAlPiUgYXJyYW5nZShkZXNjKHRvdGFsKSkNCg0KYGBgDQoNCg==