library(tidyverse)
library(openintro)
arbuthnot
## # A tibble: 82 × 3
##     year  boys girls
##    <int> <int> <int>
##  1  1629  5218  4683
##  2  1630  4858  4457
##  3  1631  4422  4102
##  4  1632  4994  4590
##  5  1633  5158  4839
##  6  1634  5035  4820
##  7  1635  5106  4928
##  8  1636  4917  4605
##  9  1637  4703  4457
## 10  1638  5359  4952
## # ℹ 72 more rows

Exercise 1

These are the counts of just baptized girls.

arbuthnot$girls
##  [1] 4683 4457 4102 4590 4839 4820 4928 4605 4457 4952 4784 5332 5200 4910 4617
## [16] 3997 3919 3395 3536 3181 2746 2722 2840 2908 2959 3179 3349 3382 3289 3013
## [31] 2781 3247 4107 4803 4881 5681 4858 4319 5322 5560 5829 5719 6061 6120 5822
## [46] 5738 5717 5847 6203 6033 6041 6299 6533 6744 7158 7127 7246 7119 7214 7101
## [61] 7167 7302 7392 7316 7483 6647 6713 7229 7767 7626 7452 7061 7514 7656 7683
## [76] 5738 7779 7417 7687 7623 7380 7288

Exercise 2

The trend seen in the plot is that the amount of baptized girls increased slightly before decreasing significantly.Then after 1660, there was a large and then steady increase of baptized girls before one plummet in the 1700’s, before increasing again.

ggplot(data = arbuthnot, aes(x = year, y = girls)) + 
  geom_point()

ggplot(data = arbuthnot, aes(x = year, y = girls)) +
  geom_line()

Exercise 3

The trend for ratio of baptized boys sees multuple repeated jumps between increasing and decreasing as the years continue..

arbuthnot$boys + arbuthnot$girls
##  [1]  9901  9315  8524  9584  9997  9855 10034  9522  9160 10311 10150 10850
## [13] 10670 10370  9410  8104  7966  7163  7332  6544  5825  5612  6071  6128
## [25]  6155  6620  7004  7050  6685  6170  5990  6971  8855 10019 10292 11722
## [37]  9972  8997 10938 11633 12335 11997 12510 12563 11895 11851 11775 12399
## [49] 12626 12601 12288 12847 13355 13653 14735 14702 14730 14694 14951 14588
## [61] 14771 15211 15054 14918 15159 13632 13976 14861 15829 16052 15363 14639
## [73] 15616 15687 15448 11851 16145 15369 16066 15862 15220 14928
arbuthnot <- arbuthnot %>%
  mutate(total = boys + girls)
ggplot(data = arbuthnot, aes(x = year, y = total)) + 
  geom_line()

arbuthnot <- arbuthnot %>%
  mutate(boy_to_girl_ratio = boys / girls)
arbuthnot <- arbuthnot %>%
  mutate(boy_ratio = boys / total)
ggplot(data = arbuthnot, aes(x = year, y = boy_ratio)) + 
  geom_line()

arbuthnot <- arbuthnot %>%
  mutate(more_boys = boys > girls)

Exercise 4

The years present are 1940 to 2002, the dimensions are 63 rows by 3 columns, and the variable names are “year”, “boys, and”girls”.

present
## # A tibble: 63 × 3
##     year    boys   girls
##    <dbl>   <dbl>   <dbl>
##  1  1940 1211684 1148715
##  2  1941 1289734 1223693
##  3  1942 1444365 1364631
##  4  1943 1508959 1427901
##  5  1944 1435301 1359499
##  6  1945 1404587 1330869
##  7  1946 1691220 1597452
##  8  1947 1899876 1800064
##  9  1948 1813852 1721216
## 10  1949 1826352 1733177
## # ℹ 53 more rows
dim(present)
## [1] 63  3
names(present)
## [1] "year"  "boys"  "girls"

Exercise 5

The plot of the “Present” data is of a much larger scale and count than that of Abuthnot’s. They are not of the same magnitude due to the massive difference in scale.

ggplot(data = present, aes(x = year, y = girls)) +
  geom_line()

Exercise 6

There seemed to be a greater proportion of boys being born in 1940 but slowly begin to decline during the mid-1960s but increased until mid-1970s but then decreased again. It seems that Arbuthnot’s observation only applied to the 1940’s but is not held up as the years go by.

present$boys + present$girls
##  [1] 2360399 2513427 2808996 2936860 2794800 2735456 3288672 3699940 3535068
## [10] 3559529 3554149 3750850 3846986 3902120 4017362 4047295 4163090 4254784
## [19] 4203812 4244796 4257850 4268326 4167362 4098020 4027490 3760358 3606274
## [28] 3520959 3501564 3600206 3731386 3555970 3258411 3136965 3159958 3144198
## [37] 3167788 3326632 3333279 3494398 3612258 3629238 3680537 3638933 3669141
## [46] 3760561 3756547 3809394 3909510 4040958 4158212 4110907 4065014 4000240
## [55] 3952767 3899589 3891494 3880894 3941553 3959417 4058814 4025933 4021726
present <- present %>%
  mutate(total = boys + girls)
present <- present %>%
  mutate(boy_to_girl_ratio = boys / girls)
present <- present %>%
  mutate(boy_ratio = boys / total)
ggplot(data = present, aes(x = year, y = boy_ratio)) + 
  geom_line()

Exercise 7

The year 1961 saw the most amount of total births in the U.S.

present %>%
  arrange(desc(total))
## # A tibble: 63 × 6
##     year    boys   girls   total boy_to_girl_ratio boy_ratio
##    <dbl>   <dbl>   <dbl>   <dbl>             <dbl>     <dbl>
##  1  1961 2186274 2082052 4268326              1.05     0.512
##  2  1960 2179708 2078142 4257850              1.05     0.512
##  3  1957 2179960 2074824 4254784              1.05     0.512
##  4  1959 2173638 2071158 4244796              1.05     0.512
##  5  1958 2152546 2051266 4203812              1.05     0.512
##  6  1962 2132466 2034896 4167362              1.05     0.512
##  7  1956 2133588 2029502 4163090              1.05     0.513
##  8  1990 2129495 2028717 4158212              1.05     0.512
##  9  1991 2101518 2009389 4110907              1.05     0.511
## 10  1963 2101632 1996388 4098020              1.05     0.513
## # ℹ 53 more rows
LS0tDQp0aXRsZTogIkxhYiAxOiBJbnRybyB0byBSIg0KYXV0aG9yOiAiUmlsZXkgWWF0ZXMiDQpkYXRlOiAiYHIgU3lzLkRhdGUoKWAiDQpvdXRwdXQ6IG9wZW5pbnRybzo6bGFiX3JlcG9ydA0KLS0tDQoNCmBgYHtyIGxvYWQtcGFja2FnZXMsIG1lc3NhZ2U9RkFMU0V9DQpsaWJyYXJ5KHRpZHl2ZXJzZSkNCmxpYnJhcnkob3BlbmludHJvKQ0KYXJidXRobm90DQpgYGANCiMjIyBFeGVyY2lzZSAxDQoNClRoZXNlIGFyZSB0aGUgY291bnRzIG9mIGp1c3QgYmFwdGl6ZWQgZ2lybHMuDQoNCmBgYHtyIHZpZXctZ2lybHMtY291bnRzfQ0KYXJidXRobm90JGdpcmxzDQpgYGANCg0KDQojIyMgRXhlcmNpc2UgMg0KDQpUaGUgdHJlbmQgc2VlbiBpbiB0aGUgcGxvdCBpcyB0aGF0IHRoZSBhbW91bnQgb2YgYmFwdGl6ZWQgZ2lybHMgaW5jcmVhc2VkIHNsaWdodGx5IGJlZm9yZSBkZWNyZWFzaW5nIHNpZ25pZmljYW50bHkuVGhlbiBhZnRlciAxNjYwLCB0aGVyZSB3YXMgYSBsYXJnZSBhbmQgdGhlbiBzdGVhZHkgaW5jcmVhc2Ugb2YgYmFwdGl6ZWQgZ2lybHMgYmVmb3JlIG9uZSBwbHVtbWV0IGluIHRoZSAxNzAwJ3MsIGJlZm9yZSBpbmNyZWFzaW5nIGFnYWluLg0KDQpgYGB7ciB0cmVuZC1naXJsc30NCmdncGxvdChkYXRhID0gYXJidXRobm90LCBhZXMoeCA9IHllYXIsIHkgPSBnaXJscykpICsgDQogIGdlb21fcG9pbnQoKQ0KZ2dwbG90KGRhdGEgPSBhcmJ1dGhub3QsIGFlcyh4ID0geWVhciwgeSA9IGdpcmxzKSkgKw0KICBnZW9tX2xpbmUoKQ0KYGBgDQoNCg0KIyMjIEV4ZXJjaXNlIDMNCg0KVGhlIHRyZW5kIGZvciByYXRpbyBvZiBiYXB0aXplZCBib3lzIHNlZXMgbXVsdHVwbGUgcmVwZWF0ZWQganVtcHMgYmV0d2VlbiBpbmNyZWFzaW5nIGFuZCBkZWNyZWFzaW5nIGFzIHRoZSB5ZWFycyBjb250aW51ZS4uDQoNCmBgYHtyIHBsb3QtcHJvcC1ib3lzLWFyYnV0aG5vdH0NCmFyYnV0aG5vdCRib3lzICsgYXJidXRobm90JGdpcmxzDQphcmJ1dGhub3QgPC0gYXJidXRobm90ICU+JQ0KICBtdXRhdGUodG90YWwgPSBib3lzICsgZ2lybHMpDQpnZ3Bsb3QoZGF0YSA9IGFyYnV0aG5vdCwgYWVzKHggPSB5ZWFyLCB5ID0gdG90YWwpKSArIA0KICBnZW9tX2xpbmUoKQ0KYXJidXRobm90IDwtIGFyYnV0aG5vdCAlPiUNCiAgbXV0YXRlKGJveV90b19naXJsX3JhdGlvID0gYm95cyAvIGdpcmxzKQ0KYXJidXRobm90IDwtIGFyYnV0aG5vdCAlPiUNCiAgbXV0YXRlKGJveV9yYXRpbyA9IGJveXMgLyB0b3RhbCkNCmdncGxvdChkYXRhID0gYXJidXRobm90LCBhZXMoeCA9IHllYXIsIHkgPSBib3lfcmF0aW8pKSArIA0KICBnZW9tX2xpbmUoKQ0KYXJidXRobm90IDwtIGFyYnV0aG5vdCAlPiUNCiAgbXV0YXRlKG1vcmVfYm95cyA9IGJveXMgPiBnaXJscykNCg0KYGBgDQoNCg0KIyMjIEV4ZXJjaXNlIDQNCg0KVGhlIHllYXJzIHByZXNlbnQgYXJlIDE5NDAgdG8gMjAwMiwgdGhlIGRpbWVuc2lvbnMgYXJlIDYzIHJvd3MgYnkgMyBjb2x1bW5zLCBhbmQgdGhlIHZhcmlhYmxlIG5hbWVzIGFyZSAieWVhciIsICJib3lzLCBhbmQgImdpcmxzIi4NCg0KYGBge3IgZGltLXByZXNlbnR9DQpwcmVzZW50DQpkaW0ocHJlc2VudCkNCm5hbWVzKHByZXNlbnQpDQpgYGANCg0KDQojIyMgRXhlcmNpc2UgNQ0KDQpUaGUgcGxvdCBvZiB0aGUgIlByZXNlbnQiIGRhdGEgaXMgb2YgYSBtdWNoIGxhcmdlciBzY2FsZSBhbmQgY291bnQgdGhhbiB0aGF0IG9mIEFidXRobm90J3MuIFRoZXkgYXJlIG5vdCBvZiB0aGUgc2FtZSBtYWduaXR1ZGUgZHVlIHRvIHRoZSBtYXNzaXZlIGRpZmZlcmVuY2UgaW4gc2NhbGUuDQoNCmBgYHtyIGNvdW50LWNvbXBhcmV9DQpnZ3Bsb3QoZGF0YSA9IHByZXNlbnQsIGFlcyh4ID0geWVhciwgeSA9IGdpcmxzKSkgKw0KICBnZW9tX2xpbmUoKQ0KYGBgDQoNCg0KIyMjIEV4ZXJjaXNlIDYNCg0KVGhlcmUgc2VlbWVkIHRvIGJlIGEgZ3JlYXRlciBwcm9wb3J0aW9uIG9mIGJveXMgYmVpbmcgYm9ybiBpbiAxOTQwIGJ1dCBzbG93bHkgYmVnaW4gdG8gZGVjbGluZSBkdXJpbmcgdGhlIG1pZC0xOTYwcyBidXQgaW5jcmVhc2VkIHVudGlsIG1pZC0xOTcwcyBidXQgdGhlbiBkZWNyZWFzZWQgYWdhaW4uIEl0IHNlZW1zIHRoYXQgQXJidXRobm904oCZcyBvYnNlcnZhdGlvbiBvbmx5IGFwcGxpZWQgdG8gdGhlIDE5NDDigJlzIGJ1dCBpcyBub3QgaGVsZCB1cCBhcyB0aGUgeWVhcnMgZ28gYnkuDQoNCmBgYHtyIHBsb3QtcHJvcC1ib3lzLXByZXNlbnR9DQpwcmVzZW50JGJveXMgKyBwcmVzZW50JGdpcmxzDQpwcmVzZW50IDwtIHByZXNlbnQgJT4lDQogIG11dGF0ZSh0b3RhbCA9IGJveXMgKyBnaXJscykNCnByZXNlbnQgPC0gcHJlc2VudCAlPiUNCiAgbXV0YXRlKGJveV90b19naXJsX3JhdGlvID0gYm95cyAvIGdpcmxzKQ0KcHJlc2VudCA8LSBwcmVzZW50ICU+JQ0KICBtdXRhdGUoYm95X3JhdGlvID0gYm95cyAvIHRvdGFsKQ0KZ2dwbG90KGRhdGEgPSBwcmVzZW50LCBhZXMoeCA9IHllYXIsIHkgPSBib3lfcmF0aW8pKSArIA0KICBnZW9tX2xpbmUoKQ0KYGBgDQoNCg0KIyMjIEV4ZXJjaXNlIDcNCg0KVGhlIHllYXIgMTk2MSBzYXcgdGhlIG1vc3QgYW1vdW50IG9mIHRvdGFsIGJpcnRocyBpbiB0aGUgVS5TLg0KDQpgYGB7ciBmaW5kLW1heC10b3RhbH0NCnByZXNlbnQgJT4lDQogIGFycmFuZ2UoZGVzYyh0b3RhbCkpDQpgYGANCg0K