library(tidyverse)
library(openintro)
library(dplyr)

data('arbuthnot', package='openintro')

Exercise 1

arbuthnot$girls
##  [1] 4683 4457 4102 4590 4839 4820 4928 4605 4457 4952 4784 5332 5200 4910 4617
## [16] 3997 3919 3395 3536 3181 2746 2722 2840 2908 2959 3179 3349 3382 3289 3013
## [31] 2781 3247 4107 4803 4881 5681 4858 4319 5322 5560 5829 5719 6061 6120 5822
## [46] 5738 5717 5847 6203 6033 6041 6299 6533 6744 7158 7127 7246 7119 7214 7101
## [61] 7167 7302 7392 7316 7483 6647 6713 7229 7767 7626 7452 7061 7514 7656 7683
## [76] 5738 7779 7417 7687 7623 7380 7288

Exercise 2

The number of girls baptized over the years trends to rise consistenlty with the exception of 20 years between 1640 and 1660 where we observe a drop in baptizms.

ggplot(data = arbuthnot, aes(x = year, y = girls)) + 
  geom_point()

Exercise 3

Over the time period represented by the data there are consistently more boys than girls born in a single year. The percentage of boys born is a single year varies between just over 50% to over 53%.

arbuthnot <- arbuthnot %>%
  mutate(total = boys + girls)

arbuthnot <- arbuthnot %>%
  mutate(boy_ratio = boys / total)

ggplot(data = arbuthnot, aes(x = year, y = boy_ratio)) + 
  geom_point()

Exercise 4

The data sets includes the years 1940 to 2002 inclusively. The dimensions of the dataset are 3 columns with 63 rows of data.

data('present', package='openintro')
present
## # A tibble: 63 × 3
##     year    boys   girls
##    <dbl>   <dbl>   <dbl>
##  1  1940 1211684 1148715
##  2  1941 1289734 1223693
##  3  1942 1444365 1364631
##  4  1943 1508959 1427901
##  5  1944 1435301 1359499
##  6  1945 1404587 1330869
##  7  1946 1691220 1597452
##  8  1947 1899876 1800064
##  9  1948 1813852 1721216
## 10  1949 1826352 1733177
## # … with 53 more rows
present %>% summarize(min = min(year), max = max(year))
## # A tibble: 1 × 2
##     min   max
##   <dbl> <dbl>
## 1  1940  2002

Exercise 5

The data sets includes the years 1620 to 1710 inclusively. The dimensions of the original data set are 3 columns with 82 rows of data.

arbuthnot
## # A tibble: 82 × 5
##     year  boys girls total boy_ratio
##    <int> <int> <int> <int>     <dbl>
##  1  1629  5218  4683  9901     0.527
##  2  1630  4858  4457  9315     0.522
##  3  1631  4422  4102  8524     0.519
##  4  1632  4994  4590  9584     0.521
##  5  1633  5158  4839  9997     0.516
##  6  1634  5035  4820  9855     0.511
##  7  1635  5106  4928 10034     0.509
##  8  1636  4917  4605  9522     0.516
##  9  1637  4703  4457  9160     0.513
## 10  1638  5359  4952 10311     0.520
## # … with 72 more rows
arbuthnot %>% summarize(min = min(year), max = max(year))
## # A tibble: 1 × 2
##     min   max
##   <int> <int>
## 1  1629  1710

Exercise 6

The authors observations do hold up there were more boys born in over the time period represented in the present data set.

present <- present %>%
  mutate(total = boys + girls)

present <- present %>%
  mutate(boy_ratio = boys / total)

ggplot(data = present, aes(x = year, y = boy_ratio)) + 
  geom_point()

Exercise 7

In the data set the most births were recorded in 1961

present %>%
  arrange(desc(total))
## # A tibble: 63 × 5
##     year    boys   girls   total boy_ratio
##    <dbl>   <dbl>   <dbl>   <dbl>     <dbl>
##  1  1961 2186274 2082052 4268326     0.512
##  2  1960 2179708 2078142 4257850     0.512
##  3  1957 2179960 2074824 4254784     0.512
##  4  1959 2173638 2071158 4244796     0.512
##  5  1958 2152546 2051266 4203812     0.512
##  6  1962 2132466 2034896 4167362     0.512
##  7  1956 2133588 2029502 4163090     0.513
##  8  1990 2129495 2028717 4158212     0.512
##  9  1991 2101518 2009389 4110907     0.511
## 10  1963 2101632 1996388 4098020     0.513
## # … with 53 more rows
LS0tCnRpdGxlOiAiTGFiIDE6IEludHJvIHRvIFIiCmF1dGhvcjogIkRhdmlkIFNpbWJhbmR1bXdlIgpkYXRlOiAiYHIgU3lzLkRhdGUoKWAiCm91dHB1dDogb3BlbmludHJvOjpsYWJfcmVwb3J0Ci0tLQoKYGBge3IgbG9hZC1wYWNrYWdlcywgbWVzc2FnZT1GQUxTRX0KbGlicmFyeSh0aWR5dmVyc2UpCmxpYnJhcnkob3BlbmludHJvKQpsaWJyYXJ5KGRwbHlyKQoKZGF0YSgnYXJidXRobm90JywgcGFja2FnZT0nb3BlbmludHJvJykKCmBgYAoKIyMjIEV4ZXJjaXNlIDEKCmBgYHtyIHZpZXctZ2lybHMtY291bnRzfQphcmJ1dGhub3QkZ2lybHMKYGBgCgoKIyMjIEV4ZXJjaXNlIDIKClRoZSBudW1iZXIgb2YgZ2lybHMgYmFwdGl6ZWQgb3ZlciB0aGUgeWVhcnMgdHJlbmRzIHRvIHJpc2UgY29uc2lzdGVubHR5IHdpdGggdGhlIGV4Y2VwdGlvbiBvZiAyMCB5ZWFycyBiZXR3ZWVuIDE2NDAgYW5kIDE2NjAgd2hlcmUgd2Ugb2JzZXJ2ZSBhIGRyb3AgaW4gYmFwdGl6bXMuIAoKYGBge3IgdHJlbmQtZ2lybHN9CmdncGxvdChkYXRhID0gYXJidXRobm90LCBhZXMoeCA9IHllYXIsIHkgPSBnaXJscykpICsgCiAgZ2VvbV9wb2ludCgpCmBgYAoKCiMjIyBFeGVyY2lzZSAzCgpPdmVyIHRoZSB0aW1lIHBlcmlvZCByZXByZXNlbnRlZCBieSB0aGUgZGF0YSB0aGVyZSBhcmUgY29uc2lzdGVudGx5IG1vcmUgYm95cyB0aGFuIGdpcmxzIGJvcm4gaW4gYSBzaW5nbGUgeWVhci4gVGhlIHBlcmNlbnRhZ2Ugb2YgYm95cyBib3JuIGlzIGEgc2luZ2xlIHllYXIgdmFyaWVzIGJldHdlZW4ganVzdCBvdmVyIDUwJSB0byBvdmVyIDUzJS4KCmBgYHtyIHBsb3QtcHJvcC1ib3lzLWFyYnV0aG5vdH0KCmFyYnV0aG5vdCA8LSBhcmJ1dGhub3QgJT4lCiAgbXV0YXRlKHRvdGFsID0gYm95cyArIGdpcmxzKQoKYXJidXRobm90IDwtIGFyYnV0aG5vdCAlPiUKICBtdXRhdGUoYm95X3JhdGlvID0gYm95cyAvIHRvdGFsKQoKZ2dwbG90KGRhdGEgPSBhcmJ1dGhub3QsIGFlcyh4ID0geWVhciwgeSA9IGJveV9yYXRpbykpICsgCiAgZ2VvbV9wb2ludCgpCgpgYGAKCgojIyMgRXhlcmNpc2UgNAoKVGhlIGRhdGEgc2V0cyBpbmNsdWRlcyB0aGUgeWVhcnMgMTk0MCB0byAyMDAyIGluY2x1c2l2ZWx5LiBUaGUgZGltZW5zaW9ucyBvZiB0aGUgZGF0YXNldCBhcmUgMyBjb2x1bW5zIHdpdGggNjMgcm93cyBvZiBkYXRhLiAKCmBgYHtyIGRpbS1wcmVzZW50fQpkYXRhKCdwcmVzZW50JywgcGFja2FnZT0nb3BlbmludHJvJykKcHJlc2VudAoKcHJlc2VudCAlPiUgc3VtbWFyaXplKG1pbiA9IG1pbih5ZWFyKSwgbWF4ID0gbWF4KHllYXIpKQoKYGBgCgoKIyMjIEV4ZXJjaXNlIDUKClRoZSBkYXRhIHNldHMgaW5jbHVkZXMgdGhlIHllYXJzIDE2MjAgdG8gMTcxMCBpbmNsdXNpdmVseS4gVGhlIGRpbWVuc2lvbnMgb2YgdGhlIG9yaWdpbmFsIGRhdGEgc2V0IGFyZSAzIGNvbHVtbnMgd2l0aCA4MiByb3dzIG9mIGRhdGEuIAoKYGBge3IgY291bnQtY29tcGFyZX0KCmFyYnV0aG5vdAoKYXJidXRobm90ICU+JSBzdW1tYXJpemUobWluID0gbWluKHllYXIpLCBtYXggPSBtYXgoeWVhcikpCgoKYGBgCgoKIyMjIEV4ZXJjaXNlIDYKClRoZSBhdXRob3JzIG9ic2VydmF0aW9ucyBkbyBob2xkIHVwIHRoZXJlIHdlcmUgbW9yZSBib3lzIGJvcm4gaW4gb3ZlciB0aGUgdGltZSBwZXJpb2QgcmVwcmVzZW50ZWQgaW4gdGhlIHByZXNlbnQgZGF0YSBzZXQuIAoKYGBge3IgcGxvdC1wcm9wLWJveXMtcHJlc2VudH0KCnByZXNlbnQgPC0gcHJlc2VudCAlPiUKICBtdXRhdGUodG90YWwgPSBib3lzICsgZ2lybHMpCgpwcmVzZW50IDwtIHByZXNlbnQgJT4lCiAgbXV0YXRlKGJveV9yYXRpbyA9IGJveXMgLyB0b3RhbCkKCmdncGxvdChkYXRhID0gcHJlc2VudCwgYWVzKHggPSB5ZWFyLCB5ID0gYm95X3JhdGlvKSkgKyAKICBnZW9tX3BvaW50KCkKCmBgYAoKCiMjIyBFeGVyY2lzZSA3CgpJbiB0aGUgZGF0YSBzZXQgdGhlIG1vc3QgYmlydGhzIHdlcmUgcmVjb3JkZWQgaW4gMTk2MQoKYGBge3IgZmluZC1tYXgtdG90YWx9CgpwcmVzZW50ICU+JQogIGFycmFuZ2UoZGVzYyh0b3RhbCkpCgoKCmBgYAoK