library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.1.1
## Warning: package 'tibble' was built under R version 4.1.1
## Warning: package 'tidyr' was built under R version 4.1.1
## Warning: package 'readr' was built under R version 4.1.1
## Warning: package 'purrr' was built under R version 4.1.1
## Warning: package 'stringr' was built under R version 4.1.1
## Warning: package 'forcats' was built under R version 4.1.1
library(openintro)
## Warning: package 'openintro' was built under R version 4.1.1
## Warning: package 'airports' was built under R version 4.1.1
## Warning: package 'cherryblossom' was built under R version 4.1.1
## Warning: package 'usdata' was built under R version 4.1.1
arbuthnot
## # A tibble: 82 x 3
##     year  boys girls
##    <int> <int> <int>
##  1  1629  5218  4683
##  2  1630  4858  4457
##  3  1631  4422  4102
##  4  1632  4994  4590
##  5  1633  5158  4839
##  6  1634  5035  4820
##  7  1635  5106  4928
##  8  1636  4917  4605
##  9  1637  4703  4457
## 10  1638  5359  4952
## # ... with 72 more rows

Exercise 1

arbuthnot$girls
##  [1] 4683 4457 4102 4590 4839 4820 4928 4605 4457 4952 4784 5332 5200 4910 4617
## [16] 3997 3919 3395 3536 3181 2746 2722 2840 2908 2959 3179 3349 3382 3289 3013
## [31] 2781 3247 4107 4803 4881 5681 4858 4319 5322 5560 5829 5719 6061 6120 5822
## [46] 5738 5717 5847 6203 6033 6041 6299 6533 6744 7158 7127 7246 7119 7214 7101
## [61] 7167 7302 7392 7316 7483 6647 6713 7229 7767 7626 7452 7061 7514 7656 7683
## [76] 5738 7779 7417 7687 7623 7380 7288

Exercise 2

It appears that the overall trend of girls getting baptized increased over time. It also appears that there may have been some events that triggered a drastic drop in baptized girls from the years 1649 until 1659, and the year 1703.

ggplot(data = arbuthnot, aes(x = year, y = girls)) + 
  geom_line()

Exercise 3

The first thing I notice is that there is not a difference birth rate for boys over time. I also notice that the birth rate for boys over time looks similar to a wave function. I would assume that the birth rate for girls over time would be a wave function that mirrors the birth rate of boys over time.

arbuthnot <- arbuthnot %>%
  mutate(total = boys + girls)
arbuthnot <- arbuthnot %>%
  mutate(boy_ratio = boys / total)
ggplot(data = arbuthnot, aes(x = year, y= boy_ratio))+geom_line()

Exercise 4

The years are from 1940 until 2002. The dimensions are 63 x 3. The variables are year, boys, girls.

data('present', package='openintro')
present
## # A tibble: 63 x 3
##     year    boys   girls
##    <dbl>   <dbl>   <dbl>
##  1  1940 1211684 1148715
##  2  1941 1289734 1223693
##  3  1942 1444365 1364631
##  4  1943 1508959 1427901
##  5  1944 1435301 1359499
##  6  1945 1404587 1330869
##  7  1946 1691220 1597452
##  8  1947 1899876 1800064
##  9  1948 1813852 1721216
## 10  1949 1826352 1733177
## # ... with 53 more rows
summary(present)
##       year           boys             girls        
##  Min.   :1940   Min.   :1211684   Min.   :1148715  
##  1st Qu.:1956   1st Qu.:1799857   1st Qu.:1711405  
##  Median :1971   Median :1924868   Median :1831679  
##  Mean   :1971   Mean   :1885600   Mean   :1793915  
##  3rd Qu.:1986   3rd Qu.:2058524   3rd Qu.:1965538  
##  Max.   :2002   Max.   :2186274   Max.   :2082052

Exercise 5

Clearly, present has much more observations than Arbuthnot’s. Looking at the minimun and maximun of boy births in a year, we can see how stagering the difference between the two datasets are. Arbuthnot saw a maximum of 8426 boys being born in one year, while present has a maximum of 2186274 boys being born in one year. The minimum in presesnt is 1211684, in comparison to Arburthnot which has a minimum of 2890 boys being born in one year.

arbuthnot %>%
  summarize(min = min(boys), max = max(boys))
## # A tibble: 1 x 2
##     min   max
##   <int> <int>
## 1  2890  8426
present %>%
  summarize(min = min(boys), max = max(boys))
## # A tibble: 1 x 2
##       min     max
##     <dbl>   <dbl>
## 1 1211684 2186274

Exercise 6

Actually, the present data shows the opposite trend. In fact, boys being born is declining over time.

present <- present %>%
  mutate(total = boys + girls)
present <- present %>%
  mutate(boy_ratio = boys / total)
ggplot(data = present, aes(x = year, y= boy_ratio))+geom_line()

Exercise 7

1961 saw the most births.

present <- present %>%
  mutate(total = boys + girls)
present %>%
  arrange(desc(total))
## # A tibble: 63 x 5
##     year    boys   girls   total boy_ratio
##    <dbl>   <dbl>   <dbl>   <dbl>     <dbl>
##  1  1961 2186274 2082052 4268326     0.512
##  2  1960 2179708 2078142 4257850     0.512
##  3  1957 2179960 2074824 4254784     0.512
##  4  1959 2173638 2071158 4244796     0.512
##  5  1958 2152546 2051266 4203812     0.512
##  6  1962 2132466 2034896 4167362     0.512
##  7  1956 2133588 2029502 4163090     0.513
##  8  1990 2129495 2028717 4158212     0.512
##  9  1991 2101518 2009389 4110907     0.511
## 10  1963 2101632 1996388 4098020     0.513
## # ... with 53 more rows
LS0tDQp0aXRsZTogIkxhYiAxOiBJbnRybyB0byBSIg0KYXV0aG9yOiAiVHlsZXIgQmFrZXIiDQpkYXRlOiAiYHIgU3lzLkRhdGUoKWAiDQpvdXRwdXQ6IG9wZW5pbnRybzo6bGFiX3JlcG9ydA0KLS0tDQoNCmBgYHtyIGxvYWQtcGFja2FnZXMsIG1lc3NhZ2U9RkFMU0V9DQpsaWJyYXJ5KHRpZHl2ZXJzZSkNCmxpYnJhcnkob3BlbmludHJvKQ0KYXJidXRobm90DQpgYGANCg0KIyMjIEV4ZXJjaXNlIDENCg0KYGBge3Igdmlldy1naXJscy1jb3VudHN9DQphcmJ1dGhub3QkZ2lybHMNCmBgYA0KDQoNCiMjIyBFeGVyY2lzZSAyDQoNCkl0IGFwcGVhcnMgdGhhdCB0aGUgb3ZlcmFsbCB0cmVuZCBvZiBnaXJscyBnZXR0aW5nIGJhcHRpemVkIGluY3JlYXNlZCBvdmVyIHRpbWUuIEl0IGFsc28gYXBwZWFycyB0aGF0IHRoZXJlIG1heSBoYXZlIGJlZW4gc29tZSAgZXZlbnRzIHRoYXQgdHJpZ2dlcmVkIGEgZHJhc3RpYyBkcm9wIGluIGJhcHRpemVkIGdpcmxzIGZyb20gdGhlIHllYXJzIDE2NDkgdW50aWwgMTY1OSwgYW5kIHRoZSB5ZWFyIDE3MDMuDQoNCmBgYHtyIHRyZW5kLWdpcmxzfQ0KZ2dwbG90KGRhdGEgPSBhcmJ1dGhub3QsIGFlcyh4ID0geWVhciwgeSA9IGdpcmxzKSkgKyANCiAgZ2VvbV9saW5lKCkNCmBgYA0KDQoNCiMjIyBFeGVyY2lzZSAzDQoNClRoZSBmaXJzdCB0aGluZyBJIG5vdGljZSBpcyB0aGF0IHRoZXJlIGlzIG5vdCBhIGRpZmZlcmVuY2UgYmlydGggcmF0ZSBmb3IgYm95cyBvdmVyIHRpbWUuIEkgYWxzbyBub3RpY2UgdGhhdCB0aGUgYmlydGggcmF0ZSBmb3IgYm95cyBvdmVyIHRpbWUgbG9va3Mgc2ltaWxhciB0byBhIHdhdmUgZnVuY3Rpb24uIEkgd291bGQgYXNzdW1lIHRoYXQgdGhlIGJpcnRoIHJhdGUgZm9yIGdpcmxzIG92ZXIgdGltZSB3b3VsZCBiZSBhIHdhdmUgZnVuY3Rpb24gdGhhdCBtaXJyb3JzIHRoZSBiaXJ0aCByYXRlIG9mIGJveXMgb3ZlciB0aW1lLg0KDQpgYGB7ciBwbG90LXByb3AtYm95cy1hcmJ1dGhub3R9DQphcmJ1dGhub3QgPC0gYXJidXRobm90ICU+JQ0KICBtdXRhdGUodG90YWwgPSBib3lzICsgZ2lybHMpDQphcmJ1dGhub3QgPC0gYXJidXRobm90ICU+JQ0KICBtdXRhdGUoYm95X3JhdGlvID0gYm95cyAvIHRvdGFsKQ0KZ2dwbG90KGRhdGEgPSBhcmJ1dGhub3QsIGFlcyh4ID0geWVhciwgeT0gYm95X3JhdGlvKSkrZ2VvbV9saW5lKCkNCmBgYA0KDQoNCiMjIyBFeGVyY2lzZSA0DQoNClRoZSB5ZWFycyBhcmUgZnJvbSAxOTQwIHVudGlsIDIwMDIuIFRoZSBkaW1lbnNpb25zIGFyZSA2MyB4IDMuIFRoZSB2YXJpYWJsZXMgYXJlIHllYXIsIGJveXMsIGdpcmxzLg0KDQoNCmBgYHtyIGRpbS1wcmVzZW50fQ0KZGF0YSgncHJlc2VudCcsIHBhY2thZ2U9J29wZW5pbnRybycpDQpwcmVzZW50DQpzdW1tYXJ5KHByZXNlbnQpDQoNCmBgYA0KDQoNCiMjIyBFeGVyY2lzZSA1DQoNCkNsZWFybHksIHByZXNlbnQgaGFzIG11Y2ggbW9yZSBvYnNlcnZhdGlvbnMgdGhhbiBBcmJ1dGhub3Qncy4gTG9va2luZyBhdCB0aGUgbWluaW11biBhbmQgbWF4aW11biBvZiBib3kgYmlydGhzIGluIGEgeWVhciwgd2UgY2FuIHNlZSBob3cgc3RhZ2VyaW5nIHRoZSBkaWZmZXJlbmNlIGJldHdlZW4gdGhlIHR3byBkYXRhc2V0cyBhcmUuIEFyYnV0aG5vdCBzYXcgYSBtYXhpbXVtIG9mIDg0MjYgYm95cyBiZWluZyBib3JuIGluIG9uZSB5ZWFyLCB3aGlsZSBwcmVzZW50IGhhcyBhIG1heGltdW0gb2YgMjE4NjI3NCBib3lzIGJlaW5nIGJvcm4gaW4gb25lIHllYXIuIFRoZSBtaW5pbXVtIGluIHByZXNlc250IGlzIDEyMTE2ODQsIGluIGNvbXBhcmlzb24gdG8gQXJidXJ0aG5vdCB3aGljaCBoYXMgYSBtaW5pbXVtIG9mIDI4OTAgYm95cyBiZWluZyBib3JuIGluIG9uZSB5ZWFyLg0KYGBge3IgY291bnQtY29tcGFyZX0NCmFyYnV0aG5vdCAlPiUNCiAgc3VtbWFyaXplKG1pbiA9IG1pbihib3lzKSwgbWF4ID0gbWF4KGJveXMpKQ0KcHJlc2VudCAlPiUNCiAgc3VtbWFyaXplKG1pbiA9IG1pbihib3lzKSwgbWF4ID0gbWF4KGJveXMpKQ0KYGBgDQoNCg0KIyMjIEV4ZXJjaXNlIDYNCg0KQWN0dWFsbHksIHRoZSBwcmVzZW50IGRhdGEgc2hvd3MgdGhlIG9wcG9zaXRlIHRyZW5kLiBJbiBmYWN0LCBib3lzIGJlaW5nIGJvcm4gaXMgZGVjbGluaW5nIG92ZXIgdGltZS4NCg0KYGBge3IgcGxvdC1wcm9wLWJveXMtcHJlc2VudH0NCnByZXNlbnQgPC0gcHJlc2VudCAlPiUNCiAgbXV0YXRlKHRvdGFsID0gYm95cyArIGdpcmxzKQ0KcHJlc2VudCA8LSBwcmVzZW50ICU+JQ0KICBtdXRhdGUoYm95X3JhdGlvID0gYm95cyAvIHRvdGFsKQ0KZ2dwbG90KGRhdGEgPSBwcmVzZW50LCBhZXMoeCA9IHllYXIsIHk9IGJveV9yYXRpbykpK2dlb21fbGluZSgpDQpgYGANCg0KDQojIyMgRXhlcmNpc2UgNw0KDQoxOTYxIHNhdyB0aGUgbW9zdCBiaXJ0aHMuDQoNCmBgYHtyIGZpbmQtbWF4LXRvdGFsfQ0KcHJlc2VudCA8LSBwcmVzZW50ICU+JQ0KICBtdXRhdGUodG90YWwgPSBib3lzICsgZ2lybHMpDQpwcmVzZW50ICU+JQ0KICBhcnJhbmdlKGRlc2ModG90YWwpKQ0KYGBgDQoNCg==