library(tidyverse)
library(openintro)
library(ggplot2)
library(dplyr)

Exercise 1

arbuthnot$girls
##  [1] 4683 4457 4102 4590 4839 4820 4928 4605 4457 4952 4784 5332 5200 4910 4617
## [16] 3997 3919 3395 3536 3181 2746 2722 2840 2908 2959 3179 3349 3382 3289 3013
## [31] 2781 3247 4107 4803 4881 5681 4858 4319 5322 5560 5829 5719 6061 6120 5822
## [46] 5738 5717 5847 6203 6033 6041 6299 6533 6744 7158 7127 7246 7119 7214 7101
## [61] 7167 7302 7392 7316 7483 6647 6713 7229 7767 7626 7452 7061 7514 7656 7683
## [76] 5738 7779 7417 7687 7623 7380 7288
data('arbuthnot', package='openintro')
arbuthnot
## # A tibble: 82 × 3
##     year  boys girls
##    <int> <int> <int>
##  1  1629  5218  4683
##  2  1630  4858  4457
##  3  1631  4422  4102
##  4  1632  4994  4590
##  5  1633  5158  4839
##  6  1634  5035  4820
##  7  1635  5106  4928
##  8  1636  4917  4605
##  9  1637  4703  4457
## 10  1638  5359  4952
## # ℹ 72 more rows
glimpse(arbuthnot)
## Rows: 82
## Columns: 3
## $ year  <int> 1629, 1630, 1631, 1632, 1633, 1634, 1635, 1636, 1637, 1638, 1639…
## $ boys  <int> 5218, 4858, 4422, 4994, 5158, 5035, 5106, 4917, 4703, 5359, 5366…
## $ girls <int> 4683, 4457, 4102, 4590, 4839, 4820, 4928, 4605, 4457, 4952, 4784…
arbuthnot$girls
##  [1] 4683 4457 4102 4590 4839 4820 4928 4605 4457 4952 4784 5332 5200 4910 4617
## [16] 3997 3919 3395 3536 3181 2746 2722 2840 2908 2959 3179 3349 3382 3289 3013
## [31] 2781 3247 4107 4803 4881 5681 4858 4319 5322 5560 5829 5719 6061 6120 5822
## [46] 5738 5717 5847 6203 6033 6041 6299 6533 6744 7158 7127 7246 7119 7214 7101
## [61] 7167 7302 7392 7316 7483 6647 6713 7229 7767 7626 7452 7061 7514 7656 7683
## [76] 5738 7779 7417 7687 7623 7380 7288

Exercise 2

There has decrease number in year 1640-1660. And huge increase number of girls baptized during 1680-1700.

# Insert code for Exercise 2 here
ggplot(data = arbuthnot, aes(x = year, y = girls,
                             colour= girls)) + 
  geom_point(size=3, alpha=0.5)+
  geom_smooth(method = lm,
              se=F)+
  labs(title="Number of Girls Baptized Over The Years",
     x="Year",
     y="Number of Girls")+
  theme_bw()
## `geom_smooth()` using formula = 'y ~ x'
## Warning: The following aesthetics were dropped during statistical transformation:
## colour.
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
##   the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
##   variable into a factor?

Exercise 3

Boys baptized rate decrease.

# Insert code for Exercise 3 here
arbuthnot <- arbuthnot %>%
  mutate(boy_to_girl_ratio = boys / girls)
arbuthnot <- arbuthnot %>%
  mutate(total = boys + girls)
arbuthnot <- arbuthnot %>%
  mutate(boy_ratio = boys / total)

arbuthnot <- arbuthnot %>% 
  mutate(boy_ratio=boys/total)
  ggplot(arbuthnot, aes(x= year,
                      y= boy_ratio,
                      colour = boy_ratio))+
  geom_point(size= 3, alpha= 0.5)+
  geom_smooth(method= lm,
              se=F)+
  labs(title="Proportion of Boys Baptized Over Time",
       x="Year",
       y="Proportion of Boys")+
  theme_bw()
## `geom_smooth()` using formula = 'y ~ x'
## Warning: The following aesthetics were dropped during statistical transformation:
## colour.
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
##   the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
##   variable into a factor?

Exercise 4

The years included in this data set is year 1940- 2002. The data frame is num. The variable’s name are year, boys, and girls.

# Insert code for Exercise 4 here
data('present', package='openintro')
str(present)
## tibble [63 × 3] (S3: tbl_df/tbl/data.frame)
##  $ year : num [1:63] 1940 1941 1942 1943 1944 ...
##  $ boys : num [1:63] 1211684 1289734 1444365 1508959 1435301 ...
##  $ girls: num [1:63] 1148715 1223693 1364631 1427901 1359499 ...
summary(present)
##       year           boys             girls        
##  Min.   :1940   Min.   :1211684   Min.   :1148715  
##  1st Qu.:1956   1st Qu.:1799857   1st Qu.:1711405  
##  Median :1971   Median :1924868   Median :1831679  
##  Mean   :1971   Mean   :1885600   Mean   :1793915  
##  3rd Qu.:1986   3rd Qu.:2058524   3rd Qu.:1965538  
##  Max.   :2002   Max.   :2186274   Max.   :2082052

Exercise 5

The number of birth records is more then number of baptism.

# Insert code for Exercise 5 here
glimpse(present)
## Rows: 63
## Columns: 3
## $ year  <dbl> 1940, 1941, 1942, 1943, 1944, 1945, 1946, 1947, 1948, 1949, 1950…
## $ boys  <dbl> 1211684, 1289734, 1444365, 1508959, 1435301, 1404587, 1691220, 1…
## $ girls <dbl> 1148715, 1223693, 1364631, 1427901, 1359499, 1330869, 1597452, 1…
glimpse(arbuthnot)
## Rows: 82
## Columns: 6
## $ year              <int> 1629, 1630, 1631, 1632, 1633, 1634, 1635, 1636, 1637…
## $ boys              <int> 5218, 4858, 4422, 4994, 5158, 5035, 5106, 4917, 4703…
## $ girls             <int> 4683, 4457, 4102, 4590, 4839, 4820, 4928, 4605, 4457…
## $ boy_to_girl_ratio <dbl> 1.114243, 1.089971, 1.078011, 1.088017, 1.065923, 1.…
## $ total             <int> 9901, 9315, 8524, 9584, 9997, 9855, 10034, 9522, 916…
## $ boy_ratio         <dbl> 0.5270175, 0.5215244, 0.5187705, 0.5210768, 0.515954…

Exercise 6

Boy’s birth decrease over the time. The Arbuthnot’s observation about boys being born in greater proportion than girls does not hold up in the U.S.

# Insert code for Exercise 6 here
present<- present %>%
  mutate(boy_to_girl_ratio = boys / girls)
present <- present %>%
  mutate(total = boys + girls)
present <- present %>%
  mutate(boy_ratio = boys / total)

present <- present %>% 
  mutate(boy_ratio=boys/total)
  ggplot(present, aes(x= year,
                      y= boy_ratio,
                      colour = boy_ratio))+
  geom_point(size= 3, alpha= 0.5)+
  geom_smooth(method= lm,
              se=F)+
  labs(title="Proportion of Boys Over Time",
       x="Year",
       y="Proportion of Boys")+
  theme_bw()
## `geom_smooth()` using formula = 'y ~ x'
## Warning: The following aesthetics were dropped during statistical transformation:
## colour.
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
##   the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
##   variable into a factor?

Exercise 7

Year 1961 has most total number of births in the U.S.

# Insert code for Exercise 7 here
present %>%
  arrange(desc(total))
## # A tibble: 63 × 6
##     year    boys   girls boy_to_girl_ratio   total boy_ratio
##    <dbl>   <dbl>   <dbl>             <dbl>   <dbl>     <dbl>
##  1  1961 2186274 2082052              1.05 4268326     0.512
##  2  1960 2179708 2078142              1.05 4257850     0.512
##  3  1957 2179960 2074824              1.05 4254784     0.512
##  4  1959 2173638 2071158              1.05 4244796     0.512
##  5  1958 2152546 2051266              1.05 4203812     0.512
##  6  1962 2132466 2034896              1.05 4167362     0.512
##  7  1956 2133588 2029502              1.05 4163090     0.513
##  8  1990 2129495 2028717              1.05 4158212     0.512
##  9  1991 2101518 2009389              1.05 4110907     0.511
## 10  1963 2101632 1996388              1.05 4098020     0.513
## # ℹ 53 more rows
LS0tDQp0aXRsZTogIkxhYiAxOiBJbnRybyB0byBSIg0KYXV0aG9yOiAiSmlheGluIFpoZW5nIg0KZGF0ZTogIjA5LzA2LzIwMjQiDQpvdXRwdXQ6IG9wZW5pbnRybzo6bGFiX3JlcG9ydA0KLS0tDQoNCmBgYHtyIGxvYWQtcGFja2FnZXMsIG1lc3NhZ2U9RkFMU0V9DQpsaWJyYXJ5KHRpZHl2ZXJzZSkNCmxpYnJhcnkob3BlbmludHJvKQ0KbGlicmFyeShnZ3Bsb3QyKQ0KbGlicmFyeShkcGx5cikNCmBgYA0KDQojIyMgRXhlcmNpc2UgMQ0KDQpgYGB7ciB2aWV3LWdpcmxzLWNvdW50c30NCmFyYnV0aG5vdCRnaXJscw0KZGF0YSgnYXJidXRobm90JywgcGFja2FnZT0nb3BlbmludHJvJykNCmFyYnV0aG5vdA0KDQpnbGltcHNlKGFyYnV0aG5vdCkNCmFyYnV0aG5vdCRnaXJscw0KDQpgYGANCg0KDQojIyMgRXhlcmNpc2UgMg0KDQpUaGVyZSBoYXMgZGVjcmVhc2UgbnVtYmVyIGluIHllYXIgMTY0MC0xNjYwLiANCkFuZCBodWdlIGluY3JlYXNlIG51bWJlciBvZiBnaXJscyBiYXB0aXplZCBkdXJpbmcgMTY4MC0xNzAwLg0KDQpgYGB7ciB0cmVuZC1naXJsc30NCiMgSW5zZXJ0IGNvZGUgZm9yIEV4ZXJjaXNlIDIgaGVyZQ0KZ2dwbG90KGRhdGEgPSBhcmJ1dGhub3QsIGFlcyh4ID0geWVhciwgeSA9IGdpcmxzLA0KICAgICAgICAgICAgICAgICAgICAgICAgICAgICBjb2xvdXI9IGdpcmxzKSkgKyANCiAgZ2VvbV9wb2ludChzaXplPTMsIGFscGhhPTAuNSkrDQogIGdlb21fc21vb3RoKG1ldGhvZCA9IGxtLA0KICAgICAgICAgICAgICBzZT1GKSsNCiAgbGFicyh0aXRsZT0iTnVtYmVyIG9mIEdpcmxzIEJhcHRpemVkIE92ZXIgVGhlIFllYXJzIiwNCiAgICAgeD0iWWVhciIsDQogICAgIHk9Ik51bWJlciBvZiBHaXJscyIpKw0KICB0aGVtZV9idygpDQpgYGANCg0KDQojIyMgRXhlcmNpc2UgMw0KDQpCb3lzIGJhcHRpemVkIHJhdGUgZGVjcmVhc2UuDQoNCmBgYHtyIHBsb3QtcHJvcC1ib3lzLWFyYnV0aG5vdH0NCiMgSW5zZXJ0IGNvZGUgZm9yIEV4ZXJjaXNlIDMgaGVyZQ0KYXJidXRobm90IDwtIGFyYnV0aG5vdCAlPiUNCiAgbXV0YXRlKGJveV90b19naXJsX3JhdGlvID0gYm95cyAvIGdpcmxzKQ0KYXJidXRobm90IDwtIGFyYnV0aG5vdCAlPiUNCiAgbXV0YXRlKHRvdGFsID0gYm95cyArIGdpcmxzKQ0KYXJidXRobm90IDwtIGFyYnV0aG5vdCAlPiUNCiAgbXV0YXRlKGJveV9yYXRpbyA9IGJveXMgLyB0b3RhbCkNCg0KYXJidXRobm90IDwtIGFyYnV0aG5vdCAlPiUgDQogIG11dGF0ZShib3lfcmF0aW89Ym95cy90b3RhbCkNCiAgZ2dwbG90KGFyYnV0aG5vdCwgYWVzKHg9IHllYXIsDQogICAgICAgICAgICAgICAgICAgICAgeT0gYm95X3JhdGlvLA0KICAgICAgICAgICAgICAgICAgICAgIGNvbG91ciA9IGJveV9yYXRpbykpKw0KICBnZW9tX3BvaW50KHNpemU9IDMsIGFscGhhPSAwLjUpKw0KICBnZW9tX3Ntb290aChtZXRob2Q9IGxtLA0KICAgICAgICAgICAgICBzZT1GKSsNCiAgbGFicyh0aXRsZT0iUHJvcG9ydGlvbiBvZiBCb3lzIEJhcHRpemVkIE92ZXIgVGltZSIsDQogICAgICAgeD0iWWVhciIsDQogICAgICAgeT0iUHJvcG9ydGlvbiBvZiBCb3lzIikrDQogIHRoZW1lX2J3KCkNCmBgYA0KDQoNCiMjIyBFeGVyY2lzZSA0DQoNClRoZSB5ZWFycyBpbmNsdWRlZCBpbiB0aGlzIGRhdGEgc2V0IGlzIHllYXIgMTk0MC0gMjAwMi4NClRoZSBkYXRhIGZyYW1lIGlzIG51bS4NClRoZSB2YXJpYWJsZSdzIG5hbWUgYXJlIHllYXIsIGJveXMsIGFuZCBnaXJscy4NCg0KYGBge3IgZGltLXByZXNlbnR9DQojIEluc2VydCBjb2RlIGZvciBFeGVyY2lzZSA0IGhlcmUNCmRhdGEoJ3ByZXNlbnQnLCBwYWNrYWdlPSdvcGVuaW50cm8nKQ0Kc3RyKHByZXNlbnQpDQpzdW1tYXJ5KHByZXNlbnQpDQpgYGANCg0KDQojIyMgRXhlcmNpc2UgNQ0KDQpUaGUgbnVtYmVyIG9mIGJpcnRoIHJlY29yZHMgaXMgbW9yZSB0aGVuIG51bWJlciBvZiBiYXB0aXNtLg0KDQpgYGB7ciBjb3VudC1jb21wYXJlfQ0KIyBJbnNlcnQgY29kZSBmb3IgRXhlcmNpc2UgNSBoZXJlDQpnbGltcHNlKHByZXNlbnQpDQpnbGltcHNlKGFyYnV0aG5vdCkNCmBgYA0KDQoNCiMjIyBFeGVyY2lzZSA2DQoNCkJveSdzIGJpcnRoIGRlY3JlYXNlIG92ZXIgdGhlIHRpbWUuIFRoZSBBcmJ1dGhub3TigJlzIG9ic2VydmF0aW9uIGFib3V0IGJveXMgYmVpbmcgYm9ybiBpbiBncmVhdGVyIHByb3BvcnRpb24gdGhhbiBnaXJscyBkb2VzIG5vdCBob2xkIHVwIGluIHRoZSBVLlMuDQoNCmBgYHtyIHBsb3QtcHJvcC1ib3lzLXByZXNlbnR9DQojIEluc2VydCBjb2RlIGZvciBFeGVyY2lzZSA2IGhlcmUNCnByZXNlbnQ8LSBwcmVzZW50ICU+JQ0KICBtdXRhdGUoYm95X3RvX2dpcmxfcmF0aW8gPSBib3lzIC8gZ2lybHMpDQpwcmVzZW50IDwtIHByZXNlbnQgJT4lDQogIG11dGF0ZSh0b3RhbCA9IGJveXMgKyBnaXJscykNCnByZXNlbnQgPC0gcHJlc2VudCAlPiUNCiAgbXV0YXRlKGJveV9yYXRpbyA9IGJveXMgLyB0b3RhbCkNCg0KcHJlc2VudCA8LSBwcmVzZW50ICU+JSANCiAgbXV0YXRlKGJveV9yYXRpbz1ib3lzL3RvdGFsKQ0KICBnZ3Bsb3QocHJlc2VudCwgYWVzKHg9IHllYXIsDQogICAgICAgICAgICAgICAgICAgICAgeT0gYm95X3JhdGlvLA0KICAgICAgICAgICAgICAgICAgICAgIGNvbG91ciA9IGJveV9yYXRpbykpKw0KICBnZW9tX3BvaW50KHNpemU9IDMsIGFscGhhPSAwLjUpKw0KICBnZW9tX3Ntb290aChtZXRob2Q9IGxtLA0KICAgICAgICAgICAgICBzZT1GKSsNCiAgbGFicyh0aXRsZT0iUHJvcG9ydGlvbiBvZiBCb3lzIE92ZXIgVGltZSIsDQogICAgICAgeD0iWWVhciIsDQogICAgICAgeT0iUHJvcG9ydGlvbiBvZiBCb3lzIikrDQogIHRoZW1lX2J3KCkNCmBgYA0KDQoNCiMjIyBFeGVyY2lzZSA3DQoNClllYXIgMTk2MSBoYXMgbW9zdCB0b3RhbCBudW1iZXIgb2YgYmlydGhzIGluIHRoZSBVLlMuDQoNCmBgYHtyIGZpbmQtbWF4LXRvdGFsfQ0KIyBJbnNlcnQgY29kZSBmb3IgRXhlcmNpc2UgNyBoZXJlDQpwcmVzZW50ICU+JQ0KICBhcnJhbmdlKGRlc2ModG90YWwpKQ0KYGBgDQoNCg==