library(tidyverse)
library(openintro)

Exercise 1

arbuthnot$girls
##  [1] 4683 4457 4102 4590 4839 4820 4928 4605 4457 4952 4784 5332 5200 4910 4617
## [16] 3997 3919 3395 3536 3181 2746 2722 2840 2908 2959 3179 3349 3382 3289 3013
## [31] 2781 3247 4107 4803 4881 5681 4858 4319 5322 5560 5829 5719 6061 6120 5822
## [46] 5738 5717 5847 6203 6033 6041 6299 6533 6744 7158 7127 7246 7119 7214 7101
## [61] 7167 7302 7392 7316 7483 6647 6713 7229 7767 7626 7452 7061 7514 7656 7683
## [76] 5738 7779 7417 7687 7623 7380 7288

Exercise 2

Overall, the number of baptized girls saw an upward trend. Before surging more than 3-fold from around 2500 in 1660s to around 7500 in 1700s, the number had signigicantly dropped by a half from approximate 5000 in 1930.

ggplot(arbuthnot,aes(x=year,y=girls))+
  geom_point(colour="red")+
  geom_line()

Exercise 3

There had been a slight decline overtime for number of newborn male during this time period. However, babyboys still outnumbered baby girls.

Is more boys than girls? Answer is TRUE

arbuthnot <- arbuthnot %>%
 mutate(total = boys + girls)

arbuthnot <- arbuthnot %>%
 mutate(boyprob = boys/ total)
        
arbuthnot <- arbuthnot %>%
  mutate(moreboys = boys >girls)


head(arbuthnot)
## # A tibble: 6 x 6
##    year  boys girls total boyprob moreboys
##   <int> <int> <int> <int>   <dbl> <lgl>   
## 1  1629  5218  4683  9901   0.527 TRUE    
## 2  1630  4858  4457  9315   0.522 TRUE    
## 3  1631  4422  4102  8524   0.519 TRUE    
## 4  1632  4994  4590  9584   0.521 TRUE    
## 5  1633  5158  4839  9997   0.516 TRUE    
## 6  1634  5035  4820  9855   0.511 TRUE
ggplot(arbuthnot,aes(x=year,y=boyprob))+geom_line(colour= "green")

Exercise 4

A ‘present’ reports data from 1940 to 2002 undercrosstab format with 3 columns and 63 rows.

#What years are included in the present data set
summary(present[1])
##       year     
##  Min.   :1940  
##  1st Qu.:1956  
##  Median :1971  
##  Mean   :1971  
##  3rd Qu.:1986  
##  Max.   :2002
#What are the dimensions of the data frame?
dim(present)
## [1] 63  3
#What are the variable (column) names
names(present)
## [1] "year"  "boys"  "girls"

Exercise 5

In ‘present’ dataset, the norm is as same as in ‘arbuthnot’, while the preset data set has beem a much higher in magnitude.

present <- present %>%
  mutate(more_boys = boys > girls)

# #compare norm with arbunthnot
sum(present$boys)>sum(present$girls)
## [1] TRUE
head(present)
## # A tibble: 6 x 4
##    year    boys   girls more_boys
##   <dbl>   <dbl>   <dbl> <lgl>    
## 1  1940 1211684 1148715 TRUE     
## 2  1941 1289734 1223693 TRUE     
## 3  1942 1444365 1364631 TRUE     
## 4  1943 1508959 1427901 TRUE     
## 5  1944 1435301 1359499 TRUE     
## 6  1945 1404587 1330869 TRUE
# compare magnitude with arbunthnot
y<-sum(present$boys)+sum(present$girls)
x<-sum(arbuthnot$boys)+sum(arbuthnot$girls)

y>x
## [1] TRUE

Exercise 6

The proportion of newborn boys has had gradually falling, but still dominated newborn girls so far.

The observation of Arbuthnot still held true since then.

present <- present %>%
  mutate(total1 = boys + girls)

present <- present %>%
  mutate(boyprob1 = boys / total1)

head(present)
## # A tibble: 6 x 6
##    year    boys   girls more_boys  total1 boyprob1
##   <dbl>   <dbl>   <dbl> <lgl>       <dbl>    <dbl>
## 1  1940 1211684 1148715 TRUE      2360399    0.513
## 2  1941 1289734 1223693 TRUE      2513427    0.513
## 3  1942 1444365 1364631 TRUE      2808996    0.514
## 4  1943 1508959 1427901 TRUE      2936860    0.514
## 5  1944 1435301 1359499 TRUE      2794800    0.514
## 6  1945 1404587 1330869 TRUE      2735456    0.513
ggplot(present,aes(x=year,y=boyprob1))+geom_line()

Exercise 7

Year 1961 has the most highest numbwe of newborn

#In what year did we see the most total number of births in the U.S.?

present <- present %>%
        arrange(desc(total1))
head(present,1)
## # A tibble: 1 x 6
##    year    boys   girls more_boys  total1 boyprob1
##   <dbl>   <dbl>   <dbl> <lgl>       <dbl>    <dbl>
## 1  1961 2186274 2082052 TRUE      4268326    0.512
LS0tDQp0aXRsZTogIkxhYiAxOiBJbnRybyB0byBSIg0KYXV0aG9yOiAiTGFiMDFfSUQzMDUyMjA4MCINCmRhdGU6ICJgciBTeXMuRGF0ZSgpYCINCm91dHB1dDogb3BlbmludHJvOjpsYWJfcmVwb3J0DQotLS0NCg0KYGBge3IgbG9hZC1wYWNrYWdlcywgbWVzc2FnZT1GQUxTRX0NCmxpYnJhcnkodGlkeXZlcnNlKQ0KbGlicmFyeShvcGVuaW50cm8pDQpgYGANCg0KIyMjIEV4ZXJjaXNlIDENCg0KYGBge3Igdmlldy1naXJscy1jb3VudHN9DQphcmJ1dGhub3QkZ2lybHMNCmBgYA0KDQoNCiMjIyBFeGVyY2lzZSAyDQoNCk92ZXJhbGwsIHRoZSBudW1iZXIgb2YgYmFwdGl6ZWQgZ2lybHMgc2F3IGFuIHVwd2FyZCB0cmVuZC4gQmVmb3JlIHN1cmdpbmcgbW9yZSB0aGFuIDMtZm9sZCBmcm9tIGFyb3VuZCAyNTAwIGluIDE2NjBzIHRvIGFyb3VuZCA3NTAwIGluIDE3MDBzLCB0aGUgbnVtYmVyIGhhZCBzaWduaWdpY2FudGx5IGRyb3BwZWQgYnkgYSBoYWxmIGZyb20gYXBwcm94aW1hdGUgNTAwMCBpbiAxOTMwLg0KDQpgYGB7ciB0cmVuZC1naXJsc30NCmdncGxvdChhcmJ1dGhub3QsYWVzKHg9eWVhcix5PWdpcmxzKSkrDQogIGdlb21fcG9pbnQoY29sb3VyPSJyZWQiKSsNCiAgZ2VvbV9saW5lKCkNCmBgYA0KDQoNCiMjIyBFeGVyY2lzZSAzDQoNClRoZXJlIGhhZCBiZWVuIGEgc2xpZ2h0IGRlY2xpbmUgb3ZlcnRpbWUgZm9yIG51bWJlciBvZiBuZXdib3JuIG1hbGUgZHVyaW5nIHRoaXMgdGltZSBwZXJpb2QuIEhvd2V2ZXIsIGJhYnlib3lzIHN0aWxsIG91dG51bWJlcmVkIGJhYnkgZ2lybHMuDQoNCklzIG1vcmUgYm95cyB0aGFuIGdpcmxzPyBBbnN3ZXIgaXMgYHIgIHN1bShhcmJ1dGhub3QkYm95cyk+c3VtKGFyYnV0aG5vdCRnaXJscylgDQoNCg0KYGBge3IgcGxvdC1wcm9wLWJveXMtYXJidXRobm90fQ0KYXJidXRobm90IDwtIGFyYnV0aG5vdCAlPiUNCiBtdXRhdGUodG90YWwgPSBib3lzICsgZ2lybHMpDQoNCmFyYnV0aG5vdCA8LSBhcmJ1dGhub3QgJT4lDQogbXV0YXRlKGJveXByb2IgPSBib3lzLyB0b3RhbCkNCiAgICAgICAgDQphcmJ1dGhub3QgPC0gYXJidXRobm90ICU+JQ0KICBtdXRhdGUobW9yZWJveXMgPSBib3lzID5naXJscykNCg0KDQpoZWFkKGFyYnV0aG5vdCkNCmdncGxvdChhcmJ1dGhub3QsYWVzKHg9eWVhcix5PWJveXByb2IpKStnZW9tX2xpbmUoY29sb3VyPSAiZ3JlZW4iKQ0KYGBgDQoNCg0KIyMjIEV4ZXJjaXNlIDQNCg0KQSAncHJlc2VudCcgcmVwb3J0cyBkYXRhIGZyb20gMTk0MCB0byAyMDAyIHVuZGVyY3Jvc3N0YWIgZm9ybWF0IHdpdGggMyBjb2x1bW5zIGFuZCA2MyByb3dzLg0KDQpgYGB7ciBkaW0tcHJlc2VudH0NCiNXaGF0IHllYXJzIGFyZSBpbmNsdWRlZCBpbiB0aGUgcHJlc2VudCBkYXRhIHNldA0Kc3VtbWFyeShwcmVzZW50WzFdKQ0KI1doYXQgYXJlIHRoZSBkaW1lbnNpb25zIG9mIHRoZSBkYXRhIGZyYW1lPw0KZGltKHByZXNlbnQpDQojV2hhdCBhcmUgdGhlIHZhcmlhYmxlIChjb2x1bW4pIG5hbWVzDQpuYW1lcyhwcmVzZW50KQ0KYGBgDQoNCg0KIyMjIEV4ZXJjaXNlIDUNCg0KSW4gJ3ByZXNlbnQnIGRhdGFzZXQsIHRoZSBub3JtIGlzIGFzIHNhbWUgYXMgaW4gJ2FyYnV0aG5vdCcsIHdoaWxlIHRoZSBwcmVzZXQgZGF0YSBzZXQgaGFzIGJlZW0gYSBtdWNoIGhpZ2hlciBpbiBtYWduaXR1ZGUuDQoNCmBgYHtyIGNvdW50LWNvbXBhcmV9DQpwcmVzZW50IDwtIHByZXNlbnQgJT4lDQogIG11dGF0ZShtb3JlX2JveXMgPSBib3lzID4gZ2lybHMpDQoNCiMgI2NvbXBhcmUgbm9ybSB3aXRoIGFyYnVudGhub3QNCnN1bShwcmVzZW50JGJveXMpPnN1bShwcmVzZW50JGdpcmxzKQ0KaGVhZChwcmVzZW50KQ0KDQojIGNvbXBhcmUgbWFnbml0dWRlIHdpdGggYXJidW50aG5vdA0KeTwtc3VtKHByZXNlbnQkYm95cykrc3VtKHByZXNlbnQkZ2lybHMpDQp4PC1zdW0oYXJidXRobm90JGJveXMpK3N1bShhcmJ1dGhub3QkZ2lybHMpDQoNCnk+eA0KDQpgYGANCg0KDQojIyMgRXhlcmNpc2UgNg0KDQpUaGUgcHJvcG9ydGlvbiBvZiBuZXdib3JuIGJveXMgaGFzIGhhZCBncmFkdWFsbHkgZmFsbGluZywgYnV0IHN0aWxsIGRvbWluYXRlZCBuZXdib3JuIGdpcmxzIHNvIGZhci4NCg0KVGhlIG9ic2VydmF0aW9uIG9mIEFyYnV0aG5vdCBzdGlsbCBoZWxkIHRydWUgc2luY2UgdGhlbi4NCg0KYGBge3IgcGxvdC1wcm9wLWJveXMtcHJlc2VudH0NCnByZXNlbnQgPC0gcHJlc2VudCAlPiUNCiAgbXV0YXRlKHRvdGFsMSA9IGJveXMgKyBnaXJscykNCg0KcHJlc2VudCA8LSBwcmVzZW50ICU+JQ0KICBtdXRhdGUoYm95cHJvYjEgPSBib3lzIC8gdG90YWwxKQ0KDQpoZWFkKHByZXNlbnQpDQoNCmdncGxvdChwcmVzZW50LGFlcyh4PXllYXIseT1ib3lwcm9iMSkpK2dlb21fbGluZSgpDQpgYGANCg0KDQojIyMgRXhlcmNpc2UgNw0KDQpZZWFyIDE5NjEgaGFzIHRoZSBtb3N0IGhpZ2hlc3QgbnVtYndlIG9mIG5ld2Jvcm4NCg0KYGBge3IgZmluZC1tYXgtdG90YWx9DQojSW4gd2hhdCB5ZWFyIGRpZCB3ZSBzZWUgdGhlIG1vc3QgdG90YWwgbnVtYmVyIG9mIGJpcnRocyBpbiB0aGUgVS5TLj8NCg0KcHJlc2VudCA8LSBwcmVzZW50ICU+JQ0KICAgICAgICBhcnJhbmdlKGRlc2ModG90YWwxKSkNCmhlYWQocHJlc2VudCwxKQ0KYGBg