library(tidyverse)
library(openintro)

load/look at data

#load data
data('arbuthnot', package = 'openintro')

#peak at data
head (arbuthnot)
## # A tibble: 6 x 3
##    year  boys girls
##   <int> <int> <int>
## 1  1629  5218  4683
## 2  1630  4858  4457
## 3  1631  4422  4102
## 4  1632  4994  4590
## 5  1633  5158  4839
## 6  1634  5035  4820

Exercise 1

colSums(arbuthnot[ , "girls"])
##  girls 
## 453841
print ("There were 453,841 girls baptized during the years 1629 to 1710")
## [1] "There were 453,841 girls baptized during the years 1629 to 1710"

Exercise 2

By plotting the data we can visually see that the least number of girls baptizes occurred in 1649 through 1651

ggplot(data = arbuthnot, aes(x = year, y = girls) ) +
  geom_line()

Exercise 3

Let’s see the proportion of boys baptized over time for arbuthnot , this time using a bar_plot. Here we see that there were more boys than girls baptised every year during this period.

arbuthnot_prop <- arbuthnot %>%
  mutate(more_boys = boys > girls)

summary(arbuthnot_prop)
##       year           boys          girls      more_boys     
##  Min.   :1629   Min.   :2890   Min.   :2722   Mode:logical  
##  1st Qu.:1649   1st Qu.:4759   1st Qu.:4457   TRUE:82       
##  Median :1670   Median :6073   Median :5718                 
##  Mean   :1670   Mean   :5907   Mean   :5535                 
##  3rd Qu.:1690   3rd Qu.:7576   3rd Qu.:7150                 
##  Max.   :1710   Max.   :8426   Max.   :7779
ggplot(data = arbuthnot_prop, aes( x = more_boys))+
  geom_bar()

Exercise 4

What does present data frame look like. Using the dim function, we see the data frame has 63 rows and 3 columns

data(present)
dim(present)
## [1] 63  3

Exercise 5

colSums(arbuthnot[ , c(2,3)])
##   boys  girls 
## 484382 453841
print ("Between the years 1629 to 1710, there were 484,382 boys baptized and 453,841 girls baptized")
## [1] "Between the years 1629 to 1710, there were 484,382 boys baptized and 453,841 girls baptized"

Exercise 6

what is the proportioon of boys to girls baptized in the present?

present_prop <- present %>%
  mutate(more_boys = boys > girls)

summary(present_prop)
##       year           boys             girls         more_boys     
##  Min.   :1940   Min.   :1211684   Min.   :1148715   Mode:logical  
##  1st Qu.:1956   1st Qu.:1799857   1st Qu.:1711405   TRUE:63       
##  Median :1971   Median :1924868   Median :1831679                 
##  Mean   :1971   Mean   :1885600   Mean   :1793915                 
##  3rd Qu.:1986   3rd Qu.:2058524   3rd Qu.:1965538                 
##  Max.   :2002   Max.   :2186274   Max.   :2082052
ggplot(data = present_prop, aes( x = more_boys))+
  geom_bar()

Exercise 7

Find the maximum number in the boys column

arbuthnot %>%
  summarize (max = max(boys) )
## # A tibble: 1 x 1
##     max
##   <int>
## 1  8426
print("The maximumn number of boys is 8,426")
## [1] "The maximumn number of boys is 8,426"
LS0tDQp0aXRsZTogIkxhYiAxOiBJbnRybyB0byBSIg0KYXV0aG9yOiAiQ2Fyb2wgQ2FtcGJlbGwiDQpkYXRlOiAiRmVicnVhcnkgNCwgMjAyMyINCm91dHB1dDogb3BlbmludHJvOjpsYWJfcmVwb3J0DQotLS0NCg0KYGBge3IgbG9hZC1wYWNrYWdlcywgbWVzc2FnZT1GQUxTRX0NCmxpYnJhcnkodGlkeXZlcnNlKQ0KbGlicmFyeShvcGVuaW50cm8pDQpgYGANCg0KbG9hZC9sb29rIGF0IGRhdGENCmBgYHtyfQ0KI2xvYWQgZGF0YQ0KZGF0YSgnYXJidXRobm90JywgcGFja2FnZSA9ICdvcGVuaW50cm8nKQ0KDQojcGVhayBhdCBkYXRhDQpoZWFkIChhcmJ1dGhub3QpDQpgYGANCg0KIyMjIEV4ZXJjaXNlIDENCg0KYGBge3Igdmlldy1naXJscy1jb3VudHN9DQpjb2xTdW1zKGFyYnV0aG5vdFsgLCAiZ2lybHMiXSkNCg0KcHJpbnQgKCJUaGVyZSB3ZXJlIDQ1Myw4NDEgZ2lybHMgYmFwdGl6ZWQgZHVyaW5nIHRoZSB5ZWFycyAxNjI5IHRvIDE3MTAiKQ0KYGBgDQoNCg0KIyMjIEV4ZXJjaXNlIDINCg0KQnkgcGxvdHRpbmcgdGhlIGRhdGEgd2UgY2FuIHZpc3VhbGx5IHNlZSB0aGF0IHRoZSBsZWFzdCBudW1iZXIgb2YgZ2lybHMgYmFwdGl6ZXMgb2NjdXJyZWQgaW4gMTY0OSB0aHJvdWdoIDE2NTENCg0KYGBge3IgdHJlbmQtZ2lybHN9DQpnZ3Bsb3QoZGF0YSA9IGFyYnV0aG5vdCwgYWVzKHggPSB5ZWFyLCB5ID0gZ2lybHMpICkgKw0KICBnZW9tX2xpbmUoKQ0KYGBgDQoNCg0KIyMjIEV4ZXJjaXNlIDMNCg0KTGV0J3Mgc2VlIHRoZSBwcm9wb3J0aW9uIG9mIGJveXMgYmFwdGl6ZWQgb3ZlciB0aW1lIGZvciBhcmJ1dGhub3QgICwgdGhpcyB0aW1lIHVzaW5nIGEgYmFyX3Bsb3QuIEhlcmUgd2Ugc2VlIHRoYXQgdGhlcmUgd2VyZSBtb3JlIGJveXMgdGhhbiBnaXJscyBiYXB0aXNlZCBldmVyeSB5ZWFyIGR1cmluZyB0aGlzIHBlcmlvZC4NCg0KYGBge3IgcGxvdC1wcm9wLWJveXMtYXJidXRobm90fQ0KYXJidXRobm90X3Byb3AgPC0gYXJidXRobm90ICU+JQ0KICBtdXRhdGUobW9yZV9ib3lzID0gYm95cyA+IGdpcmxzKQ0KDQpzdW1tYXJ5KGFyYnV0aG5vdF9wcm9wKQ0KDQpnZ3Bsb3QoZGF0YSA9IGFyYnV0aG5vdF9wcm9wLCBhZXMoIHggPSBtb3JlX2JveXMpKSsNCiAgZ2VvbV9iYXIoKQ0KDQpgYGANCg0KDQojIyMgRXhlcmNpc2UgNA0KDQpXaGF0IGRvZXMgcHJlc2VudCBkYXRhIGZyYW1lIGxvb2sgbGlrZS4gVXNpbmcgdGhlIGRpbSBmdW5jdGlvbiwgd2Ugc2VlIHRoZSBkYXRhIGZyYW1lIGhhcyA2MyByb3dzIGFuZCAzIGNvbHVtbnMNCg0KYGBge3IgZGltLXByZXNlbnR9DQpkYXRhKHByZXNlbnQpDQpkaW0ocHJlc2VudCkNCmBgYA0KDQoNCiMjIyBFeGVyY2lzZSA1DQoNCg0KYGBge3IgY291bnQtY29tcGFyZX0NCmNvbFN1bXMoYXJidXRobm90WyAsIGMoMiwzKV0pDQoNCnByaW50ICgiQmV0d2VlbiB0aGUgeWVhcnMgMTYyOSB0byAxNzEwLCB0aGVyZSB3ZXJlIDQ4NCwzODIgYm95cyBiYXB0aXplZCBhbmQgNDUzLDg0MSBnaXJscyBiYXB0aXplZCIpDQpgYGANCg0KDQojIyMgRXhlcmNpc2UgNg0KDQp3aGF0IGlzIHRoZSBwcm9wb3J0aW9vbiBvZiBib3lzIHRvIGdpcmxzIGJhcHRpemVkIGluIHRoZSBwcmVzZW50Pw0KDQpgYGB7ciBwbG90LXByb3AtYm95cy1wcmVzZW50fQ0KcHJlc2VudF9wcm9wIDwtIHByZXNlbnQgJT4lDQogIG11dGF0ZShtb3JlX2JveXMgPSBib3lzID4gZ2lybHMpDQoNCnN1bW1hcnkocHJlc2VudF9wcm9wKQ0KDQpnZ3Bsb3QoZGF0YSA9IHByZXNlbnRfcHJvcCwgYWVzKCB4ID0gbW9yZV9ib3lzKSkrDQogIGdlb21fYmFyKCkNCmBgYA0KDQoNCiMjIyBFeGVyY2lzZSA3DQoNCkZpbmQgdGhlIG1heGltdW0gbnVtYmVyIGluIHRoZSBib3lzIGNvbHVtbg0KDQpgYGB7ciBmaW5kLW1heC10b3RhbH0NCmFyYnV0aG5vdCAlPiUNCiAgc3VtbWFyaXplIChtYXggPSBtYXgoYm95cykgKQ0KDQpwcmludCgiVGhlIG1heGltdW1uIG51bWJlciBvZiBib3lzIGlzIDgsNDI2IikNCmBgYA0KDQo=