library(tidyverse)
library(openintro)

Exercise 1

arbuthnot$girls
##  [1] 4683 4457 4102 4590 4839 4820 4928 4605 4457 4952 4784 5332 5200 4910 4617
## [16] 3997 3919 3395 3536 3181 2746 2722 2840 2908 2959 3179 3349 3382 3289 3013
## [31] 2781 3247 4107 4803 4881 5681 4858 4319 5322 5560 5829 5719 6061 6120 5822
## [46] 5738 5717 5847 6203 6033 6041 6299 6533 6744 7158 7127 7246 7119 7214 7101
## [61] 7167 7302 7392 7316 7483 6647 6713 7229 7767 7626 7452 7061 7514 7656 7683
## [76] 5738 7779 7417 7687 7623 7380 7288

Exercise 2

There is an overall increasing trend since around 1660. We could see a decreasing trend before that between 1640 and 1660

ggplot(data = arbuthnot, aes(x = year, y = girls)) + 
     geom_line() +
     geom_smooth(method=lm)
## `geom_smooth()` using formula 'y ~ x'

Exercise 3

We see a slightly decreasing trend in the proportion of boy to the total.

arbuthnot <- arbuthnot %>%
  mutate(total = boys + girls)

arbuthnot <- arbuthnot %>%
  mutate(boy_to_girl_ratio = boys / girls)

arbuthnot <- arbuthnot %>%
  mutate(boy_ratio = boys / total)

ggplot(data = arbuthnot, aes(x = year, y = boy_ratio)) + 
     geom_line() +
     geom_smooth(method=lm)
## `geom_smooth()` using formula 'y ~ x'

Exercise 4

Information goes from years 1940 to 2002. Dimensions of dataframe are 63 x 3. Column names are year, boys, girls

data('present', package='openintro')
min_year <- present %>% 
     summarize(min = min(year))
min_year
## # A tibble: 1 x 1
##     min
##   <dbl>
## 1  1940
max_year <- present %>% 
     summarize(max = max(year))
max_year
## # A tibble: 1 x 1
##     max
##   <dbl>
## 1  2002
nrow(present)
## [1] 63
ncol(present)
## [1] 3
colnames(present)
## [1] "year"  "boys"  "girls"

Exercise 5

I took the mean of the total in both datasets and the oberved its ratio. The observed ratio was over 300 times higher for the present dataset vs arbuthnot.

present <- present %>%
    mutate(total = boys + girls)

mean(present$total) / mean(arbuthnot$total)
## [1] 321.5869

Exercise 6

For the present dataset the proportion of boys to total has a decreasing trend. This is different to what we observed in the arbuthnot data where the boys trend was upwards.

present <- present %>%
  mutate(boy_ratio = boys / total)

ggplot(data = present, aes(x = year, y = boy_ratio)) + 
     geom_line() +
     geom_smooth(method=lm)
## `geom_smooth()` using formula 'y ~ x'

Exercise 7

By sorting the totals and then taking the first row in th sorted dataframe, the year with largest total was 1961

sorted_present <- present %>%
    arrange(desc(total))
sorted_present$year[1]
## [1] 1961
LS0tDQp0aXRsZTogIkxhYiAxOiBJbnRybyB0byBSIg0KYXV0aG9yOiAiSnVhbiBGYWxjayINCmRhdGU6ICJgciBTeXMuRGF0ZSgpYCINCm91dHB1dDogb3BlbmludHJvOjpsYWJfcmVwb3J0DQotLS0NCg0KYGBge3IgbG9hZC1wYWNrYWdlcywgbWVzc2FnZT1GQUxTRX0NCmxpYnJhcnkodGlkeXZlcnNlKQ0KbGlicmFyeShvcGVuaW50cm8pDQpgYGANCg0KIyMjIEV4ZXJjaXNlIDENCg0KYGBge3Igdmlldy1naXJscy1jb3VudHN9DQphcmJ1dGhub3QkZ2lybHMNCmBgYA0KDQoNCiMjIyBFeGVyY2lzZSAyDQoNClRoZXJlIGlzIGFuIG92ZXJhbGwgaW5jcmVhc2luZyB0cmVuZCBzaW5jZSBhcm91bmQgMTY2MC4gV2UgY291bGQgc2VlIGEgZGVjcmVhc2luZyB0cmVuZCBiZWZvcmUgdGhhdCBiZXR3ZWVuIDE2NDAgYW5kIDE2NjANCg0KYGBge3IgdHJlbmQtZ2lybHN9DQpnZ3Bsb3QoZGF0YSA9IGFyYnV0aG5vdCwgYWVzKHggPSB5ZWFyLCB5ID0gZ2lybHMpKSArIA0KICAgICBnZW9tX2xpbmUoKSArDQogICAgIGdlb21fc21vb3RoKG1ldGhvZD1sbSkNCmBgYA0KDQoNCiMjIyBFeGVyY2lzZSAzDQoNCldlIHNlZSBhIHNsaWdodGx5IGRlY3JlYXNpbmcgdHJlbmQgaW4gdGhlIHByb3BvcnRpb24gb2YgYm95IHRvIHRoZSB0b3RhbC4NCg0KYGBge3IgcGxvdC1wcm9wLWJveXMtYXJidXRobm90fQ0KYXJidXRobm90IDwtIGFyYnV0aG5vdCAlPiUNCiAgbXV0YXRlKHRvdGFsID0gYm95cyArIGdpcmxzKQ0KDQphcmJ1dGhub3QgPC0gYXJidXRobm90ICU+JQ0KICBtdXRhdGUoYm95X3RvX2dpcmxfcmF0aW8gPSBib3lzIC8gZ2lybHMpDQoNCmFyYnV0aG5vdCA8LSBhcmJ1dGhub3QgJT4lDQogIG11dGF0ZShib3lfcmF0aW8gPSBib3lzIC8gdG90YWwpDQoNCmdncGxvdChkYXRhID0gYXJidXRobm90LCBhZXMoeCA9IHllYXIsIHkgPSBib3lfcmF0aW8pKSArIA0KICAgICBnZW9tX2xpbmUoKSArDQogICAgIGdlb21fc21vb3RoKG1ldGhvZD1sbSkNCg0KYGBgDQoNCg0KIyMjIEV4ZXJjaXNlIDQNCg0KSW5mb3JtYXRpb24gZ29lcyBmcm9tIHllYXJzIDE5NDAgdG8gMjAwMi4gRGltZW5zaW9ucyBvZiBkYXRhZnJhbWUgYXJlIDYzIHggMy4gQ29sdW1uIG5hbWVzIGFyZSB5ZWFyLCBib3lzLCBnaXJscw0KDQoNCmBgYHtyIGRpbS1wcmVzZW50fQ0KZGF0YSgncHJlc2VudCcsIHBhY2thZ2U9J29wZW5pbnRybycpDQptaW5feWVhciA8LSBwcmVzZW50ICU+JSANCiAgICAgc3VtbWFyaXplKG1pbiA9IG1pbih5ZWFyKSkNCm1pbl95ZWFyDQptYXhfeWVhciA8LSBwcmVzZW50ICU+JSANCiAgICAgc3VtbWFyaXplKG1heCA9IG1heCh5ZWFyKSkNCm1heF95ZWFyDQpucm93KHByZXNlbnQpDQpuY29sKHByZXNlbnQpDQpjb2xuYW1lcyhwcmVzZW50KQ0KYGBgDQoNCg0KIyMjIEV4ZXJjaXNlIDUNCg0KSSB0b29rIHRoZSBtZWFuIG9mIHRoZSB0b3RhbCBpbiBib3RoIGRhdGFzZXRzIGFuZCB0aGUgb2JlcnZlZCBpdHMgcmF0aW8uIFRoZSBvYnNlcnZlZCByYXRpbyB3YXMgb3ZlciAzMDAgdGltZXMgaGlnaGVyIGZvciB0aGUgcHJlc2VudCBkYXRhc2V0IHZzIGFyYnV0aG5vdC4NCg0KYGBge3IgY291bnQtY29tcGFyZX0NCnByZXNlbnQgPC0gcHJlc2VudCAlPiUNCiAgICBtdXRhdGUodG90YWwgPSBib3lzICsgZ2lybHMpDQoNCm1lYW4ocHJlc2VudCR0b3RhbCkgLyBtZWFuKGFyYnV0aG5vdCR0b3RhbCkNCmBgYA0KDQoNCiMjIyBFeGVyY2lzZSA2DQoNCkZvciB0aGUgcHJlc2VudCBkYXRhc2V0IHRoZSBwcm9wb3J0aW9uIG9mIGJveXMgdG8gdG90YWwgaGFzIGEgZGVjcmVhc2luZyB0cmVuZC4gVGhpcyBpcyBkaWZmZXJlbnQgdG8gd2hhdCB3ZSBvYnNlcnZlZCBpbiB0aGUgYXJidXRobm90IGRhdGEgd2hlcmUgdGhlIGJveXMgdHJlbmQgd2FzIHVwd2FyZHMuDQoNCmBgYHtyIHBsb3QtcHJvcC1ib3lzLXByZXNlbnR9DQpwcmVzZW50IDwtIHByZXNlbnQgJT4lDQogIG11dGF0ZShib3lfcmF0aW8gPSBib3lzIC8gdG90YWwpDQoNCmdncGxvdChkYXRhID0gcHJlc2VudCwgYWVzKHggPSB5ZWFyLCB5ID0gYm95X3JhdGlvKSkgKyANCiAgICAgZ2VvbV9saW5lKCkgKw0KICAgICBnZW9tX3Ntb290aChtZXRob2Q9bG0pDQoNCmBgYA0KDQoNCiMjIyBFeGVyY2lzZSA3DQoNCkJ5IHNvcnRpbmcgdGhlIHRvdGFscyBhbmQgdGhlbiB0YWtpbmcgdGhlIGZpcnN0IHJvdyBpbiB0aCBzb3J0ZWQgZGF0YWZyYW1lLCB0aGUgeWVhciB3aXRoIGxhcmdlc3QgdG90YWwgd2FzIDE5NjENCg0KYGBge3IgZmluZC1tYXgtdG90YWx9DQpzb3J0ZWRfcHJlc2VudCA8LSBwcmVzZW50ICU+JQ0KICAgIGFycmFuZ2UoZGVzYyh0b3RhbCkpDQpzb3J0ZWRfcHJlc2VudCR5ZWFyWzFdDQoNCmBgYA0KDQo=