This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
summary(cars)
## speed dist
## Min. : 4.0 Min. : 2.00
## 1st Qu.:12.0 1st Qu.: 26.00
## Median :15.0 Median : 36.00
## Mean :15.4 Mean : 42.98
## 3rd Qu.:19.0 3rd Qu.: 56.00
## Max. :25.0 Max. :120.00
You can also embed plots, for example:
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.1.2
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5 v purrr 0.3.4
## v tibble 3.1.2 v dplyr 1.0.7
## v tidyr 1.1.3 v stringr 1.4.0
## v readr 1.4.0 v forcats 0.5.1
## Warning: package 'ggplot2' was built under R version 4.1.2
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(openintro)
## Warning: package 'openintro' was built under R version 4.1.2
## Loading required package: airports
## Warning: package 'airports' was built under R version 4.1.2
## Loading required package: cherryblossom
## Warning: package 'cherryblossom' was built under R version 4.1.2
## Loading required package: usdata
## Warning: package 'usdata' was built under R version 4.1.2
data('arbuthnot', package='openintro')
arbuthnot
## # A tibble: 82 x 3
## year boys girls
## <int> <int> <int>
## 1 1629 5218 4683
## 2 1630 4858 4457
## 3 1631 4422 4102
## 4 1632 4994 4590
## 5 1633 5158 4839
## 6 1634 5035 4820
## 7 1635 5106 4928
## 8 1636 4917 4605
## 9 1637 4703 4457
## 10 1638 5359 4952
## # ... with 72 more rows
glimpse(arbuthnot)
## Rows: 82
## Columns: 3
## $ year <int> 1629, 1630, 1631, 1632, 1633, 1634, 1635, 1636, 1637, 1638, 1639~
## $ boys <int> 5218, 4858, 4422, 4994, 5158, 5035, 5106, 4917, 4703, 5359, 5366~
## $ girls <int> 4683, 4457, 4102, 4590, 4839, 4820, 4928, 4605, 4457, 4952, 4784~
arbuthnot$boys
## [1] 5218 4858 4422 4994 5158 5035 5106 4917 4703 5359 5366 5518 5470 5460 4793
## [16] 4107 4047 3768 3796 3363 3079 2890 3231 3220 3196 3441 3655 3668 3396 3157
## [31] 3209 3724 4748 5216 5411 6041 5114 4678 5616 6073 6506 6278 6449 6443 6073
## [46] 6113 6058 6552 6423 6568 6247 6548 6822 6909 7577 7575 7484 7575 7737 7487
## [61] 7604 7909 7662 7602 7676 6985 7263 7632 8062 8426 7911 7578 8102 8031 7765
## [76] 6113 8366 7952 8379 8239 7840 7640
#Exercise 1: to extract just the counts of girls baptized?
count(arbuthnot, "girls")
## # A tibble: 1 x 2
## `"girls"` n
## <chr> <int>
## 1 girls 82
arbuthnot$girls
## [1] 4683 4457 4102 4590 4839 4820 4928 4605 4457 4952 4784 5332 5200 4910 4617
## [16] 3997 3919 3395 3536 3181 2746 2722 2840 2908 2959 3179 3349 3382 3289 3013
## [31] 2781 3247 4107 4803 4881 5681 4858 4319 5322 5560 5829 5719 6061 6120 5822
## [46] 5738 5717 5847 6203 6033 6041 6299 6533 6744 7158 7127 7246 7119 7214 7101
## [61] 7167 7302 7392 7316 7483 6647 6713 7229 7767 7626 7452 7061 7514 7656 7683
## [76] 5738 7779 7417 7687 7623 7380 7288
ggplot(data = arbuthnot, aes(x = year, y = girls)) +
geom_point()
## simple line plot of the number of girls baptized per year using ggplot
ggplot(data = arbuthnot, aes(x = year, y = girls)) +
geom_line()
## Exercise 2: From 1640 to 1660 there is drop in the number of girls baptized per year. ## From 1660 to 1700 there is a prominant increase in the number of girls baptized per year.
arbuthnot$boys + arbuthnot$girls
## [1] 9901 9315 8524 9584 9997 9855 10034 9522 9160 10311 10150 10850
## [13] 10670 10370 9410 8104 7966 7163 7332 6544 5825 5612 6071 6128
## [25] 6155 6620 7004 7050 6685 6170 5990 6971 8855 10019 10292 11722
## [37] 9972 8997 10938 11633 12335 11997 12510 12563 11895 11851 11775 12399
## [49] 12626 12601 12288 12847 13355 13653 14735 14702 14730 14694 14951 14588
## [61] 14771 15211 15054 14918 15159 13632 13976 14861 15829 16052 15363 14639
## [73] 15616 15687 15448 11851 16145 15369 16066 15862 15220 14928
arbuthnot <- arbuthnot %>%
mutate(total = boys + girls)
arbuthnot
## # A tibble: 82 x 4
## year boys girls total
## <int> <int> <int> <int>
## 1 1629 5218 4683 9901
## 2 1630 4858 4457 9315
## 3 1631 4422 4102 8524
## 4 1632 4994 4590 9584
## 5 1633 5158 4839 9997
## 6 1634 5035 4820 9855
## 7 1635 5106 4928 10034
## 8 1636 4917 4605 9522
## 9 1637 4703 4457 9160
## 10 1638 5359 4952 10311
## # ... with 72 more rows
arbuthnot <- arbuthnot %>%
mutate(boy_ratio = boys/total)
arbuthnot
## # A tibble: 82 x 5
## year boys girls total boy_ratio
## <int> <int> <int> <int> <dbl>
## 1 1629 5218 4683 9901 0.527
## 2 1630 4858 4457 9315 0.522
## 3 1631 4422 4102 8524 0.519
## 4 1632 4994 4590 9584 0.521
## 5 1633 5158 4839 9997 0.516
## 6 1634 5035 4820 9855 0.511
## 7 1635 5106 4928 10034 0.509
## 8 1636 4917 4605 9522 0.516
## 9 1637 4703 4457 9160 0.513
## 10 1638 5359 4952 10311 0.520
## # ... with 72 more rows
ggplot(data = arbuthnot, aes(x = year, y = boy_ratio)) +
geom_line()
ggplot(data = arbuthnot, aes(x = year, y = girls)) +
geom_point()
arbuthnot <- arbuthnot %>%
mutate(boy_to_girl_ratio = boys / girls)
arbuthnot
## # A tibble: 82 x 6
## year boys girls total boy_ratio boy_to_girl_ratio
## <int> <int> <int> <int> <dbl> <dbl>
## 1 1629 5218 4683 9901 0.527 1.11
## 2 1630 4858 4457 9315 0.522 1.09
## 3 1631 4422 4102 8524 0.519 1.08
## 4 1632 4994 4590 9584 0.521 1.09
## 5 1633 5158 4839 9997 0.516 1.07
## 6 1634 5035 4820 9855 0.511 1.04
## 7 1635 5106 4928 10034 0.509 1.04
## 8 1636 4917 4605 9522 0.516 1.07
## 9 1637 4703 4457 9160 0.513 1.06
## 10 1638 5359 4952 10311 0.520 1.08
## # ... with 72 more rows
arbuthnot <- arbuthnot %>%
mutate(more_boys = boys > girls)
arbuthnot
## # A tibble: 82 x 7
## year boys girls total boy_ratio boy_to_girl_ratio more_boys
## <int> <int> <int> <int> <dbl> <dbl> <lgl>
## 1 1629 5218 4683 9901 0.527 1.11 TRUE
## 2 1630 4858 4457 9315 0.522 1.09 TRUE
## 3 1631 4422 4102 8524 0.519 1.08 TRUE
## 4 1632 4994 4590 9584 0.521 1.09 TRUE
## 5 1633 5158 4839 9997 0.516 1.07 TRUE
## 6 1634 5035 4820 9855 0.511 1.04 TRUE
## 7 1635 5106 4928 10034 0.509 1.04 TRUE
## 8 1636 4917 4605 9522 0.516 1.07 TRUE
## 9 1637 4703 4457 9160 0.513 1.06 TRUE
## 10 1638 5359 4952 10311 0.520 1.08 TRUE
## # ... with 72 more rows
arbuthnot %>%
summarize(min = min(boys), max = max(boys))
## # A tibble: 1 x 2
## min max
## <int> <int>
## 1 2890 8426
data('present', package='openintro')
present
## # A tibble: 63 x 3
## year boys girls
## <dbl> <dbl> <dbl>
## 1 1940 1211684 1148715
## 2 1941 1289734 1223693
## 3 1942 1444365 1364631
## 4 1943 1508959 1427901
## 5 1944 1435301 1359499
## 6 1945 1404587 1330869
## 7 1946 1691220 1597452
## 8 1947 1899876 1800064
## 9 1948 1813852 1721216
## 10 1949 1826352 1733177
## # ... with 53 more rows
Note that the echo = FALSE
parameter was added to the code chunk to prevent printing of the R code that generated the plot.