library(openintro)
## Please visit openintro.org for free statistics materials
##
## Attaching package: 'openintro'
## The following objects are masked from 'package:datasets':
##
## cars, trees
# Access the Openintro website and fetch some data.
source("http://www.openintro.org/stat/data/arbuthnot.R")
arbuthnot
# Dimensions of the data frame
dim(arbuthnot)
## [1] 82 3
# Names of the columns
names(arbuthnot)
## [1] "year" "boys" "girls"
# exploring data in a column: boys
arbuthnot$boys
## [1] 5218 4858 4422 4994 5158 5035 5106 4917 4703 5359 5366 5518 5470 5460
## [15] 4793 4107 4047 3768 3796 3363 3079 2890 3231 3220 3196 3441 3655 3668
## [29] 3396 3157 3209 3724 4748 5216 5411 6041 5114 4678 5616 6073 6506 6278
## [43] 6449 6443 6073 6113 6058 6552 6423 6568 6247 6548 6822 6909 7577 7575
## [57] 7484 7575 7737 7487 7604 7909 7662 7602 7676 6985 7263 7632 8062 8426
## [71] 7911 7578 8102 8031 7765 6113 8366 7952 8379 8239 7840 7640
# exploring data in a column: girls
arbuthnot$girls
## [1] 4683 4457 4102 4590 4839 4820 4928 4605 4457 4952 4784 5332 5200 4910
## [15] 4617 3997 3919 3395 3536 3181 2746 2722 2840 2908 2959 3179 3349 3382
## [29] 3289 3013 2781 3247 4107 4803 4881 5681 4858 4319 5322 5560 5829 5719
## [43] 6061 6120 5822 5738 5717 5847 6203 6033 6041 6299 6533 6744 7158 7127
## [57] 7246 7119 7214 7101 7167 7302 7392 7316 7483 6647 6713 7229 7767 7626
## [71] 7452 7061 7514 7656 7683 5738 7779 7417 7687 7623 7380 7288
# plot of the number of girls baptized per year
plot(x = arbuthnot$year, y = arbuthnot$girls)
Exercise 1
# Line plot of the number of girls baptized per year
plot(x = arbuthnot$year, y = arbuthnot$girls, type = "l")
Exercise 2
# add the vectors for baptism for boys and girls.
arbuthnot$boys + arbuthnot$girls
## [1] 9901 9315 8524 9584 9997 9855 10034 9522 9160 10311 10150
## [12] 10850 10670 10370 9410 8104 7966 7163 7332 6544 5825 5612
## [23] 6071 6128 6155 6620 7004 7050 6685 6170 5990 6971 8855
## [34] 10019 10292 11722 9972 8997 10938 11633 12335 11997 12510 12563
## [45] 11895 11851 11775 12399 12626 12601 12288 12847 13355 13653 14735
## [56] 14702 14730 14694 14951 14588 14771 15211 15054 14918 15159 13632
## [67] 13976 14861 15829 16052 15363 14639 15616 15687 15448 11851 16145
## [78] 15369 16066 15862 15220 14928
plot(arbuthnot$year, arbuthnot$boys+arbuthnot$girls, type = "l")
### Compute the ratio of the numbers of boys to the number of girls baptized in 1629
5218/4683
## [1] 1.114243
arbuthnot$boys / arbuthnot$girls
## [1] 1.114243 1.089971 1.078011 1.088017 1.065923 1.044606 1.036120
## [8] 1.067752 1.055194 1.082189 1.121656 1.034884 1.051923 1.112016
## [15] 1.038120 1.027521 1.032661 1.109867 1.073529 1.057215 1.121267
## [22] 1.061719 1.137676 1.107290 1.080095 1.082416 1.091371 1.084565
## [29] 1.032533 1.047793 1.153901 1.146905 1.156075 1.085988 1.108584
## [36] 1.063369 1.052697 1.083121 1.055242 1.092266 1.116143 1.097744
## [43] 1.064016 1.052778 1.043112 1.065354 1.059647 1.120575 1.035467
## [50] 1.088679 1.034100 1.039530 1.044237 1.024466 1.058536 1.062860
## [57] 1.032846 1.064054 1.072498 1.054359 1.060974 1.083128 1.036526
## [64] 1.039092 1.025792 1.050850 1.081931 1.055748 1.037981 1.104904
## [71] 1.061594 1.073219 1.078254 1.048981 1.010673 1.065354 1.075460
## [78] 1.072132 1.090022 1.080808 1.062331 1.048299
5218 / (5218+4683)
## [1] 0.5270175
arbuthnot$boys / (arbuthnot$boys+arbuthnot$girls)
## [1] 0.5270175 0.5215244 0.5187705 0.5210768 0.5159548 0.5109082 0.5088698
## [8] 0.5163831 0.5134279 0.5197362 0.5286700 0.5085714 0.5126523 0.5265188
## [15] 0.5093518 0.5067868 0.5080341 0.5260366 0.5177305 0.5139059 0.5285837
## [22] 0.5149679 0.5322023 0.5254569 0.5192526 0.5197885 0.5218447 0.5202837
## [29] 0.5080030 0.5116694 0.5357262 0.5342132 0.5361942 0.5206108 0.5257482
## [36] 0.5153557 0.5128359 0.5199511 0.5134394 0.5220493 0.5274422 0.5232975
## [43] 0.5155076 0.5128552 0.5105507 0.5158214 0.5144798 0.5284297 0.5087122
## [50] 0.5212285 0.5083822 0.5096910 0.5108199 0.5060426 0.5142178 0.5152360
## [57] 0.5080788 0.5155165 0.5174905 0.5132301 0.5147925 0.5199527 0.5089677
## [64] 0.5095857 0.5063659 0.5123973 0.5196766 0.5135590 0.5093183 0.5249190
## [71] 0.5149385 0.5176583 0.5188268 0.5119526 0.5026541 0.5158214 0.5181790
## [78] 0.5174052 0.5215362 0.5194175 0.5151117 0.5117899
Exercise 3
plot(arbuthnot$year, arbuthnot$boys/(arbuthnot$boys+arbuthnot$girls), type = "l")
### we can ask if boys outnumber girls in each year with the expression
arbuthnot$boys > arbuthnot$girls
## [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [15] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [29] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [43] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [57] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [71] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
source("http://www.openintro.org/stat/data/present.R")
present
# Years in the dataset
present$year
## [1] 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953
## [15] 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967
## [29] 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981
## [43] 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995
## [57] 1996 1997 1998 1999 2000 2001 2002
# dimension of the dataset
dim(present)
## [1] 63 3
# column names
names(present)
## [1] "year" "boys" "girls"
range(arbuthnot$boys + arbuthnot$girls)
## [1] 5612 16145
range(present$boys + present$girls)
## [1] 2360399 4268326
# boys-to-girls ratio
present$boys/present$girls
## [1] 1.054817 1.053969 1.058429 1.056767 1.055757 1.055391 1.058698
## [8] 1.055449 1.053820 1.053760 1.053716 1.052078 1.050934 1.053399
## [15] 1.051460 1.050742 1.051286 1.050672 1.049374 1.049480 1.048873
## [22] 1.050057 1.047948 1.052717 1.047188 1.051137 1.048540 1.049964
## [29] 1.053417 1.052997 1.054719 1.051845 1.051271 1.052129 1.054797
## [36] 1.053605 1.052538 1.052569 1.052657 1.051749 1.052837 1.051615
## [43] 1.050597 1.051976 1.050199 1.052061 1.050876 1.050000 1.049991
## [50] 1.049720 1.049676 1.045849 1.050017 1.049955 1.047877 1.048928
## [57] 1.047062 1.047643 1.047190 1.048791 1.047998 1.045686 1.047986
library(ggplot2)
##
## Attaching package: 'ggplot2'
## The following object is masked from 'package:openintro':
##
## diamonds
ggplot(present, aes(x=year, y=boys/girls)) + geom_line()
present$boys > present$girls
## [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [15] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [29] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [43] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [57] TRUE TRUE TRUE TRUE TRUE TRUE TRUE
present$year[which.max(present$boys+present$girls)]
## [1] 1961