library(tidyverse)
library(openintro)load/look at data
#load data
data('arbuthnot', package = 'openintro')
#peak at data
head (arbuthnot)## # A tibble: 6 x 3
## year boys girls
## <int> <int> <int>
## 1 1629 5218 4683
## 2 1630 4858 4457
## 3 1631 4422 4102
## 4 1632 4994 4590
## 5 1633 5158 4839
## 6 1634 5035 4820
colSums(arbuthnot[ , "girls"])## girls
## 453841
print ("There were 453,841 girls baptized during the years 1629 to 1710")## [1] "There were 453,841 girls baptized during the years 1629 to 1710"
By plotting the data we can visually see that the least number of girls baptizes occurred in 1649 through 1651
ggplot(data = arbuthnot, aes(x = year, y = girls) ) +
geom_line()Let’s see the proportion of boys baptized over time for arbuthnot , this time using a bar_plot. Here we see that there were more boys than girls baptised every year during this period.
arbuthnot_prop <- arbuthnot %>%
mutate(more_boys = boys > girls)
summary(arbuthnot_prop)## year boys girls more_boys
## Min. :1629 Min. :2890 Min. :2722 Mode:logical
## 1st Qu.:1649 1st Qu.:4759 1st Qu.:4457 TRUE:82
## Median :1670 Median :6073 Median :5718
## Mean :1670 Mean :5907 Mean :5535
## 3rd Qu.:1690 3rd Qu.:7576 3rd Qu.:7150
## Max. :1710 Max. :8426 Max. :7779
ggplot(data = arbuthnot_prop, aes( x = more_boys))+
geom_bar()What does present data frame look like. Using the dim function, we see the data frame has 63 rows and 3 columns
data(present)
dim(present)## [1] 63 3
colSums(arbuthnot[ , c(2,3)])## boys girls
## 484382 453841
print ("Between the years 1629 to 1710, there were 484,382 boys baptized and 453,841 girls baptized")## [1] "Between the years 1629 to 1710, there were 484,382 boys baptized and 453,841 girls baptized"
what is the proportioon of boys to girls baptized in the present?
present_prop <- present %>%
mutate(more_boys = boys > girls)
summary(present_prop)## year boys girls more_boys
## Min. :1940 Min. :1211684 Min. :1148715 Mode:logical
## 1st Qu.:1956 1st Qu.:1799857 1st Qu.:1711405 TRUE:63
## Median :1971 Median :1924868 Median :1831679
## Mean :1971 Mean :1885600 Mean :1793915
## 3rd Qu.:1986 3rd Qu.:2058524 3rd Qu.:1965538
## Max. :2002 Max. :2186274 Max. :2082052
ggplot(data = present_prop, aes( x = more_boys))+
geom_bar()Find the maximum number in the boys column
arbuthnot %>%
summarize (max = max(boys) )## # A tibble: 1 x 1
## max
## <int>
## 1 8426
print("The maximumn number of boys is 8,426")## [1] "The maximumn number of boys is 8,426"