library(tidyverse)
## Warning: package 'ggplot2' was built under R version 4.5.2
## Warning: package 'tibble' was built under R version 4.5.2
## Warning: package 'tidyr' was built under R version 4.5.2
## Warning: package 'readr' was built under R version 4.5.2
## Warning: package 'purrr' was built under R version 4.5.2
## Warning: package 'dplyr' was built under R version 4.5.2
library(openintro)

Exercise 1

data('arbuthnot', package='openintro')
arbuthnot$girls
##  [1] 4683 4457 4102 4590 4839 4820 4928 4605 4457 4952 4784 5332 5200 4910 4617
## [16] 3997 3919 3395 3536 3181 2746 2722 2840 2908 2959 3179 3349 3382 3289 3013
## [31] 2781 3247 4107 4803 4881 5681 4858 4319 5322 5560 5829 5719 6061 6120 5822
## [46] 5738 5717 5847 6203 6033 6041 6299 6533 6744 7158 7127 7246 7119 7214 7101
## [61] 7167 7302 7392 7316 7483 6647 6713 7229 7767 7626 7452 7061 7514 7656 7683
## [76] 5738 7779 7417 7687 7623 7380 7288

Exercise 2

Insert any text here.

answer: The number of girls baptized generally increases over time, with noticeable year to year variability

# Insert code for Exercise 2 here
ggplot(data = arbuthnot, aes(x =year, y = girls )) + geom_line()

# The number of girls baptized generally increases over time, with noticeable 
#year to year variability
arbuthnot <- arbuthnot %>%
  mutate(total = boys + girls)
ggplot(data = arbuthnot, aes(x = year, y = total)) + 
  geom_line()

arbuthnot <- arbuthnot %>%
  mutate(boy_to_girl_ratio = boys / girls)
arbuthnot <- arbuthnot %>%
  mutate(boy_ratio = boys / total)

Exercise 3

Insert any text here. answer: The proportion of boys remains consistently above 0.5, indicating that more boys than girls were baptized in most years.

# Insert code for Exercise 3 here
arbuthnot <- arbuthnot %>%
  mutate(more_boys = boys > girls)
ggplot(data = arbuthnot, aes(x = year, y = boy_ratio)) +
  geom_line()

Exercise 4

Insert any text here. answer: the year include are 1940 and up the dimenssion of the data is 63 rows the variable are year, boys, girls

# Insert code for Exercise 4 here
glimpse(present)
## Rows: 63
## Columns: 3
## $ year  <dbl> 1940, 1941, 1942, 1943, 1944, 1945, 1946, 1947, 1948, 1949, 1950…
## $ boys  <dbl> 1211684, 1289734, 1444365, 1508959, 1435301, 1404587, 1691220, 1…
## $ girls <dbl> 1148715, 1223693, 1364631, 1427901, 1359499, 1330869, 1597452, 1…
data('present', package='openintro')
arbuthnot %>%
  summarize(min = min(boys), max = max(boys))

Exercise 5

Insert any text here. Birth counts in present dataset are much larger, reflecting population growth over time.

# Insert code for Exercise 5 here
data('present', package='openintro')
glimpse(present)
## Rows: 63
## Columns: 3
## $ year  <dbl> 1940, 1941, 1942, 1943, 1944, 1945, 1946, 1947, 1948, 1949, 1950…
## $ boys  <dbl> 1211684, 1289734, 1444365, 1508959, 1435301, 1404587, 1691220, 1…
## $ girls <dbl> 1148715, 1223693, 1364631, 1427901, 1359499, 1330869, 1597452, 1…

Exercise 6

Insert any text here. base on the graph the ratio decrease over time more girls being born than boys

# Insert code for Exercise 6 here
present = present %>%
  mutate(total = boys + girls,
         boy_ratio = boys / total)

ggplot(present, aes(x = year, y = boy_ratio)) +
  geom_line()

Exercise 7

Insert any text here. answer: the highest total number of birth in the USA occured in 1961 with a total of 4268326 birth

# Insert code for Exercise 7 here
present %>%
  mutate(total = boys + girls) %>%
  arrange(desc(total))