17.5 Exercises 1. Load the GaltonFamilies data from the HistData. The children in each family are listed by gender and then by height. Create a dataset called galton_heights by picking a male and female at random.

library(tidyverse)
## Warning: package 'stringr' was built under R version 4.3.3
## Warning: package 'lubridate' was built under R version 4.3.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.4.4     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.0
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(HistData)
library(gridExtra)
## Warning: package 'gridExtra' was built under R version 4.3.3
## 
## Attaching package: 'gridExtra'
## 
## The following object is masked from 'package:dplyr':
## 
##     combine
data("GaltonFamilies")
set.seed(1951)
galton_heights <- GaltonFamilies

2. Make a scatterplot for heights between mothers and daughters, mothers and sons, fathers and daughters, and fathers and sons.

fs<-filter(galton_heights, gender=="male") |> select(father, childHeight) |> rename(son = childHeight)
fd<-filter(galton_heights, gender=="female") |> select(father, childHeight) |> rename(daughter=childHeight)
md<-filter(galton_heights, gender=="female") |> select(mother, childHeight) |> rename(daughter=childHeight)
ms<-filter(galton_heights, gender=="male") |> select(mother, childHeight) |> rename(son=childHeight)
statfs<- fs |> summarize(mean(father), sd(father), mean(son), sd(son))
statfd<- fd |> summarize(mean(father), sd(father), mean(daughter), sd(daughter))
statmd<- md |> summarize(mean(mother), sd(mother), mean(daughter), sd(daughter))
statms<- ms |> summarize(mean(mother), sd(mother), mean(son), sd(son))
fsplot<-fs |> ggplot(aes(father, son)) + 
  geom_point(alpha=0.5)
fdplot<-fd |> ggplot(aes(father, daughter)) + geom_point(alpha=.5)
mdplot<-md |> ggplot(aes(mother, daughter)) + geom_point(alpha=.5)
msplot<-ms |> ggplot(aes(mother, son)) + geom_point(alpha=.5)
#Four scatterplots marking the heights between each parent gender and child gender#.
grid.arrange(mdplot, msplot, fdplot, fsplot,nrow=2)

3. Compute the correlation in heights between mothers and daughters, mothers and sons, fathers and daughters, and fathers and sons.

fs |> summarize(r=cor(father, son)) |> pull(r)
## [1] 0.3923835
fd |> summarize(r=cor(father, daughter)) |> pull(r)
## [1] 0.428433
md |> summarize(r=cor(mother, daughter)) |> pull(r)
## [1] 0.3051645
ms |> summarize(r=cor(mother, son)) |> pull(r)
## [1] 0.323005