17.5 Exercises 1. Load the GaltonFamilies data from the HistData. The children in each family are listed by gender and then by height. Create a dataset called galton_heights by picking a male and female at random.
library(tidyverse)
## Warning: package 'stringr' was built under R version 4.3.3
## Warning: package 'lubridate' was built under R version 4.3.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.4.4 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.0
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(HistData)
library(gridExtra)
## Warning: package 'gridExtra' was built under R version 4.3.3
##
## Attaching package: 'gridExtra'
##
## The following object is masked from 'package:dplyr':
##
## combine
data("GaltonFamilies")
set.seed(1951)
galton_heights <- GaltonFamilies
2. Make a scatterplot for heights between mothers and daughters, mothers and sons, fathers and daughters, and fathers and sons.
fs<-filter(galton_heights, gender=="male") |> select(father, childHeight) |> rename(son = childHeight)
fd<-filter(galton_heights, gender=="female") |> select(father, childHeight) |> rename(daughter=childHeight)
md<-filter(galton_heights, gender=="female") |> select(mother, childHeight) |> rename(daughter=childHeight)
ms<-filter(galton_heights, gender=="male") |> select(mother, childHeight) |> rename(son=childHeight)
statfs<- fs |> summarize(mean(father), sd(father), mean(son), sd(son))
statfd<- fd |> summarize(mean(father), sd(father), mean(daughter), sd(daughter))
statmd<- md |> summarize(mean(mother), sd(mother), mean(daughter), sd(daughter))
statms<- ms |> summarize(mean(mother), sd(mother), mean(son), sd(son))
fsplot<-fs |> ggplot(aes(father, son)) +
geom_point(alpha=0.5)
fdplot<-fd |> ggplot(aes(father, daughter)) + geom_point(alpha=.5)
mdplot<-md |> ggplot(aes(mother, daughter)) + geom_point(alpha=.5)
msplot<-ms |> ggplot(aes(mother, son)) + geom_point(alpha=.5)
#Four scatterplots marking the heights between each parent gender and child gender#.
grid.arrange(mdplot, msplot, fdplot, fsplot,nrow=2)
3. Compute the correlation in heights between mothers and daughters, mothers and sons, fathers and daughters, and fathers and sons.
fs |> summarize(r=cor(father, son)) |> pull(r)
## [1] 0.3923835
fd |> summarize(r=cor(father, daughter)) |> pull(r)
## [1] 0.428433
md |> summarize(r=cor(mother, daughter)) |> pull(r)
## [1] 0.3051645
ms |> summarize(r=cor(mother, son)) |> pull(r)
## [1] 0.323005