Quintin Sargent Donner Exam

# load libraries
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tidyr)
library(magrittr)
## 
## Attaching package: 'magrittr'
## The following object is masked from 'package:tidyr':
## 
##     extract
library(knitr)
library(ggplot2)
library(ggsci)
library(ggpubr)
library(vcd)
## Loading required package: grid
library(vcdExtra)
## Loading required package: gnm
## 
## Attaching package: 'vcdExtra'
## The following object is masked from 'package:dplyr':
## 
##     summarise
library(stringr)
head(Donner, n=15L)
##                     family age    sex survived      death
## Antoine              Other  23   Male        0 1846-12-29
## Breen, Edward        Breen  13   Male        1       <NA>
## Breen, Margaret I.   Breen   1 Female        1       <NA>
## Breen, James         Breen   5   Male        1       <NA>
## Breen, John          Breen  14   Male        1       <NA>
## Breen, Mary          Breen  40 Female        1       <NA>
## Breen, Patrick       Breen  51   Male        1       <NA>
## Breen, Patrick Jr.   Breen   9   Male        1       <NA>
## Breen, Peter         Breen   3   Male        1       <NA>
## Breen, Simon         Breen   8   Male        1       <NA>
## Burger, Charles      Other  30   Male        0 1846-12-27
## Denton, John         Other  28   Male        0 1847-02-26
## Dolan, Patrick       Other  40   Male        0 1846-12-27
## Donner, Elitha Cumi Donner  13 Female        1       <NA>
## Donner, Eliza Poor  Donner   3 Female        1       <NA>

Question 1

# count of the family members in the named families
named_families <- Donner %>%
select(family) %>%
filter(family != "Other")

named_families %>%
count(family, sort = TRUE)
##      family  n
## 1    Donner 14
## 2 MurFosPik 12
## 3    Graves 10
## 4     Breen  9
## 5      Reed  7
## 6      Eddy  4
## 7  FosdWolf  4
## 8  Keseberg  4
## 9 McCutchen  3
# ratio of men to women
m_2_w <- Donner %>%
select(sex) 

num_m <- m_2_w %>%
count(sex == "Male")

num_f <- m_2_w %>%
count(sex == "Female")


ratio <- num_m / num_f
ratio
##   sex == "Male"         n
## 1           NaN 0.6363636
## 2             1 1.5714286

This shows that the ratio of men to women is 1.5714 to 1.

55 males to 35 females

# get the amount of ppl younger than 18 years old

below18 <- Donner %>%
filter(age < 18)
below18
##                         family age    sex survived      death
## Breen, Edward            Breen  13   Male        1       <NA>
## Breen, Margaret I.       Breen   1 Female        1       <NA>
## Breen, James             Breen   5   Male        1       <NA>
## Breen, John              Breen  14   Male        1       <NA>
## Breen, Patrick Jr.       Breen   9   Male        1       <NA>
## Breen, Peter             Breen   3   Male        1       <NA>
## Breen, Simon             Breen   8   Male        1       <NA>
## Donner, Elitha Cumi     Donner  13 Female        1       <NA>
## Donner, Eliza Poor      Donner   3 Female        1       <NA>
## Donner, Francis E.      Donner   6 Female        1       <NA>
## Donner, George Jr.      Donner   9   Male        1       <NA>
## Donner, Georgia Ann     Donner   4 Female        1       <NA>
## Donner, Isaac           Donner   5   Male        0 1847-03-06
## Donner, Leanna          Donner  11 Female        1       <NA>
## Donner, Lewis           Donner   3   Male        0 1847-02-14
## Donner, Mary            Donner   7 Female        1       <NA>
## Donner, Samuel          Donner   4   Male        0 1846-12-21
## Eddy, James               Eddy   3   Male        0 1847-03-13
## Eddy, Margaret            Eddy   1 Female        0 1847-02-04
## Foster, Jeremiah     MurFosPik   1   Male        0 1847-03-13
## Graves, Eleanor         Graves  15 Female        1       <NA>
## Graves, Elizabeth       Graves   1 Female        1       <NA>
## Graves, Franklin W J    Graves   5   Male        0 1847-03-12
## Graves, Jonathan        Graves   7   Male        1       <NA>
## Graves, Lovina          Graves  12 Female        1       <NA>
## Graves, Nancy           Graves   9 Female        1       <NA>
## Hook, William            Other  12   Male        0 1847-02-28
## Hook, Solomon            Other  14   Male        1       <NA>
## Keseberg, Ada         Keseberg   3 Female        0 1847-02-25
## Keseberg, Lewis Jr.   Keseberg   1   Male        0 1847-01-24
## McCutchen, Harriet   McCutchen   1 Female        0 1847-02-02
## Murphy, John Landrum MurFosPik  15   Male        0 1847-01-31
## Murphy, Lemuel       MurFosPik  12   Male        0 1846-12-27
## Murphy, Mary M.      MurFosPik  14 Female        1       <NA>
## Murphy, Simon Peter  MurFosPik   8   Male        1       <NA>
## Pike, Catherine      MurFosPik   1 Female        0 1847-02-20
## Pike, Naomi          MurFosPik   2 Female        1       <NA>
## Reed, James Jr.           Reed   6   Male        1       <NA>
## Reed, Martha Jane         Reed   9 Female        1       <NA>
## Reed, Thomas Keyes        Reed   4   Male        1       <NA>
## Reed, Virginia E.         Reed  13 Female        1       <NA>
nrow(below18)
## [1] 41

There were 41 children below the age of 18 in the party.

# Most common lady first names
ladies <- Donner %>%
subset(sex == "Female")

lady_names <- rownames(ladies)
#lady_names
f_names <- gsub(".*, ", "", lady_names)
f_names_only <- gsub("\\ [A-z]*", "", f_names)
f_names_only <- gsub("\\.", "", f_names_only)

df <- data.frame(f_names_only) 
table(unlist(df))
## 
##        Ada    Almanda  Catherine      Doris    Eleanor     Elitha      Eliza 
##          1          1          1          1          2          1          2 
##  Elizabeth    Francis    Georgia    Harriet     Lavina     Leanna     Lovina 
##          3          1          1          2          1          1          1 
##   Margaret     Martha       Mary      Nancy      Naomi Phillipine      Sarah 
##          3          1          4          1          1          1          3 
##     Tamsen   Virginia 
##          1          1

We can see, Mary was the most common female first name with 4, followed by Elizabeth, Margaret, and Sarah with 3 each.

# get average age of males vs females
males <- Donner %>%
filter(sex == "Male") %>%
select(age) 
avg_m_age = mean(males$age)
print(paste("Average male age: ", avg_m_age))
## [1] "Average male age:  22.2909090909091"
females <- Donner %>%
filter(sex == "Female") %>%
select(age)
avg_f_age = mean(females$age)
print(paste("Average female age: ", avg_f_age))
## [1] "Average female age:  18.6285714285714"

Males of the party tended to be few years older.

# get mortality by gender
males <- Donner %>%
filter(sex == "Male") %>%
select(survived)

males_lived <- colSums(males == 1)
tot_m <- nrow(males)
percent_m_survived <- males_lived / tot_m
print(paste("Percent of men that survived: ", percent_m_survived *100))
## [1] "Percent of men that survived:  41.8181818181818"
females <- Donner %>%
filter(sex == "Female") %>%
select(survived)


females_lived <- colSums(females == 1)
tot_f = nrow(females)
percent_f_survived <- females_lived / tot_f
print(paste("Percent of women that survived: ", percent_f_survived*100))
## [1] "Percent of women that survived:  71.4285714285714"

So they appeared to value the survival of women over men. Makes sense with the traditional gender roles of the time. Also, women most likely had kids to take care of.

# get mortality by age
kids <- Donner %>%
filter(age < 18) %>%
select(survived)

kids_lived <- colSums(kids == 1)
tot_kids <- nrow(kids)
percent_kids_lived <- kids_lived / tot_kids

print(paste("Percent of kids that survived: ", percent_kids_lived *100))
## [1] "Percent of kids that survived:  65.8536585365854"
adults <- Donner %>%
filter(age >= 18) %>%
select(survived)

adults_lived <- colSums(adults == 1)
tot_adults <- nrow(adults)
percent_adults_lived <- adults_lived / tot_adults
print(paste("Percent of adults that survived: ", percent_adults_lived *100))
## [1] "Percent of adults that survived:  42.8571428571429"

So they tried their best to save as many children as possible. A majority of the adults died which makes sense as they put their children before themselves.

Question 2

family_tot <- Donner %>%
count(family) %>%
rename(fam = family)
#mutate(percent_survived = total / fam_size)

family_survival <- Donner %>%
count(family, survived) %>%
rename(total = n) %>%
#mutate(percent_survived = n / family_tot) %>%
filter(survived == 1) %>%
select(family, total)

combined_df <- cbind(family_survival, family_tot) %>%
mutate(percent_fam_survived = (total / n)*100) %>%
select(family, total, percent_fam_survived)
combined_df
##       family total percent_fam_survived
## 1      Breen     9            100.00000
## 2     Donner     7             50.00000
## 3       Eddy     1             25.00000
## 4   FosdWolf     2             50.00000
## 5     Graves     7             70.00000
## 6   Keseberg     2             50.00000
## 7  McCutchen     2             66.66667
## 8  MurFosPik     6             50.00000
## 9      Other     6             26.08696
## 10      Reed     6             85.71429

total = number of family members that survived

Question 3

myDonner <- combined_df %>%
arrange(desc(total)) %>%
mutate(prop = round(total * 100 / sum(total), 1), lab.ypos = cumsum(prop) - 0.5 * prop)
myDonner
##       family total percent_fam_survived prop lab.ypos
## 1      Breen     9            100.00000 18.8     9.40
## 2     Donner     7             50.00000 14.6    26.10
## 3     Graves     7             70.00000 14.6    40.70
## 4  MurFosPik     6             50.00000 12.5    54.25
## 5      Other     6             26.08696 12.5    66.75
## 6       Reed     6             85.71429 12.5    79.25
## 7   FosdWolf     2             50.00000  4.2    87.60
## 8   Keseberg     2             50.00000  4.2    91.80
## 9  McCutchen     2             66.66667  4.2    96.00
## 10      Eddy     1             25.00000  2.1    99.15
ggpie(
    myDonner, x = "prop", label = "total",
    lab.pos = "in", lab.font = list(color = "white"),
    fill = "family", color = "white", label_info = "all", label_type = "circle",
    label_split = NULL, label_size = 4, label_pos = "in", legend = "right",
    palette = "jco"
) +
ggtitle("Number of Survivors by Family")

Question 4

survived <- table(Donner$survived)

df_survived <- data.frame("Status" = c("Died", "Survived"),
                         "Count" = survived) %>%
                          rename(Count = Count.Freq) %>%
                          select(Status, Count)

df_survived
##     Status Count
## 1     Died    42
## 2 Survived    48

Question 5

q <- ggbarplot(df_survived, x = "Status", y = "Count", 
              color = "black", fill = "Status", palette = "jco",
              legend = "right") +
ggtitle("Donner Survival")
q

42 died, 48 survived of the Donner party

Question 6

random20 <- Donner[sample(nrow(Donner), 20), ]
random20 <- random20 %>%
mutate(names = row.names(random20)) %>%
mutate(survived = factor(survived))
random20
##                         family age    sex survived      death
## Donner, Leanna          Donner  11 Female        1       <NA>
## Reed, Thomas Keyes        Reed   4   Male        1       <NA>
## Reed, James               Reed  46   Male        1       <NA>
## Pike, Harriet        MurFosPik  21 Female        1       <NA>
## Donner, Tamsen          Donner  44 Female        0 1847-03-28
## McCutchen, William   McCutchen  30   Male        1       <NA>
## Donner, Elitha Cumi     Donner  13 Female        1       <NA>
## McCutchen, Harriet   McCutchen   1 Female        0 1847-02-02
## Graves, William         Graves  18   Male        1       <NA>
## Spitzer, Augustus        Other  30   Male        0 1847-02-08
## Keseberg, Lewis       Keseberg  32   Male        1       <NA>
## Donner, Isaac           Donner   5   Male        0 1847-03-06
## Dolan, Patrick           Other  40   Male        0 1846-12-27
## Murphy, Lavina       MurFosPik  36 Female        0 1847-03-12
## Burger, Charles          Other  30   Male        0 1846-12-27
## Pike, Naomi          MurFosPik   2 Female        1       <NA>
## Graves, Franklin W J    Graves   5   Male        0 1847-03-12
## Reed, James Jr.           Reed   6   Male        1       <NA>
## Breen, Patrick           Breen  51   Male        1       <NA>
## Reed, Virginia E.         Reed  13 Female        1       <NA>
##                                     names
## Donner, Leanna             Donner, Leanna
## Reed, Thomas Keyes     Reed, Thomas Keyes
## Reed, James                   Reed, James
## Pike, Harriet               Pike, Harriet
## Donner, Tamsen             Donner, Tamsen
## McCutchen, William     McCutchen, William
## Donner, Elitha Cumi   Donner, Elitha Cumi
## McCutchen, Harriet     McCutchen, Harriet
## Graves, William           Graves, William
## Spitzer, Augustus       Spitzer, Augustus
## Keseberg, Lewis           Keseberg, Lewis
## Donner, Isaac               Donner, Isaac
## Dolan, Patrick             Dolan, Patrick
## Murphy, Lavina             Murphy, Lavina
## Burger, Charles           Burger, Charles
## Pike, Naomi                   Pike, Naomi
## Graves, Franklin W J Graves, Franklin W J
## Reed, James Jr.           Reed, James Jr.
## Breen, Patrick             Breen, Patrick
## Reed, Virginia E.       Reed, Virginia E.
t <- ggbarplot(random20, x = "names", y = "age", 
              color = "black",
              fill = "survived",
              palette = "jco",
              sort.val = "desc",
              sort.by.groups = FALSE,
              x.text.angle = 90
             ) +
ggtitle("20 Random Donner Members Age and Survival")
t

blue = died, yellow = survived

Question 7

random20 <- Donner[sample(nrow(Donner), 20), ]
random20 <- random20 %>%
mutate(names = row.names(random20)) %>%
mutate(survived = factor(survived))
random20
##                        family age    sex survived      death
## Breen, Patrick          Breen  51   Male        1       <NA>
## Williams, Baylis        Other  24   Male        0 1846-12-15
## Donner, Georgia Ann    Donner   4 Female        1       <NA>
## Graves, Elizabeth      Graves   1 Female        1       <NA>
## Donner, George         Donner  62   Male        0 1847-03-18
## Reed, James              Reed  46   Male        1       <NA>
## Donner, Francis E.     Donner   6 Female        1       <NA>
## Donner, Tamsen         Donner  44 Female        0 1847-03-28
## Stanton, Charles        Other  35   Male        0 1846-12-23
## Snyder, John            Other  25   Male        0 1846-10-05
## Graves, Lovina         Graves  12 Female        1       <NA>
## Hook, Solomon           Other  14   Male        1       <NA>
## Reed, Virginia E.        Reed  13 Female        1       <NA>
## Murphy, Mary M.     MurFosPik  14 Female        1       <NA>
## Dolan, Patrick          Other  40   Male        0 1846-12-27
## Reinhardt, Joseph       Other  30   Male        0 1846-12-21
## Graves, William        Graves  18   Male        1       <NA>
## Donner, Samuel         Donner   4   Male        0 1846-12-21
## Pike, William       MurFosPik  25   Male        0 1846-10-20
## Antoine                 Other  23   Male        0 1846-12-29
##                                   names
## Breen, Patrick           Breen, Patrick
## Williams, Baylis       Williams, Baylis
## Donner, Georgia Ann Donner, Georgia Ann
## Graves, Elizabeth     Graves, Elizabeth
## Donner, George           Donner, George
## Reed, James                 Reed, James
## Donner, Francis E.   Donner, Francis E.
## Donner, Tamsen           Donner, Tamsen
## Stanton, Charles       Stanton, Charles
## Snyder, John               Snyder, John
## Graves, Lovina           Graves, Lovina
## Hook, Solomon             Hook, Solomon
## Reed, Virginia E.     Reed, Virginia E.
## Murphy, Mary M.         Murphy, Mary M.
## Dolan, Patrick           Dolan, Patrick
## Reinhardt, Joseph     Reinhardt, Joseph
## Graves, William         Graves, William
## Donner, Samuel           Donner, Samuel
## Pike, William             Pike, William
## Antoine                         Antoine
g <- ggbarplot(random20, x = "names", y = "age", 
              color = "black",
              fill = "survived",
              palette = "jco",
              sort.val = "asc",
              sort.by.groups = TRUE,
              x.text.angle = 90
             ) +
ggtitle("20 Random Donner Members Age and Survival")
g

blue = died, yellow = survived

Question 8

ggdotchart(combined_df, x = "family", y = "percent_fam_survived", 
          color = "family", palette = "jco", size = 10, label = round(combined_df$percent_fam_survived),
          font.label = list(color = "white", size = 9, vjust = 0.5),
          add = "segment", add.params = list(color = "grey", size = 1.5),
          position = position_dodge(0.5), ggtheme = theme_pubclean()) +
ggtitle("Family Survival Percentage")

Question 9

survivors_by_fam <- Donner %>%
count(family, survived) %>%
rename(total = n) %>%
filter(survived == 1)%>%
select(family, total)

mean_survived <- mean(survivors_by_fam$total)
std_dev_survived <- sd(survivors_by_fam$total)

new_survivors <- survivors_by_fam %>%
mutate(zscore = (total - mean_survived) / std_dev_survived) %>%
arrange(zscore)
new_survivors
##       family total     zscore
## 1       Eddy     1 -1.3664704
## 2   FosdWolf     2 -1.0068729
## 3   Keseberg     2 -1.0068729
## 4  McCutchen     2 -1.0068729
## 5  MurFosPik     6  0.4315170
## 6      Other     6  0.4315170
## 7       Reed     6  0.4315170
## 8     Donner     7  0.7911144
## 9     Graves     7  0.7911144
## 10     Breen     9  1.5103094
colors <- c("coral1", "dodgerblue")
z <- ggdotchart(new_survivors, x = "family", y = "zscore",
                color = ifelse(new_survivors$zscore > 0, colors[2], colors[1]), palette = colors, sorting = "ascending",
                add = "segments", add.params = list(color = "grey", size = 3),
                dot.size = 12, label = round(new_survivors$zscore, 2),
                font.label = list(color = "white", size = 9, vjust = 0.5), 
                ggtheme = theme_pubr()
               ) +
geom_hline(yintercept = 0, linetype = 2, color = "grey") + 
ggtitle("Family Survival by Zscore")
z

coral = zscore below 0, dodger blue = zscore above 0

Question 10

colors <- c("coral1", "dodgerblue")

h <- ggdotchart(new_survivors, x = "family", y = "zscore", 
               color = ifelse(new_survivors$zscore > 0, colors[2], colors[1]),
               palette = colors, sorting = "descending", rotate = TRUE,
                dot.size = 6,
                y.text.col = TRUE,
                ggtheme = theme_pubr()
    ) +
theme_cleveland() +
ggtitle("Cleveland Zscore of Family Survivors")
## Warning: Vectorized input to `element_text()` is not officially supported.
## ℹ Results may be unexpected or may change in future versions of ggplot2.
h

coral = zscore below 0, dodger blue = zscore above 0