# load libraries
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyr)
library(magrittr)
##
## Attaching package: 'magrittr'
## The following object is masked from 'package:tidyr':
##
## extract
library(knitr)
library(ggplot2)
library(ggsci)
library(ggpubr)
library(vcd)
## Loading required package: grid
library(vcdExtra)
## Loading required package: gnm
##
## Attaching package: 'vcdExtra'
## The following object is masked from 'package:dplyr':
##
## summarise
library(stringr)
head(Donner, n=15L)
## family age sex survived death
## Antoine Other 23 Male 0 1846-12-29
## Breen, Edward Breen 13 Male 1 <NA>
## Breen, Margaret I. Breen 1 Female 1 <NA>
## Breen, James Breen 5 Male 1 <NA>
## Breen, John Breen 14 Male 1 <NA>
## Breen, Mary Breen 40 Female 1 <NA>
## Breen, Patrick Breen 51 Male 1 <NA>
## Breen, Patrick Jr. Breen 9 Male 1 <NA>
## Breen, Peter Breen 3 Male 1 <NA>
## Breen, Simon Breen 8 Male 1 <NA>
## Burger, Charles Other 30 Male 0 1846-12-27
## Denton, John Other 28 Male 0 1847-02-26
## Dolan, Patrick Other 40 Male 0 1846-12-27
## Donner, Elitha Cumi Donner 13 Female 1 <NA>
## Donner, Eliza Poor Donner 3 Female 1 <NA>
# count of the family members in the named families
named_families <- Donner %>%
select(family) %>%
filter(family != "Other")
named_families %>%
count(family, sort = TRUE)
## family n
## 1 Donner 14
## 2 MurFosPik 12
## 3 Graves 10
## 4 Breen 9
## 5 Reed 7
## 6 Eddy 4
## 7 FosdWolf 4
## 8 Keseberg 4
## 9 McCutchen 3
# ratio of men to women
m_2_w <- Donner %>%
select(sex)
num_m <- m_2_w %>%
count(sex == "Male")
num_f <- m_2_w %>%
count(sex == "Female")
ratio <- num_m / num_f
ratio
## sex == "Male" n
## 1 NaN 0.6363636
## 2 1 1.5714286
This shows that the ratio of men to women is 1.5714 to 1.
55 males to 35 females
# get the amount of ppl younger than 18 years old
below18 <- Donner %>%
filter(age < 18)
below18
## family age sex survived death
## Breen, Edward Breen 13 Male 1 <NA>
## Breen, Margaret I. Breen 1 Female 1 <NA>
## Breen, James Breen 5 Male 1 <NA>
## Breen, John Breen 14 Male 1 <NA>
## Breen, Patrick Jr. Breen 9 Male 1 <NA>
## Breen, Peter Breen 3 Male 1 <NA>
## Breen, Simon Breen 8 Male 1 <NA>
## Donner, Elitha Cumi Donner 13 Female 1 <NA>
## Donner, Eliza Poor Donner 3 Female 1 <NA>
## Donner, Francis E. Donner 6 Female 1 <NA>
## Donner, George Jr. Donner 9 Male 1 <NA>
## Donner, Georgia Ann Donner 4 Female 1 <NA>
## Donner, Isaac Donner 5 Male 0 1847-03-06
## Donner, Leanna Donner 11 Female 1 <NA>
## Donner, Lewis Donner 3 Male 0 1847-02-14
## Donner, Mary Donner 7 Female 1 <NA>
## Donner, Samuel Donner 4 Male 0 1846-12-21
## Eddy, James Eddy 3 Male 0 1847-03-13
## Eddy, Margaret Eddy 1 Female 0 1847-02-04
## Foster, Jeremiah MurFosPik 1 Male 0 1847-03-13
## Graves, Eleanor Graves 15 Female 1 <NA>
## Graves, Elizabeth Graves 1 Female 1 <NA>
## Graves, Franklin W J Graves 5 Male 0 1847-03-12
## Graves, Jonathan Graves 7 Male 1 <NA>
## Graves, Lovina Graves 12 Female 1 <NA>
## Graves, Nancy Graves 9 Female 1 <NA>
## Hook, William Other 12 Male 0 1847-02-28
## Hook, Solomon Other 14 Male 1 <NA>
## Keseberg, Ada Keseberg 3 Female 0 1847-02-25
## Keseberg, Lewis Jr. Keseberg 1 Male 0 1847-01-24
## McCutchen, Harriet McCutchen 1 Female 0 1847-02-02
## Murphy, John Landrum MurFosPik 15 Male 0 1847-01-31
## Murphy, Lemuel MurFosPik 12 Male 0 1846-12-27
## Murphy, Mary M. MurFosPik 14 Female 1 <NA>
## Murphy, Simon Peter MurFosPik 8 Male 1 <NA>
## Pike, Catherine MurFosPik 1 Female 0 1847-02-20
## Pike, Naomi MurFosPik 2 Female 1 <NA>
## Reed, James Jr. Reed 6 Male 1 <NA>
## Reed, Martha Jane Reed 9 Female 1 <NA>
## Reed, Thomas Keyes Reed 4 Male 1 <NA>
## Reed, Virginia E. Reed 13 Female 1 <NA>
nrow(below18)
## [1] 41
There were 41 children below the age of 18 in the party.
# Most common lady first names
ladies <- Donner %>%
subset(sex == "Female")
lady_names <- rownames(ladies)
#lady_names
f_names <- gsub(".*, ", "", lady_names)
f_names_only <- gsub("\\ [A-z]*", "", f_names)
f_names_only <- gsub("\\.", "", f_names_only)
df <- data.frame(f_names_only)
table(unlist(df))
##
## Ada Almanda Catherine Doris Eleanor Elitha Eliza
## 1 1 1 1 2 1 2
## Elizabeth Francis Georgia Harriet Lavina Leanna Lovina
## 3 1 1 2 1 1 1
## Margaret Martha Mary Nancy Naomi Phillipine Sarah
## 3 1 4 1 1 1 3
## Tamsen Virginia
## 1 1
We can see, Mary was the most common female first name with 4, followed by Elizabeth, Margaret, and Sarah with 3 each.
# get average age of males vs females
males <- Donner %>%
filter(sex == "Male") %>%
select(age)
avg_m_age = mean(males$age)
print(paste("Average male age: ", avg_m_age))
## [1] "Average male age: 22.2909090909091"
females <- Donner %>%
filter(sex == "Female") %>%
select(age)
avg_f_age = mean(females$age)
print(paste("Average female age: ", avg_f_age))
## [1] "Average female age: 18.6285714285714"
Males of the party tended to be few years older.
# get mortality by gender
males <- Donner %>%
filter(sex == "Male") %>%
select(survived)
males_lived <- colSums(males == 1)
tot_m <- nrow(males)
percent_m_survived <- males_lived / tot_m
print(paste("Percent of men that survived: ", percent_m_survived *100))
## [1] "Percent of men that survived: 41.8181818181818"
females <- Donner %>%
filter(sex == "Female") %>%
select(survived)
females_lived <- colSums(females == 1)
tot_f = nrow(females)
percent_f_survived <- females_lived / tot_f
print(paste("Percent of women that survived: ", percent_f_survived*100))
## [1] "Percent of women that survived: 71.4285714285714"
So they appeared to value the survival of women over men. Makes sense with the traditional gender roles of the time. Also, women most likely had kids to take care of.
# get mortality by age
kids <- Donner %>%
filter(age < 18) %>%
select(survived)
kids_lived <- colSums(kids == 1)
tot_kids <- nrow(kids)
percent_kids_lived <- kids_lived / tot_kids
print(paste("Percent of kids that survived: ", percent_kids_lived *100))
## [1] "Percent of kids that survived: 65.8536585365854"
adults <- Donner %>%
filter(age >= 18) %>%
select(survived)
adults_lived <- colSums(adults == 1)
tot_adults <- nrow(adults)
percent_adults_lived <- adults_lived / tot_adults
print(paste("Percent of adults that survived: ", percent_adults_lived *100))
## [1] "Percent of adults that survived: 42.8571428571429"
So they tried their best to save as many children as possible. A majority of the adults died which makes sense as they put their children before themselves.
family_tot <- Donner %>%
count(family) %>%
rename(fam = family)
#mutate(percent_survived = total / fam_size)
family_survival <- Donner %>%
count(family, survived) %>%
rename(total = n) %>%
#mutate(percent_survived = n / family_tot) %>%
filter(survived == 1) %>%
select(family, total)
combined_df <- cbind(family_survival, family_tot) %>%
mutate(percent_fam_survived = (total / n)*100) %>%
select(family, total, percent_fam_survived)
combined_df
## family total percent_fam_survived
## 1 Breen 9 100.00000
## 2 Donner 7 50.00000
## 3 Eddy 1 25.00000
## 4 FosdWolf 2 50.00000
## 5 Graves 7 70.00000
## 6 Keseberg 2 50.00000
## 7 McCutchen 2 66.66667
## 8 MurFosPik 6 50.00000
## 9 Other 6 26.08696
## 10 Reed 6 85.71429
total = number of family members that survived
myDonner <- combined_df %>%
arrange(desc(total)) %>%
mutate(prop = round(total * 100 / sum(total), 1), lab.ypos = cumsum(prop) - 0.5 * prop)
myDonner
## family total percent_fam_survived prop lab.ypos
## 1 Breen 9 100.00000 18.8 9.40
## 2 Donner 7 50.00000 14.6 26.10
## 3 Graves 7 70.00000 14.6 40.70
## 4 MurFosPik 6 50.00000 12.5 54.25
## 5 Other 6 26.08696 12.5 66.75
## 6 Reed 6 85.71429 12.5 79.25
## 7 FosdWolf 2 50.00000 4.2 87.60
## 8 Keseberg 2 50.00000 4.2 91.80
## 9 McCutchen 2 66.66667 4.2 96.00
## 10 Eddy 1 25.00000 2.1 99.15
ggpie(
myDonner, x = "prop", label = "total",
lab.pos = "in", lab.font = list(color = "white"),
fill = "family", color = "white", label_info = "all", label_type = "circle",
label_split = NULL, label_size = 4, label_pos = "in", legend = "right",
palette = "jco"
) +
ggtitle("Number of Survivors by Family")
survived <- table(Donner$survived)
df_survived <- data.frame("Status" = c("Died", "Survived"),
"Count" = survived) %>%
rename(Count = Count.Freq) %>%
select(Status, Count)
df_survived
## Status Count
## 1 Died 42
## 2 Survived 48
q <- ggbarplot(df_survived, x = "Status", y = "Count",
color = "black", fill = "Status", palette = "jco",
legend = "right") +
ggtitle("Donner Survival")
q
42 died, 48 survived of the Donner party
random20 <- Donner[sample(nrow(Donner), 20), ]
random20 <- random20 %>%
mutate(names = row.names(random20)) %>%
mutate(survived = factor(survived))
random20
## family age sex survived death
## Donner, Leanna Donner 11 Female 1 <NA>
## Reed, Thomas Keyes Reed 4 Male 1 <NA>
## Reed, James Reed 46 Male 1 <NA>
## Pike, Harriet MurFosPik 21 Female 1 <NA>
## Donner, Tamsen Donner 44 Female 0 1847-03-28
## McCutchen, William McCutchen 30 Male 1 <NA>
## Donner, Elitha Cumi Donner 13 Female 1 <NA>
## McCutchen, Harriet McCutchen 1 Female 0 1847-02-02
## Graves, William Graves 18 Male 1 <NA>
## Spitzer, Augustus Other 30 Male 0 1847-02-08
## Keseberg, Lewis Keseberg 32 Male 1 <NA>
## Donner, Isaac Donner 5 Male 0 1847-03-06
## Dolan, Patrick Other 40 Male 0 1846-12-27
## Murphy, Lavina MurFosPik 36 Female 0 1847-03-12
## Burger, Charles Other 30 Male 0 1846-12-27
## Pike, Naomi MurFosPik 2 Female 1 <NA>
## Graves, Franklin W J Graves 5 Male 0 1847-03-12
## Reed, James Jr. Reed 6 Male 1 <NA>
## Breen, Patrick Breen 51 Male 1 <NA>
## Reed, Virginia E. Reed 13 Female 1 <NA>
## names
## Donner, Leanna Donner, Leanna
## Reed, Thomas Keyes Reed, Thomas Keyes
## Reed, James Reed, James
## Pike, Harriet Pike, Harriet
## Donner, Tamsen Donner, Tamsen
## McCutchen, William McCutchen, William
## Donner, Elitha Cumi Donner, Elitha Cumi
## McCutchen, Harriet McCutchen, Harriet
## Graves, William Graves, William
## Spitzer, Augustus Spitzer, Augustus
## Keseberg, Lewis Keseberg, Lewis
## Donner, Isaac Donner, Isaac
## Dolan, Patrick Dolan, Patrick
## Murphy, Lavina Murphy, Lavina
## Burger, Charles Burger, Charles
## Pike, Naomi Pike, Naomi
## Graves, Franklin W J Graves, Franklin W J
## Reed, James Jr. Reed, James Jr.
## Breen, Patrick Breen, Patrick
## Reed, Virginia E. Reed, Virginia E.
t <- ggbarplot(random20, x = "names", y = "age",
color = "black",
fill = "survived",
palette = "jco",
sort.val = "desc",
sort.by.groups = FALSE,
x.text.angle = 90
) +
ggtitle("20 Random Donner Members Age and Survival")
t
blue = died, yellow = survived
random20 <- Donner[sample(nrow(Donner), 20), ]
random20 <- random20 %>%
mutate(names = row.names(random20)) %>%
mutate(survived = factor(survived))
random20
## family age sex survived death
## Breen, Patrick Breen 51 Male 1 <NA>
## Williams, Baylis Other 24 Male 0 1846-12-15
## Donner, Georgia Ann Donner 4 Female 1 <NA>
## Graves, Elizabeth Graves 1 Female 1 <NA>
## Donner, George Donner 62 Male 0 1847-03-18
## Reed, James Reed 46 Male 1 <NA>
## Donner, Francis E. Donner 6 Female 1 <NA>
## Donner, Tamsen Donner 44 Female 0 1847-03-28
## Stanton, Charles Other 35 Male 0 1846-12-23
## Snyder, John Other 25 Male 0 1846-10-05
## Graves, Lovina Graves 12 Female 1 <NA>
## Hook, Solomon Other 14 Male 1 <NA>
## Reed, Virginia E. Reed 13 Female 1 <NA>
## Murphy, Mary M. MurFosPik 14 Female 1 <NA>
## Dolan, Patrick Other 40 Male 0 1846-12-27
## Reinhardt, Joseph Other 30 Male 0 1846-12-21
## Graves, William Graves 18 Male 1 <NA>
## Donner, Samuel Donner 4 Male 0 1846-12-21
## Pike, William MurFosPik 25 Male 0 1846-10-20
## Antoine Other 23 Male 0 1846-12-29
## names
## Breen, Patrick Breen, Patrick
## Williams, Baylis Williams, Baylis
## Donner, Georgia Ann Donner, Georgia Ann
## Graves, Elizabeth Graves, Elizabeth
## Donner, George Donner, George
## Reed, James Reed, James
## Donner, Francis E. Donner, Francis E.
## Donner, Tamsen Donner, Tamsen
## Stanton, Charles Stanton, Charles
## Snyder, John Snyder, John
## Graves, Lovina Graves, Lovina
## Hook, Solomon Hook, Solomon
## Reed, Virginia E. Reed, Virginia E.
## Murphy, Mary M. Murphy, Mary M.
## Dolan, Patrick Dolan, Patrick
## Reinhardt, Joseph Reinhardt, Joseph
## Graves, William Graves, William
## Donner, Samuel Donner, Samuel
## Pike, William Pike, William
## Antoine Antoine
g <- ggbarplot(random20, x = "names", y = "age",
color = "black",
fill = "survived",
palette = "jco",
sort.val = "asc",
sort.by.groups = TRUE,
x.text.angle = 90
) +
ggtitle("20 Random Donner Members Age and Survival")
g
blue = died, yellow = survived
ggdotchart(combined_df, x = "family", y = "percent_fam_survived",
color = "family", palette = "jco", size = 10, label = round(combined_df$percent_fam_survived),
font.label = list(color = "white", size = 9, vjust = 0.5),
add = "segment", add.params = list(color = "grey", size = 1.5),
position = position_dodge(0.5), ggtheme = theme_pubclean()) +
ggtitle("Family Survival Percentage")
survivors_by_fam <- Donner %>%
count(family, survived) %>%
rename(total = n) %>%
filter(survived == 1)%>%
select(family, total)
mean_survived <- mean(survivors_by_fam$total)
std_dev_survived <- sd(survivors_by_fam$total)
new_survivors <- survivors_by_fam %>%
mutate(zscore = (total - mean_survived) / std_dev_survived) %>%
arrange(zscore)
new_survivors
## family total zscore
## 1 Eddy 1 -1.3664704
## 2 FosdWolf 2 -1.0068729
## 3 Keseberg 2 -1.0068729
## 4 McCutchen 2 -1.0068729
## 5 MurFosPik 6 0.4315170
## 6 Other 6 0.4315170
## 7 Reed 6 0.4315170
## 8 Donner 7 0.7911144
## 9 Graves 7 0.7911144
## 10 Breen 9 1.5103094
colors <- c("coral1", "dodgerblue")
z <- ggdotchart(new_survivors, x = "family", y = "zscore",
color = ifelse(new_survivors$zscore > 0, colors[2], colors[1]), palette = colors, sorting = "ascending",
add = "segments", add.params = list(color = "grey", size = 3),
dot.size = 12, label = round(new_survivors$zscore, 2),
font.label = list(color = "white", size = 9, vjust = 0.5),
ggtheme = theme_pubr()
) +
geom_hline(yintercept = 0, linetype = 2, color = "grey") +
ggtitle("Family Survival by Zscore")
z
coral = zscore below 0, dodger blue = zscore above 0
colors <- c("coral1", "dodgerblue")
h <- ggdotchart(new_survivors, x = "family", y = "zscore",
color = ifelse(new_survivors$zscore > 0, colors[2], colors[1]),
palette = colors, sorting = "descending", rotate = TRUE,
dot.size = 6,
y.text.col = TRUE,
ggtheme = theme_pubr()
) +
theme_cleveland() +
ggtitle("Cleveland Zscore of Family Survivors")
## Warning: Vectorized input to `element_text()` is not officially supported.
## ℹ Results may be unexpected or may change in future versions of ggplot2.
h
coral = zscore below 0, dodger blue = zscore above 0