#I know its bad to load all the packages in like this but its the only way I could get the knit to work so here we are.

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tidyr)
library(magrittr)
## 
## Attaching package: 'magrittr'
## The following object is masked from 'package:tidyr':
## 
##     extract
library(knitr)
library(ggplot2)
library(vcd)
## Loading required package: grid
library(vcdExtra)
## Loading required package: gnm
## 
## Attaching package: 'vcdExtra'
## The following object is masked from 'package:dplyr':
## 
##     summarise
library(ggsci)
library(ggpubr)

Question 1

#This first chunk looks at the number of member per family

head(Donner, n = 60L)
##                         family age    sex survived      death
## Antoine                  Other  23   Male        0 1846-12-29
## Breen, Edward            Breen  13   Male        1       <NA>
## Breen, Margaret I.       Breen   1 Female        1       <NA>
## Breen, James             Breen   5   Male        1       <NA>
## Breen, John              Breen  14   Male        1       <NA>
## Breen, Mary              Breen  40 Female        1       <NA>
## Breen, Patrick           Breen  51   Male        1       <NA>
## Breen, Patrick Jr.       Breen   9   Male        1       <NA>
## Breen, Peter             Breen   3   Male        1       <NA>
## Breen, Simon             Breen   8   Male        1       <NA>
## Burger, Charles          Other  30   Male        0 1846-12-27
## Denton, John             Other  28   Male        0 1847-02-26
## Dolan, Patrick           Other  40   Male        0 1846-12-27
## Donner, Elitha Cumi     Donner  13 Female        1       <NA>
## Donner, Eliza Poor      Donner   3 Female        1       <NA>
## Donner, Elizabeth       Donner  45 Female        0 1847-03-14
## Donner, Francis E.      Donner   6 Female        1       <NA>
## Donner, George          Donner  62   Male        0 1847-03-18
## Donner, George Jr.      Donner   9   Male        1       <NA>
## Donner, Georgia Ann     Donner   4 Female        1       <NA>
## Donner, Isaac           Donner   5   Male        0 1847-03-06
## Donner, Jacob           Donner  65   Male        0 1846-12-21
## Donner, Leanna          Donner  11 Female        1       <NA>
## Donner, Lewis           Donner   3   Male        0 1847-02-14
## Donner, Mary            Donner   7 Female        1       <NA>
## Donner, Samuel          Donner   4   Male        0 1846-12-21
## Donner, Tamsen          Donner  44 Female        0 1847-03-28
## Eddy, Eleanor             Eddy  25 Female        0 1847-02-07
## Eddy, James               Eddy   3   Male        0 1847-03-13
## Eddy, Margaret            Eddy   1 Female        0 1847-02-04
## Eddy, William             Eddy  28   Male        1       <NA>
## Elliot, Milton           Other  28   Male        0 1847-02-09
## Fosdick, Jay          FosdWolf  23   Male        0 1847-01-18
## Fosdick, Sarah        FosdWolf  22 Female        1       <NA>
## Foster, Sarah        MurFosPik  23 Female        1       <NA>
## Foster, William      MurFosPik  28   Male        1       <NA>
## Foster, Jeremiah     MurFosPik   1   Male        0 1847-03-13
## Graves, Eleanor         Graves  15 Female        1       <NA>
## Graves, Elizabeth       Graves   1 Female        1       <NA>
## Graves, Elizabeth C.    Graves  47 Female        0 1847-03-12
## Graves, Franklin W      Graves  57   Male        0 1846-12-27
## Graves, Franklin W J    Graves   5   Male        0 1847-03-12
## Graves, Jonathan        Graves   7   Male        1       <NA>
## Graves, Lovina          Graves  12 Female        1       <NA>
## Graves, Mary Ann        Graves  20 Female        1       <NA>
## Graves, Nancy           Graves   9 Female        1       <NA>
## Graves, William         Graves  18   Male        1       <NA>
## Halloran, Luke           Other  25   Male        0 1846-08-25
## Hardkoop, Mr.            Other  60   Male        0 1846-10-10
## Herron, William          Other  25   Male        1       <NA>
## Hook, William            Other  12   Male        0 1847-02-28
## Hook, Solomon            Other  14   Male        1       <NA>
## James, Noah              Other  20   Male        1       <NA>
## Keseberg, Ada         Keseberg   3 Female        0 1847-02-25
## Keseberg, Lewis       Keseberg  32   Male        1       <NA>
## Keseberg, Lewis Jr.   Keseberg   1   Male        0 1847-01-24
## Keseberg, Phillipine  Keseberg  32 Female        1       <NA>
## Keyes, Sarah              Reed  70 Female        0 1846-05-29
## Luis                     Other  27   Male        0 1847-01-18
## McCutchen, Almanda   McCutchen  24 Female        1       <NA>
Donner %>% count(family)
##       family  n
## 1      Breen  9
## 2     Donner 14
## 3       Eddy  4
## 4   FosdWolf  4
## 5     Graves 10
## 6   Keseberg  4
## 7  McCutchen  3
## 8  MurFosPik 12
## 9      Other 23
## 10      Reed  7

#Another simple code chunk looks at the number of males and females, the two below it looks at the eldest women and youngest men in the group

Donner %>%  count(sex)
##      sex  n
## 1 Female 35
## 2   Male 55
Donner %>% select("family", "age", "sex") %>% filter(sex == 'Female') %>% slice_max(age, n=5)
##                      family age    sex
## Keyes, Sarah           Reed  70 Female
## Graves, Elizabeth C. Graves  47 Female
## Donner, Elizabeth    Donner  45 Female
## Donner, Tamsen       Donner  44 Female
## Breen, Mary           Breen  40 Female
Donner %>% select("family", "age", "sex") %>% filter(sex == 'Male') %>% slice_min(age, n=5)
##                        family age  sex
## Foster, Jeremiah    MurFosPik   1 Male
## Keseberg, Lewis Jr.  Keseberg   1 Male
## Breen, Peter            Breen   3 Male
## Donner, Lewis          Donner   3 Male
## Eddy, James              Eddy   3 Male

#The code below looks at when people started dying, so you can see that a few people died early on, but things started to pick up in october of 1846

Donner %>% select("family", "age", "death") %>% arrange(death)
##                         family age      death
## Keyes, Sarah              Reed  70 1846-05-29
## Halloran, Luke           Other  25 1846-08-25
## Snyder, John             Other  25 1846-10-05
## Wolfinger, Mr.        FosdWolf  24 1846-10-08
## Hardkoop, Mr.            Other  60 1846-10-10
## Pike, William        MurFosPik  25 1846-10-20
## Williams, Baylis         Other  24 1846-12-15
## Donner, Jacob           Donner  65 1846-12-21
## Donner, Samuel          Donner   4 1846-12-21
## Reinhardt, Joseph        Other  30 1846-12-21
## Shoemaker, Samuel        Other  25 1846-12-21
## Smith, James             Other  25 1846-12-21
## Stanton, Charles         Other  35 1846-12-23
## Burger, Charles          Other  30 1846-12-27
## Dolan, Patrick           Other  40 1846-12-27
## Graves, Franklin W      Graves  57 1846-12-27
## Murphy, Lemuel       MurFosPik  12 1846-12-27
## Antoine                  Other  23 1846-12-29
## Fosdick, Jay          FosdWolf  23 1847-01-18
## Luis                     Other  27 1847-01-18
## Salvador                 Other  23 1847-01-18
## Keseberg, Lewis Jr.   Keseberg   1 1847-01-24
## Murphy, John Landrum MurFosPik  15 1847-01-31
## McCutchen, Harriet   McCutchen   1 1847-02-02
## Eddy, Margaret            Eddy   1 1847-02-04
## Eddy, Eleanor             Eddy  25 1847-02-07
## Spitzer, Augustus        Other  30 1847-02-08
## Elliot, Milton           Other  28 1847-02-09
## Donner, Lewis           Donner   3 1847-02-14
## Pike, Catherine      MurFosPik   1 1847-02-20
## Keseberg, Ada         Keseberg   3 1847-02-25
## Denton, John             Other  28 1847-02-26
## Hook, William            Other  12 1847-02-28
## Donner, Isaac           Donner   5 1847-03-06
## Graves, Elizabeth C.    Graves  47 1847-03-12
## Graves, Franklin W J    Graves   5 1847-03-12
## Murphy, Lavina       MurFosPik  36 1847-03-12
## Eddy, James               Eddy   3 1847-03-13
## Foster, Jeremiah     MurFosPik   1 1847-03-13
## Donner, Elizabeth       Donner  45 1847-03-14
## Donner, George          Donner  62 1847-03-18
## Donner, Tamsen          Donner  44 1847-03-28
## Breen, Edward            Breen  13       <NA>
## Breen, Margaret I.       Breen   1       <NA>
## Breen, James             Breen   5       <NA>
## Breen, John              Breen  14       <NA>
## Breen, Mary              Breen  40       <NA>
## Breen, Patrick           Breen  51       <NA>
## Breen, Patrick Jr.       Breen   9       <NA>
## Breen, Peter             Breen   3       <NA>
## Breen, Simon             Breen   8       <NA>
## Donner, Elitha Cumi     Donner  13       <NA>
## Donner, Eliza Poor      Donner   3       <NA>
## Donner, Francis E.      Donner   6       <NA>
## Donner, George Jr.      Donner   9       <NA>
## Donner, Georgia Ann     Donner   4       <NA>
## Donner, Leanna          Donner  11       <NA>
## Donner, Mary            Donner   7       <NA>
## Eddy, William             Eddy  28       <NA>
## Fosdick, Sarah        FosdWolf  22       <NA>
## Foster, Sarah        MurFosPik  23       <NA>
## Foster, William      MurFosPik  28       <NA>
## Graves, Eleanor         Graves  15       <NA>
## Graves, Elizabeth       Graves   1       <NA>
## Graves, Jonathan        Graves   7       <NA>
## Graves, Lovina          Graves  12       <NA>
## Graves, Mary Ann        Graves  20       <NA>
## Graves, Nancy           Graves   9       <NA>
## Graves, William         Graves  18       <NA>
## Herron, William          Other  25       <NA>
## Hook, Solomon            Other  14       <NA>
## James, Noah              Other  20       <NA>
## Keseberg, Lewis       Keseberg  32       <NA>
## Keseberg, Phillipine  Keseberg  32       <NA>
## McCutchen, Almanda   McCutchen  24       <NA>
## McCutchen, William   McCutchen  30       <NA>
## Miller, Hiram O.         Other  30       <NA>
## Murphy, Mary M.      MurFosPik  14       <NA>
## Murphy, Simon Peter  MurFosPik   8       <NA>
## Pike, Harriet        MurFosPik  21       <NA>
## Pike, Naomi          MurFosPik   2       <NA>
## Reed, James               Reed  46       <NA>
## Reed, James Jr.           Reed   6       <NA>
## Reed, Margaret            Reed  32       <NA>
## Reed, Martha Jane         Reed   9       <NA>
## Reed, Thomas Keyes        Reed   4       <NA>
## Reed, Virginia E.         Reed  13       <NA>
## Trubode, Jean B.         Other  23       <NA>
## Williams, Eliza          Other  25       <NA>
## Wolfinger, Doris      FosdWolf  20       <NA>

#These last two lines show us how many people were under the age of 18 and howmany survived vs did not make it

Donner %>% select("family", "age", "survived") %>% filter(age < 18)
##                         family age survived
## Breen, Edward            Breen  13        1
## Breen, Margaret I.       Breen   1        1
## Breen, James             Breen   5        1
## Breen, John              Breen  14        1
## Breen, Patrick Jr.       Breen   9        1
## Breen, Peter             Breen   3        1
## Breen, Simon             Breen   8        1
## Donner, Elitha Cumi     Donner  13        1
## Donner, Eliza Poor      Donner   3        1
## Donner, Francis E.      Donner   6        1
## Donner, George Jr.      Donner   9        1
## Donner, Georgia Ann     Donner   4        1
## Donner, Isaac           Donner   5        0
## Donner, Leanna          Donner  11        1
## Donner, Lewis           Donner   3        0
## Donner, Mary            Donner   7        1
## Donner, Samuel          Donner   4        0
## Eddy, James               Eddy   3        0
## Eddy, Margaret            Eddy   1        0
## Foster, Jeremiah     MurFosPik   1        0
## Graves, Eleanor         Graves  15        1
## Graves, Elizabeth       Graves   1        1
## Graves, Franklin W J    Graves   5        0
## Graves, Jonathan        Graves   7        1
## Graves, Lovina          Graves  12        1
## Graves, Nancy           Graves   9        1
## Hook, William            Other  12        0
## Hook, Solomon            Other  14        1
## Keseberg, Ada         Keseberg   3        0
## Keseberg, Lewis Jr.   Keseberg   1        0
## McCutchen, Harriet   McCutchen   1        0
## Murphy, John Landrum MurFosPik  15        0
## Murphy, Lemuel       MurFosPik  12        0
## Murphy, Mary M.      MurFosPik  14        1
## Murphy, Simon Peter  MurFosPik   8        1
## Pike, Catherine      MurFosPik   1        0
## Pike, Naomi          MurFosPik   2        1
## Reed, James Jr.           Reed   6        1
## Reed, Martha Jane         Reed   9        1
## Reed, Thomas Keyes        Reed   4        1
## Reed, Virginia E.         Reed  13        1
Donner %>% select("family", "age", "survived") %>% filter(age < 18) %>% count(survived)
##   survived  n
## 1        0 14
## 2        1 27

#Question 2

#For this question I used the cbind function to combine two separate data frames and preformed simple math on the values in the tables to display the family name, total number of survivors from the family, and percentage of the family that survived.

SD <- Donner %>% filter(survived == 1) %>% count(family)

TD <- Donner %>% count(family)

Percent <- cbind(SD[1],round(SD[-1]/TD[-1],1)*100)

TotMem <- cbind(SD[1],SD[2],Percent[2])
colnames(TotMem) <- c("Family","NumberSurvived","PercentSurvived")

TotMem
##       Family NumberSurvived PercentSurvived
## 1      Breen              9             100
## 2     Donner              7              50
## 3       Eddy              1              20
## 4   FosdWolf              2              50
## 5     Graves              7              70
## 6   Keseberg              2              50
## 7  McCutchen              2              70
## 8  MurFosPik              6              50
## 9      Other              6              30
## 10      Reed              6              90
SD
##       family n
## 1      Breen 9
## 2     Donner 7
## 3       Eddy 1
## 4   FosdWolf 2
## 5     Graves 7
## 6   Keseberg 2
## 7  McCutchen 2
## 8  MurFosPik 6
## 9      Other 6
## 10      Reed 6

#Question 3

#For this question I used one of the data frames I created in the previous question (SD) and followed the steps in the example pie chart to create the new data frame (NewDonner) and a pie chart to go along with it. I think that the numbers being in the chart and in the position they are in is awkward but I am not sure ho to change it.

SD <- Donner %>% filter(survived == 1) %>% count(family)

NewDonner <- SD %>% arrange(desc(n)) %>% 
  mutate(prop = round(n * 100/sum(n), 1), lab.ypos = cumsum(prop) - 0.5 * prop)

head(NewDonner)
##      family n prop lab.ypos
## 1     Breen 9 18.8     9.40
## 2    Donner 7 14.6    26.10
## 3    Graves 7 14.6    40.70
## 4 MurFosPik 6 12.5    54.25
## 5     Other 6 12.5    66.75
## 6      Reed 6 12.5    79.25
ggpie(NewDonner, x = "prop", label = "n",lab.pos = "in", lab.font = list(color = "white"), fill = "family", color = "white", palette = "jco", legend = "right")

#Question 4

#For this question I used dplyr to calculate the total number of people who survived and total number of those who died in a 2x2 data frame. Definitely took a round about way, with too much table manipulation, but I got there in the end.

SurvivorSum <- Donner %>% summarize(Survived = sum(survived), Died = n() - sum(survived))

SurvivorSum <- t(SurvivorSum)

SurvivorSum <- data.frame(Status = row.names(SurvivorSum), SurvivorSum)
rownames(SurvivorSum) <- NULL

SurvivorSum
##     Status SurvivorSum
## 1 Survived          48
## 2     Died          42

#Question 5

#This is just simple bar graph of the data we extrapolated in the question above, did not want to do anything fancy with it.

ggplot(SurvivorSum, aes(x = Status, y = SurvivorSum)) + 
  geom_col(width = 0.6) + 
  ylab("Count")

#Question 6

#There are many plots attached to this question so I will describe all of them at the end. However to get this data set I first had to change the names of the individuals from row names to their own column, as well as changing survived from an integer to a factor. The last thing I did was take a random sample of 20 from the entire data frame, so these graphs will look different every time you run the code. #Now to the plots, the first plot is my favorite way of showing what you asked for in question 6, I think that the names being readable on the y axis is the best, where as with the second plot they are less legible due to their orientation. The second plot is much closer to what you asked for in the test question. The third plot is the most informative for me because it shows the age distribution of every one in the party and if they survived or not. From the last plot we can tell that many of the elderly and young died. We can also tell that there hardly any people over 45 in the party which is something we forget about today with longer lifespans, back in the 1800s people did not live as long so the 70 year old in the party would have been considered ancient. The colors are the ones R automatically assigned and I left them because I think red is a good color for dead and blue for survived, the red does stand out a little from the blue and they are a little bright, but overall I think they are acceptable. I tried changing the alpha but that also looked wonky. I might mess with them more if I had more time and wanted it to look better however.

MyDonner <- Donner
MyDonner <- cbind(rownames(MyDonner), MyDonner)
rownames(MyDonner) <- NULL
colnames(MyDonner) <- c("name","family","age","sex","survived","death")
MyDonner$survived <- as.factor(MyDonner$survived)

GraphDonner <- MyDonner[sample(nrow(MyDonner), 20), ]

head(GraphDonner, n=20)
##                    name    family age    sex survived      death
## 63     Miller, Hiram O.     Other  30   Male        1       <NA>
## 31        Eddy, William      Eddy  28   Male        1       <NA>
## 82         Smith, James     Other  25   Male        0 1846-12-21
## 88      Williams, Eliza     Other  25 Female        1       <NA>
## 74      Reed, James Jr.      Reed   6   Male        1       <NA>
## 87     Williams, Baylis     Other  24   Male        0 1846-12-15
## 9          Breen, Peter     Breen   3   Male        1       <NA>
## 30       Eddy, Margaret      Eddy   1 Female        0 1847-02-04
## 28        Eddy, Eleanor      Eddy  25 Female        0 1847-02-07
## 43     Graves, Jonathan    Graves   7   Male        1       <NA>
## 32       Elliot, Milton     Other  28   Male        0 1847-02-09
## 80             Salvador     Other  23   Male        0 1847-01-18
## 12         Denton, John     Other  28   Male        0 1847-02-26
## 55      Keseberg, Lewis  Keseberg  32   Male        1       <NA>
## 85     Stanton, Charles     Other  35   Male        0 1846-12-23
## 40 Graves, Elizabeth C.    Graves  47 Female        0 1847-03-12
## 60   McCutchen, Almanda McCutchen  24 Female        1       <NA>
## 77   Reed, Thomas Keyes      Reed   4   Male        1       <NA>
## 41   Graves, Franklin W    Graves  57   Male        0 1846-12-27
## 54        Keseberg, Ada  Keseberg   3 Female        0 1847-02-25
ggplot(GraphDonner, aes(x = age, y = reorder(name, -age), fill = survived)) + 
  geom_col() + 
  ylab("Name (Last, First)") + 
  scale_fill_discrete(name = "Status", labels = c("Died", "Survived"))

ggplot(GraphDonner, aes(x = reorder(name, -age), y = age, fill = survived)) + 
  geom_col() + 
  xlab("Name (Last, First)") + 
  scale_fill_discrete(name = "Status", labels = c("Died", "Survived")) + 
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))

ggplot(MyDonner, aes(age, fill = survived)) + 
  geom_bar() + 
  scale_fill_discrete(name = "Status", labels = c("Died", "Survived"))

#Question 7

#This question got me, I spent a long(at least 2 hours) time looking for a way to group the bars by survived and then arrange by age but I found nothing. A reverse image search got me the code for the example on the test question , but that threw an error whenever I ran it. It is the comment on the bottom of this question so maybe you can tell me where I went wrong with it. The plots I have below were the closest I could come, but it is extremely junky doing it that way and does not keep the data ordered ho I want it. I saw some people saying you should order it using dplyr first, but that didnt seem to work for me either.

#Extra code that I tried but ti failed #ggbarplot(GraphDonner, x = “age”, y = “name”, fill = “survived”, color = “white”, palette = “jco”, sort.val = “asc”, sort.by.groups = TRUE, x.text.angle = 90)

MyDonner <- Donner
MyDonner <- cbind(rownames(MyDonner), MyDonner, MyDonner[4])
rownames(MyDonner) <- NULL
colnames(MyDonner) <- c("name","family","age","sex","survived","death", "alive")
MyDonner$survived <- as.factor(MyDonner$survived)

head(MyDonner, n=20)
##                   name family age    sex survived      death alive
## 1              Antoine  Other  23   Male        0 1846-12-29     0
## 2        Breen, Edward  Breen  13   Male        1       <NA>     1
## 3   Breen, Margaret I.  Breen   1 Female        1       <NA>     1
## 4         Breen, James  Breen   5   Male        1       <NA>     1
## 5          Breen, John  Breen  14   Male        1       <NA>     1
## 6          Breen, Mary  Breen  40 Female        1       <NA>     1
## 7       Breen, Patrick  Breen  51   Male        1       <NA>     1
## 8   Breen, Patrick Jr.  Breen   9   Male        1       <NA>     1
## 9         Breen, Peter  Breen   3   Male        1       <NA>     1
## 10        Breen, Simon  Breen   8   Male        1       <NA>     1
## 11     Burger, Charles  Other  30   Male        0 1846-12-27     0
## 12        Denton, John  Other  28   Male        0 1847-02-26     0
## 13      Dolan, Patrick  Other  40   Male        0 1846-12-27     0
## 14 Donner, Elitha Cumi Donner  13 Female        1       <NA>     1
## 15  Donner, Eliza Poor Donner   3 Female        1       <NA>     1
## 16   Donner, Elizabeth Donner  45 Female        0 1847-03-14     0
## 17  Donner, Francis E. Donner   6 Female        1       <NA>     1
## 18      Donner, George Donner  62   Male        0 1847-03-18     0
## 19  Donner, George Jr. Donner   9   Male        1       <NA>     1
## 20 Donner, Georgia Ann Donner   4 Female        1       <NA>     1
GraphDonner <- MyDonner[sample(nrow(MyDonner), 20), ]

GraphDonner
##                    name    family age    sex survived      death alive
## 79    Reinhardt, Joseph     Other  30   Male        0 1846-12-21     0
## 60   McCutchen, Almanda McCutchen  24 Female        1       <NA>     1
## 52        Hook, Solomon     Other  14   Male        1       <NA>     1
## 48       Halloran, Luke     Other  25   Male        0 1846-08-25     0
## 86     Trubode, Jean B.     Other  23   Male        1       <NA>     1
## 85     Stanton, Charles     Other  35   Male        0 1846-12-23     0
## 74      Reed, James Jr.      Reed   6   Male        1       <NA>     1
## 63     Miller, Hiram O.     Other  30   Male        1       <NA>     1
## 72        Pike, William MurFosPik  25   Male        0 1846-10-20     0
## 41   Graves, Franklin W    Graves  57   Male        0 1846-12-27     0
## 19   Donner, George Jr.    Donner   9   Male        1       <NA>     1
## 18       Donner, George    Donner  62   Male        0 1847-03-18     0
## 16    Donner, Elizabeth    Donner  45 Female        0 1847-03-14     0
## 26       Donner, Samuel    Donner   4   Male        0 1846-12-21     0
## 20  Donner, Georgia Ann    Donner   4 Female        1       <NA>     1
## 71          Pike, Naomi MurFosPik   2 Female        1       <NA>     1
## 1               Antoine     Other  23   Male        0 1846-12-29     0
## 73          Reed, James      Reed  46   Male        1       <NA>     1
## 22        Donner, Jacob    Donner  65   Male        0 1846-12-21     0
## 40 Graves, Elizabeth C.    Graves  47 Female        0 1847-03-12     0
ggplot(GraphDonner, aes(x = age, y = reorder(name, alive), fill = survived)) + 
  geom_col() + 
  ylab("Name (Last, First)") + 
  scale_fill_discrete(name = "Status", labels = c("Died", "Survived"))

ggplot(GraphDonner, aes(x = reorder(name, alive), y = age, fill = survived)) + 
  geom_col() + 
  xlab("Name (Last, First)") + 
  scale_fill_discrete(name = "Status", labels = c("Died", "Survived")) + 
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))

#Question 8

#For this question I used much of the same code form question 2 to create the data set for the plot. Then I made the plot, not the biggest fan of the color pallet but it works and I am not 100% sure how to change it so there’s that. I figured the color should have purpose so I mapped it to the Number of survivors per family which meant I had to change that variable type to a factor instead of an integer because I did not want a color gradient.

SD <- Donner %>% filter(survived == 1) %>% count(family)

TD <- Donner %>% count(family)

Percent <- cbind(SD[1],round(SD[-1]/TD[-1],1)*100)

TotMem <- cbind(SD[1],SD[2],Percent[2])
colnames(TotMem) <- c("Family","NumberSurvived","PercentSurvived")
TotMem$NumberSurvived <- as.factor(TotMem$NumberSurvived)

ggplot(TotMem, aes(x = reorder(Family, -PercentSurvived), y = PercentSurvived)) + 
  geom_col(width = 0.1, color = "grey", fill = "grey") + 
  geom_point(size = 4, aes(color = NumberSurvived)) + 
  geom_text(aes(label = PercentSurvived), color = "white", size = 2) + 
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) + 
  xlab("Family") + 
  ylab("Survival %")

#Question 9

#For this question I again cannibalized some of the code from question 2, again, to help with the calculation of the z-score. The plot I messed around with for a while and I am quite pleased with how it turned out. As you can see from the z-scores there are not any extreme outliers, but there also are not that many values close to 0.

SD <- Donner %>% filter(survived == 1) %>% count(family)

TD <- Donner %>% count(family)

Percent <- cbind(SD[1],round(SD[-1]/TD[-1],1)*100)

num <- SD[[2]]

M <- mean(num)
D <- sd(num)

ZSc <- (SD[[2]]-M)/D
ZSc <- signif(ZSc, digits = 3)

FamData <- cbind(SD[1],SD[2],Percent[2], ZSc)
colnames(FamData) <- c("Family","NumberSurvived","PercentSurvived", "ZScore")
FamData <- FamData[order(ZSc),]

FamData
##       Family NumberSurvived PercentSurvived ZScore
## 3       Eddy              1              20 -1.370
## 4   FosdWolf              2              50 -1.010
## 6   Keseberg              2              50 -1.010
## 7  McCutchen              2              70 -1.010
## 8  MurFosPik              6              50  0.432
## 9      Other              6              30  0.432
## 10      Reed              6              90  0.432
## 2     Donner              7              50  0.791
## 5     Graves              7              70  0.791
## 1      Breen              9             100  1.510
ggplot(FamData, aes(reorder(Family, ZScore), ZScore, color = ZScore>0)) + 
  geom_col(width = 0.1, color = "grey", fill = "grey") + 
  geom_point(size = 6, show.legend = FALSE) + 
  xlab("Family") + 
  ylab("Z-Score") + 
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) + 
  geom_hline(yintercept = 0) + geom_text(aes(label = ZScore), color = "black", size = 2)

#Question 10

#I essentially copy and pasted the first 20 lines of code from the last question to this question. I do think that the way I colored the y-axis titles is unsustainable and there has to be an easier way to do it, but the way I have it works. Again I am pleased with the plot there is a lot of blank space in the middle so I like the plot from question 9 better.

SD <- Donner %>% filter(survived == 1) %>% count(family)

TD <- Donner %>% count(family)

Percent <- cbind(SD[1],round(SD[-1]/TD[-1],1)*100)

num <- SD[[2]]

M <- mean(num)
D <- sd(num)

ZSc <- (SD[[2]]-M)/D
ZSc <- signif(ZSc, digits = 3)

FamData <- cbind(SD[1],SD[2],Percent[2], ZSc)
colnames(FamData) <- c("Family","NumberSurvived","PercentSurvived", "ZScore")
FamData <- FamData[order(ZSc),]

a <- ifelse(FamData$ZSc < 0, "red", "lightblue")

ggplot(FamData, aes(ZScore, reorder(Family, ZScore), color = ZScore>0)) + 
  geom_point(size = 6, show.legend = FALSE) + 
  ylab("Family") + 
  xlab("Z-Score") + 
  geom_vline(xintercept = 0) + 
  geom_text(aes(label = ZScore), color = "black", size = 2) + 
  theme(axis.text.y = element_text(hjust = 1, colour = a))
## Warning: Vectorized input to `element_text()` is not officially supported.
## ℹ Results may be unexpected or may change in future versions of ggplot2.