#I know its bad to load all the packages in like this but its the only way I could get the knit to work so here we are.
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyr)
library(magrittr)
##
## Attaching package: 'magrittr'
## The following object is masked from 'package:tidyr':
##
## extract
library(knitr)
library(ggplot2)
library(vcd)
## Loading required package: grid
library(vcdExtra)
## Loading required package: gnm
##
## Attaching package: 'vcdExtra'
## The following object is masked from 'package:dplyr':
##
## summarise
library(ggsci)
library(ggpubr)
#This first chunk looks at the number of member per family
head(Donner, n = 60L)
## family age sex survived death
## Antoine Other 23 Male 0 1846-12-29
## Breen, Edward Breen 13 Male 1 <NA>
## Breen, Margaret I. Breen 1 Female 1 <NA>
## Breen, James Breen 5 Male 1 <NA>
## Breen, John Breen 14 Male 1 <NA>
## Breen, Mary Breen 40 Female 1 <NA>
## Breen, Patrick Breen 51 Male 1 <NA>
## Breen, Patrick Jr. Breen 9 Male 1 <NA>
## Breen, Peter Breen 3 Male 1 <NA>
## Breen, Simon Breen 8 Male 1 <NA>
## Burger, Charles Other 30 Male 0 1846-12-27
## Denton, John Other 28 Male 0 1847-02-26
## Dolan, Patrick Other 40 Male 0 1846-12-27
## Donner, Elitha Cumi Donner 13 Female 1 <NA>
## Donner, Eliza Poor Donner 3 Female 1 <NA>
## Donner, Elizabeth Donner 45 Female 0 1847-03-14
## Donner, Francis E. Donner 6 Female 1 <NA>
## Donner, George Donner 62 Male 0 1847-03-18
## Donner, George Jr. Donner 9 Male 1 <NA>
## Donner, Georgia Ann Donner 4 Female 1 <NA>
## Donner, Isaac Donner 5 Male 0 1847-03-06
## Donner, Jacob Donner 65 Male 0 1846-12-21
## Donner, Leanna Donner 11 Female 1 <NA>
## Donner, Lewis Donner 3 Male 0 1847-02-14
## Donner, Mary Donner 7 Female 1 <NA>
## Donner, Samuel Donner 4 Male 0 1846-12-21
## Donner, Tamsen Donner 44 Female 0 1847-03-28
## Eddy, Eleanor Eddy 25 Female 0 1847-02-07
## Eddy, James Eddy 3 Male 0 1847-03-13
## Eddy, Margaret Eddy 1 Female 0 1847-02-04
## Eddy, William Eddy 28 Male 1 <NA>
## Elliot, Milton Other 28 Male 0 1847-02-09
## Fosdick, Jay FosdWolf 23 Male 0 1847-01-18
## Fosdick, Sarah FosdWolf 22 Female 1 <NA>
## Foster, Sarah MurFosPik 23 Female 1 <NA>
## Foster, William MurFosPik 28 Male 1 <NA>
## Foster, Jeremiah MurFosPik 1 Male 0 1847-03-13
## Graves, Eleanor Graves 15 Female 1 <NA>
## Graves, Elizabeth Graves 1 Female 1 <NA>
## Graves, Elizabeth C. Graves 47 Female 0 1847-03-12
## Graves, Franklin W Graves 57 Male 0 1846-12-27
## Graves, Franklin W J Graves 5 Male 0 1847-03-12
## Graves, Jonathan Graves 7 Male 1 <NA>
## Graves, Lovina Graves 12 Female 1 <NA>
## Graves, Mary Ann Graves 20 Female 1 <NA>
## Graves, Nancy Graves 9 Female 1 <NA>
## Graves, William Graves 18 Male 1 <NA>
## Halloran, Luke Other 25 Male 0 1846-08-25
## Hardkoop, Mr. Other 60 Male 0 1846-10-10
## Herron, William Other 25 Male 1 <NA>
## Hook, William Other 12 Male 0 1847-02-28
## Hook, Solomon Other 14 Male 1 <NA>
## James, Noah Other 20 Male 1 <NA>
## Keseberg, Ada Keseberg 3 Female 0 1847-02-25
## Keseberg, Lewis Keseberg 32 Male 1 <NA>
## Keseberg, Lewis Jr. Keseberg 1 Male 0 1847-01-24
## Keseberg, Phillipine Keseberg 32 Female 1 <NA>
## Keyes, Sarah Reed 70 Female 0 1846-05-29
## Luis Other 27 Male 0 1847-01-18
## McCutchen, Almanda McCutchen 24 Female 1 <NA>
Donner %>% count(family)
## family n
## 1 Breen 9
## 2 Donner 14
## 3 Eddy 4
## 4 FosdWolf 4
## 5 Graves 10
## 6 Keseberg 4
## 7 McCutchen 3
## 8 MurFosPik 12
## 9 Other 23
## 10 Reed 7
#Another simple code chunk looks at the number of males and females, the two below it looks at the eldest women and youngest men in the group
Donner %>% count(sex)
## sex n
## 1 Female 35
## 2 Male 55
Donner %>% select("family", "age", "sex") %>% filter(sex == 'Female') %>% slice_max(age, n=5)
## family age sex
## Keyes, Sarah Reed 70 Female
## Graves, Elizabeth C. Graves 47 Female
## Donner, Elizabeth Donner 45 Female
## Donner, Tamsen Donner 44 Female
## Breen, Mary Breen 40 Female
Donner %>% select("family", "age", "sex") %>% filter(sex == 'Male') %>% slice_min(age, n=5)
## family age sex
## Foster, Jeremiah MurFosPik 1 Male
## Keseberg, Lewis Jr. Keseberg 1 Male
## Breen, Peter Breen 3 Male
## Donner, Lewis Donner 3 Male
## Eddy, James Eddy 3 Male
#The code below looks at when people started dying, so you can see that a few people died early on, but things started to pick up in october of 1846
Donner %>% select("family", "age", "death") %>% arrange(death)
## family age death
## Keyes, Sarah Reed 70 1846-05-29
## Halloran, Luke Other 25 1846-08-25
## Snyder, John Other 25 1846-10-05
## Wolfinger, Mr. FosdWolf 24 1846-10-08
## Hardkoop, Mr. Other 60 1846-10-10
## Pike, William MurFosPik 25 1846-10-20
## Williams, Baylis Other 24 1846-12-15
## Donner, Jacob Donner 65 1846-12-21
## Donner, Samuel Donner 4 1846-12-21
## Reinhardt, Joseph Other 30 1846-12-21
## Shoemaker, Samuel Other 25 1846-12-21
## Smith, James Other 25 1846-12-21
## Stanton, Charles Other 35 1846-12-23
## Burger, Charles Other 30 1846-12-27
## Dolan, Patrick Other 40 1846-12-27
## Graves, Franklin W Graves 57 1846-12-27
## Murphy, Lemuel MurFosPik 12 1846-12-27
## Antoine Other 23 1846-12-29
## Fosdick, Jay FosdWolf 23 1847-01-18
## Luis Other 27 1847-01-18
## Salvador Other 23 1847-01-18
## Keseberg, Lewis Jr. Keseberg 1 1847-01-24
## Murphy, John Landrum MurFosPik 15 1847-01-31
## McCutchen, Harriet McCutchen 1 1847-02-02
## Eddy, Margaret Eddy 1 1847-02-04
## Eddy, Eleanor Eddy 25 1847-02-07
## Spitzer, Augustus Other 30 1847-02-08
## Elliot, Milton Other 28 1847-02-09
## Donner, Lewis Donner 3 1847-02-14
## Pike, Catherine MurFosPik 1 1847-02-20
## Keseberg, Ada Keseberg 3 1847-02-25
## Denton, John Other 28 1847-02-26
## Hook, William Other 12 1847-02-28
## Donner, Isaac Donner 5 1847-03-06
## Graves, Elizabeth C. Graves 47 1847-03-12
## Graves, Franklin W J Graves 5 1847-03-12
## Murphy, Lavina MurFosPik 36 1847-03-12
## Eddy, James Eddy 3 1847-03-13
## Foster, Jeremiah MurFosPik 1 1847-03-13
## Donner, Elizabeth Donner 45 1847-03-14
## Donner, George Donner 62 1847-03-18
## Donner, Tamsen Donner 44 1847-03-28
## Breen, Edward Breen 13 <NA>
## Breen, Margaret I. Breen 1 <NA>
## Breen, James Breen 5 <NA>
## Breen, John Breen 14 <NA>
## Breen, Mary Breen 40 <NA>
## Breen, Patrick Breen 51 <NA>
## Breen, Patrick Jr. Breen 9 <NA>
## Breen, Peter Breen 3 <NA>
## Breen, Simon Breen 8 <NA>
## Donner, Elitha Cumi Donner 13 <NA>
## Donner, Eliza Poor Donner 3 <NA>
## Donner, Francis E. Donner 6 <NA>
## Donner, George Jr. Donner 9 <NA>
## Donner, Georgia Ann Donner 4 <NA>
## Donner, Leanna Donner 11 <NA>
## Donner, Mary Donner 7 <NA>
## Eddy, William Eddy 28 <NA>
## Fosdick, Sarah FosdWolf 22 <NA>
## Foster, Sarah MurFosPik 23 <NA>
## Foster, William MurFosPik 28 <NA>
## Graves, Eleanor Graves 15 <NA>
## Graves, Elizabeth Graves 1 <NA>
## Graves, Jonathan Graves 7 <NA>
## Graves, Lovina Graves 12 <NA>
## Graves, Mary Ann Graves 20 <NA>
## Graves, Nancy Graves 9 <NA>
## Graves, William Graves 18 <NA>
## Herron, William Other 25 <NA>
## Hook, Solomon Other 14 <NA>
## James, Noah Other 20 <NA>
## Keseberg, Lewis Keseberg 32 <NA>
## Keseberg, Phillipine Keseberg 32 <NA>
## McCutchen, Almanda McCutchen 24 <NA>
## McCutchen, William McCutchen 30 <NA>
## Miller, Hiram O. Other 30 <NA>
## Murphy, Mary M. MurFosPik 14 <NA>
## Murphy, Simon Peter MurFosPik 8 <NA>
## Pike, Harriet MurFosPik 21 <NA>
## Pike, Naomi MurFosPik 2 <NA>
## Reed, James Reed 46 <NA>
## Reed, James Jr. Reed 6 <NA>
## Reed, Margaret Reed 32 <NA>
## Reed, Martha Jane Reed 9 <NA>
## Reed, Thomas Keyes Reed 4 <NA>
## Reed, Virginia E. Reed 13 <NA>
## Trubode, Jean B. Other 23 <NA>
## Williams, Eliza Other 25 <NA>
## Wolfinger, Doris FosdWolf 20 <NA>
#These last two lines show us how many people were under the age of 18 and howmany survived vs did not make it
Donner %>% select("family", "age", "survived") %>% filter(age < 18)
## family age survived
## Breen, Edward Breen 13 1
## Breen, Margaret I. Breen 1 1
## Breen, James Breen 5 1
## Breen, John Breen 14 1
## Breen, Patrick Jr. Breen 9 1
## Breen, Peter Breen 3 1
## Breen, Simon Breen 8 1
## Donner, Elitha Cumi Donner 13 1
## Donner, Eliza Poor Donner 3 1
## Donner, Francis E. Donner 6 1
## Donner, George Jr. Donner 9 1
## Donner, Georgia Ann Donner 4 1
## Donner, Isaac Donner 5 0
## Donner, Leanna Donner 11 1
## Donner, Lewis Donner 3 0
## Donner, Mary Donner 7 1
## Donner, Samuel Donner 4 0
## Eddy, James Eddy 3 0
## Eddy, Margaret Eddy 1 0
## Foster, Jeremiah MurFosPik 1 0
## Graves, Eleanor Graves 15 1
## Graves, Elizabeth Graves 1 1
## Graves, Franklin W J Graves 5 0
## Graves, Jonathan Graves 7 1
## Graves, Lovina Graves 12 1
## Graves, Nancy Graves 9 1
## Hook, William Other 12 0
## Hook, Solomon Other 14 1
## Keseberg, Ada Keseberg 3 0
## Keseberg, Lewis Jr. Keseberg 1 0
## McCutchen, Harriet McCutchen 1 0
## Murphy, John Landrum MurFosPik 15 0
## Murphy, Lemuel MurFosPik 12 0
## Murphy, Mary M. MurFosPik 14 1
## Murphy, Simon Peter MurFosPik 8 1
## Pike, Catherine MurFosPik 1 0
## Pike, Naomi MurFosPik 2 1
## Reed, James Jr. Reed 6 1
## Reed, Martha Jane Reed 9 1
## Reed, Thomas Keyes Reed 4 1
## Reed, Virginia E. Reed 13 1
Donner %>% select("family", "age", "survived") %>% filter(age < 18) %>% count(survived)
## survived n
## 1 0 14
## 2 1 27
#Question 2
#For this question I used the cbind function to combine two separate data frames and preformed simple math on the values in the tables to display the family name, total number of survivors from the family, and percentage of the family that survived.
SD <- Donner %>% filter(survived == 1) %>% count(family)
TD <- Donner %>% count(family)
Percent <- cbind(SD[1],round(SD[-1]/TD[-1],1)*100)
TotMem <- cbind(SD[1],SD[2],Percent[2])
colnames(TotMem) <- c("Family","NumberSurvived","PercentSurvived")
TotMem
## Family NumberSurvived PercentSurvived
## 1 Breen 9 100
## 2 Donner 7 50
## 3 Eddy 1 20
## 4 FosdWolf 2 50
## 5 Graves 7 70
## 6 Keseberg 2 50
## 7 McCutchen 2 70
## 8 MurFosPik 6 50
## 9 Other 6 30
## 10 Reed 6 90
SD
## family n
## 1 Breen 9
## 2 Donner 7
## 3 Eddy 1
## 4 FosdWolf 2
## 5 Graves 7
## 6 Keseberg 2
## 7 McCutchen 2
## 8 MurFosPik 6
## 9 Other 6
## 10 Reed 6
#Question 3
#For this question I used one of the data frames I created in the previous question (SD) and followed the steps in the example pie chart to create the new data frame (NewDonner) and a pie chart to go along with it. I think that the numbers being in the chart and in the position they are in is awkward but I am not sure ho to change it.
SD <- Donner %>% filter(survived == 1) %>% count(family)
NewDonner <- SD %>% arrange(desc(n)) %>%
mutate(prop = round(n * 100/sum(n), 1), lab.ypos = cumsum(prop) - 0.5 * prop)
head(NewDonner)
## family n prop lab.ypos
## 1 Breen 9 18.8 9.40
## 2 Donner 7 14.6 26.10
## 3 Graves 7 14.6 40.70
## 4 MurFosPik 6 12.5 54.25
## 5 Other 6 12.5 66.75
## 6 Reed 6 12.5 79.25
ggpie(NewDonner, x = "prop", label = "n",lab.pos = "in", lab.font = list(color = "white"), fill = "family", color = "white", palette = "jco", legend = "right")
#Question 4
#For this question I used dplyr to calculate the total number of people who survived and total number of those who died in a 2x2 data frame. Definitely took a round about way, with too much table manipulation, but I got there in the end.
SurvivorSum <- Donner %>% summarize(Survived = sum(survived), Died = n() - sum(survived))
SurvivorSum <- t(SurvivorSum)
SurvivorSum <- data.frame(Status = row.names(SurvivorSum), SurvivorSum)
rownames(SurvivorSum) <- NULL
SurvivorSum
## Status SurvivorSum
## 1 Survived 48
## 2 Died 42
#Question 5
#This is just simple bar graph of the data we extrapolated in the question above, did not want to do anything fancy with it.
ggplot(SurvivorSum, aes(x = Status, y = SurvivorSum)) +
geom_col(width = 0.6) +
ylab("Count")
#Question 6
#There are many plots attached to this question so I will describe all of them at the end. However to get this data set I first had to change the names of the individuals from row names to their own column, as well as changing survived from an integer to a factor. The last thing I did was take a random sample of 20 from the entire data frame, so these graphs will look different every time you run the code. #Now to the plots, the first plot is my favorite way of showing what you asked for in question 6, I think that the names being readable on the y axis is the best, where as with the second plot they are less legible due to their orientation. The second plot is much closer to what you asked for in the test question. The third plot is the most informative for me because it shows the age distribution of every one in the party and if they survived or not. From the last plot we can tell that many of the elderly and young died. We can also tell that there hardly any people over 45 in the party which is something we forget about today with longer lifespans, back in the 1800s people did not live as long so the 70 year old in the party would have been considered ancient. The colors are the ones R automatically assigned and I left them because I think red is a good color for dead and blue for survived, the red does stand out a little from the blue and they are a little bright, but overall I think they are acceptable. I tried changing the alpha but that also looked wonky. I might mess with them more if I had more time and wanted it to look better however.
MyDonner <- Donner
MyDonner <- cbind(rownames(MyDonner), MyDonner)
rownames(MyDonner) <- NULL
colnames(MyDonner) <- c("name","family","age","sex","survived","death")
MyDonner$survived <- as.factor(MyDonner$survived)
GraphDonner <- MyDonner[sample(nrow(MyDonner), 20), ]
head(GraphDonner, n=20)
## name family age sex survived death
## 63 Miller, Hiram O. Other 30 Male 1 <NA>
## 31 Eddy, William Eddy 28 Male 1 <NA>
## 82 Smith, James Other 25 Male 0 1846-12-21
## 88 Williams, Eliza Other 25 Female 1 <NA>
## 74 Reed, James Jr. Reed 6 Male 1 <NA>
## 87 Williams, Baylis Other 24 Male 0 1846-12-15
## 9 Breen, Peter Breen 3 Male 1 <NA>
## 30 Eddy, Margaret Eddy 1 Female 0 1847-02-04
## 28 Eddy, Eleanor Eddy 25 Female 0 1847-02-07
## 43 Graves, Jonathan Graves 7 Male 1 <NA>
## 32 Elliot, Milton Other 28 Male 0 1847-02-09
## 80 Salvador Other 23 Male 0 1847-01-18
## 12 Denton, John Other 28 Male 0 1847-02-26
## 55 Keseberg, Lewis Keseberg 32 Male 1 <NA>
## 85 Stanton, Charles Other 35 Male 0 1846-12-23
## 40 Graves, Elizabeth C. Graves 47 Female 0 1847-03-12
## 60 McCutchen, Almanda McCutchen 24 Female 1 <NA>
## 77 Reed, Thomas Keyes Reed 4 Male 1 <NA>
## 41 Graves, Franklin W Graves 57 Male 0 1846-12-27
## 54 Keseberg, Ada Keseberg 3 Female 0 1847-02-25
ggplot(GraphDonner, aes(x = age, y = reorder(name, -age), fill = survived)) +
geom_col() +
ylab("Name (Last, First)") +
scale_fill_discrete(name = "Status", labels = c("Died", "Survived"))
ggplot(GraphDonner, aes(x = reorder(name, -age), y = age, fill = survived)) +
geom_col() +
xlab("Name (Last, First)") +
scale_fill_discrete(name = "Status", labels = c("Died", "Survived")) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
ggplot(MyDonner, aes(age, fill = survived)) +
geom_bar() +
scale_fill_discrete(name = "Status", labels = c("Died", "Survived"))
#Question 7
#This question got me, I spent a long(at least 2 hours) time looking for a way to group the bars by survived and then arrange by age but I found nothing. A reverse image search got me the code for the example on the test question , but that threw an error whenever I ran it. It is the comment on the bottom of this question so maybe you can tell me where I went wrong with it. The plots I have below were the closest I could come, but it is extremely junky doing it that way and does not keep the data ordered ho I want it. I saw some people saying you should order it using dplyr first, but that didnt seem to work for me either.
#Extra code that I tried but ti failed #ggbarplot(GraphDonner, x = “age”, y = “name”, fill = “survived”, color = “white”, palette = “jco”, sort.val = “asc”, sort.by.groups = TRUE, x.text.angle = 90)
MyDonner <- Donner
MyDonner <- cbind(rownames(MyDonner), MyDonner, MyDonner[4])
rownames(MyDonner) <- NULL
colnames(MyDonner) <- c("name","family","age","sex","survived","death", "alive")
MyDonner$survived <- as.factor(MyDonner$survived)
head(MyDonner, n=20)
## name family age sex survived death alive
## 1 Antoine Other 23 Male 0 1846-12-29 0
## 2 Breen, Edward Breen 13 Male 1 <NA> 1
## 3 Breen, Margaret I. Breen 1 Female 1 <NA> 1
## 4 Breen, James Breen 5 Male 1 <NA> 1
## 5 Breen, John Breen 14 Male 1 <NA> 1
## 6 Breen, Mary Breen 40 Female 1 <NA> 1
## 7 Breen, Patrick Breen 51 Male 1 <NA> 1
## 8 Breen, Patrick Jr. Breen 9 Male 1 <NA> 1
## 9 Breen, Peter Breen 3 Male 1 <NA> 1
## 10 Breen, Simon Breen 8 Male 1 <NA> 1
## 11 Burger, Charles Other 30 Male 0 1846-12-27 0
## 12 Denton, John Other 28 Male 0 1847-02-26 0
## 13 Dolan, Patrick Other 40 Male 0 1846-12-27 0
## 14 Donner, Elitha Cumi Donner 13 Female 1 <NA> 1
## 15 Donner, Eliza Poor Donner 3 Female 1 <NA> 1
## 16 Donner, Elizabeth Donner 45 Female 0 1847-03-14 0
## 17 Donner, Francis E. Donner 6 Female 1 <NA> 1
## 18 Donner, George Donner 62 Male 0 1847-03-18 0
## 19 Donner, George Jr. Donner 9 Male 1 <NA> 1
## 20 Donner, Georgia Ann Donner 4 Female 1 <NA> 1
GraphDonner <- MyDonner[sample(nrow(MyDonner), 20), ]
GraphDonner
## name family age sex survived death alive
## 79 Reinhardt, Joseph Other 30 Male 0 1846-12-21 0
## 60 McCutchen, Almanda McCutchen 24 Female 1 <NA> 1
## 52 Hook, Solomon Other 14 Male 1 <NA> 1
## 48 Halloran, Luke Other 25 Male 0 1846-08-25 0
## 86 Trubode, Jean B. Other 23 Male 1 <NA> 1
## 85 Stanton, Charles Other 35 Male 0 1846-12-23 0
## 74 Reed, James Jr. Reed 6 Male 1 <NA> 1
## 63 Miller, Hiram O. Other 30 Male 1 <NA> 1
## 72 Pike, William MurFosPik 25 Male 0 1846-10-20 0
## 41 Graves, Franklin W Graves 57 Male 0 1846-12-27 0
## 19 Donner, George Jr. Donner 9 Male 1 <NA> 1
## 18 Donner, George Donner 62 Male 0 1847-03-18 0
## 16 Donner, Elizabeth Donner 45 Female 0 1847-03-14 0
## 26 Donner, Samuel Donner 4 Male 0 1846-12-21 0
## 20 Donner, Georgia Ann Donner 4 Female 1 <NA> 1
## 71 Pike, Naomi MurFosPik 2 Female 1 <NA> 1
## 1 Antoine Other 23 Male 0 1846-12-29 0
## 73 Reed, James Reed 46 Male 1 <NA> 1
## 22 Donner, Jacob Donner 65 Male 0 1846-12-21 0
## 40 Graves, Elizabeth C. Graves 47 Female 0 1847-03-12 0
ggplot(GraphDonner, aes(x = age, y = reorder(name, alive), fill = survived)) +
geom_col() +
ylab("Name (Last, First)") +
scale_fill_discrete(name = "Status", labels = c("Died", "Survived"))
ggplot(GraphDonner, aes(x = reorder(name, alive), y = age, fill = survived)) +
geom_col() +
xlab("Name (Last, First)") +
scale_fill_discrete(name = "Status", labels = c("Died", "Survived")) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
#Question 8
#For this question I used much of the same code form question 2 to create the data set for the plot. Then I made the plot, not the biggest fan of the color pallet but it works and I am not 100% sure how to change it so there’s that. I figured the color should have purpose so I mapped it to the Number of survivors per family which meant I had to change that variable type to a factor instead of an integer because I did not want a color gradient.
SD <- Donner %>% filter(survived == 1) %>% count(family)
TD <- Donner %>% count(family)
Percent <- cbind(SD[1],round(SD[-1]/TD[-1],1)*100)
TotMem <- cbind(SD[1],SD[2],Percent[2])
colnames(TotMem) <- c("Family","NumberSurvived","PercentSurvived")
TotMem$NumberSurvived <- as.factor(TotMem$NumberSurvived)
ggplot(TotMem, aes(x = reorder(Family, -PercentSurvived), y = PercentSurvived)) +
geom_col(width = 0.1, color = "grey", fill = "grey") +
geom_point(size = 4, aes(color = NumberSurvived)) +
geom_text(aes(label = PercentSurvived), color = "white", size = 2) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
xlab("Family") +
ylab("Survival %")
#Question 9
#For this question I again cannibalized some of the code from question 2, again, to help with the calculation of the z-score. The plot I messed around with for a while and I am quite pleased with how it turned out. As you can see from the z-scores there are not any extreme outliers, but there also are not that many values close to 0.
SD <- Donner %>% filter(survived == 1) %>% count(family)
TD <- Donner %>% count(family)
Percent <- cbind(SD[1],round(SD[-1]/TD[-1],1)*100)
num <- SD[[2]]
M <- mean(num)
D <- sd(num)
ZSc <- (SD[[2]]-M)/D
ZSc <- signif(ZSc, digits = 3)
FamData <- cbind(SD[1],SD[2],Percent[2], ZSc)
colnames(FamData) <- c("Family","NumberSurvived","PercentSurvived", "ZScore")
FamData <- FamData[order(ZSc),]
FamData
## Family NumberSurvived PercentSurvived ZScore
## 3 Eddy 1 20 -1.370
## 4 FosdWolf 2 50 -1.010
## 6 Keseberg 2 50 -1.010
## 7 McCutchen 2 70 -1.010
## 8 MurFosPik 6 50 0.432
## 9 Other 6 30 0.432
## 10 Reed 6 90 0.432
## 2 Donner 7 50 0.791
## 5 Graves 7 70 0.791
## 1 Breen 9 100 1.510
ggplot(FamData, aes(reorder(Family, ZScore), ZScore, color = ZScore>0)) +
geom_col(width = 0.1, color = "grey", fill = "grey") +
geom_point(size = 6, show.legend = FALSE) +
xlab("Family") +
ylab("Z-Score") +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
geom_hline(yintercept = 0) + geom_text(aes(label = ZScore), color = "black", size = 2)
#Question 10
#I essentially copy and pasted the first 20 lines of code from the last question to this question. I do think that the way I colored the y-axis titles is unsustainable and there has to be an easier way to do it, but the way I have it works. Again I am pleased with the plot there is a lot of blank space in the middle so I like the plot from question 9 better.
SD <- Donner %>% filter(survived == 1) %>% count(family)
TD <- Donner %>% count(family)
Percent <- cbind(SD[1],round(SD[-1]/TD[-1],1)*100)
num <- SD[[2]]
M <- mean(num)
D <- sd(num)
ZSc <- (SD[[2]]-M)/D
ZSc <- signif(ZSc, digits = 3)
FamData <- cbind(SD[1],SD[2],Percent[2], ZSc)
colnames(FamData) <- c("Family","NumberSurvived","PercentSurvived", "ZScore")
FamData <- FamData[order(ZSc),]
a <- ifelse(FamData$ZSc < 0, "red", "lightblue")
ggplot(FamData, aes(ZScore, reorder(Family, ZScore), color = ZScore>0)) +
geom_point(size = 6, show.legend = FALSE) +
ylab("Family") +
xlab("Z-Score") +
geom_vline(xintercept = 0) +
geom_text(aes(label = ZScore), color = "black", size = 2) +
theme(axis.text.y = element_text(hjust = 1, colour = a))
## Warning: Vectorized input to `element_text()` is not officially supported.
## ℹ Results may be unexpected or may change in future versions of ggplot2.