# pacman::p_load(tidyverse, GGally, install = FALSE, update = FALSE)
library(tidyverse)
library(GGally)

Part 1

Question 5

Part (b)

\(\hat{y}=\beta_0+\beta_1*Time+\beta_2*SEX\) where \(\beta_1=2.7333\) and \(\beta_2=6.6207\)

Part 2

Question 6

Part (a)

perm <- function(n, k) {
  # n thinks choosen k at a time
  factorial(n) / factorial(n - k)
}

perm(10, 3)
## [1] 720

Part (b)

fib <- function(n){
  x <- integer()
  x[1] <- 1
  x[2] <- 1
  for (i in 3:n){
    x[i] <- x[i-2] + x[i-1]
  }
  x[n]
}

# fib(1)
fib(6)
## [1] 8

Part (c)

https://epomedicine.com/medical-students/unforget-bayes-theorem-derived-ppv-and-npv/

ppv <- function(p, se, sp){
  # p = prevalence
  # se = sensitivity
  # sp = specificity
  (se * p) / ((se * p) + ((1 - sp) * (1 - p)))
}

Part (d)

p <- 0.01
se <- 0.98
sp <- 0.95

ppv(p, se, sp)
## [1] 0.1652614

Question 7

titanic <- read_csv("G:/My Drive/homework/Justin S/titanic.csv")

Part (a)

dim(titanic)
## [1] 2201    4
paste0("The Titanic data set has ", dim(titanic)[1], " rows and ", dim(titanic)[2], " columns.")
## [1] "The Titanic data set has 2201 rows and 4 columns."

Part (b)

# Head
# titanic[1:5, ]
# titanic %>% slice_head(n=5)
head(titanic, n = 5)
## # A tibble: 5 x 4
##   Class Sex   Age   Survived
##   <chr> <chr> <chr> <chr>   
## 1 1st   Male  Child Yes     
## 2 1st   Male  Child Yes     
## 3 1st   Male  Child Yes     
## 4 1st   Male  Child Yes     
## 5 1st   Male  Child Yes
# Tail
# titanic[2197:2201, ]
# titanic %>% slice_tail(n = 5)
tail(titanic, n = 5)
## # A tibble: 5 x 4
##   Class Sex    Age   Survived
##   <chr> <chr>  <chr> <chr>   
## 1 Crew  Female Adult Yes     
## 2 Crew  Female Adult Yes     
## 3 Crew  Female Adult Yes     
## 4 Crew  Female Adult Yes     
## 5 Crew  Female Adult Yes
# Both
# titanic[c(1:5, 2197:2201), ]
# titanic %>% slice(1:5, 2197:2201)

Part (c)

titanic %>% filter(Survived == "Yes") %>% count(Sex, name = "Survived") -> prop
prop
## # A tibble: 2 x 2
##   Sex    Survived
## * <chr>     <int>
## 1 Female      344
## 2 Male        367
# Male
# prop$Survived[2] / sum(prop$Survived)
sum(titanic$Sex == "Male" & titanic$Survived == "Yes") / sum(titanic$Survived == "Yes")
## [1] 0.5161744
# Female
# prop$Survived[1] / sum(prop$Survived)
# 1 - prop$Survived[2] / sum(prop$Survived)
sum(titanic$Sex == "Female" & titanic$Survived == "Yes") / sum(titanic$Survived == "Yes")
## [1] 0.4838256

Part (d)

titanic %>% mutate(across(.cols = everything(), as_factor)) %>% summary()
##   Class         Sex          Age       Survived  
##  1st :325   Male  :1731   Child: 109   Yes: 711  
##  2nd :285   Female: 470   Adult:2092   No :1490  
##  3rd :706                                        
##  Crew:885
# assocplot
titanic %>%
  filter(Survived == "Yes") %>%
  count(Class, name = "Survived") %>%
  table() %>%
  marginSums(c(2, 1)) %>%
  assocplot(col = c("red", "green"))

titanic %>%
  filter(Survived == "Yes") %>%
  count(Age, name = "Survived") %>%
  table() %>%
  marginSums(c(2,1)) %>%
  assocplot(col = c("red", "green"))

# mosaicplot
mosaicplot(Survived ~ Age, data = titanic)

mosaicplot(Survived ~ Class, data = titanic)

mosaicplot(Survived ~ Age + Class, data = titanic)

# ggpairs
titanic %>% select(Class, Age, Survived) %>% ggpairs()

# facet_grid
titanic %>%
  ggplot(aes(Survived)) + # fill = Sex
  geom_bar() +
  facet_grid(rows = vars(Age), cols = vars(Class),
             margins = FALSE, labeller = "label_both", scales = "free")

titanic %>%
  ggplot(aes(Age)) +
  geom_bar() +
  facet_grid(rows = vars(Survived), cols = vars(Class),
             margins = FALSE, labeller = "label_both", scales = "free")

titanic %>%
  ggplot(aes(Class)) +
  geom_bar() +
  facet_grid(rows = vars(Survived), cols = vars(Age),
             margins = FALSE, labeller = "label_both", scales = "free")

Part (e)

Survival rate of 3rd Class Males

sum(titanic$Sex == "Male" & titanic$Survived == "Yes" & titanic$Class == "3rd") /
  nrow(titanic)
## [1] 0.03998183
# titanic %>% filter(Sex == "Male" & Survived == "Yes" & Class == "3rd") %>% nrow() / nrow(titanic)