# pacman::p_load(tidyverse, GGally, install = FALSE, update = FALSE)
library(tidyverse)
library(GGally)
\(\hat{y}=\beta_0+\beta_1*Time+\beta_2*SEX\) where \(\beta_1=2.7333\) and \(\beta_2=6.6207\)
perm <- function(n, k) {
# n thinks choosen k at a time
factorial(n) / factorial(n - k)
}
perm(10, 3)
## [1] 720
fib <- function(n){
x <- integer()
x[1] <- 1
x[2] <- 1
for (i in 3:n){
x[i] <- x[i-2] + x[i-1]
}
x[n]
}
# fib(1)
fib(6)
## [1] 8
https://epomedicine.com/medical-students/unforget-bayes-theorem-derived-ppv-and-npv/
ppv <- function(p, se, sp){
# p = prevalence
# se = sensitivity
# sp = specificity
(se * p) / ((se * p) + ((1 - sp) * (1 - p)))
}
p <- 0.01
se <- 0.98
sp <- 0.95
ppv(p, se, sp)
## [1] 0.1652614
titanic <- read_csv("G:/My Drive/homework/Justin S/titanic.csv")
dim(titanic)
## [1] 2201 4
paste0("The Titanic data set has ", dim(titanic)[1], " rows and ", dim(titanic)[2], " columns.")
## [1] "The Titanic data set has 2201 rows and 4 columns."
# Head
# titanic[1:5, ]
# titanic %>% slice_head(n=5)
head(titanic, n = 5)
## # A tibble: 5 x 4
## Class Sex Age Survived
## <chr> <chr> <chr> <chr>
## 1 1st Male Child Yes
## 2 1st Male Child Yes
## 3 1st Male Child Yes
## 4 1st Male Child Yes
## 5 1st Male Child Yes
# Tail
# titanic[2197:2201, ]
# titanic %>% slice_tail(n = 5)
tail(titanic, n = 5)
## # A tibble: 5 x 4
## Class Sex Age Survived
## <chr> <chr> <chr> <chr>
## 1 Crew Female Adult Yes
## 2 Crew Female Adult Yes
## 3 Crew Female Adult Yes
## 4 Crew Female Adult Yes
## 5 Crew Female Adult Yes
# Both
# titanic[c(1:5, 2197:2201), ]
# titanic %>% slice(1:5, 2197:2201)
titanic %>% filter(Survived == "Yes") %>% count(Sex, name = "Survived") -> prop
prop
## # A tibble: 2 x 2
## Sex Survived
## * <chr> <int>
## 1 Female 344
## 2 Male 367
# Male
# prop$Survived[2] / sum(prop$Survived)
sum(titanic$Sex == "Male" & titanic$Survived == "Yes") / sum(titanic$Survived == "Yes")
## [1] 0.5161744
# Female
# prop$Survived[1] / sum(prop$Survived)
# 1 - prop$Survived[2] / sum(prop$Survived)
sum(titanic$Sex == "Female" & titanic$Survived == "Yes") / sum(titanic$Survived == "Yes")
## [1] 0.4838256
titanic %>% mutate(across(.cols = everything(), as_factor)) %>% summary()
## Class Sex Age Survived
## 1st :325 Male :1731 Child: 109 Yes: 711
## 2nd :285 Female: 470 Adult:2092 No :1490
## 3rd :706
## Crew:885
# assocplot
titanic %>%
filter(Survived == "Yes") %>%
count(Class, name = "Survived") %>%
table() %>%
marginSums(c(2, 1)) %>%
assocplot(col = c("red", "green"))
titanic %>%
filter(Survived == "Yes") %>%
count(Age, name = "Survived") %>%
table() %>%
marginSums(c(2,1)) %>%
assocplot(col = c("red", "green"))
# mosaicplot
mosaicplot(Survived ~ Age, data = titanic)
mosaicplot(Survived ~ Class, data = titanic)
mosaicplot(Survived ~ Age + Class, data = titanic)
# ggpairs
titanic %>% select(Class, Age, Survived) %>% ggpairs()
# facet_grid
titanic %>%
ggplot(aes(Survived)) + # fill = Sex
geom_bar() +
facet_grid(rows = vars(Age), cols = vars(Class),
margins = FALSE, labeller = "label_both", scales = "free")
titanic %>%
ggplot(aes(Age)) +
geom_bar() +
facet_grid(rows = vars(Survived), cols = vars(Class),
margins = FALSE, labeller = "label_both", scales = "free")
titanic %>%
ggplot(aes(Class)) +
geom_bar() +
facet_grid(rows = vars(Survived), cols = vars(Age),
margins = FALSE, labeller = "label_both", scales = "free")
Survival rate of 3rd Class Males
sum(titanic$Sex == "Male" & titanic$Survived == "Yes" & titanic$Class == "3rd") /
nrow(titanic)
## [1] 0.03998183
# titanic %>% filter(Sex == "Male" & Survived == "Yes" & Class == "3rd") %>% nrow() / nrow(titanic)