assignment-2-test.knit

title: “R Notebook” output: html_document: df_print: paged

#Q1 titanicSubset <- data.frame(Name = titanic[,3], Fare = titanic[,9], Sibsp = titanic[,6], Parch = titanic[,7]) head(titanicSubset) sum(titanic$age >=0 & titanic$age <= 17 &titanic$survived == 1, na.rm = TRUE)

sum(titanic$sex=="female") sum(titanic$sex==“male”) ages <- data.frame(titanic[,5]) median(ages[,1], na.rm = TRUE) ages[is.na(ages)] <- 28 head(ages)

pclass_survival <- data.frame(pclass = titanic[,1], survived = titanic[,2])

aggregate(pclass_survival[,2], list(pclass_survival$pclass), mean)

#This data insinuates that those in higher classes were more likely to survive, as Class 1 had the highest survival rate at approx 61.9% versus Class 3 being the lowest at approx 25.5%

gender_pricing <- data.frame(sex = titanic[,4], fare = titanic[,9]) boxplot(gender_pricing[,2] ~ gender_pricing[,1], main = “fare by gender”, xlab = “gender”, ylab = “fare”)

#This would insuate that female passengers paid more for their tickets on average than male passengers.

aggregate(gender_pricing[,2], list(gender_pricing$sex), mean, na.rm = TRUE)

#This appears to be in line with the box plot.

#Q2 dbinom(100, 100, 0.97) n <- 100 p_success <- 0.97 p_fail <- 1 - p_success p_lessthanfive <- pbinom(4, size = n, prob = p_fail) p_fiveormore <- 1 - p_lessthanfive p_a <- 0.97 p_b <- 0.97 pb_fail <- 1 - p_b

#Q3 lam <- 3 emails <- 2:9 probs <- dpois(emails, lam) email_probs <- data.frame(emails = emails, probability = probs) email_probs emails2 <- 0:3 probs2 <- dpois(emails2, lam) percent <- sum(probs2) final_percent <- (1 - percent) final_percent n <- 100 num_samples <- 50000 prob <- 0.03 samples <- rbinom(num_samples, size = n, prob = probs) lam <- 3 numb_samples <- 50000 email_samples <- rpois(numb_samples, lam) email_samples

#Q4 dist1 <- rnorm(1000, mean = 8, sd = 1.5) dist2 <- rnorm(1000, mean = 5, sd = 2) combined_dist <- c(dist1, dist2) combined_dist library(ggplot2) x_val <- seq(0,15, by = 0.1) pdf_val <- dnorm(x_val, mean = 8, sd = 1.5) pdf_data <- data.frame(x = x_val, pdf= pdf_val) library(ggplot2) x_val <- seq(0,15, by = 0.1) pdf_val <- dnorm(x_val, mean = 8, sd = 1.5) pdf_data <- data.frame(x = x_val, pdf= pdf_val) ggplot(pdf_data, aes(x = x, y = pdf)) + geom_line() + labs(title = “PDF”, x = “Value”, y = “Density”) + theme_minimal() val_95 <- qnorm(0.95, mean = 8, sd = 1.5) val_95 ggplot(pdf_data, aes(x = x, y = pdf)) + geom_line() + geom_vline(xintercept = val_95, color = “red”, linetype = “dashed”) + annotate(“text”, x = val_95 + 0.5, y = max(pdf_val) * 0.8, label = paste(“CDF = 0.95 at x =”, round(val_95, 2)), color = “red”) + labs(title = “Probability Density Function with CDF = 0.95”, x = “Value”, y = “Density”) + theme_minimal() cat(val_95) —