##Use the categorical variables Pclass and Embarked in your analysis.
##Upload your zipped Rmd + HTML file or just your Rmd along with a link from RPubs.
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.2 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.2 ✔ tibble 3.2.1
## ✔ lubridate 1.9.2 ✔ tidyr 1.3.0
## ✔ purrr 1.0.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
setwd("C:/Users/StarKid/Desktop/Data_Science/Data_101/week_5/IC10")
titanic <- read.csv("titanic_train.csv")
str(titanic)
## 'data.frame': 891 obs. of 12 variables:
## $ PassengerId: int 1 2 3 4 5 6 7 8 9 10 ...
## $ Survived : int 0 1 1 1 0 0 0 0 1 1 ...
## $ Pclass : int 3 1 3 1 3 3 1 3 3 2 ...
## $ Name : chr "Braund, Mr. Owen Harris" "Cumings, Mrs. John Bradley (Florence Briggs Thayer)" "Heikkinen, Miss. Laina" "Futrelle, Mrs. Jacques Heath (Lily May Peel)" ...
## $ Sex : chr "male" "female" "female" "female" ...
## $ Age : num 22 38 26 35 35 NA 54 2 27 14 ...
## $ SibSp : int 1 1 0 1 0 0 0 3 0 1 ...
## $ Parch : int 0 0 0 0 0 0 0 1 2 0 ...
## $ Ticket : chr "A/5 21171" "PC 17599" "STON/O2. 3101282" "113803" ...
## $ Fare : num 7.25 71.28 7.92 53.1 8.05 ...
## $ Cabin : chr "" "C85" "" "C123" ...
## $ Embarked : chr "S" "C" "S" "S" ...
#INDEPENDENT 2 SAMPLE TEST
t.test(titanic$Fare, titanic$Age, conf.level = 0.95, alternative="greater")
##
## Welch Two Sample t-test
##
## data: titanic$Fare and titanic$Age
## t = 1.4304, df = 1074.7, p-value = 0.07644
## alternative hypothesis: true difference in means is greater than 0
## 95 percent confidence interval:
## -0.37804 Inf
## sample estimates:
## mean of x mean of y
## 32.20421 29.69912
t.test(titanic$Fare, titanic$Age, paired = TRUE)
##
## Paired t-test
##
## data: titanic$Fare and titanic$Age
## t = 2.4943, df = 713, p-value = 0.01285
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
## 1.063490 8.927303
## sample estimates:
## mean difference
## 4.995396
qqnorm(titanic$Fare - titanic$Age)