#Display the first few rows of the data to understand its structure
titanic <- read.csv(paste("Titanic Data.csv", sep=""))
View(titanic)
## ## 1.create a table showing the average age of the survivors and the average age of the people who died.
titanic$Sur = factor(titanic$Survived, levels = c(0,1), labels = c("Not Survived", "Survived"))
mydata = aggregate(Age~Sur,data=titanic,FUN = mean)
mydata
## Sur Age
## 1 Not Survived 30.41530
## 2 Survived 28.42382
mytable <- xtabs(~ Age+Sur, data=mydata)
mytable
## Sur
## Age Not Survived Survived
## 28.4238235294118 0 1
## 30.4153005464481 1 0
## 2. R to run a t-test to test the following hypothesis:
## H2: The Titanic survivors were younger than the passengers who died.
t.test(Age~Sur,data = titanic, var.equal = TRUE)
##
## Two Sample t-test
##
## data: Age by Sur
## t = 2.2302, df = 887, p-value = 0.02599
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 0.238890 3.744064
## sample estimates:
## mean in group Not Survived mean in group Survived
## 30.41530 28.42382