Om Joy Halder
26-09-2017
Titanic <- read.csv(paste("Titanic Data.csv", sep=""))
nrow(Titanic)
[1] 889
Titanic_Survivour= subset(Titanic,Survived == 1)
nrow(Titanic_Survivour)
[1] 340
Titanic$Survived.f <- factor(Titanic$Survived,
levels=c(0,1),
labels=c("Perished","Survived"))
mytable<- with(Titanic, table(Survived.f))
mytable
Survived.f
Perished Survived
549 340
prop.table(mytable)*100
Survived.f
Perished Survived
61.75478 38.24522
Titanic$Survived.f <- factor(Titanic$Survived,
levels=c(0,1),
labels=c("Perished","Survived"))
mytable2 <- xtabs(~Survived.f+Pclass,data = Titanic)
mytable2
Pclass
Survived.f 1 2 3
Perished 80 97 372
Survived 134 87 119
barplot(mytable2,
main="Survival by Passenger Class",
xlab="Passenger Class", ylab="Frequency",
col=c("grey", "blue"), beside = TRUE)
legend("topleft", fill=c("grey", "blue"), legend=c("Died", "Survived"))
nrow(subset(Titanic, Survived == 1 & Pclass == 1))
[1] 134
mytable2 <- xtabs(~ Survived+Pclass, data=Titanic)
prop.table(mytable2, 2)*100
Pclass
Survived 1 2 3
0 37.38318 52.71739 75.76375
1 62.61682 47.28261 24.23625
mytable3 <- xtabs(~ Pclass+Sex+Survived, data=Titanic)
ftable(mytable3)
Survived 0 1
Pclass Sex
1 female 3 89
male 77 45
2 female 6 70
male 91 17
3 female 72 72
male 300 47
mytable3 <- xtabs(~ Pclass+Sex+Survived, data=Titanic)
ftable(round(prop.table(mytable3)*100, 2))
Survived 0 1
Pclass Sex
1 female 0.34 10.01
male 8.66 5.06
2 female 0.67 7.87
male 10.24 1.91
3 female 8.10 8.10
male 33.75 5.29
f <- xtabs(~ Survived+ Pclass, data=subset(Titanic, Sex == "female"))
m <- xtabs(~ Survived+ Pclass, data=subset(Titanic, Sex == "male"))
par(mfrow=c(1,2))
barplot(f,
main="Female", ylim = c(0,400),
xlab="Passenger Class", ylab="No of passengers",
col=c("grey", "blue"), beside = TRUE)
legend("topleft", fill=c("grey", "blue"), legend=c("Died", "Survived"))
barplot(m,
main="Male", ylim = c(0,400),
xlab="Passenger Class", ylab="No of passengers",
col=c("grey", "blue"), beside = TRUE)
legend("topleft", fill=c("grey", "blue"), legend=c("Died", "Survived"))
nrow(subset(Titanic, Survived == 1 & Pclass == 1 & Sex == "female"))
[1] 89
mytable4 <- xtabs(~ Survived+Sex, data=Titanic)
prop.table(mytable4, 1)*100
Sex
Survived female male
0 14.75410 85.24590
1 67.94118 32.05882
survived <- xtabs(~ Sex, data=subset(Titanic, Survived == 1))
pct <- c (round(prop.table(survived)*100,2))
lbls <- c("Female", "Male")
lbls <- paste(lbls, pct)
lbls <- paste(lbls,"%",sep="")
pie(survived, labels = lbls, col = c("blue","grey"), main = "Who survived?")
mytable4 <- xtabs(~ Survived+Sex, data=Titanic)
prop.table(mytable4, 2)*100
Sex
Survived female male
0 25.96154 81.10919
1 74.03846 18.89081
Challenge Question C3
female <- xtabs(~ Survived, data=subset(Titanic, Sex == "female"))
pct <- c (round(prop.table(female)*100,2))
lbls <- c("Died", "Survived")
lbls <- paste(lbls, pct)
lbls <- paste(lbls,"%",sep="")
pie(female, labels = lbls, col = c("red","blue"), main = "Percentage of females on board the Titanic who survived")
mytable4 <- xtabs(~ Survived+Sex, data=Titanic)
chisq.test(mytable4)
Pearson's Chi-squared test with Yates' continuity correction
data: mytable4
X-squared = 258.43, df = 1, p-value < 2.2e-16
library(vcd)
mytable3 <- xtabs(~ Pclass+Sex+Survived, data=Titanic)
mosaic(data=mytable3,~ Pclass+Sex+Survived, shade=TRUE, legend = TRUE, main = "Mosaic Plot")
mytable5 <- aggregate(Titanic$Age ~ Titanic$Survived, FUN = mean)
mytable5
Titanic$Survived Titanic$Age
1 0 30.41530
2 1 28.42382
boxplot( Age ~ Survived, data = Titanic, main = "Distribution of the age of the survivors ", xlab = "Survived?", ylab = "Age", col = "lightblue")
# independent 2-group t-test
t.test(Titanic$Age~Titanic$Survived)
Welch Two Sample t-test
data: Titanic$Age by Titanic$Survived
t = 2.1816, df = 667.56, p-value = 0.02949
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
0.1990628 3.7838912
sample estimates:
mean in group 0 mean in group 1
30.41530 28.42382
# where Age is numeric and Survived is a binary factor