This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
titanic.df <- read.csv(paste("Titanic Data.csv", sep=""))
View(titanic.df)
summary(titanic.df)
## Survived Pclass Sex Age
## Min. :0.0000 Min. :1.000 female:312 Min. : 0.40
## 1st Qu.:0.0000 1st Qu.:2.000 male :577 1st Qu.:22.00
## Median :0.0000 Median :3.000 Median :29.70
## Mean :0.3825 Mean :2.312 Mean :29.65
## 3rd Qu.:1.0000 3rd Qu.:3.000 3rd Qu.:35.00
## Max. :1.0000 Max. :3.000 Max. :80.00
## SibSp Parch Fare Embarked
## Min. :0.0000 Min. :0.0000 Min. : 0.000 C:168
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.: 7.896 Q: 77
## Median :0.0000 Median :0.0000 Median : 14.454 S:644
## Mean :0.5242 Mean :0.3825 Mean : 32.097
## 3rd Qu.:1.0000 3rd Qu.:0.0000 3rd Qu.: 31.000
## Max. :8.0000 Max. :6.0000 Max. :512.329
sum(Titanic)#total no. of passengers
## [1] 2201
summary(titanic.df$Survived)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0000 0.0000 0.0000 0.3825 1.0000 1.0000
sum(titanic.df$Survived)
## [1] 340
mytable <- with(titanic.df, table(Survived))
prop.table(mytable)
## Survived
## 0 1
## 0.6175478 0.3824522
prop.table(mytable)*100
## Survived
## 0 1
## 61.75478 38.24522
library(vcd)
## Loading required package: grid
View(Titanic)
mytable <- xtabs(~ Survived+Sex+Class, data=Titanic)
ftable(mytable)
## Class 1st 2nd 3rd Crew
## Survived Sex
## No Male 2 2 2 2
## Female 2 2 2 2
## Yes Male 2 2 2 2
## Female 2 2 2 2
margin.table(mytable,c(1,3))
## Class
## Survived 1st 2nd 3rd Crew
## No 4 4 4 4
## Yes 4 4 4 4
margin.table(mytable,1)
## Survived
## No Yes
## 16 16
margin.table(mytable, 2)
## Sex
## Male Female
## 16 16
addmargins(mytable)
## , , Class = 1st
##
## Sex
## Survived Male Female Sum
## No 2 2 4
## Yes 2 2 4
## Sum 4 4 8
##
## , , Class = 2nd
##
## Sex
## Survived Male Female Sum
## No 2 2 4
## Yes 2 2 4
## Sum 4 4 8
##
## , , Class = 3rd
##
## Sex
## Survived Male Female Sum
## No 2 2 4
## Yes 2 2 4
## Sum 4 4 8
##
## , , Class = Crew
##
## Sex
## Survived Male Female Sum
## No 2 2 4
## Yes 2 2 4
## Sum 4 4 8
##
## , , Class = Sum
##
## Sex
## Survived Male Female Sum
## No 8 8 16
## Yes 8 8 16
## Sum 16 16 32
mytable7 <- xtabs(~Sex+Survived, data=titanic.df)
chisq.test(mytable7)
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: mytable7
## X-squared = 258.43, df = 1, p-value < 2.2e-16
mytable7 <- xtabs(~Sex+Survived, data=titanic.df)
chisq.test(mytable7)$p.value
## [1] 3.77991e-58
mxtable <- aggregate(titanic.df$Survived ~ titanic.df$Age, FUN = mean)
mxtable
## titanic.df$Age titanic.df$Survived
## 1 0.4 1.0000000
## 2 0.7 1.0000000
## 3 0.8 1.0000000
## 4 0.9 1.0000000
## 5 1.0 0.7142857
## 6 2.0 0.3000000
## 7 3.0 0.8333333
## 8 4.0 0.7000000
## 9 5.0 1.0000000
## 10 6.0 0.6666667
## 11 7.0 0.3333333
## 12 8.0 0.5000000
## 13 9.0 0.2500000
## 14 10.0 0.0000000
## 15 11.0 0.2500000
## 16 12.0 1.0000000
## 17 13.0 1.0000000
## 18 14.0 0.5000000
## 19 14.5 0.0000000
## 20 15.0 0.8000000
## 21 16.0 0.3529412
## 22 17.0 0.4615385
## 23 18.0 0.3461538
## 24 19.0 0.3600000
## 25 20.0 0.2000000
## 26 20.5 0.0000000
## 27 21.0 0.2083333
## 28 22.0 0.4074074
## 29 23.0 0.3333333
## 30 23.5 0.0000000
## 31 24.0 0.5000000
## 32 24.5 0.0000000
## 33 25.0 0.2608696
## 34 26.0 0.3333333
## 35 27.0 0.6111111
## 36 28.0 0.2800000
## 37 28.5 0.0000000
## 38 29.0 0.4000000
## 39 29.7 0.2937853
## 40 30.0 0.4000000
## 41 30.5 0.0000000
## 42 31.0 0.4705882
## 43 32.0 0.5000000
## 44 32.5 0.5000000
## 45 33.0 0.4000000
## 46 34.0 0.4000000
## 47 34.5 0.0000000
## 48 35.0 0.6111111
## 49 36.0 0.5000000
## 50 36.5 0.0000000
## 51 37.0 0.1666667
## 52 38.0 0.4000000
## 53 39.0 0.3571429
## 54 40.0 0.4615385
## 55 40.5 0.0000000
## 56 41.0 0.3333333
## 57 42.0 0.4615385
## 58 43.0 0.2000000
## 59 44.0 0.3333333
## 60 45.0 0.4166667
## 61 45.5 0.0000000
## 62 46.0 0.0000000
## 63 47.0 0.1111111
## 64 48.0 0.6666667
## 65 49.0 0.6666667
## 66 50.0 0.5000000
## 67 51.0 0.2857143
## 68 52.0 0.5000000
## 69 53.0 1.0000000
## 70 54.0 0.3750000
## 71 55.0 0.5000000
## 72 55.5 0.0000000
## 73 56.0 0.5000000
## 74 57.0 0.0000000
## 75 58.0 0.6000000
## 76 59.0 0.0000000
## 77 60.0 0.5000000
## 78 61.0 0.0000000
## 79 62.0 0.3333333
## 80 63.0 1.0000000
## 81 64.0 0.0000000
## 82 65.0 0.0000000
## 83 66.0 0.0000000
## 84 70.0 0.0000000
## 85 70.5 0.0000000
## 86 71.0 0.0000000
## 87 74.0 0.0000000
## 88 80.0 1.0000000
t.test(titanic.df$Age ~ titanic.df$Survived)
##
## Welch Two Sample t-test
##
## data: titanic.df$Age by titanic.df$Survived
## t = 2.1816, df = 667.56, p-value = 0.02949
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 0.1990628 3.7838912
## sample estimates:
## mean in group 0 mean in group 1
## 30.41530 28.42382