###################################
# ### DESCRIBING DATA ###
############
#1) Setting working directory
setwd("C:/Users/HP/Downloads/Intern/Titanic")
#2) Reading the data using read.csv and creation of dataframe
titanic.df <- read.csv(paste("Titanic Data.csv", sep=""))
#3) Viewing the data frame in R
View(titanic.df)
library(psych)
describe(titanic.df)
## vars n mean sd median trimmed mad min max range
## Survived 1 889 0.38 0.49 0.00 0.35 0.00 0.0 1.00 1.00
## Pclass 2 889 2.31 0.83 3.00 2.39 0.00 1.0 3.00 2.00
## Sex* 3 889 1.65 0.48 2.00 1.69 0.00 1.0 2.00 1.00
## Age 4 889 29.65 12.97 29.70 29.22 9.34 0.4 80.00 79.60
## SibSp 5 889 0.52 1.10 0.00 0.27 0.00 0.0 8.00 8.00
## Parch 6 889 0.38 0.81 0.00 0.19 0.00 0.0 6.00 6.00
## Fare 7 889 32.10 49.70 14.45 21.28 10.24 0.0 512.33 512.33
## Embarked* 8 889 2.54 0.79 3.00 2.67 0.00 1.0 3.00 2.00
## skew kurtosis se
## Survived 0.48 -1.77 0.02
## Pclass -0.63 -1.27 0.03
## Sex* -0.62 -1.61 0.02
## Age 0.43 0.96 0.43
## SibSp 3.68 17.69 0.04
## Parch 2.74 9.66 0.03
## Fare 4.79 33.23 1.67
## Embarked* -1.26 -0.23 0.03
summary(titanic.df)
## Survived Pclass Sex Age
## Min. :0.0000 Min. :1.000 female:312 Min. : 0.40
## 1st Qu.:0.0000 1st Qu.:2.000 male :577 1st Qu.:22.00
## Median :0.0000 Median :3.000 Median :29.70
## Mean :0.3825 Mean :2.312 Mean :29.65
## 3rd Qu.:1.0000 3rd Qu.:3.000 3rd Qu.:35.00
## Max. :1.0000 Max. :3.000 Max. :80.00
## SibSp Parch Fare Embarked
## Min. :0.0000 Min. :0.0000 Min. : 0.000 C:168
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.: 7.896 Q: 77
## Median :0.0000 Median :0.0000 Median : 14.454 S:644
## Mean :0.5242 Mean :0.3825 Mean : 32.097
## 3rd Qu.:1.0000 3rd Qu.:0.0000 3rd Qu.: 31.000
## Max. :8.0000 Max. :6.0000 Max. :512.329
#4) Number of Passengers onboard the Titanic
length(titanic.df$Survived)
## [1] 889
#5) Number of Passengers who survived the sinking of Titanic
mytable<-xtabs(~Survived,data=titanic.df)
mytable
## Survived
## 0 1
## 549 340
#6) Percentage of passengers who survived the sinking of Titanic
prop.table(mytable)*100
## Survived
## 0 1
## 61.75478 38.24522
#7) No. of first class passengers who survived the sinking of titanic
mytable<-xtabs(~Survived+Pclass,data=titanic.df)
mytable
## Pclass
## Survived 1 2 3
## 0 80 97 372
## 1 134 87 119
#8) percentage of first-class passengers who survived the sinking of the Titanic
prop.table(mytable)*100
## Pclass
## Survived 1 2 3
## 0 8.998875 10.911136 41.844769
## 1 15.073116 9.786277 13.385827
#9) number of females from First-Class who survived the sinking of the Titanic
mytable<-xtabs(~Survived+Sex,data=titanic.df)
mytable
## Sex
## Survived female male
## 0 81 468
## 1 231 109
#10) percentage of survivors who were female
prop.table(mytable)*100
## Sex
## Survived female male
## 0 9.111361 52.643420
## 1 25.984252 12.260967
#11) percentage of females on board the Titanic who survived
mytable<-xtabs(~Survived+Sex,data=titanic.df)
mytable
## Sex
## Survived female male
## 0 81 468
## 1 231 109
#12) Pearson's Chi-squared test to test the following hypothesis:
#Hypothesis: The proportion of females onboard who survived the sinking of the Titanic was higher than the proportion of males onboard who survived the sinking of the Titanic.
chisq.test(mytable)
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: mytable
## X-squared = 258.43, df = 1, p-value < 2.2e-16
You can also embed plots, for example:
Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.