# Read entire CSV file into a data frame
train <- read.csv("train.csv")

str(train)
## 'data.frame':    891 obs. of  12 variables:
##  $ PassengerId: int  1 2 3 4 5 6 7 8 9 10 ...
##  $ Survived   : int  0 1 1 1 0 0 0 0 1 1 ...
##  $ Pclass     : int  3 1 3 1 3 3 1 3 3 2 ...
##  $ Name       : chr  "Braund, Mr. Owen Harris" "Cumings, Mrs. John Bradley (Florence Briggs Thayer)" "Heikkinen, Miss. Laina" "Futrelle, Mrs. Jacques Heath (Lily May Peel)" ...
##  $ Sex        : chr  "male" "female" "female" "female" ...
##  $ Age        : num  22 38 26 35 35 NA 54 2 27 14 ...
##  $ SibSp      : int  1 1 0 1 0 0 0 3 0 1 ...
##  $ Parch      : int  0 0 0 0 0 0 0 1 2 0 ...
##  $ Ticket     : chr  "A/5 21171" "PC 17599" "STON/O2. 3101282" "113803" ...
##  $ Fare       : num  7.25 71.28 7.92 53.1 8.05 ...
##  $ Cabin      : chr  "" "C85" "" "C123" ...
##  $ Embarked   : chr  "S" "C" "S" "S" ...
head(train)
##   PassengerId Survived Pclass
## 1           1        0      3
## 2           2        1      1
## 3           3        1      3
## 4           4        1      1
## 5           5        0      3
## 6           6        0      3
##                                                  Name    Sex Age SibSp Parch
## 1                             Braund, Mr. Owen Harris   male  22     1     0
## 2 Cumings, Mrs. John Bradley (Florence Briggs Thayer) female  38     1     0
## 3                              Heikkinen, Miss. Laina female  26     0     0
## 4        Futrelle, Mrs. Jacques Heath (Lily May Peel) female  35     1     0
## 5                            Allen, Mr. William Henry   male  35     0     0
## 6                                    Moran, Mr. James   male  NA     0     0
##             Ticket    Fare Cabin Embarked
## 1        A/5 21171  7.2500              S
## 2         PC 17599 71.2833   C85        C
## 3 STON/O2. 3101282  7.9250              S
## 4           113803 53.1000  C123        S
## 5           373450  8.0500              S
## 6           330877  8.4583              Q
tail(train)
##     PassengerId Survived Pclass                                     Name    Sex
## 886         886        0      3     Rice, Mrs. William (Margaret Norton) female
## 887         887        0      2                    Montvila, Rev. Juozas   male
## 888         888        1      1             Graham, Miss. Margaret Edith female
## 889         889        0      3 Johnston, Miss. Catherine Helen "Carrie" female
## 890         890        1      1                    Behr, Mr. Karl Howell   male
## 891         891        0      3                      Dooley, Mr. Patrick   male
##     Age SibSp Parch     Ticket   Fare Cabin Embarked
## 886  39     0     5     382652 29.125              Q
## 887  27     0     0     211536 13.000              S
## 888  19     0     0     112053 30.000   B42        S
## 889  NA     1     2 W./C. 6607 23.450              S
## 890  26     0     0     111369 30.000  C148        C
## 891  32     0     0     370376  7.750              Q
table(train$Survived)
## 
##   0   1 
## 549 342
table(train$Sex)
## 
## female   male 
##    314    577
survived = table(train$Survived,train$Sex)
rownames(survived) = c("Died", "Survived")
colnames(survived) = c("Female", "Male")
survived
##           
##            Female Male
##   Died         81  468
##   Survived    233  109
prop.test(233,314)
## 
##  1-sample proportions test with continuity correction
## 
## data:  233 out of 314, null probability 0.5
## X-squared = 72.615, df = 1, p-value < 2.2e-16
## alternative hypothesis: true p is not equal to 0.5
## 95 percent confidence interval:
##  0.6892571 0.7887777
## sample estimates:
##         p 
## 0.7420382
prop.test(109,577)
## 
##  1-sample proportions test with continuity correction
## 
## data:  109 out of 577, null probability 0.5
## X-squared = 222.12, df = 1, p-value < 2.2e-16
## alternative hypothesis: true p is not equal to 0.5
## 95 percent confidence interval:
##  0.1582608 0.2237814
## sample estimates:
##         p 
## 0.1889081
prop.test(342,891)
## 
##  1-sample proportions test with continuity correction
## 
## data:  342 out of 891, null probability 0.5
## X-squared = 47.627, df = 1, p-value = 5.154e-12
## alternative hypothesis: true p is not equal to 0.5
## 95 percent confidence interval:
##  0.3519194 0.4167722
## sample estimates:
##         p 
## 0.3838384
prop.test(c(233,342),c(314,891))
## 
##  2-sample test for equality of proportions with continuity correction
## 
## data:  c(233, 342) out of c(314, 891)
## X-squared = 117.98, df = 1, p-value < 2.2e-16
## alternative hypothesis: two.sided
## 95 percent confidence interval:
##  0.2980682 0.4183315
## sample estimates:
##    prop 1    prop 2 
## 0.7420382 0.3838384

Clearly we reject the null and can conclude that this difference in survival rate was intentional.