Importing Titanic Data Set from csv file to a dataframe Object

df <- read.csv("titanic_data.csv", sep = ',')
head(df, 10)
##    PassengerId Survived Pclass
## 1            1        0      3
## 2            2        1      1
## 3            3        1      3
## 4            4        1      1
## 5            5        0      3
## 6            6        0      3
## 7            7        0      1
## 8            8        0      3
## 9            9        1      3
## 10          10        1      2
##                                                   Name    Sex Age SibSp
## 1                              Braund, Mr. Owen Harris   male  22     1
## 2  Cumings, Mrs. John Bradley (Florence Briggs Thayer) female  38     1
## 3                               Heikkinen, Miss. Laina female  26     0
## 4         Futrelle, Mrs. Jacques Heath (Lily May Peel) female  35     1
## 5                             Allen, Mr. William Henry   male  35     0
## 6                                     Moran, Mr. James   male  NA     0
## 7                              McCarthy, Mr. Timothy J   male  54     0
## 8                       Palsson, Master. Gosta Leonard   male   2     3
## 9    Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg) female  27     0
## 10                 Nasser, Mrs. Nicholas (Adele Achem) female  14     1
##    Parch           Ticket    Fare Cabin Embarked
## 1      0        A/5 21171  7.2500              S
## 2      0         PC 17599 71.2833   C85        C
## 3      0 STON/O2. 3101282  7.9250              S
## 4      0           113803 53.1000  C123        S
## 5      0           373450  8.0500              S
## 6      0           330877  8.4583              Q
## 7      0            17463 51.8625   E46        S
## 8      1           349909 21.0750              S
## 9      2           347742 11.1333              S
## 10     0           237736 30.0708              C
class(df)
## [1] "data.frame"
str(df)
## 'data.frame':    891 obs. of  12 variables:
##  $ PassengerId: int  1 2 3 4 5 6 7 8 9 10 ...
##  $ Survived   : int  0 1 1 1 0 0 0 0 1 1 ...
##  $ Pclass     : int  3 1 3 1 3 3 1 3 3 2 ...
##  $ Name       : Factor w/ 891 levels "Abbing, Mr. Anthony",..: 109 191 358 277 16 559 520 629 417 581 ...
##  $ Sex        : Factor w/ 2 levels "female","male": 2 1 1 1 2 2 2 2 1 1 ...
##  $ Age        : num  22 38 26 35 35 NA 54 2 27 14 ...
##  $ SibSp      : int  1 1 0 1 0 0 0 3 0 1 ...
##  $ Parch      : int  0 0 0 0 0 0 0 1 2 0 ...
##  $ Ticket     : Factor w/ 681 levels "110152","110413",..: 524 597 670 50 473 276 86 396 345 133 ...
##  $ Fare       : num  7.25 71.28 7.92 53.1 8.05 ...
##  $ Cabin      : Factor w/ 148 levels "","A10","A14",..: 1 83 1 57 1 1 131 1 1 1 ...
##  $ Embarked   : Factor w/ 4 levels "","C","Q","S": 4 2 4 4 4 3 4 4 4 2 ...

Number of Unique Passengers

length(unique(df$PassengerId))
## [1] 891
df$Survived <- as.factor(df$Survived)
levels(df$Survived) <- c("Died", "Survivor")
library(data.table)
df <- data.table(df)
df[, .N, by=.(Sex, Survived)]
##       Sex Survived   N
## 1:   male     Died 468
## 2: female Survivor 233
## 3: female     Died  81
## 4:   male Survivor 109
df[!is.na(df$Age), ][, mean(Age), by=.(Sex, Survived)]
##       Sex Survived       V1
## 1:   male     Died 31.61806
## 2: female Survivor 28.84772
## 3: female     Died 25.04688
## 4:   male Survivor 27.27602