1 Libraries

library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.1     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.4.2     ✔ tibble    3.2.1
## ✔ lubridate 1.9.2     ✔ tidyr     1.3.0
## ✔ purrr     1.0.1     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(psych)
## 
## Attaching package: 'psych'
## 
## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha
library(RWeka) 

The sinking of the RMS Titanic is one of the most infamous shipwrecks in history. On April 15, 1912, during her maiden voyage, the Titanic sank after colliding with an iceberg, killing 1502 out of 2224 passengers and crew. This sensational tragedy shocked the international community and led to better safety regulations for ships. One of the reasons that the shipwreck led to such loss of life was that there were not enough lifeboats for the passengers and crew. Although there was some element of luck involved in surviving the sinking, some groups of people such as women, children, and the upper-class were more likely to survive than others.

VARIABLE DESCRIPTIONS:

PassengerID Unique passenger identifier Survived Survival (0 = No; 1 = Yes) Pclass Passenger Class(1 = 1st; 2 = 2nd; 3 = 3rd) (Pclass is a proxy for socio-economic status (SES) 1st ~ Upper; 2nd ~ Middle; 3rd ~ Lower) Name Name Sex Sex Age Age (Age is in Years; Fractional if Age less than One (1) If the Age is Estimated, it is in the form xx.5) Sibsp Number of Siblings/Spouses Aboard Parch Number of Parents/Children Aboard Ticket Ticket Number Fare Passenger Fare Cabin Cabin Embarked Port of Embarkation (C = Cherbourg; Q = Queenstown; S = Southampton)


2 Import a csv file

cloud_wd <- getwd()
setwd(cloud_wd)

titanic <- read.csv(file = "titanic.train.csv", stringsAsFactors = FALSE)

3 Examine the overall data frame

str() shows the number of observations, and the number, names, types and some values of columns

titanic %>% str()
## 'data.frame':    891 obs. of  12 variables:
##  $ PassengerId: int  1 2 3 4 5 6 7 8 9 10 ...
##  $ Survived   : int  0 1 1 1 0 0 0 0 1 1 ...
##  $ Pclass     : int  3 1 3 1 3 3 1 3 3 2 ...
##  $ Name       : chr  "Braund, Mr. Owen Harris" "Cumings, Mrs. John Bradley (Florence Briggs Thayer)" "Heikkinen, Miss. Laina" "Futrelle, Mrs. Jacques Heath (Lily May Peel)" ...
##  $ Sex        : chr  "male" "female" "female" "female" ...
##  $ Age        : num  22 38 26 35 35 NA 54 2 27 14 ...
##  $ SibSp      : int  1 1 0 1 0 0 0 3 0 1 ...
##  $ Parch      : int  0 0 0 0 0 0 0 1 2 0 ...
##  $ Ticket     : chr  "A/5 21171" "PC 17599" "STON/O2. 3101282" "113803" ...
##  $ Fare       : num  7.25 71.28 7.92 53.1 8.05 ...
##  $ Cabin      : chr  "" "C85" "" "C123" ...
##  $ Embarked   : chr  "S" "C" "S" "S" ...

3.1 You can retrieve and save the number of rows and number of coloumns of a data frame

# create local variables for row and column numbers

row <- nrow(titanic)
row
## [1] 891
#should be 891
col <- ncol(titanic)
col
## [1] 12
#should be 12

3.2 Show the head and tail rows of a data frame

titanic %>% head()
##   PassengerId Survived Pclass
## 1           1        0      3
## 2           2        1      1
## 3           3        1      3
## 4           4        1      1
## 5           5        0      3
## 6           6        0      3
##                                                  Name    Sex Age SibSp Parch
## 1                             Braund, Mr. Owen Harris   male  22     1     0
## 2 Cumings, Mrs. John Bradley (Florence Briggs Thayer) female  38     1     0
## 3                              Heikkinen, Miss. Laina female  26     0     0
## 4        Futrelle, Mrs. Jacques Heath (Lily May Peel) female  35     1     0
## 5                            Allen, Mr. William Henry   male  35     0     0
## 6                                    Moran, Mr. James   male  NA     0     0
##             Ticket    Fare Cabin Embarked
## 1        A/5 21171  7.2500              S
## 2         PC 17599 71.2833   C85        C
## 3 STON/O2. 3101282  7.9250              S
## 4           113803 53.1000  C123        S
## 5           373450  8.0500              S
## 6           330877  8.4583              Q
titanic %>% tail()
##     PassengerId Survived Pclass                                     Name    Sex
## 886         886        0      3     Rice, Mrs. William (Margaret Norton) female
## 887         887        0      2                    Montvila, Rev. Juozas   male
## 888         888        1      1             Graham, Miss. Margaret Edith female
## 889         889        0      3 Johnston, Miss. Catherine Helen "Carrie" female
## 890         890        1      1                    Behr, Mr. Karl Howell   male
## 891         891        0      3                      Dooley, Mr. Patrick   male
##     Age SibSp Parch     Ticket   Fare Cabin Embarked
## 886  39     0     5     382652 29.125              Q
## 887  27     0     0     211536 13.000              S
## 888  19     0     0     112053 30.000   B42        S
## 889  NA     1     2 W./C. 6607 23.450              S
## 890  26     0     0     111369 30.000  C148        C
## 891  32     0     0     370376  7.750              Q
titanic %>% head(10)
##    PassengerId Survived Pclass
## 1            1        0      3
## 2            2        1      1
## 3            3        1      3
## 4            4        1      1
## 5            5        0      3
## 6            6        0      3
## 7            7        0      1
## 8            8        0      3
## 9            9        1      3
## 10          10        1      2
##                                                   Name    Sex Age SibSp Parch
## 1                              Braund, Mr. Owen Harris   male  22     1     0
## 2  Cumings, Mrs. John Bradley (Florence Briggs Thayer) female  38     1     0
## 3                               Heikkinen, Miss. Laina female  26     0     0
## 4         Futrelle, Mrs. Jacques Heath (Lily May Peel) female  35     1     0
## 5                             Allen, Mr. William Henry   male  35     0     0
## 6                                     Moran, Mr. James   male  NA     0     0
## 7                              McCarthy, Mr. Timothy J   male  54     0     0
## 8                       Palsson, Master. Gosta Leonard   male   2     3     1
## 9    Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg) female  27     0     2
## 10                 Nasser, Mrs. Nicholas (Adele Achem) female  14     1     0
##              Ticket    Fare Cabin Embarked
## 1         A/5 21171  7.2500              S
## 2          PC 17599 71.2833   C85        C
## 3  STON/O2. 3101282  7.9250              S
## 4            113803 53.1000  C123        S
## 5            373450  8.0500              S
## 6            330877  8.4583              Q
## 7             17463 51.8625   E46        S
## 8            349909 21.0750              S
## 9            347742 11.1333              S
## 10           237736 30.0708              C
titanic %>% tail(7)
##     PassengerId Survived Pclass                                     Name    Sex
## 885         885        0      3                   Sutehall, Mr. Henry Jr   male
## 886         886        0      3     Rice, Mrs. William (Margaret Norton) female
## 887         887        0      2                    Montvila, Rev. Juozas   male
## 888         888        1      1             Graham, Miss. Margaret Edith female
## 889         889        0      3 Johnston, Miss. Catherine Helen "Carrie" female
## 890         890        1      1                    Behr, Mr. Karl Howell   male
## 891         891        0      3                      Dooley, Mr. Patrick   male
##     Age SibSp Parch          Ticket   Fare Cabin Embarked
## 885  25     0     0 SOTON/OQ 392076  7.050              S
## 886  39     0     5          382652 29.125              Q
## 887  27     0     0          211536 13.000              S
## 888  19     0     0          112053 30.000   B42        S
## 889  NA     1     2      W./C. 6607 23.450              S
## 890  26     0     0          111369 30.000  C148        C
## 891  32     0     0          370376  7.750              Q

3.3 summary()

shows the mean and the five-number statistics indicating the spread of each column’s values

summary()

titanic %>% summary()
##   PassengerId       Survived          Pclass          Name          
##  Min.   :  1.0   Min.   :0.0000   Min.   :1.000   Length:891        
##  1st Qu.:223.5   1st Qu.:0.0000   1st Qu.:2.000   Class :character  
##  Median :446.0   Median :0.0000   Median :3.000   Mode  :character  
##  Mean   :446.0   Mean   :0.3838   Mean   :2.309                     
##  3rd Qu.:668.5   3rd Qu.:1.0000   3rd Qu.:3.000                     
##  Max.   :891.0   Max.   :1.0000   Max.   :3.000                     
##                                                                     
##      Sex                 Age            SibSp           Parch       
##  Length:891         Min.   : 0.42   Min.   :0.000   Min.   :0.0000  
##  Class :character   1st Qu.:20.12   1st Qu.:0.000   1st Qu.:0.0000  
##  Mode  :character   Median :28.00   Median :0.000   Median :0.0000  
##                     Mean   :29.70   Mean   :0.523   Mean   :0.3816  
##                     3rd Qu.:38.00   3rd Qu.:1.000   3rd Qu.:0.0000  
##                     Max.   :80.00   Max.   :8.000   Max.   :6.0000  
##                     NA's   :177                                     
##     Ticket               Fare           Cabin             Embarked        
##  Length:891         Min.   :  0.00   Length:891         Length:891        
##  Class :character   1st Qu.:  7.91   Class :character   Class :character  
##  Mode  :character   Median : 14.45   Mode  :character   Mode  :character  
##                     Mean   : 32.20                                        
##                     3rd Qu.: 31.00                                        
##                     Max.   :512.33                                        
## 

4 Data transformation

Remove unique identifiers from further analysis as they are not interesting without additional feature extractions ### selecting columns

titanic %>% select(Sex,Age) %>% head() # use head or tail to make sure we don't print the entire dataframe
##      Sex Age
## 1   male  22
## 2 female  38
## 3 female  26
## 4 female  35
## 5   male  35
## 6   male  NA

4.0.1 Remove columns

titanic <- titanic %>% select(-PassengerId,-Name,-Ticket)

4.0.2 Factor categorical columns

Change Survived and other nominal variables to factors Use structure to see data before and after the transformation

titanic %>% str()
## 'data.frame':    891 obs. of  9 variables:
##  $ Survived: int  0 1 1 1 0 0 0 0 1 1 ...
##  $ Pclass  : int  3 1 3 1 3 3 1 3 3 2 ...
##  $ Sex     : chr  "male" "female" "female" "female" ...
##  $ Age     : num  22 38 26 35 35 NA 54 2 27 14 ...
##  $ SibSp   : int  1 1 0 1 0 0 0 3 0 1 ...
##  $ Parch   : int  0 0 0 0 0 0 0 1 2 0 ...
##  $ Fare    : num  7.25 71.28 7.92 53.1 8.05 ...
##  $ Cabin   : chr  "" "C85" "" "C123" ...
##  $ Embarked: chr  "S" "C" "S" "S" ...
titanic$Survived <- factor(titanic$Survived)
titanic$Sex <- factor(titanic$Sex)
titanic$Pclass <- factor(titanic$Pclass)
titanic$Cabin <- factor(titanic$Cabin)
titanic$Embarked <- factor(titanic$Embarked) # now you do the same for Embarked

#tidyverse syntax
titanic <- titanic %>% mutate(Cabin = factor(Cabin))

#Check your work to make sure factorization was successful

titanic %>% str()
## 'data.frame':    891 obs. of  9 variables:
##  $ Survived: Factor w/ 2 levels "0","1": 1 2 2 2 1 1 1 1 2 2 ...
##  $ Pclass  : Factor w/ 3 levels "1","2","3": 3 1 3 1 3 3 1 3 3 2 ...
##  $ Sex     : Factor w/ 2 levels "female","male": 2 1 1 1 2 2 2 2 1 1 ...
##  $ Age     : num  22 38 26 35 35 NA 54 2 27 14 ...
##  $ SibSp   : int  1 1 0 1 0 0 0 3 0 1 ...
##  $ Parch   : int  0 0 0 0 0 0 0 1 2 0 ...
##  $ Fare    : num  7.25 71.28 7.92 53.1 8.05 ...
##  $ Cabin   : Factor w/ 148 levels "","A10","A14",..: 1 83 1 57 1 1 131 1 1 1 ...
##  $ Embarked: Factor w/ 4 levels "","C","Q","S": 4 2 4 4 4 3 4 4 4 2 ...

Remove observations with missing Age values.

This missing data handling approach has the obvious disadvantages of the applicability of the model to data with missing age.

To keep observations with missing Age values require careful imputation of Age missingness.

The various missing data imputation methods are beyond the knowledge required for this tutorial.

titanic %>% summarize(across(everything(), ~ sum(is.na(.))))
##   Survived Pclass Sex Age SibSp Parch Fare Cabin Embarked
## 1        0      0   0 177     0     0    0     0        0

4.0.3 Drop columns

dropping columns from the dataframe

titanic %>% drop_na()
##     Survived Pclass    Sex   Age SibSp Parch     Fare           Cabin Embarked
## 1          0      3   male 22.00     1     0   7.2500                        S
## 2          1      1 female 38.00     1     0  71.2833             C85        C
## 3          1      3 female 26.00     0     0   7.9250                        S
## 4          1      1 female 35.00     1     0  53.1000            C123        S
## 5          0      3   male 35.00     0     0   8.0500                        S
## 6          0      1   male 54.00     0     0  51.8625             E46        S
## 7          0      3   male  2.00     3     1  21.0750                        S
## 8          1      3 female 27.00     0     2  11.1333                        S
## 9          1      2 female 14.00     1     0  30.0708                        C
## 10         1      3 female  4.00     1     1  16.7000              G6        S
## 11         1      1 female 58.00     0     0  26.5500            C103        S
## 12         0      3   male 20.00     0     0   8.0500                        S
## 13         0      3   male 39.00     1     5  31.2750                        S
## 14         0      3 female 14.00     0     0   7.8542                        S
## 15         1      2 female 55.00     0     0  16.0000                        S
## 16         0      3   male  2.00     4     1  29.1250                        Q
## 17         0      3 female 31.00     1     0  18.0000                        S
## 18         0      2   male 35.00     0     0  26.0000                        S
## 19         1      2   male 34.00     0     0  13.0000             D56        S
## 20         1      3 female 15.00     0     0   8.0292                        Q
## 21         1      1   male 28.00     0     0  35.5000              A6        S
## 22         0      3 female  8.00     3     1  21.0750                        S
## 23         1      3 female 38.00     1     5  31.3875                        S
## 24         0      1   male 19.00     3     2 263.0000     C23 C25 C27        S
## 25         0      1   male 40.00     0     0  27.7208                        C
## 26         0      2   male 66.00     0     0  10.5000                        S
## 27         0      1   male 28.00     1     0  82.1708                        C
## 28         0      1   male 42.00     1     0  52.0000                        S
## 29         0      3   male 21.00     0     0   8.0500                        S
## 30         0      3 female 18.00     2     0  18.0000                        S
## 31         1      3 female 14.00     1     0  11.2417                        C
## 32         0      3 female 40.00     1     0   9.4750                        S
## 33         0      2 female 27.00     1     0  21.0000                        S
## 34         1      2 female  3.00     1     2  41.5792                        C
## 35         1      3 female 19.00     0     0   7.8792                        Q
## 36         0      3 female 18.00     1     0  17.8000                        S
## 37         0      3   male  7.00     4     1  39.6875                        S
## 38         0      3   male 21.00     0     0   7.8000                        S
## 39         1      1 female 49.00     1     0  76.7292             D33        C
## 40         1      2 female 29.00     1     0  26.0000                        S
## 41         0      1   male 65.00     0     1  61.9792             B30        C
## 42         1      2 female 21.00     0     0  10.5000                        S
## 43         0      3   male 28.50     0     0   7.2292                        C
## 44         1      2 female  5.00     1     2  27.7500                        S
## 45         0      3   male 11.00     5     2  46.9000                        S
## 46         0      3   male 22.00     0     0   7.2292                        C
## 47         1      1 female 38.00     0     0  80.0000             B28         
## 48         0      1   male 45.00     1     0  83.4750             C83        S
## 49         0      3   male  4.00     3     2  27.9000                        S
## 50         1      2 female 29.00     0     0  10.5000             F33        S
## 51         0      3   male 19.00     0     0   8.1583                        S
## 52         1      3 female 17.00     4     2   7.9250                        S
## 53         0      3   male 26.00     2     0   8.6625                        S
## 54         0      2   male 32.00     0     0  10.5000                        S
## 55         0      3 female 16.00     5     2  46.9000                        S
## 56         0      2   male 21.00     0     0  73.5000                        S
## 57         0      3   male 26.00     1     0  14.4542                        C
## 58         1      3   male 32.00     0     0  56.4958                        S
## 59         0      3   male 25.00     0     0   7.6500           F G73        S
## 60         1      2   male  0.83     0     2  29.0000                        S
## 61         1      3 female 30.00     0     0  12.4750                        S
## 62         0      3   male 22.00     0     0   9.0000                        S
## 63         1      3   male 29.00     0     0   9.5000                        S
## 64         0      1   male 28.00     0     0  47.1000                        S
## 65         1      2 female 17.00     0     0  10.5000                        S
## 66         1      3 female 33.00     3     0  15.8500                        S
## 67         0      3   male 16.00     1     3  34.3750                        S
## 68         1      1 female 23.00     3     2 263.0000     C23 C25 C27        S
## 69         0      3   male 24.00     0     0   8.0500                        S
## 70         0      3   male 29.00     0     0   8.0500                        S
## 71         0      3   male 20.00     0     0   7.8542                        S
## 72         0      1   male 46.00     1     0  61.1750             E31        S
## 73         0      3   male 26.00     1     2  20.5750                        S
## 74         0      3   male 59.00     0     0   7.2500                        S
## 75         0      1   male 71.00     0     0  34.6542              A5        C
## 76         1      1   male 23.00     0     1  63.3583         D10 D12        C
## 77         1      2 female 34.00     0     1  23.0000                        S
## 78         0      2   male 34.00     1     0  26.0000                        S
## 79         0      3 female 28.00     0     0   7.8958                        S
## 80         0      1   male 21.00     0     1  77.2875             D26        S
## 81         0      3   male 33.00     0     0   8.6542                        S
## 82         0      3   male 37.00     2     0   7.9250                        S
## 83         0      3   male 28.00     0     0   7.8958                        S
## 84         1      3 female 21.00     0     0   7.6500                        S
## 85         0      3   male 38.00     0     0   7.8958                        S
## 86         0      1   male 47.00     0     0  52.0000            C110        S
## 87         0      3 female 14.50     1     0  14.4542                        C
## 88         0      3   male 22.00     0     0   8.0500                        S
## 89         0      3 female 20.00     1     0   9.8250                        S
## 90         0      3 female 17.00     0     0  14.4583                        C
## 91         0      3   male 21.00     0     0   7.9250                        S
## 92         0      3   male 70.50     0     0   7.7500                        Q
## 93         0      2   male 29.00     1     0  21.0000                        S
## 94         0      1   male 24.00     0     1 247.5208         B58 B60        C
## 95         0      3 female  2.00     4     2  31.2750                        S
## 96         0      2   male 21.00     2     0  73.5000                        S
## 97         0      2   male 32.50     1     0  30.0708                        C
## 98         1      2 female 32.50     0     0  13.0000            E101        S
## 99         0      1   male 54.00     0     1  77.2875             D26        S
## 100        1      3   male 12.00     1     0  11.2417                        C
## 101        1      3   male 24.00     0     0   7.1417                        S
## 102        0      3   male 45.00     0     0   6.9750                        S
## 103        0      3   male 33.00     0     0   7.8958                        C
## 104        0      3   male 20.00     0     0   7.0500                        S
## 105        0      3 female 47.00     1     0  14.5000                        S
## 106        1      2 female 29.00     1     0  26.0000                        S
## 107        0      2   male 25.00     0     0  13.0000                        S
## 108        0      2   male 23.00     0     0  15.0458                        C
## 109        1      1 female 19.00     0     2  26.2833             D47        S
## 110        0      1   male 37.00     1     0  53.1000            C123        S
## 111        0      3   male 16.00     0     0   9.2167                        S
## 112        0      1   male 24.00     0     0  79.2000             B86        C
## 113        1      3 female 22.00     0     0   7.7500                        S
## 114        1      3 female 24.00     1     0  15.8500                        S
## 115        0      3   male 19.00     0     0   6.7500                        Q
## 116        0      2   male 18.00     0     0  11.5000                        S
## 117        0      2   male 19.00     1     1  36.7500                        S
## 118        1      3   male 27.00     0     0   7.7958                        S
## 119        0      3 female  9.00     2     2  34.3750                        S
## 120        0      2   male 36.50     0     2  26.0000              F2        S
## 121        0      2   male 42.00     0     0  13.0000                        S
## 122        0      2   male 51.00     0     0  12.5250                        S
## 123        1      1 female 22.00     1     0  66.6000              C2        S
## 124        0      3   male 55.50     0     0   8.0500                        S
## 125        0      3   male 40.50     0     2  14.5000                        S
## 126        0      1   male 51.00     0     1  61.3792                        C
## 127        1      3 female 16.00     0     0   7.7333                        Q
## 128        0      3   male 30.00     0     0   8.0500                        S
## 129        0      3   male 44.00     0     1  16.1000                        S
## 130        1      2 female 40.00     0     0  15.7500                        S
## 131        0      3   male 26.00     0     0   7.7750                        S
## 132        0      3   male 17.00     0     0   8.6625                        S
## 133        0      3   male  1.00     4     1  39.6875                        S
## 134        1      3   male  9.00     0     2  20.5250                        S
## 135        0      3 female 45.00     1     4  27.9000                        S
## 136        0      3   male 28.00     0     0  56.4958                        S
## 137        0      1   male 61.00     0     0  33.5000             B19        S
## 138        0      3   male  4.00     4     1  29.1250                        Q
## 139        1      3 female  1.00     1     1  11.1333                        S
## 140        0      3   male 21.00     0     0   7.9250                        S
## 141        0      1   male 56.00     0     0  30.6958              A7        C
## 142        0      3   male 18.00     1     1   7.8542                        S
## 143        0      1 female 50.00     0     0  28.7125             C49        C
## 144        0      2   male 30.00     0     0  13.0000                        S
## 145        0      3   male 36.00     0     0   0.0000                        S
## 146        0      3   male  9.00     4     2  31.3875                        S
## 147        1      2   male  1.00     2     1  39.0000              F4        S
## 148        1      3 female  4.00     0     2  22.0250                        S
## 149        1      1   male 45.00     0     0  26.5500                        S
## 150        0      3   male 40.00     1     1  15.5000                        Q
## 151        0      3   male 36.00     0     0   7.8958                        S
## 152        1      2 female 32.00     0     0  13.0000                        S
## 153        0      2   male 19.00     0     0  13.0000                        S
## 154        1      3 female 19.00     1     0   7.8542                        S
## 155        1      2   male  3.00     1     1  26.0000              F2        S
## 156        1      1 female 44.00     0     0  27.7208              B4        C
## 157        1      1 female 58.00     0     0 146.5208             B80        C
## 158        0      3   male 42.00     0     1   8.4042                        S
## 159        0      2 female 24.00     0     0  13.0000                        S
## 160        0      3   male 28.00     0     0   9.5000                        S
## 161        0      3   male 34.00     0     0   6.4958                        S
## 162        0      3   male 45.50     0     0   7.2250                        C
## 163        1      3   male 18.00     0     0   8.0500                        S
## 164        0      3 female  2.00     0     1  10.4625              G6        S
## 165        0      3   male 32.00     1     0  15.8500                        S
## 166        1      3   male 26.00     0     0  18.7875                        C
## 167        1      3 female 16.00     0     0   7.7500                        Q
## 168        1      1   male 40.00     0     0  31.0000             A31        C
## 169        0      3   male 24.00     0     0   7.0500                        S
## 170        1      2 female 35.00     0     0  21.0000                        S
## 171        0      3   male 22.00     0     0   7.2500                        S
## 172        0      2   male 30.00     0     0  13.0000                        S
## 173        1      1 female 31.00     1     0 113.2750             D36        C
## 174        1      3 female 27.00     0     0   7.9250                        S
## 175        0      2   male 42.00     1     0  27.0000                        S
## 176        1      1 female 32.00     0     0  76.2917             D15        C
## 177        0      2   male 30.00     0     0  10.5000                        S
## 178        1      3   male 16.00     0     0   8.0500                        S
## 179        0      2   male 27.00     0     0  13.0000                        S
## 180        0      3   male 51.00     0     0   8.0500                        S
## 181        1      1   male 38.00     1     0  90.0000             C93        S
## 182        0      3   male 22.00     0     0   9.3500                        S
## 183        1      2   male 19.00     0     0  10.5000                        S
## 184        0      3   male 20.50     0     0   7.2500                        S
## 185        0      2   male 18.00     0     0  13.0000                        S
## 186        1      1 female 35.00     1     0  83.4750             C83        S
## 187        0      3   male 29.00     0     0   7.7750                        S
## 188        0      2   male 59.00     0     0  13.5000                        S
## 189        1      3 female  5.00     4     2  31.3875                        S
## 190        0      2   male 24.00     0     0  10.5000                        S
## 191        0      2   male 44.00     1     0  26.0000                        S
## 192        1      2 female  8.00     0     2  26.2500                        S
## 193        0      2   male 19.00     0     0  10.5000                        S
## 194        0      2   male 33.00     0     0  12.2750                        S
## 195        0      2   male 29.00     0     0  10.5000                        S
## 196        0      3   male 22.00     0     0   7.1250                        S
## 197        0      3   male 30.00     0     0   7.2250                        C
## 198        0      1   male 44.00     2     0  90.0000             C78        Q
## 199        0      3 female 25.00     0     0   7.7750                        S
## 200        1      2 female 24.00     0     2  14.5000                        S
## 201        1      1   male 37.00     1     1  52.5542             D35        S
## 202        0      2   male 54.00     1     0  26.0000                        S
## 203        0      3 female 29.00     1     1  10.4625              G6        S
## 204        0      1   male 62.00     0     0  26.5500             C87        S
## 205        0      3   male 30.00     1     0  16.1000                        S
## 206        0      3 female 41.00     0     2  20.2125                        S
## 207        1      3 female 29.00     0     2  15.2458                        C
## 208        1      1 female 30.00     0     0  86.5000             B77        S
## 209        1      1 female 35.00     0     0 512.3292                        C
## 210        1      2 female 50.00     0     1  26.0000                        S
## 211        1      3   male  3.00     4     2  31.3875                        S
## 212        0      1   male 52.00     1     1  79.6500             E67        S
## 213        0      1   male 40.00     0     0   0.0000             B94        S
## 214        0      2   male 36.00     0     0  10.5000                        S
## 215        0      3   male 16.00     4     1  39.6875                        S
## 216        1      3   male 25.00     1     0   7.7750                        S
## 217        1      1 female 58.00     0     1 153.4625            C125        S
## 218        1      1 female 35.00     0     0 135.6333             C99        S
## 219        1      3   male 25.00     0     0   0.0000                        S
## 220        1      2 female 41.00     0     1  19.5000                        S
## 221        0      1   male 37.00     0     1  29.7000            C118        C
## 222        1      1 female 63.00     1     0  77.9583              D7        S
## 223        0      3 female 45.00     0     0   7.7500                        S
## 224        0      3   male  7.00     4     1  29.1250                        Q
## 225        1      3 female 35.00     1     1  20.2500                        S
## 226        0      3   male 65.00     0     0   7.7500                        Q
## 227        0      3   male 28.00     0     0   7.8542                        S
## 228        0      3   male 16.00     0     0   9.5000                        S
## 229        1      3   male 19.00     0     0   8.0500                        S
## 230        0      3   male 33.00     0     0   8.6625                        C
## 231        1      3   male 30.00     0     0   9.5000                        S
## 232        0      3   male 22.00     0     0   7.8958                        S
## 233        1      2   male 42.00     0     0  13.0000                        S
## 234        1      3 female 22.00     0     0   7.7500                        Q
## 235        1      1 female 26.00     0     0  78.8500                        S
## 236        1      1 female 19.00     1     0  91.0792             B49        C
## 237        0      2   male 36.00     0     0  12.8750               D        C
## 238        0      3 female 24.00     0     0   8.8500                        S
## 239        0      3   male 24.00     0     0   7.8958                        S
## 240        0      3   male 23.50     0     0   7.2292                        C
## 241        0      1 female  2.00     1     2 151.5500         C22 C26        S
## 242        1      1 female 50.00     0     1 247.5208         B58 B60        C
## 243        0      3   male 19.00     0     0   0.0000                        S
## 244        1      1   male  0.92     1     2 151.5500         C22 C26        S
## 245        1      1 female 17.00     1     0 108.9000             C65        C
## 246        0      2   male 30.00     1     0  24.0000                        C
## 247        1      1 female 30.00     0     0  56.9292             E36        C
## 248        1      1 female 24.00     0     0  83.1583             C54        C
## 249        1      1 female 18.00     2     2 262.3750 B57 B59 B63 B66        C
## 250        0      2 female 26.00     1     1  26.0000                        S
## 251        0      3   male 28.00     0     0   7.8958                        S
## 252        0      2   male 43.00     1     1  26.2500                        S
## 253        1      3 female 26.00     0     0   7.8542                        S
## 254        1      2 female 24.00     1     0  26.0000                        S
## 255        0      2   male 54.00     0     0  14.0000                        S
## 256        1      1 female 31.00     0     2 164.8667              C7        S
## 257        1      1 female 40.00     1     1 134.5000             E34        C
## 258        0      3   male 22.00     0     0   7.2500                        S
## 259        0      3   male 27.00     0     0   7.8958                        S
## 260        1      2 female 30.00     0     0  12.3500                        Q
## 261        1      2 female 22.00     1     1  29.0000                        S
## 262        1      1 female 36.00     0     0 135.6333             C32        C
## 263        0      3   male 61.00     0     0   6.2375                        S
## 264        1      2 female 36.00     0     0  13.0000               D        S
## 265        1      3 female 31.00     1     1  20.5250                        S
## 266        1      1 female 16.00     0     1  57.9792             B18        C
## 267        0      1   male 45.50     0     0  28.5000            C124        S
## 268        0      1   male 38.00     0     1 153.4625             C91        S
## 269        0      3   male 16.00     2     0  18.0000                        S
## 270        0      1   male 29.00     1     0  66.6000              C2        S
## 271        1      1 female 41.00     0     0 134.5000             E40        C
## 272        1      3   male 45.00     0     0   8.0500                        S
## 273        0      1   male 45.00     0     0  35.5000               T        S
## 274        1      2   male  2.00     1     1  26.0000              F2        S
## 275        1      1 female 24.00     3     2 263.0000     C23 C25 C27        S
## 276        0      2   male 28.00     0     0  13.0000                        S
## 277        0      2   male 25.00     0     0  13.0000                        S
## 278        0      2   male 36.00     0     0  13.0000                        S
## 279        1      2 female 24.00     0     0  13.0000             F33        S
## 280        1      2 female 40.00     0     0  13.0000                        S
## 281        1      3   male  3.00     1     1  15.9000                        S
## 282        0      3   male 42.00     0     0   8.6625                        S
## 283        0      3   male 23.00     0     0   9.2250                        S
## 284        0      3   male 15.00     1     1   7.2292                        C
## 285        0      3   male 25.00     1     0  17.8000                        S
## 286        0      3   male 28.00     0     0   9.5000                        S
## 287        1      1 female 22.00     0     1  55.0000             E33        S
## 288        0      2 female 38.00     0     0  13.0000                        S
## 289        0      3   male 40.00     1     4  27.9000                        S
## 290        0      2   male 29.00     1     0  27.7208                        C
## 291        0      3 female 45.00     0     1  14.4542                        C
## 292        0      3   male 35.00     0     0   7.0500                        S
## 293        0      3   male 30.00     0     0   7.2500                        S
## 294        1      1 female 60.00     1     0  75.2500             D37        C
## 295        1      1 female 24.00     0     0  69.3000             B35        C
## 296        1      1   male 25.00     1     0  55.4417             E50        C
## 297        0      3   male 18.00     1     0   6.4958                        S
## 298        0      3   male 19.00     0     0   8.0500                        S
## 299        0      1   male 22.00     0     0 135.6333                        C
## 300        0      3 female  3.00     3     1  21.0750                        S
## 301        1      3 female 22.00     0     0   7.2500                        S
## 302        0      1   male 27.00     0     2 211.5000             C82        C
## 303        0      3   male 20.00     0     0   4.0125                        C
## 304        0      3   male 19.00     0     0   7.7750                        S
## 305        1      1 female 42.00     0     0 227.5250                        C
## 306        1      3 female  1.00     0     2  15.7417                        C
## 307        0      3   male 32.00     0     0   7.9250                        S
## 308        1      1 female 35.00     1     0  52.0000                        S
## 309        0      2   male 18.00     0     0  73.5000                        S
## 310        0      3   male  1.00     5     2  46.9000                        S
## 311        1      2 female 36.00     0     0  13.0000                        S
## 312        1      2 female 17.00     0     0  12.0000                        C
## 313        1      1   male 36.00     1     2 120.0000         B96 B98        S
## 314        1      3   male 21.00     0     0   7.7958                        S
## 315        0      3   male 28.00     2     0   7.9250                        S
## 316        1      1 female 23.00     1     0 113.2750             D36        C
## 317        1      3 female 24.00     0     2  16.7000              G6        S
## 318        0      3   male 22.00     0     0   7.7958                        S
## 319        0      3 female 31.00     0     0   7.8542                        S
## 320        0      2   male 46.00     0     0  26.0000                        S
## 321        0      2   male 23.00     0     0  10.5000                        S
## 322        1      2 female 28.00     0     0  12.6500                        S
## 323        1      3   male 39.00     0     0   7.9250                        S
## 324        0      3   male 26.00     0     0   8.0500                        S
## 325        0      3 female 21.00     1     0   9.8250                        S
## 326        0      3   male 28.00     1     0  15.8500                        S
## 327        0      3 female 20.00     0     0   8.6625                        S
## 328        0      2   male 34.00     1     0  21.0000                        S
## 329        0      3   male 51.00     0     0   7.7500                        S
## 330        1      2   male  3.00     1     1  18.7500                        S
## 331        0      3   male 21.00     0     0   7.7750                        S
## 332        1      1 female 33.00     1     0  90.0000             C78        Q
## 333        1      3   male 44.00     0     0   7.9250                        S
## 334        1      2 female 34.00     1     1  32.5000                        S
## 335        1      2 female 18.00     0     2  13.0000                        S
## 336        0      2   male 30.00     0     0  13.0000                        S
## 337        0      3 female 10.00     0     2  24.1500                        S
## 338        0      3   male 21.00     0     0   7.7333                        Q
## 339        0      3   male 29.00     0     0   7.8750                        S
## 340        0      3 female 28.00     1     1  14.4000                        S
## 341        0      3   male 18.00     1     1  20.2125                        S
## 342        1      2 female 28.00     1     0  26.0000                        S
## 343        1      2 female 19.00     0     0  26.0000                        S
## 344        1      3   male 32.00     0     0   8.0500             E10        S
## 345        1      1   male 28.00     0     0  26.5500             C52        S
## 346        1      2 female 42.00     1     0  26.0000                        S
## 347        0      3   male 17.00     0     0   7.1250                        S
## 348        0      1   male 50.00     1     0  55.9000             E44        S
## 349        1      1 female 14.00     1     2 120.0000         B96 B98        S
## 350        0      3 female 21.00     2     2  34.3750                        S
## 351        1      2 female 24.00     2     3  18.7500                        S
## 352        0      1   male 64.00     1     4 263.0000     C23 C25 C27        S
## 353        0      2   male 31.00     0     0  10.5000                        S
## 354        1      2 female 45.00     1     1  26.2500                        S
## 355        0      3   male 20.00     0     0   9.5000                        S
## 356        0      3   male 25.00     1     0   7.7750                        S
## 357        1      2 female 28.00     0     0  13.0000                        S
## 358        1      1   male  4.00     0     2  81.8583             A34        S
## 359        1      2 female 13.00     0     1  19.5000                        S
## 360        1      1   male 34.00     0     0  26.5500                        S
## 361        1      3 female  5.00     2     1  19.2583                        C
## 362        1      1   male 52.00     0     0  30.5000            C104        S
## 363        0      2   male 36.00     1     2  27.7500                        S
## 364        0      1   male 30.00     0     0  27.7500            C111        C
## 365        1      1   male 49.00     1     0  89.1042             C92        C
## 366        1      3   male 29.00     0     0   7.8958                        C
## 367        0      1   male 65.00     0     0  26.5500             E38        S
## 368        1      2 female 50.00     0     0  10.5000                        S
## 369        1      1   male 48.00     0     0  26.5500             E12        S
## 370        0      3   male 34.00     0     0   8.0500                        S
## 371        0      1   male 47.00     0     0  38.5000             E63        S
## 372        0      2   male 48.00     0     0  13.0000                        S
## 373        0      3   male 38.00     0     0   7.0500                        S
## 374        0      1   male 56.00     0     0  26.5500                        S
## 375        1      3 female  0.75     2     1  19.2583                        C
## 376        0      3   male 38.00     0     0   8.6625                        S
## 377        1      2 female 33.00     1     2  27.7500                        S
## 378        1      2 female 23.00     0     0  13.7917               D        C
## 379        0      3 female 22.00     0     0   9.8375                        S
## 380        0      2   male 34.00     1     0  21.0000                        S
## 381        0      3   male 29.00     1     0   7.0458                        S
## 382        0      3   male 22.00     0     0   7.5208                        S
## 383        1      3 female  2.00     0     1  12.2875                        S
## 384        0      3   male  9.00     5     2  46.9000                        S
## 385        0      3   male 50.00     0     0   8.0500                        S
## 386        1      3 female 63.00     0     0   9.5875                        S
## 387        1      1   male 25.00     1     0  91.0792             B49        C
## 388        1      1 female 35.00     1     0  90.0000             C93        S
## 389        0      1   male 58.00     0     0  29.7000             B37        C
## 390        0      3   male 30.00     0     0   8.0500                        S
## 391        1      3   male  9.00     1     1  15.9000                        S
## 392        0      3   male 21.00     0     0   7.2500                        S
## 393        0      1   male 55.00     0     0  30.5000             C30        S
## 394        0      1   male 71.00     0     0  49.5042                        C
## 395        0      3   male 21.00     0     0   8.0500                        S
## 396        1      1 female 54.00     1     0  78.2667             D20        C
## 397        0      1 female 25.00     1     2 151.5500         C22 C26        S
## 398        0      3   male 24.00     0     0   7.7958                        S
## 399        0      3   male 17.00     0     0   8.6625                        S
## 400        0      3 female 21.00     0     0   7.7500                        Q
## 401        0      3 female 37.00     0     0   9.5875                        S
## 402        1      1 female 16.00     0     0  86.5000             B79        S
## 403        0      1   male 18.00     1     0 108.9000             C65        C
## 404        1      2 female 33.00     0     2  26.0000                        S
## 405        0      3   male 28.00     0     0  22.5250                        S
## 406        1      3   male 26.00     0     0  56.4958                        S
## 407        1      3   male 29.00     0     0   7.7500                        Q
## 408        1      1   male 36.00     0     0  26.2875             E25        S
## 409        1      1 female 54.00     1     0  59.4000                        C
## 410        0      3   male 24.00     0     0   7.4958                        S
## 411        0      1   male 47.00     0     0  34.0208             D46        S
## 412        1      2 female 34.00     0     0  10.5000             F33        S
## 413        1      2 female 36.00     1     0  26.0000                        S
## 414        0      3   male 32.00     0     0   7.8958                        S
## 415        1      1 female 30.00     0     0  93.5000             B73        S
## 416        0      3   male 22.00     0     0   7.8958                        S
## 417        1      1 female 44.00     0     1  57.9792             B18        C
## 418        0      3   male 40.50     0     0   7.7500                        Q
## 419        1      2 female 50.00     0     0  10.5000                        S
## 420        0      3   male 39.00     0     0   7.9250                        S
## 421        0      2   male 23.00     2     1  11.5000                        S
## 422        1      2 female  2.00     1     1  26.0000                        S
## 423        0      3   male 17.00     1     1   7.2292                        C
## 424        0      3 female 30.00     0     0   8.6625                        S
## 425        1      2 female  7.00     0     2  26.2500                        S
## 426        0      1   male 45.00     0     0  26.5500             B38        S
## 427        1      1 female 30.00     0     0 106.4250                        C
## 428        1      1 female 22.00     0     2  49.5000             B39        C
## 429        1      1 female 36.00     0     2  71.0000             B22        S
## 430        0      3 female  9.00     4     2  31.2750                        S
## 431        0      3 female 11.00     4     2  31.2750                        S
## 432        1      2   male 32.00     1     0  26.0000                        S
## 433        0      1   male 50.00     1     0 106.4250             C86        C
## 434        0      1   male 64.00     0     0  26.0000                        S
## 435        1      2 female 19.00     1     0  26.0000                        S
## 436        0      3   male 33.00     1     1  20.5250                        S
## 437        1      2   male  8.00     1     1  36.7500                        S
## 438        1      1   male 17.00     0     2 110.8833             C70        C
## 439        0      2   male 27.00     0     0  26.0000                        S
## 440        1      3   male 22.00     0     0   7.2250                        C
## 441        1      3 female 22.00     0     0   7.7750                        S
## 442        0      1   male 62.00     0     0  26.5500                        S
## 443        1      1 female 48.00     1     0  39.6000             A16        C
## 444        1      1 female 39.00     1     1  79.6500             E67        S
## 445        1      3 female 36.00     1     0  17.4000                        S
## 446        0      3   male 40.00     0     0   7.8958                        S
## 447        0      2   male 28.00     0     0  13.5000                        S
## 448        0      3   male 24.00     2     0  24.1500                        S
## 449        0      3   male 19.00     0     0   7.8958                        S
## 450        0      3 female 29.00     0     4  21.0750                        S
## 451        1      3   male 32.00     0     0   7.8542                        S
## 452        1      2   male 62.00     0     0  10.5000                        S
## 453        1      1 female 53.00     2     0  51.4792            C101        S
## 454        1      1   male 36.00     0     0  26.3875             E25        S
## 455        0      3   male 16.00     0     0   8.0500                        S
## 456        0      3   male 19.00     0     0  14.5000                        S
## 457        1      2 female 34.00     0     0  13.0000                        S
## 458        1      1 female 39.00     1     0  55.9000             E44        S
## 459        1      3   male 32.00     0     0   7.9250                        S
## 460        1      2 female 25.00     1     1  30.0000                        S
## 461        1      1 female 39.00     1     1 110.8833             C68        C
## 462        0      2   male 54.00     0     0  26.0000                        S
## 463        0      1   male 36.00     0     0  40.1250             A10        C
## 464        1      1 female 18.00     0     2  79.6500             E68        S
## 465        0      2   male 47.00     0     0  15.0000                        S
## 466        1      1   male 60.00     1     1  79.2000             B41        C
## 467        0      3   male 22.00     0     0   8.0500                        S
## 468        0      3   male 35.00     0     0   7.1250                        S
## 469        1      1 female 52.00     1     0  78.2667             D20        C
## 470        0      3   male 47.00     0     0   7.2500                        S
## 471        0      2   male 37.00     1     0  26.0000                        S
## 472        0      3   male 36.00     1     1  24.1500                        S
## 473        0      3   male 49.00     0     0   0.0000                        S
## 474        1      1   male 49.00     1     0  56.9292             A20        C
## 475        1      2 female 24.00     2     1  27.0000                        S
## 476        0      3   male 44.00     0     0   8.0500                        S
## 477        1      1   male 35.00     0     0  26.5500                        C
## 478        0      3   male 36.00     1     0  15.5500                        S
## 479        0      3   male 30.00     0     0   7.8958                        S
## 480        1      1   male 27.00     0     0  30.5000                        S
## 481        1      2 female 22.00     1     2  41.5792                        C
## 482        1      1 female 40.00     0     0 153.4625            C125        S
## 483        0      3 female 39.00     1     5  31.2750                        S
## 484        0      3   male 35.00     0     0   8.0500                        S
## 485        1      2 female 24.00     1     2  65.0000                        S
## 486        0      3   male 34.00     1     1  14.4000                        S
## 487        0      3 female 26.00     1     0  16.1000                        S
## 488        1      2 female  4.00     2     1  39.0000              F4        S
## 489        0      2   male 26.00     0     0  10.5000                        S
## 490        0      3   male 27.00     1     0  14.4542                        C
## 491        1      1   male 42.00     1     0  52.5542             D19        S
## 492        1      3   male 20.00     1     1  15.7417                        C
## 493        0      3   male 21.00     0     0   7.8542                        S
## 494        0      3   male 21.00     0     0  16.1000                        S
## 495        0      1   male 61.00     0     0  32.3208             D50        S
## 496        0      2   male 57.00     0     0  12.3500                        Q
## 497        1      1 female 21.00     0     0  77.9583              D9        S
## 498        0      3   male 26.00     0     0   7.8958                        S
## 499        1      1   male 80.00     0     0  30.0000             A23        S
## 500        0      3   male 51.00     0     0   7.0542                        S
## 501        1      1   male 32.00     0     0  30.5000             B50        C
## 502        0      3 female  9.00     3     2  27.9000                        S
## 503        1      2 female 28.00     0     0  13.0000                        S
## 504        0      3   male 32.00     0     0   7.9250                        S
## 505        0      2   male 31.00     1     1  26.2500                        S
## 506        0      3 female 41.00     0     5  39.6875                        S
## 507        0      3   male 20.00     0     0   7.8542                        S
## 508        1      1 female 24.00     0     0  69.3000             B35        C
## 509        0      3 female  2.00     3     2  27.9000                        S
## 510        1      3 female  0.75     2     1  19.2583                        C
## 511        1      1   male 48.00     1     0  76.7292             D33        C
## 512        0      3   male 19.00     0     0   7.8958                        S
## 513        1      1   male 56.00     0     0  35.5000             A26        C
## 514        1      3 female 23.00     0     0   7.5500                        S
## 515        1      2 female 18.00     0     1  23.0000                        S
## 516        0      3   male 21.00     0     0   8.4333                        S
## 517        0      3 female 18.00     0     0   6.7500                        Q
## 518        0      2   male 24.00     2     0  73.5000                        S
## 519        0      3 female 32.00     1     1  15.5000                        Q
## 520        0      2   male 23.00     0     0  13.0000                        S
## 521        0      1   male 58.00     0     2 113.2750             D48        C
## 522        1      1   male 50.00     2     0 133.6500                        S
## 523        0      3   male 40.00     0     0   7.2250                        C
## 524        0      1   male 47.00     0     0  25.5875             E58        S
## 525        0      3   male 36.00     0     0   7.4958                        S
## 526        1      3   male 20.00     1     0   7.9250                        S
## 527        0      2   male 32.00     2     0  73.5000                        S
## 528        0      2   male 25.00     0     0  13.0000                        S
## 529        0      3   male 43.00     0     0   8.0500                        S
## 530        1      2 female 40.00     1     1  39.0000                        S
## 531        0      1   male 31.00     1     0  52.0000             B71        S
## 532        0      2   male 70.00     0     0  10.5000                        S
## 533        1      2   male 31.00     0     0  13.0000                        S
## 534        0      3   male 18.00     0     0   7.7750                        S
## 535        0      3   male 24.50     0     0   8.0500                        S
## 536        1      3 female 18.00     0     0   9.8417                        S
## 537        0      3 female 43.00     1     6  46.9000                        S
## 538        1      1   male 36.00     0     1 512.3292     B51 B53 B55        C
## 539        1      1   male 27.00     0     0  76.7292             D49        C
## 540        0      3   male 20.00     0     0   9.2250                        S
## 541        0      3   male 14.00     5     2  46.9000                        S
## 542        0      2   male 60.00     1     1  39.0000                        S
## 543        0      2   male 25.00     1     2  41.5792                        C
## 544        0      3   male 14.00     4     1  39.6875                        S
## 545        0      3   male 19.00     0     0  10.1708                        S
## 546        0      3   male 18.00     0     0   7.7958                        S
## 547        1      1 female 15.00     0     1 211.3375              B5        S
## 548        1      1   male 31.00     1     0  57.0000             B20        S
## 549        1      3 female  4.00     0     1  13.4167                        C
## 550        0      3   male 25.00     0     0   7.2250                        C
## 551        0      1   male 60.00     0     0  26.5500                        S
## 552        0      2   male 52.00     0     0  13.5000                        S
## 553        0      3   male 44.00     0     0   8.0500                        S
## 554        0      1   male 49.00     1     1 110.8833             C68        C
## 555        0      3   male 42.00     0     0   7.6500           F G63        S
## 556        1      1 female 18.00     1     0 227.5250         C62 C64        C
## 557        1      1   male 35.00     0     0  26.2875             E24        S
## 558        0      3 female 18.00     0     1  14.4542                        C
## 559        0      3   male 25.00     0     0   7.7417                        Q
## 560        0      3   male 26.00     1     0   7.8542                        S
## 561        0      2   male 39.00     0     0  26.0000                        S
## 562        1      2 female 45.00     0     0  13.5000                        S
## 563        1      1   male 42.00     0     0  26.2875             E24        S
## 564        1      1 female 22.00     0     0 151.5500                        S
## 565        1      1 female 24.00     0     0  49.5042             C90        C
## 566        1      1   male 48.00     1     0  52.0000            C126        S
## 567        0      3   male 29.00     0     0   9.4833                        S
## 568        0      2   male 52.00     0     0  13.0000                        S
## 569        0      3   male 19.00     0     0   7.6500           F G73        S
## 570        1      1 female 38.00     0     0 227.5250             C45        C
## 571        1      2 female 27.00     0     0  10.5000            E101        S
## 572        0      3   male 33.00     0     0   7.7750                        S
## 573        1      2 female  6.00     0     1  33.0000                        S
## 574        0      3   male 17.00     1     0   7.0542                        S
## 575        0      2   male 34.00     0     0  13.0000                        S
## 576        0      2   male 50.00     0     0  13.0000                        S
## 577        1      1   male 27.00     1     0  53.1000              E8        S
## 578        0      3   male 20.00     0     0   8.6625                        S
## 579        1      2 female 30.00     3     0  21.0000                        S
## 580        0      2   male 25.00     1     0  26.0000                        S
## 581        0      3 female 25.00     1     0   7.9250                        S
## 582        1      1 female 29.00     0     0 211.3375              B5        S
## 583        0      3   male 11.00     0     0  18.7875                        C
## 584        0      2   male 23.00     0     0  13.0000                        S
## 585        0      2   male 23.00     0     0  13.0000                        S
## 586        0      3   male 28.50     0     0  16.1000                        S
## 587        0      3 female 48.00     1     3  34.3750                        S
## 588        1      1   male 35.00     0     0 512.3292            B101        C
## 589        0      1   male 36.00     1     0  78.8500             C46        S
## 590        1      1 female 21.00     2     2 262.3750 B57 B59 B63 B66        C
## 591        0      3   male 24.00     1     0  16.1000                        S
## 592        1      3   male 31.00     0     0   7.9250                        S
## 593        0      1   male 70.00     1     1  71.0000             B22        S
## 594        0      3   male 16.00     1     1  20.2500                        S
## 595        1      2 female 30.00     0     0  13.0000                        S
## 596        0      1   male 19.00     1     0  53.1000             D30        S
## 597        0      3   male 31.00     0     0   7.7500                        Q
## 598        1      2 female  4.00     1     1  23.0000                        S
## 599        1      3   male  6.00     0     1  12.4750            E121        S
## 600        0      3   male 33.00     0     0   9.5000                        S
## 601        0      3   male 23.00     0     0   7.8958                        S
## 602        1      2 female 48.00     1     2  65.0000                        S
## 603        1      2   male  0.67     1     1  14.5000                        S
## 604        0      3   male 28.00     0     0   7.7958                        S
## 605        0      2   male 18.00     0     0  11.5000                        S
## 606        0      3   male 34.00     0     0   8.0500                        S
## 607        1      1 female 33.00     0     0  86.5000             B77        S
## 608        0      3   male 41.00     0     0   7.1250                        S
## 609        1      3   male 20.00     0     0   7.2292                        C
## 610        1      1 female 36.00     1     2 120.0000         B96 B98        S
## 611        0      3   male 16.00     0     0   7.7750                        S
## 612        1      1 female 51.00     1     0  77.9583             D11        S
## 613        0      3 female 30.50     0     0   7.7500                        Q
## 614        0      3   male 32.00     0     0   8.3625                        S
## 615        0      3   male 24.00     0     0   9.5000                        S
## 616        0      3   male 48.00     0     0   7.8542                        S
## 617        0      2 female 57.00     0     0  10.5000             E77        S
## 618        1      2 female 54.00     1     3  23.0000                        S
## 619        0      3   male 18.00     0     0   7.7500                        S
## 620        1      3 female  5.00     0     0  12.4750                        S
## 621        1      1 female 43.00     0     1 211.3375              B3        S
## 622        1      3 female 13.00     0     0   7.2292                        C
## 623        1      1 female 17.00     1     0  57.0000             B20        S
## 624        0      1   male 29.00     0     0  30.0000              D6        S
## 625        0      3   male 25.00     0     0   7.0500                        S
## 626        0      3   male 25.00     0     0   7.2500                        S
## 627        1      3 female 18.00     0     0   7.4958                        S
## 628        0      3   male  8.00     4     1  29.1250                        Q
## 629        1      3   male  1.00     1     2  20.5750                        S
## 630        0      1   male 46.00     0     0  79.2000         B82 B84        C
## 631        0      2   male 16.00     0     0  26.0000                        S
## 632        0      3   male 25.00     0     0   7.8958                        S
## 633        0      2   male 39.00     0     0  13.0000                        S
## 634        1      1 female 49.00     0     0  25.9292             D17        S
## 635        1      3 female 31.00     0     0   8.6833                        S
## 636        0      3   male 30.00     0     0   7.2292                        C
## 637        0      3 female 30.00     1     1  24.1500                        S
## 638        0      2   male 34.00     0     0  13.0000                        S
## 639        1      2 female 31.00     1     1  26.2500                        S
## 640        1      1   male 11.00     1     2 120.0000         B96 B98        S
## 641        1      3   male  0.42     0     1   8.5167                        C
## 642        1      3   male 27.00     0     0   6.9750                        S
## 643        0      3   male 31.00     0     0   7.7750                        S
## 644        0      1   male 39.00     0     0   0.0000             A36        S
## 645        0      3 female 18.00     0     0   7.7750                        S
## 646        0      2   male 39.00     0     0  13.0000                        S
## 647        1      1 female 33.00     1     0  53.1000              E8        S
## 648        0      3   male 26.00     0     0   7.8875                        S
## 649        0      3   male 39.00     0     0  24.1500                        S
## 650        0      2   male 35.00     0     0  10.5000                        S
## 651        0      3 female  6.00     4     2  31.2750                        S
## 652        0      3   male 30.50     0     0   8.0500                        S
## 653        0      3 female 23.00     0     0   7.9250                        S
## 654        0      2   male 31.00     1     1  37.0042                        C
## 655        0      3   male 43.00     0     0   6.4500                        S
## 656        0      3   male 10.00     3     2  27.9000                        S
## 657        1      1 female 52.00     1     1  93.5000             B69        S
## 658        1      3   male 27.00     0     0   8.6625                        S
## 659        0      1   male 38.00     0     0   0.0000                        S
## 660        1      3 female 27.00     0     1  12.4750            E121        S
## 661        0      3   male  2.00     4     1  39.6875                        S
## 662        1      2   male  1.00     0     2  37.0042                        C
## 663        1      1 female 62.00     0     0  80.0000             B28         
## 664        1      3 female 15.00     1     0  14.4542                        C
## 665        1      2   male  0.83     1     1  18.7500                        S
## 666        0      3   male 23.00     0     0   7.8542                        S
## 667        0      3   male 18.00     0     0   8.3000                        S
## 668        1      1 female 39.00     1     1  83.1583             E49        C
## 669        0      3   male 21.00     0     0   8.6625                        S
## 670        1      3   male 32.00     0     0  56.4958                        S
## 671        0      3   male 20.00     0     0   7.9250                        S
## 672        0      2   male 16.00     0     0  10.5000                        S
## 673        1      1 female 30.00     0     0  31.0000                        C
## 674        0      3   male 34.50     0     0   6.4375                        C
## 675        0      3   male 17.00     0     0   8.6625                        S
## 676        0      3   male 42.00     0     0   7.5500                        S
## 677        0      3   male 35.00     0     0   7.8958                        C
## 678        0      2   male 28.00     0     1  33.0000                        S
## 679        0      3   male  4.00     4     2  31.2750                        S
## 680        0      3   male 74.00     0     0   7.7750                        S
## 681        0      3 female  9.00     1     1  15.2458                        C
## 682        1      1 female 16.00     0     1  39.4000             D28        S
## 683        0      2 female 44.00     1     0  26.0000                        S
## 684        1      3 female 18.00     0     1   9.3500                        S
## 685        1      1 female 45.00     1     1 164.8667                        S
## 686        1      1   male 51.00     0     0  26.5500             E17        S
## 687        1      3 female 24.00     0     3  19.2583                        C
## 688        0      3   male 41.00     2     0  14.1083                        S
## 689        0      2   male 21.00     1     0  11.5000                        S
## 690        1      1 female 48.00     0     0  25.9292             D17        S
## 691        0      2   male 24.00     0     0  13.0000                        S
## 692        1      2 female 42.00     0     0  13.0000                        S
## 693        1      2 female 27.00     1     0  13.8583                        C
## 694        0      1   male 31.00     0     0  50.4958             A24        S
## 695        1      3   male  4.00     1     1  11.1333                        S
## 696        0      3   male 26.00     0     0   7.8958                        S
## 697        1      1 female 47.00     1     1  52.5542             D35        S
## 698        0      1   male 33.00     0     0   5.0000     B51 B53 B55        S
## 699        0      3   male 47.00     0     0   9.0000                        S
## 700        1      2 female 28.00     1     0  24.0000                        C
## 701        1      3 female 15.00     0     0   7.2250                        C
## 702        0      3   male 20.00     0     0   9.8458                        S
## 703        0      3   male 19.00     0     0   7.8958                        S
## 704        1      1 female 56.00     0     1  83.1583             C50        C
## 705        1      2 female 25.00     0     1  26.0000                        S
## 706        0      3   male 33.00     0     0   7.8958                        S
## 707        0      3 female 22.00     0     0  10.5167                        S
## 708        0      2   male 28.00     0     0  10.5000                        S
## 709        0      3   male 25.00     0     0   7.0500                        S
## 710        0      3 female 39.00     0     5  29.1250                        Q
## 711        0      2   male 27.00     0     0  13.0000                        S
## 712        1      1 female 19.00     0     0  30.0000             B42        S
## 713        1      1   male 26.00     0     0  30.0000            C148        C
## 714        0      3   male 32.00     0     0   7.7500                        Q

On the other hand, keeping missing factor levels might be able to lead to meaningful models Empty level names of the Cabin and Embarked factors will cause problems in some analysis. Other missing factor value imputation remains a good option beyond the scope of this tutorial.

4.0.4 Counts of empty strings in columns

# Count empty strings in all columns
# filter to count all no zero columns
titanic %>%
  summarize(across(everything(), ~ sum(. == ""))) %>% 
  t() %>%
  as.data.frame() %>%
  filter(V1>0)
##           V1
## Cabin    687
## Embarked   2
titanic <- titanic %>%
  mutate(Cabin = if_else(Cabin == "","Missing",Cabin),
         Embarked = if_else(Embarked == "","Missing",Embarked))

titanic %>%
  summarize(across(everything(), ~ sum(. == "")))
##   Survived Pclass Sex Age SibSp Parch Fare Cabin Embarked
## 1        0      0   0  NA     0     0    0     0        0

5 Understanding numeric variables

# base R version
summary(titanic[c("Sex", "Age")])
##      Sex           Age       
##  female:314   Min.   : 0.42  
##  male  :577   1st Qu.:20.12  
##               Median :28.00  
##               Mean   :29.70  
##               3rd Qu.:38.00  
##               Max.   :80.00  
##               NA's   :177
#tidyverse version
titanic %>% select(Sex,Age) %>% summary()
##      Sex           Age       
##  female:314   Min.   : 0.42  
##  male  :577   1st Qu.:20.12  
##               Median :28.00  
##               Mean   :29.70  
##               3rd Qu.:38.00  
##               Max.   :80.00  
##               NA's   :177
# YOU DO.
titanic %>% select(Sex,Age,Fare) %>% summary()
##      Sex           Age             Fare       
##  female:314   Min.   : 0.42   Min.   :  0.00  
##  male  :577   1st Qu.:20.12   1st Qu.:  7.91  
##               Median :28.00   Median : 14.45  
##               Mean   :29.70   Mean   : 32.20  
##               3rd Qu.:38.00   3rd Qu.: 31.00  
##               Max.   :80.00   Max.   :512.33  
##               NA's   :177

5.1 Quantile function

Show summary of one or more columns

# quantiles and deciles in Base R

quantile(titanic$Fare, seq(from = 0, to = 1, by = 0.20), na.rm=TRUE)
##       0%      20%      40%      60%      80%     100% 
##   0.0000   7.8542  10.5000  21.6792  39.6875 512.3292
quantile(titanic$Fare, seq(from = 0, to = 1, by = 0.10), na.rm=TRUE)
##       0%      10%      20%      30%      40%      50%      60%      70% 
##   0.0000   7.5500   7.8542   8.0500  10.5000  14.4542  21.6792  27.0000 
##      80%      90%     100% 
##  39.6875  77.9583 512.3292
# same in tidyverse
titanic %>% pull(Fare) %>% quantile(., seq(from = 0, to = 1, by = 0.20), na.rm=TRUE)
##       0%      20%      40%      60%      80%     100% 
##   0.0000   7.8542  10.5000  21.6792  39.6875 512.3292
titanic %>% pull(Fare) %>% quantile(., seq(from = 0, to = 1, by = 0.10), na.rm=TRUE)
##       0%      10%      20%      30%      40%      50%      60%      70% 
##   0.0000   7.5500   7.8542   8.0500  10.5000  14.4542  21.6792  27.0000 
##      80%      90%     100% 
##  39.6875  77.9583 512.3292
# now you do the same for Age
titanic %>% pull(Age) %>% quantile(., seq(from = 0, to = 1, by = 0.20), na.rm=TRUE)
##    0%   20%   40%   60%   80%  100% 
##  0.42 19.00 25.00 31.80 41.00 80.00
titanic %>% pull(Age) %>% quantile(., seq(from = 0, to = 1, by = 0.10), na.rm=TRUE)
##    0%   10%   20%   30%   40%   50%   60%   70%   80%   90%  100% 
##  0.42 14.00 19.00 22.00 25.00 28.00 31.80 36.00 41.00 50.00 80.00

5.2 Boxplots

Useful numeric visualization.

# Base R
boxplot(titanic$Fare, main="Boxplot of Age in the titanic data set",
        ylab="Age")

# ggplot
# replace the following code with code to display Fare.
# change the title to reflect the new data. 

titanic %>% 
  ggplot(aes(x=Fare)) + 
  geom_boxplot() +
  ggtitle('boxplot of Fare ($)')

# histograms of a numeric variable

hist(titanic$Fare, main = "Histogram of Fare in the titanic data set",
     xlab = "Fare")

titanic %>% ggplot() +
  geom_histogram(aes(x=Fare),binwidth = 20) +
  ggtitle('Histogram of Fare in the titanic data set')

# create a new histogram of Age using the following template

titanic %>% ggplot() +
  geom_histogram(aes(x=Age),binwidth = 20) +
  ggtitle('Histogram of Age in the titanic data set')
## Warning: Removed 177 rows containing non-finite values (`stat_bin()`).

#base R
var(titanic$Fare)
## [1] 2469.437
sd(titanic$Fare)
## [1] 49.69343
#tidyverse
titanic %>% pull(Fare) %>% var()
## [1] 2469.437
titanic %>% pull(Fare) %>% sd()
## [1] 49.69343
# now you do the same for Age 
var(titanic$Age)
## [1] NA
sd(titanic$Age)
## [1] NA

5.3 Rounding

It’s nice to be able to make numeric variables more readable. Consider rounding to improve readability.

titanic %>% select(Fare) %>% head()
##      Fare
## 1  7.2500
## 2 71.2833
## 3  7.9250
## 4 53.1000
## 5  8.0500
## 6  8.4583
# rounded
titanic %>% select(Fare) %>% round() %>% head()
##   Fare
## 1    7
## 2   71
## 3    8
## 4   53
## 5    8
## 6    8
# now you try for Age
titanic %>% select(Age) %>% head()
##   Age
## 1  22
## 2  38
## 3  26
## 4  35
## 5  35
## 6  NA

5.4 Understand relationship of multiple variables

Generate correlation coefficients of two numeric variables in a 2x2 matrix cor(X,Y) lies between -1 and 1. zero means no correlation. 1 or -1 indicates full correlation positive value means positive correlation and negative values mean negative relationships Examine the components in the formulation for correlation coefficients cor(X,Y) = cov(X,Y)/(sd(X)sd(Y)) cov(X,Y) = E[X-E(X)]E[Y-E(Y)]

# cor,  boxplot, 2D scatter plot - plot, 3D scatter plot

# scatter plot: two numeric variables
# base R
plot(titanic$Age, titanic$Fare)

# ggplot
titanic %>% ggplot() +
  geom_point(aes(x=Age,y=Fare))
## Warning: Removed 177 rows containing missing values (`geom_point()`).

cov(titanic[,c("Fare","Age")]) # this will display incorrect results if missing values are not removed
##          Fare Age
## Fare 2469.437  NA
## Age        NA  NA
var(titanic[,c("Fare","Age")])
##          Fare Age
## Fare 2469.437  NA
## Age        NA  NA

5.5 pairs.panels

# Generate 2D scatter plots and correlation coefficients
# tidyverse 
# select only numeric variables for pairs panels. 
titanic %>% select(where(is.numeric)) %>% pairs.panels()

6 Exploring factor variables

6.1 nlevels(), is.factor()

# A factor's distinct values

# base R
is.factor(titanic$Survived)
## [1] TRUE
nlevels(titanic$Survived)
## [1] 2
# tidy syntax
titanic %>% pull(Survived) %>% nlevels()
## [1] 2
# now you do the same for Pclass
is.factor(titanic$Pclass)
## [1] TRUE
nlevels(titanic$Pclass)
## [1] 3

6.2 Barplot

plot(titanic$Pclass,main="Barplot of Pclass")

#tidyveryse
titanic %>% ggplot() +
  geom_bar(aes(x=Pclass)) +
  ggtitle("Barplot of Pclass")

# now you create a barplot for Sex
titanic %>% ggplot() +
  geom_bar(aes(x=Sex)) +
  ggtitle("Barplot of Sex")

# Exploring Categorical grouped by Categorical (Factor by Factor)

# base R
table(titanic$Survived,titanic$Pclass) # shows the raw counts
##    
##       1   2   3
##   0  80  97 372
##   1 136  87 119
prop.table(table(titanic$Survived,titanic$Pclass)) # shows the proportions
##    
##              1          2          3
##   0 0.08978676 0.10886644 0.41750842
##   1 0.15263749 0.09764310 0.13355780
prop.table(table(titanic$Survived,titanic$Pclass))*100 # shows the percentages
##    
##             1         2         3
##   0  8.978676 10.886644 41.750842
##   1 15.263749  9.764310 13.355780
# sort of tidyverse (ish)
titanic %>% select(Survived,Pclass) %>% table()
##         Pclass
## Survived   1   2   3
##        0  80  97 372
##        1 136  87 119
titanic %>% select(Survived,Pclass) %>% table() %>% prop.table() %>% round(2)
##         Pclass
## Survived    1    2    3
##        0 0.09 0.11 0.42
##        1 0.15 0.10 0.13
titanic %>% select(Survived,Pclass) %>% table() %>% prop.table() %>% round(2) * 100
##         Pclass
## Survived  1  2  3
##        0  9 11 42
##        1 15 10 13
# now you create a barplot for Sex grouped by Survived
titanic %>% ggplot() +
  geom_bar(aes(x=Sex, fill=Survived)) +
  ggtitle("Barplot of Sex by Survived")

#tidyveryse
titanic %>% ggplot() +
  geom_bar(aes(x=Pclass,fill=Survived),position="dodge") +
  ggtitle("Barplot of Pclass by Survived")

# now you create a barplot for Sex grouped by Survived
titanic %>% ggplot() +
  geom_bar(aes(x=Sex,fill=Survived),position="dodge") +
  ggtitle("Barplot of Sex by Survived")

Wow! A lot more passengers in Class 3 didn’t survive than those who did. And conversely in Class 1 more passengers survived than did not. In Class 2 it was a bit of a mixed bag.

Often simply looking at a single column is insufficient for the needs of the analysis. Being able to ask the question: When I group the data by a column, how do other columns behave? Is a more interesting and useful EDA task. For example: In the titanic how do the groups of passengers who survived differ from those who did not? Group our data by survived to explore this question.

7 Exploring numeric variables by factors

7.1 Boxplot

This groups values of a numeric variable based on the values of a factor

# base R
boxplot(Age~Survived, data = titanic)

# Below change the graph to display the difference between Survived by Fare

titanic %>% 
  ggplot() +
  geom_boxplot(aes(x=Fare,y=Survived,color=Fare)) +
  ggtitle('Survived by Fare')
## Warning: The following aesthetics were dropped during statistical transformation: colour
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
##   the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
##   variable into a factor?

## The aggregate function

# We can use the aggregate command to aggregate a numeric feature by a categorical one.

# The aggregate function has three parameters

# 1. The numeric value, e.g. sales, to be aggregated to find out, e.g., total of sales,
#   average of sales, number of sales (i.e. orders).

# 2. The set of categories, product_category and sales_region, on which you wish
#   to aggregate

# 3.The aggregation function (e.g., sum, mean, length) that you wish to use


# this will not show in output until Knit. 
aggregate(Fare~Survived, summary, data = titanic)
##   Survived Fare.Min. Fare.1st Qu. Fare.Median Fare.Mean Fare.3rd Qu. Fare.Max.
## 1        0   0.00000      7.85420    10.50000  22.11789     26.00000 263.00000
## 2        1   0.00000     12.47500    26.00000  48.39541     57.00000 512.32920
#tidyverse

titanic %>%
  group_by(Survived) %>%
  summarize(
    min = min(Fare),
    q1 = quantile(Fare, 0.25),
    median = median(Fare),
    mean = mean(Fare), # adding in mean as well
    q3 = quantile(Fare, 0.75),
    max = max(Fare)
  )
## # A tibble: 2 × 7
##   Survived   min    q1 median  mean    q3   max
##   <fct>    <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl>
## 1 0            0  7.85   10.5  22.1    26  263 
## 2 1            0 12.5    26    48.4    57  512.

7.2 Scatter plot of numeric values and factor values

plot(titanic$Age,titanic$Fare, col=titanic$Survived, pch = as.numeric((titanic$Survived)))

titanic %>% ggplot() + geom_point(aes(x=Age,y=Fare,color=Survived))
## Warning: Removed 177 rows containing missing values (`geom_point()`).