library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.1 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.2 ✔ tibble 3.2.1
## ✔ lubridate 1.9.2 ✔ tidyr 1.3.0
## ✔ purrr 1.0.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(psych)
##
## Attaching package: 'psych'
##
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
library(RWeka)
The sinking of the RMS Titanic is one of the most infamous shipwrecks in history. On April 15, 1912, during her maiden voyage, the Titanic sank after colliding with an iceberg, killing 1502 out of 2224 passengers and crew. This sensational tragedy shocked the international community and led to better safety regulations for ships. One of the reasons that the shipwreck led to such loss of life was that there were not enough lifeboats for the passengers and crew. Although there was some element of luck involved in surviving the sinking, some groups of people such as women, children, and the upper-class were more likely to survive than others.
VARIABLE DESCRIPTIONS:
PassengerID Unique passenger identifier Survived Survival (0 = No; 1 = Yes) Pclass Passenger Class(1 = 1st; 2 = 2nd; 3 = 3rd) (Pclass is a proxy for socio-economic status (SES) 1st ~ Upper; 2nd ~ Middle; 3rd ~ Lower) Name Name Sex Sex Age Age (Age is in Years; Fractional if Age less than One (1) If the Age is Estimated, it is in the form xx.5) Sibsp Number of Siblings/Spouses Aboard Parch Number of Parents/Children Aboard Ticket Ticket Number Fare Passenger Fare Cabin Cabin Embarked Port of Embarkation (C = Cherbourg; Q = Queenstown; S = Southampton)
cloud_wd <- getwd()
setwd(cloud_wd)
titanic <- read.csv(file = "titanic.train.csv", stringsAsFactors = FALSE)
str() shows the number of observations, and the number, names, types and some values of columns
titanic %>% str()
## 'data.frame': 891 obs. of 12 variables:
## $ PassengerId: int 1 2 3 4 5 6 7 8 9 10 ...
## $ Survived : int 0 1 1 1 0 0 0 0 1 1 ...
## $ Pclass : int 3 1 3 1 3 3 1 3 3 2 ...
## $ Name : chr "Braund, Mr. Owen Harris" "Cumings, Mrs. John Bradley (Florence Briggs Thayer)" "Heikkinen, Miss. Laina" "Futrelle, Mrs. Jacques Heath (Lily May Peel)" ...
## $ Sex : chr "male" "female" "female" "female" ...
## $ Age : num 22 38 26 35 35 NA 54 2 27 14 ...
## $ SibSp : int 1 1 0 1 0 0 0 3 0 1 ...
## $ Parch : int 0 0 0 0 0 0 0 1 2 0 ...
## $ Ticket : chr "A/5 21171" "PC 17599" "STON/O2. 3101282" "113803" ...
## $ Fare : num 7.25 71.28 7.92 53.1 8.05 ...
## $ Cabin : chr "" "C85" "" "C123" ...
## $ Embarked : chr "S" "C" "S" "S" ...
# create local variables for row and column numbers
row <- nrow(titanic)
row
## [1] 891
#should be 891
col <- ncol(titanic)
col
## [1] 12
#should be 12
titanic %>% head()
## PassengerId Survived Pclass
## 1 1 0 3
## 2 2 1 1
## 3 3 1 3
## 4 4 1 1
## 5 5 0 3
## 6 6 0 3
## Name Sex Age SibSp Parch
## 1 Braund, Mr. Owen Harris male 22 1 0
## 2 Cumings, Mrs. John Bradley (Florence Briggs Thayer) female 38 1 0
## 3 Heikkinen, Miss. Laina female 26 0 0
## 4 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35 1 0
## 5 Allen, Mr. William Henry male 35 0 0
## 6 Moran, Mr. James male NA 0 0
## Ticket Fare Cabin Embarked
## 1 A/5 21171 7.2500 S
## 2 PC 17599 71.2833 C85 C
## 3 STON/O2. 3101282 7.9250 S
## 4 113803 53.1000 C123 S
## 5 373450 8.0500 S
## 6 330877 8.4583 Q
titanic %>% tail()
## PassengerId Survived Pclass Name Sex
## 886 886 0 3 Rice, Mrs. William (Margaret Norton) female
## 887 887 0 2 Montvila, Rev. Juozas male
## 888 888 1 1 Graham, Miss. Margaret Edith female
## 889 889 0 3 Johnston, Miss. Catherine Helen "Carrie" female
## 890 890 1 1 Behr, Mr. Karl Howell male
## 891 891 0 3 Dooley, Mr. Patrick male
## Age SibSp Parch Ticket Fare Cabin Embarked
## 886 39 0 5 382652 29.125 Q
## 887 27 0 0 211536 13.000 S
## 888 19 0 0 112053 30.000 B42 S
## 889 NA 1 2 W./C. 6607 23.450 S
## 890 26 0 0 111369 30.000 C148 C
## 891 32 0 0 370376 7.750 Q
titanic %>% head(10)
## PassengerId Survived Pclass
## 1 1 0 3
## 2 2 1 1
## 3 3 1 3
## 4 4 1 1
## 5 5 0 3
## 6 6 0 3
## 7 7 0 1
## 8 8 0 3
## 9 9 1 3
## 10 10 1 2
## Name Sex Age SibSp Parch
## 1 Braund, Mr. Owen Harris male 22 1 0
## 2 Cumings, Mrs. John Bradley (Florence Briggs Thayer) female 38 1 0
## 3 Heikkinen, Miss. Laina female 26 0 0
## 4 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35 1 0
## 5 Allen, Mr. William Henry male 35 0 0
## 6 Moran, Mr. James male NA 0 0
## 7 McCarthy, Mr. Timothy J male 54 0 0
## 8 Palsson, Master. Gosta Leonard male 2 3 1
## 9 Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg) female 27 0 2
## 10 Nasser, Mrs. Nicholas (Adele Achem) female 14 1 0
## Ticket Fare Cabin Embarked
## 1 A/5 21171 7.2500 S
## 2 PC 17599 71.2833 C85 C
## 3 STON/O2. 3101282 7.9250 S
## 4 113803 53.1000 C123 S
## 5 373450 8.0500 S
## 6 330877 8.4583 Q
## 7 17463 51.8625 E46 S
## 8 349909 21.0750 S
## 9 347742 11.1333 S
## 10 237736 30.0708 C
titanic %>% tail(7)
## PassengerId Survived Pclass Name Sex
## 885 885 0 3 Sutehall, Mr. Henry Jr male
## 886 886 0 3 Rice, Mrs. William (Margaret Norton) female
## 887 887 0 2 Montvila, Rev. Juozas male
## 888 888 1 1 Graham, Miss. Margaret Edith female
## 889 889 0 3 Johnston, Miss. Catherine Helen "Carrie" female
## 890 890 1 1 Behr, Mr. Karl Howell male
## 891 891 0 3 Dooley, Mr. Patrick male
## Age SibSp Parch Ticket Fare Cabin Embarked
## 885 25 0 0 SOTON/OQ 392076 7.050 S
## 886 39 0 5 382652 29.125 Q
## 887 27 0 0 211536 13.000 S
## 888 19 0 0 112053 30.000 B42 S
## 889 NA 1 2 W./C. 6607 23.450 S
## 890 26 0 0 111369 30.000 C148 C
## 891 32 0 0 370376 7.750 Q
shows the mean and the five-number statistics indicating the spread of each column’s values
summary()
titanic %>% summary()
## PassengerId Survived Pclass Name
## Min. : 1.0 Min. :0.0000 Min. :1.000 Length:891
## 1st Qu.:223.5 1st Qu.:0.0000 1st Qu.:2.000 Class :character
## Median :446.0 Median :0.0000 Median :3.000 Mode :character
## Mean :446.0 Mean :0.3838 Mean :2.309
## 3rd Qu.:668.5 3rd Qu.:1.0000 3rd Qu.:3.000
## Max. :891.0 Max. :1.0000 Max. :3.000
##
## Sex Age SibSp Parch
## Length:891 Min. : 0.42 Min. :0.000 Min. :0.0000
## Class :character 1st Qu.:20.12 1st Qu.:0.000 1st Qu.:0.0000
## Mode :character Median :28.00 Median :0.000 Median :0.0000
## Mean :29.70 Mean :0.523 Mean :0.3816
## 3rd Qu.:38.00 3rd Qu.:1.000 3rd Qu.:0.0000
## Max. :80.00 Max. :8.000 Max. :6.0000
## NA's :177
## Ticket Fare Cabin Embarked
## Length:891 Min. : 0.00 Length:891 Length:891
## Class :character 1st Qu.: 7.91 Class :character Class :character
## Mode :character Median : 14.45 Mode :character Mode :character
## Mean : 32.20
## 3rd Qu.: 31.00
## Max. :512.33
##
Remove unique identifiers from further analysis as they are not interesting without additional feature extractions ### selecting columns
titanic %>% select(Sex,Age) %>% head() # use head or tail to make sure we don't print the entire dataframe
## Sex Age
## 1 male 22
## 2 female 38
## 3 female 26
## 4 female 35
## 5 male 35
## 6 male NA
titanic <- titanic %>% select(-PassengerId,-Name,-Ticket)
Change Survived and other nominal variables to factors Use structure to see data before and after the transformation
titanic %>% str()
## 'data.frame': 891 obs. of 9 variables:
## $ Survived: int 0 1 1 1 0 0 0 0 1 1 ...
## $ Pclass : int 3 1 3 1 3 3 1 3 3 2 ...
## $ Sex : chr "male" "female" "female" "female" ...
## $ Age : num 22 38 26 35 35 NA 54 2 27 14 ...
## $ SibSp : int 1 1 0 1 0 0 0 3 0 1 ...
## $ Parch : int 0 0 0 0 0 0 0 1 2 0 ...
## $ Fare : num 7.25 71.28 7.92 53.1 8.05 ...
## $ Cabin : chr "" "C85" "" "C123" ...
## $ Embarked: chr "S" "C" "S" "S" ...
titanic$Survived <- factor(titanic$Survived)
titanic$Sex <- factor(titanic$Sex)
titanic$Pclass <- factor(titanic$Pclass)
titanic$Cabin <- factor(titanic$Cabin)
titanic$Embarked <- factor(titanic$Embarked) # now you do the same for Embarked
#tidyverse syntax
titanic <- titanic %>% mutate(Cabin = factor(Cabin))
#Check your work to make sure factorization was successful
titanic %>% str()
## 'data.frame': 891 obs. of 9 variables:
## $ Survived: Factor w/ 2 levels "0","1": 1 2 2 2 1 1 1 1 2 2 ...
## $ Pclass : Factor w/ 3 levels "1","2","3": 3 1 3 1 3 3 1 3 3 2 ...
## $ Sex : Factor w/ 2 levels "female","male": 2 1 1 1 2 2 2 2 1 1 ...
## $ Age : num 22 38 26 35 35 NA 54 2 27 14 ...
## $ SibSp : int 1 1 0 1 0 0 0 3 0 1 ...
## $ Parch : int 0 0 0 0 0 0 0 1 2 0 ...
## $ Fare : num 7.25 71.28 7.92 53.1 8.05 ...
## $ Cabin : Factor w/ 148 levels "","A10","A14",..: 1 83 1 57 1 1 131 1 1 1 ...
## $ Embarked: Factor w/ 4 levels "","C","Q","S": 4 2 4 4 4 3 4 4 4 2 ...
Remove observations with missing Age values.
This missing data handling approach has the obvious disadvantages of the applicability of the model to data with missing age.
To keep observations with missing Age values require careful imputation of Age missingness.
The various missing data imputation methods are beyond the knowledge required for this tutorial.
titanic %>% summarize(across(everything(), ~ sum(is.na(.))))
## Survived Pclass Sex Age SibSp Parch Fare Cabin Embarked
## 1 0 0 0 177 0 0 0 0 0
dropping columns from the dataframe
titanic %>% drop_na()
## Survived Pclass Sex Age SibSp Parch Fare Cabin Embarked
## 1 0 3 male 22.00 1 0 7.2500 S
## 2 1 1 female 38.00 1 0 71.2833 C85 C
## 3 1 3 female 26.00 0 0 7.9250 S
## 4 1 1 female 35.00 1 0 53.1000 C123 S
## 5 0 3 male 35.00 0 0 8.0500 S
## 6 0 1 male 54.00 0 0 51.8625 E46 S
## 7 0 3 male 2.00 3 1 21.0750 S
## 8 1 3 female 27.00 0 2 11.1333 S
## 9 1 2 female 14.00 1 0 30.0708 C
## 10 1 3 female 4.00 1 1 16.7000 G6 S
## 11 1 1 female 58.00 0 0 26.5500 C103 S
## 12 0 3 male 20.00 0 0 8.0500 S
## 13 0 3 male 39.00 1 5 31.2750 S
## 14 0 3 female 14.00 0 0 7.8542 S
## 15 1 2 female 55.00 0 0 16.0000 S
## 16 0 3 male 2.00 4 1 29.1250 Q
## 17 0 3 female 31.00 1 0 18.0000 S
## 18 0 2 male 35.00 0 0 26.0000 S
## 19 1 2 male 34.00 0 0 13.0000 D56 S
## 20 1 3 female 15.00 0 0 8.0292 Q
## 21 1 1 male 28.00 0 0 35.5000 A6 S
## 22 0 3 female 8.00 3 1 21.0750 S
## 23 1 3 female 38.00 1 5 31.3875 S
## 24 0 1 male 19.00 3 2 263.0000 C23 C25 C27 S
## 25 0 1 male 40.00 0 0 27.7208 C
## 26 0 2 male 66.00 0 0 10.5000 S
## 27 0 1 male 28.00 1 0 82.1708 C
## 28 0 1 male 42.00 1 0 52.0000 S
## 29 0 3 male 21.00 0 0 8.0500 S
## 30 0 3 female 18.00 2 0 18.0000 S
## 31 1 3 female 14.00 1 0 11.2417 C
## 32 0 3 female 40.00 1 0 9.4750 S
## 33 0 2 female 27.00 1 0 21.0000 S
## 34 1 2 female 3.00 1 2 41.5792 C
## 35 1 3 female 19.00 0 0 7.8792 Q
## 36 0 3 female 18.00 1 0 17.8000 S
## 37 0 3 male 7.00 4 1 39.6875 S
## 38 0 3 male 21.00 0 0 7.8000 S
## 39 1 1 female 49.00 1 0 76.7292 D33 C
## 40 1 2 female 29.00 1 0 26.0000 S
## 41 0 1 male 65.00 0 1 61.9792 B30 C
## 42 1 2 female 21.00 0 0 10.5000 S
## 43 0 3 male 28.50 0 0 7.2292 C
## 44 1 2 female 5.00 1 2 27.7500 S
## 45 0 3 male 11.00 5 2 46.9000 S
## 46 0 3 male 22.00 0 0 7.2292 C
## 47 1 1 female 38.00 0 0 80.0000 B28
## 48 0 1 male 45.00 1 0 83.4750 C83 S
## 49 0 3 male 4.00 3 2 27.9000 S
## 50 1 2 female 29.00 0 0 10.5000 F33 S
## 51 0 3 male 19.00 0 0 8.1583 S
## 52 1 3 female 17.00 4 2 7.9250 S
## 53 0 3 male 26.00 2 0 8.6625 S
## 54 0 2 male 32.00 0 0 10.5000 S
## 55 0 3 female 16.00 5 2 46.9000 S
## 56 0 2 male 21.00 0 0 73.5000 S
## 57 0 3 male 26.00 1 0 14.4542 C
## 58 1 3 male 32.00 0 0 56.4958 S
## 59 0 3 male 25.00 0 0 7.6500 F G73 S
## 60 1 2 male 0.83 0 2 29.0000 S
## 61 1 3 female 30.00 0 0 12.4750 S
## 62 0 3 male 22.00 0 0 9.0000 S
## 63 1 3 male 29.00 0 0 9.5000 S
## 64 0 1 male 28.00 0 0 47.1000 S
## 65 1 2 female 17.00 0 0 10.5000 S
## 66 1 3 female 33.00 3 0 15.8500 S
## 67 0 3 male 16.00 1 3 34.3750 S
## 68 1 1 female 23.00 3 2 263.0000 C23 C25 C27 S
## 69 0 3 male 24.00 0 0 8.0500 S
## 70 0 3 male 29.00 0 0 8.0500 S
## 71 0 3 male 20.00 0 0 7.8542 S
## 72 0 1 male 46.00 1 0 61.1750 E31 S
## 73 0 3 male 26.00 1 2 20.5750 S
## 74 0 3 male 59.00 0 0 7.2500 S
## 75 0 1 male 71.00 0 0 34.6542 A5 C
## 76 1 1 male 23.00 0 1 63.3583 D10 D12 C
## 77 1 2 female 34.00 0 1 23.0000 S
## 78 0 2 male 34.00 1 0 26.0000 S
## 79 0 3 female 28.00 0 0 7.8958 S
## 80 0 1 male 21.00 0 1 77.2875 D26 S
## 81 0 3 male 33.00 0 0 8.6542 S
## 82 0 3 male 37.00 2 0 7.9250 S
## 83 0 3 male 28.00 0 0 7.8958 S
## 84 1 3 female 21.00 0 0 7.6500 S
## 85 0 3 male 38.00 0 0 7.8958 S
## 86 0 1 male 47.00 0 0 52.0000 C110 S
## 87 0 3 female 14.50 1 0 14.4542 C
## 88 0 3 male 22.00 0 0 8.0500 S
## 89 0 3 female 20.00 1 0 9.8250 S
## 90 0 3 female 17.00 0 0 14.4583 C
## 91 0 3 male 21.00 0 0 7.9250 S
## 92 0 3 male 70.50 0 0 7.7500 Q
## 93 0 2 male 29.00 1 0 21.0000 S
## 94 0 1 male 24.00 0 1 247.5208 B58 B60 C
## 95 0 3 female 2.00 4 2 31.2750 S
## 96 0 2 male 21.00 2 0 73.5000 S
## 97 0 2 male 32.50 1 0 30.0708 C
## 98 1 2 female 32.50 0 0 13.0000 E101 S
## 99 0 1 male 54.00 0 1 77.2875 D26 S
## 100 1 3 male 12.00 1 0 11.2417 C
## 101 1 3 male 24.00 0 0 7.1417 S
## 102 0 3 male 45.00 0 0 6.9750 S
## 103 0 3 male 33.00 0 0 7.8958 C
## 104 0 3 male 20.00 0 0 7.0500 S
## 105 0 3 female 47.00 1 0 14.5000 S
## 106 1 2 female 29.00 1 0 26.0000 S
## 107 0 2 male 25.00 0 0 13.0000 S
## 108 0 2 male 23.00 0 0 15.0458 C
## 109 1 1 female 19.00 0 2 26.2833 D47 S
## 110 0 1 male 37.00 1 0 53.1000 C123 S
## 111 0 3 male 16.00 0 0 9.2167 S
## 112 0 1 male 24.00 0 0 79.2000 B86 C
## 113 1 3 female 22.00 0 0 7.7500 S
## 114 1 3 female 24.00 1 0 15.8500 S
## 115 0 3 male 19.00 0 0 6.7500 Q
## 116 0 2 male 18.00 0 0 11.5000 S
## 117 0 2 male 19.00 1 1 36.7500 S
## 118 1 3 male 27.00 0 0 7.7958 S
## 119 0 3 female 9.00 2 2 34.3750 S
## 120 0 2 male 36.50 0 2 26.0000 F2 S
## 121 0 2 male 42.00 0 0 13.0000 S
## 122 0 2 male 51.00 0 0 12.5250 S
## 123 1 1 female 22.00 1 0 66.6000 C2 S
## 124 0 3 male 55.50 0 0 8.0500 S
## 125 0 3 male 40.50 0 2 14.5000 S
## 126 0 1 male 51.00 0 1 61.3792 C
## 127 1 3 female 16.00 0 0 7.7333 Q
## 128 0 3 male 30.00 0 0 8.0500 S
## 129 0 3 male 44.00 0 1 16.1000 S
## 130 1 2 female 40.00 0 0 15.7500 S
## 131 0 3 male 26.00 0 0 7.7750 S
## 132 0 3 male 17.00 0 0 8.6625 S
## 133 0 3 male 1.00 4 1 39.6875 S
## 134 1 3 male 9.00 0 2 20.5250 S
## 135 0 3 female 45.00 1 4 27.9000 S
## 136 0 3 male 28.00 0 0 56.4958 S
## 137 0 1 male 61.00 0 0 33.5000 B19 S
## 138 0 3 male 4.00 4 1 29.1250 Q
## 139 1 3 female 1.00 1 1 11.1333 S
## 140 0 3 male 21.00 0 0 7.9250 S
## 141 0 1 male 56.00 0 0 30.6958 A7 C
## 142 0 3 male 18.00 1 1 7.8542 S
## 143 0 1 female 50.00 0 0 28.7125 C49 C
## 144 0 2 male 30.00 0 0 13.0000 S
## 145 0 3 male 36.00 0 0 0.0000 S
## 146 0 3 male 9.00 4 2 31.3875 S
## 147 1 2 male 1.00 2 1 39.0000 F4 S
## 148 1 3 female 4.00 0 2 22.0250 S
## 149 1 1 male 45.00 0 0 26.5500 S
## 150 0 3 male 40.00 1 1 15.5000 Q
## 151 0 3 male 36.00 0 0 7.8958 S
## 152 1 2 female 32.00 0 0 13.0000 S
## 153 0 2 male 19.00 0 0 13.0000 S
## 154 1 3 female 19.00 1 0 7.8542 S
## 155 1 2 male 3.00 1 1 26.0000 F2 S
## 156 1 1 female 44.00 0 0 27.7208 B4 C
## 157 1 1 female 58.00 0 0 146.5208 B80 C
## 158 0 3 male 42.00 0 1 8.4042 S
## 159 0 2 female 24.00 0 0 13.0000 S
## 160 0 3 male 28.00 0 0 9.5000 S
## 161 0 3 male 34.00 0 0 6.4958 S
## 162 0 3 male 45.50 0 0 7.2250 C
## 163 1 3 male 18.00 0 0 8.0500 S
## 164 0 3 female 2.00 0 1 10.4625 G6 S
## 165 0 3 male 32.00 1 0 15.8500 S
## 166 1 3 male 26.00 0 0 18.7875 C
## 167 1 3 female 16.00 0 0 7.7500 Q
## 168 1 1 male 40.00 0 0 31.0000 A31 C
## 169 0 3 male 24.00 0 0 7.0500 S
## 170 1 2 female 35.00 0 0 21.0000 S
## 171 0 3 male 22.00 0 0 7.2500 S
## 172 0 2 male 30.00 0 0 13.0000 S
## 173 1 1 female 31.00 1 0 113.2750 D36 C
## 174 1 3 female 27.00 0 0 7.9250 S
## 175 0 2 male 42.00 1 0 27.0000 S
## 176 1 1 female 32.00 0 0 76.2917 D15 C
## 177 0 2 male 30.00 0 0 10.5000 S
## 178 1 3 male 16.00 0 0 8.0500 S
## 179 0 2 male 27.00 0 0 13.0000 S
## 180 0 3 male 51.00 0 0 8.0500 S
## 181 1 1 male 38.00 1 0 90.0000 C93 S
## 182 0 3 male 22.00 0 0 9.3500 S
## 183 1 2 male 19.00 0 0 10.5000 S
## 184 0 3 male 20.50 0 0 7.2500 S
## 185 0 2 male 18.00 0 0 13.0000 S
## 186 1 1 female 35.00 1 0 83.4750 C83 S
## 187 0 3 male 29.00 0 0 7.7750 S
## 188 0 2 male 59.00 0 0 13.5000 S
## 189 1 3 female 5.00 4 2 31.3875 S
## 190 0 2 male 24.00 0 0 10.5000 S
## 191 0 2 male 44.00 1 0 26.0000 S
## 192 1 2 female 8.00 0 2 26.2500 S
## 193 0 2 male 19.00 0 0 10.5000 S
## 194 0 2 male 33.00 0 0 12.2750 S
## 195 0 2 male 29.00 0 0 10.5000 S
## 196 0 3 male 22.00 0 0 7.1250 S
## 197 0 3 male 30.00 0 0 7.2250 C
## 198 0 1 male 44.00 2 0 90.0000 C78 Q
## 199 0 3 female 25.00 0 0 7.7750 S
## 200 1 2 female 24.00 0 2 14.5000 S
## 201 1 1 male 37.00 1 1 52.5542 D35 S
## 202 0 2 male 54.00 1 0 26.0000 S
## 203 0 3 female 29.00 1 1 10.4625 G6 S
## 204 0 1 male 62.00 0 0 26.5500 C87 S
## 205 0 3 male 30.00 1 0 16.1000 S
## 206 0 3 female 41.00 0 2 20.2125 S
## 207 1 3 female 29.00 0 2 15.2458 C
## 208 1 1 female 30.00 0 0 86.5000 B77 S
## 209 1 1 female 35.00 0 0 512.3292 C
## 210 1 2 female 50.00 0 1 26.0000 S
## 211 1 3 male 3.00 4 2 31.3875 S
## 212 0 1 male 52.00 1 1 79.6500 E67 S
## 213 0 1 male 40.00 0 0 0.0000 B94 S
## 214 0 2 male 36.00 0 0 10.5000 S
## 215 0 3 male 16.00 4 1 39.6875 S
## 216 1 3 male 25.00 1 0 7.7750 S
## 217 1 1 female 58.00 0 1 153.4625 C125 S
## 218 1 1 female 35.00 0 0 135.6333 C99 S
## 219 1 3 male 25.00 0 0 0.0000 S
## 220 1 2 female 41.00 0 1 19.5000 S
## 221 0 1 male 37.00 0 1 29.7000 C118 C
## 222 1 1 female 63.00 1 0 77.9583 D7 S
## 223 0 3 female 45.00 0 0 7.7500 S
## 224 0 3 male 7.00 4 1 29.1250 Q
## 225 1 3 female 35.00 1 1 20.2500 S
## 226 0 3 male 65.00 0 0 7.7500 Q
## 227 0 3 male 28.00 0 0 7.8542 S
## 228 0 3 male 16.00 0 0 9.5000 S
## 229 1 3 male 19.00 0 0 8.0500 S
## 230 0 3 male 33.00 0 0 8.6625 C
## 231 1 3 male 30.00 0 0 9.5000 S
## 232 0 3 male 22.00 0 0 7.8958 S
## 233 1 2 male 42.00 0 0 13.0000 S
## 234 1 3 female 22.00 0 0 7.7500 Q
## 235 1 1 female 26.00 0 0 78.8500 S
## 236 1 1 female 19.00 1 0 91.0792 B49 C
## 237 0 2 male 36.00 0 0 12.8750 D C
## 238 0 3 female 24.00 0 0 8.8500 S
## 239 0 3 male 24.00 0 0 7.8958 S
## 240 0 3 male 23.50 0 0 7.2292 C
## 241 0 1 female 2.00 1 2 151.5500 C22 C26 S
## 242 1 1 female 50.00 0 1 247.5208 B58 B60 C
## 243 0 3 male 19.00 0 0 0.0000 S
## 244 1 1 male 0.92 1 2 151.5500 C22 C26 S
## 245 1 1 female 17.00 1 0 108.9000 C65 C
## 246 0 2 male 30.00 1 0 24.0000 C
## 247 1 1 female 30.00 0 0 56.9292 E36 C
## 248 1 1 female 24.00 0 0 83.1583 C54 C
## 249 1 1 female 18.00 2 2 262.3750 B57 B59 B63 B66 C
## 250 0 2 female 26.00 1 1 26.0000 S
## 251 0 3 male 28.00 0 0 7.8958 S
## 252 0 2 male 43.00 1 1 26.2500 S
## 253 1 3 female 26.00 0 0 7.8542 S
## 254 1 2 female 24.00 1 0 26.0000 S
## 255 0 2 male 54.00 0 0 14.0000 S
## 256 1 1 female 31.00 0 2 164.8667 C7 S
## 257 1 1 female 40.00 1 1 134.5000 E34 C
## 258 0 3 male 22.00 0 0 7.2500 S
## 259 0 3 male 27.00 0 0 7.8958 S
## 260 1 2 female 30.00 0 0 12.3500 Q
## 261 1 2 female 22.00 1 1 29.0000 S
## 262 1 1 female 36.00 0 0 135.6333 C32 C
## 263 0 3 male 61.00 0 0 6.2375 S
## 264 1 2 female 36.00 0 0 13.0000 D S
## 265 1 3 female 31.00 1 1 20.5250 S
## 266 1 1 female 16.00 0 1 57.9792 B18 C
## 267 0 1 male 45.50 0 0 28.5000 C124 S
## 268 0 1 male 38.00 0 1 153.4625 C91 S
## 269 0 3 male 16.00 2 0 18.0000 S
## 270 0 1 male 29.00 1 0 66.6000 C2 S
## 271 1 1 female 41.00 0 0 134.5000 E40 C
## 272 1 3 male 45.00 0 0 8.0500 S
## 273 0 1 male 45.00 0 0 35.5000 T S
## 274 1 2 male 2.00 1 1 26.0000 F2 S
## 275 1 1 female 24.00 3 2 263.0000 C23 C25 C27 S
## 276 0 2 male 28.00 0 0 13.0000 S
## 277 0 2 male 25.00 0 0 13.0000 S
## 278 0 2 male 36.00 0 0 13.0000 S
## 279 1 2 female 24.00 0 0 13.0000 F33 S
## 280 1 2 female 40.00 0 0 13.0000 S
## 281 1 3 male 3.00 1 1 15.9000 S
## 282 0 3 male 42.00 0 0 8.6625 S
## 283 0 3 male 23.00 0 0 9.2250 S
## 284 0 3 male 15.00 1 1 7.2292 C
## 285 0 3 male 25.00 1 0 17.8000 S
## 286 0 3 male 28.00 0 0 9.5000 S
## 287 1 1 female 22.00 0 1 55.0000 E33 S
## 288 0 2 female 38.00 0 0 13.0000 S
## 289 0 3 male 40.00 1 4 27.9000 S
## 290 0 2 male 29.00 1 0 27.7208 C
## 291 0 3 female 45.00 0 1 14.4542 C
## 292 0 3 male 35.00 0 0 7.0500 S
## 293 0 3 male 30.00 0 0 7.2500 S
## 294 1 1 female 60.00 1 0 75.2500 D37 C
## 295 1 1 female 24.00 0 0 69.3000 B35 C
## 296 1 1 male 25.00 1 0 55.4417 E50 C
## 297 0 3 male 18.00 1 0 6.4958 S
## 298 0 3 male 19.00 0 0 8.0500 S
## 299 0 1 male 22.00 0 0 135.6333 C
## 300 0 3 female 3.00 3 1 21.0750 S
## 301 1 3 female 22.00 0 0 7.2500 S
## 302 0 1 male 27.00 0 2 211.5000 C82 C
## 303 0 3 male 20.00 0 0 4.0125 C
## 304 0 3 male 19.00 0 0 7.7750 S
## 305 1 1 female 42.00 0 0 227.5250 C
## 306 1 3 female 1.00 0 2 15.7417 C
## 307 0 3 male 32.00 0 0 7.9250 S
## 308 1 1 female 35.00 1 0 52.0000 S
## 309 0 2 male 18.00 0 0 73.5000 S
## 310 0 3 male 1.00 5 2 46.9000 S
## 311 1 2 female 36.00 0 0 13.0000 S
## 312 1 2 female 17.00 0 0 12.0000 C
## 313 1 1 male 36.00 1 2 120.0000 B96 B98 S
## 314 1 3 male 21.00 0 0 7.7958 S
## 315 0 3 male 28.00 2 0 7.9250 S
## 316 1 1 female 23.00 1 0 113.2750 D36 C
## 317 1 3 female 24.00 0 2 16.7000 G6 S
## 318 0 3 male 22.00 0 0 7.7958 S
## 319 0 3 female 31.00 0 0 7.8542 S
## 320 0 2 male 46.00 0 0 26.0000 S
## 321 0 2 male 23.00 0 0 10.5000 S
## 322 1 2 female 28.00 0 0 12.6500 S
## 323 1 3 male 39.00 0 0 7.9250 S
## 324 0 3 male 26.00 0 0 8.0500 S
## 325 0 3 female 21.00 1 0 9.8250 S
## 326 0 3 male 28.00 1 0 15.8500 S
## 327 0 3 female 20.00 0 0 8.6625 S
## 328 0 2 male 34.00 1 0 21.0000 S
## 329 0 3 male 51.00 0 0 7.7500 S
## 330 1 2 male 3.00 1 1 18.7500 S
## 331 0 3 male 21.00 0 0 7.7750 S
## 332 1 1 female 33.00 1 0 90.0000 C78 Q
## 333 1 3 male 44.00 0 0 7.9250 S
## 334 1 2 female 34.00 1 1 32.5000 S
## 335 1 2 female 18.00 0 2 13.0000 S
## 336 0 2 male 30.00 0 0 13.0000 S
## 337 0 3 female 10.00 0 2 24.1500 S
## 338 0 3 male 21.00 0 0 7.7333 Q
## 339 0 3 male 29.00 0 0 7.8750 S
## 340 0 3 female 28.00 1 1 14.4000 S
## 341 0 3 male 18.00 1 1 20.2125 S
## 342 1 2 female 28.00 1 0 26.0000 S
## 343 1 2 female 19.00 0 0 26.0000 S
## 344 1 3 male 32.00 0 0 8.0500 E10 S
## 345 1 1 male 28.00 0 0 26.5500 C52 S
## 346 1 2 female 42.00 1 0 26.0000 S
## 347 0 3 male 17.00 0 0 7.1250 S
## 348 0 1 male 50.00 1 0 55.9000 E44 S
## 349 1 1 female 14.00 1 2 120.0000 B96 B98 S
## 350 0 3 female 21.00 2 2 34.3750 S
## 351 1 2 female 24.00 2 3 18.7500 S
## 352 0 1 male 64.00 1 4 263.0000 C23 C25 C27 S
## 353 0 2 male 31.00 0 0 10.5000 S
## 354 1 2 female 45.00 1 1 26.2500 S
## 355 0 3 male 20.00 0 0 9.5000 S
## 356 0 3 male 25.00 1 0 7.7750 S
## 357 1 2 female 28.00 0 0 13.0000 S
## 358 1 1 male 4.00 0 2 81.8583 A34 S
## 359 1 2 female 13.00 0 1 19.5000 S
## 360 1 1 male 34.00 0 0 26.5500 S
## 361 1 3 female 5.00 2 1 19.2583 C
## 362 1 1 male 52.00 0 0 30.5000 C104 S
## 363 0 2 male 36.00 1 2 27.7500 S
## 364 0 1 male 30.00 0 0 27.7500 C111 C
## 365 1 1 male 49.00 1 0 89.1042 C92 C
## 366 1 3 male 29.00 0 0 7.8958 C
## 367 0 1 male 65.00 0 0 26.5500 E38 S
## 368 1 2 female 50.00 0 0 10.5000 S
## 369 1 1 male 48.00 0 0 26.5500 E12 S
## 370 0 3 male 34.00 0 0 8.0500 S
## 371 0 1 male 47.00 0 0 38.5000 E63 S
## 372 0 2 male 48.00 0 0 13.0000 S
## 373 0 3 male 38.00 0 0 7.0500 S
## 374 0 1 male 56.00 0 0 26.5500 S
## 375 1 3 female 0.75 2 1 19.2583 C
## 376 0 3 male 38.00 0 0 8.6625 S
## 377 1 2 female 33.00 1 2 27.7500 S
## 378 1 2 female 23.00 0 0 13.7917 D C
## 379 0 3 female 22.00 0 0 9.8375 S
## 380 0 2 male 34.00 1 0 21.0000 S
## 381 0 3 male 29.00 1 0 7.0458 S
## 382 0 3 male 22.00 0 0 7.5208 S
## 383 1 3 female 2.00 0 1 12.2875 S
## 384 0 3 male 9.00 5 2 46.9000 S
## 385 0 3 male 50.00 0 0 8.0500 S
## 386 1 3 female 63.00 0 0 9.5875 S
## 387 1 1 male 25.00 1 0 91.0792 B49 C
## 388 1 1 female 35.00 1 0 90.0000 C93 S
## 389 0 1 male 58.00 0 0 29.7000 B37 C
## 390 0 3 male 30.00 0 0 8.0500 S
## 391 1 3 male 9.00 1 1 15.9000 S
## 392 0 3 male 21.00 0 0 7.2500 S
## 393 0 1 male 55.00 0 0 30.5000 C30 S
## 394 0 1 male 71.00 0 0 49.5042 C
## 395 0 3 male 21.00 0 0 8.0500 S
## 396 1 1 female 54.00 1 0 78.2667 D20 C
## 397 0 1 female 25.00 1 2 151.5500 C22 C26 S
## 398 0 3 male 24.00 0 0 7.7958 S
## 399 0 3 male 17.00 0 0 8.6625 S
## 400 0 3 female 21.00 0 0 7.7500 Q
## 401 0 3 female 37.00 0 0 9.5875 S
## 402 1 1 female 16.00 0 0 86.5000 B79 S
## 403 0 1 male 18.00 1 0 108.9000 C65 C
## 404 1 2 female 33.00 0 2 26.0000 S
## 405 0 3 male 28.00 0 0 22.5250 S
## 406 1 3 male 26.00 0 0 56.4958 S
## 407 1 3 male 29.00 0 0 7.7500 Q
## 408 1 1 male 36.00 0 0 26.2875 E25 S
## 409 1 1 female 54.00 1 0 59.4000 C
## 410 0 3 male 24.00 0 0 7.4958 S
## 411 0 1 male 47.00 0 0 34.0208 D46 S
## 412 1 2 female 34.00 0 0 10.5000 F33 S
## 413 1 2 female 36.00 1 0 26.0000 S
## 414 0 3 male 32.00 0 0 7.8958 S
## 415 1 1 female 30.00 0 0 93.5000 B73 S
## 416 0 3 male 22.00 0 0 7.8958 S
## 417 1 1 female 44.00 0 1 57.9792 B18 C
## 418 0 3 male 40.50 0 0 7.7500 Q
## 419 1 2 female 50.00 0 0 10.5000 S
## 420 0 3 male 39.00 0 0 7.9250 S
## 421 0 2 male 23.00 2 1 11.5000 S
## 422 1 2 female 2.00 1 1 26.0000 S
## 423 0 3 male 17.00 1 1 7.2292 C
## 424 0 3 female 30.00 0 0 8.6625 S
## 425 1 2 female 7.00 0 2 26.2500 S
## 426 0 1 male 45.00 0 0 26.5500 B38 S
## 427 1 1 female 30.00 0 0 106.4250 C
## 428 1 1 female 22.00 0 2 49.5000 B39 C
## 429 1 1 female 36.00 0 2 71.0000 B22 S
## 430 0 3 female 9.00 4 2 31.2750 S
## 431 0 3 female 11.00 4 2 31.2750 S
## 432 1 2 male 32.00 1 0 26.0000 S
## 433 0 1 male 50.00 1 0 106.4250 C86 C
## 434 0 1 male 64.00 0 0 26.0000 S
## 435 1 2 female 19.00 1 0 26.0000 S
## 436 0 3 male 33.00 1 1 20.5250 S
## 437 1 2 male 8.00 1 1 36.7500 S
## 438 1 1 male 17.00 0 2 110.8833 C70 C
## 439 0 2 male 27.00 0 0 26.0000 S
## 440 1 3 male 22.00 0 0 7.2250 C
## 441 1 3 female 22.00 0 0 7.7750 S
## 442 0 1 male 62.00 0 0 26.5500 S
## 443 1 1 female 48.00 1 0 39.6000 A16 C
## 444 1 1 female 39.00 1 1 79.6500 E67 S
## 445 1 3 female 36.00 1 0 17.4000 S
## 446 0 3 male 40.00 0 0 7.8958 S
## 447 0 2 male 28.00 0 0 13.5000 S
## 448 0 3 male 24.00 2 0 24.1500 S
## 449 0 3 male 19.00 0 0 7.8958 S
## 450 0 3 female 29.00 0 4 21.0750 S
## 451 1 3 male 32.00 0 0 7.8542 S
## 452 1 2 male 62.00 0 0 10.5000 S
## 453 1 1 female 53.00 2 0 51.4792 C101 S
## 454 1 1 male 36.00 0 0 26.3875 E25 S
## 455 0 3 male 16.00 0 0 8.0500 S
## 456 0 3 male 19.00 0 0 14.5000 S
## 457 1 2 female 34.00 0 0 13.0000 S
## 458 1 1 female 39.00 1 0 55.9000 E44 S
## 459 1 3 male 32.00 0 0 7.9250 S
## 460 1 2 female 25.00 1 1 30.0000 S
## 461 1 1 female 39.00 1 1 110.8833 C68 C
## 462 0 2 male 54.00 0 0 26.0000 S
## 463 0 1 male 36.00 0 0 40.1250 A10 C
## 464 1 1 female 18.00 0 2 79.6500 E68 S
## 465 0 2 male 47.00 0 0 15.0000 S
## 466 1 1 male 60.00 1 1 79.2000 B41 C
## 467 0 3 male 22.00 0 0 8.0500 S
## 468 0 3 male 35.00 0 0 7.1250 S
## 469 1 1 female 52.00 1 0 78.2667 D20 C
## 470 0 3 male 47.00 0 0 7.2500 S
## 471 0 2 male 37.00 1 0 26.0000 S
## 472 0 3 male 36.00 1 1 24.1500 S
## 473 0 3 male 49.00 0 0 0.0000 S
## 474 1 1 male 49.00 1 0 56.9292 A20 C
## 475 1 2 female 24.00 2 1 27.0000 S
## 476 0 3 male 44.00 0 0 8.0500 S
## 477 1 1 male 35.00 0 0 26.5500 C
## 478 0 3 male 36.00 1 0 15.5500 S
## 479 0 3 male 30.00 0 0 7.8958 S
## 480 1 1 male 27.00 0 0 30.5000 S
## 481 1 2 female 22.00 1 2 41.5792 C
## 482 1 1 female 40.00 0 0 153.4625 C125 S
## 483 0 3 female 39.00 1 5 31.2750 S
## 484 0 3 male 35.00 0 0 8.0500 S
## 485 1 2 female 24.00 1 2 65.0000 S
## 486 0 3 male 34.00 1 1 14.4000 S
## 487 0 3 female 26.00 1 0 16.1000 S
## 488 1 2 female 4.00 2 1 39.0000 F4 S
## 489 0 2 male 26.00 0 0 10.5000 S
## 490 0 3 male 27.00 1 0 14.4542 C
## 491 1 1 male 42.00 1 0 52.5542 D19 S
## 492 1 3 male 20.00 1 1 15.7417 C
## 493 0 3 male 21.00 0 0 7.8542 S
## 494 0 3 male 21.00 0 0 16.1000 S
## 495 0 1 male 61.00 0 0 32.3208 D50 S
## 496 0 2 male 57.00 0 0 12.3500 Q
## 497 1 1 female 21.00 0 0 77.9583 D9 S
## 498 0 3 male 26.00 0 0 7.8958 S
## 499 1 1 male 80.00 0 0 30.0000 A23 S
## 500 0 3 male 51.00 0 0 7.0542 S
## 501 1 1 male 32.00 0 0 30.5000 B50 C
## 502 0 3 female 9.00 3 2 27.9000 S
## 503 1 2 female 28.00 0 0 13.0000 S
## 504 0 3 male 32.00 0 0 7.9250 S
## 505 0 2 male 31.00 1 1 26.2500 S
## 506 0 3 female 41.00 0 5 39.6875 S
## 507 0 3 male 20.00 0 0 7.8542 S
## 508 1 1 female 24.00 0 0 69.3000 B35 C
## 509 0 3 female 2.00 3 2 27.9000 S
## 510 1 3 female 0.75 2 1 19.2583 C
## 511 1 1 male 48.00 1 0 76.7292 D33 C
## 512 0 3 male 19.00 0 0 7.8958 S
## 513 1 1 male 56.00 0 0 35.5000 A26 C
## 514 1 3 female 23.00 0 0 7.5500 S
## 515 1 2 female 18.00 0 1 23.0000 S
## 516 0 3 male 21.00 0 0 8.4333 S
## 517 0 3 female 18.00 0 0 6.7500 Q
## 518 0 2 male 24.00 2 0 73.5000 S
## 519 0 3 female 32.00 1 1 15.5000 Q
## 520 0 2 male 23.00 0 0 13.0000 S
## 521 0 1 male 58.00 0 2 113.2750 D48 C
## 522 1 1 male 50.00 2 0 133.6500 S
## 523 0 3 male 40.00 0 0 7.2250 C
## 524 0 1 male 47.00 0 0 25.5875 E58 S
## 525 0 3 male 36.00 0 0 7.4958 S
## 526 1 3 male 20.00 1 0 7.9250 S
## 527 0 2 male 32.00 2 0 73.5000 S
## 528 0 2 male 25.00 0 0 13.0000 S
## 529 0 3 male 43.00 0 0 8.0500 S
## 530 1 2 female 40.00 1 1 39.0000 S
## 531 0 1 male 31.00 1 0 52.0000 B71 S
## 532 0 2 male 70.00 0 0 10.5000 S
## 533 1 2 male 31.00 0 0 13.0000 S
## 534 0 3 male 18.00 0 0 7.7750 S
## 535 0 3 male 24.50 0 0 8.0500 S
## 536 1 3 female 18.00 0 0 9.8417 S
## 537 0 3 female 43.00 1 6 46.9000 S
## 538 1 1 male 36.00 0 1 512.3292 B51 B53 B55 C
## 539 1 1 male 27.00 0 0 76.7292 D49 C
## 540 0 3 male 20.00 0 0 9.2250 S
## 541 0 3 male 14.00 5 2 46.9000 S
## 542 0 2 male 60.00 1 1 39.0000 S
## 543 0 2 male 25.00 1 2 41.5792 C
## 544 0 3 male 14.00 4 1 39.6875 S
## 545 0 3 male 19.00 0 0 10.1708 S
## 546 0 3 male 18.00 0 0 7.7958 S
## 547 1 1 female 15.00 0 1 211.3375 B5 S
## 548 1 1 male 31.00 1 0 57.0000 B20 S
## 549 1 3 female 4.00 0 1 13.4167 C
## 550 0 3 male 25.00 0 0 7.2250 C
## 551 0 1 male 60.00 0 0 26.5500 S
## 552 0 2 male 52.00 0 0 13.5000 S
## 553 0 3 male 44.00 0 0 8.0500 S
## 554 0 1 male 49.00 1 1 110.8833 C68 C
## 555 0 3 male 42.00 0 0 7.6500 F G63 S
## 556 1 1 female 18.00 1 0 227.5250 C62 C64 C
## 557 1 1 male 35.00 0 0 26.2875 E24 S
## 558 0 3 female 18.00 0 1 14.4542 C
## 559 0 3 male 25.00 0 0 7.7417 Q
## 560 0 3 male 26.00 1 0 7.8542 S
## 561 0 2 male 39.00 0 0 26.0000 S
## 562 1 2 female 45.00 0 0 13.5000 S
## 563 1 1 male 42.00 0 0 26.2875 E24 S
## 564 1 1 female 22.00 0 0 151.5500 S
## 565 1 1 female 24.00 0 0 49.5042 C90 C
## 566 1 1 male 48.00 1 0 52.0000 C126 S
## 567 0 3 male 29.00 0 0 9.4833 S
## 568 0 2 male 52.00 0 0 13.0000 S
## 569 0 3 male 19.00 0 0 7.6500 F G73 S
## 570 1 1 female 38.00 0 0 227.5250 C45 C
## 571 1 2 female 27.00 0 0 10.5000 E101 S
## 572 0 3 male 33.00 0 0 7.7750 S
## 573 1 2 female 6.00 0 1 33.0000 S
## 574 0 3 male 17.00 1 0 7.0542 S
## 575 0 2 male 34.00 0 0 13.0000 S
## 576 0 2 male 50.00 0 0 13.0000 S
## 577 1 1 male 27.00 1 0 53.1000 E8 S
## 578 0 3 male 20.00 0 0 8.6625 S
## 579 1 2 female 30.00 3 0 21.0000 S
## 580 0 2 male 25.00 1 0 26.0000 S
## 581 0 3 female 25.00 1 0 7.9250 S
## 582 1 1 female 29.00 0 0 211.3375 B5 S
## 583 0 3 male 11.00 0 0 18.7875 C
## 584 0 2 male 23.00 0 0 13.0000 S
## 585 0 2 male 23.00 0 0 13.0000 S
## 586 0 3 male 28.50 0 0 16.1000 S
## 587 0 3 female 48.00 1 3 34.3750 S
## 588 1 1 male 35.00 0 0 512.3292 B101 C
## 589 0 1 male 36.00 1 0 78.8500 C46 S
## 590 1 1 female 21.00 2 2 262.3750 B57 B59 B63 B66 C
## 591 0 3 male 24.00 1 0 16.1000 S
## 592 1 3 male 31.00 0 0 7.9250 S
## 593 0 1 male 70.00 1 1 71.0000 B22 S
## 594 0 3 male 16.00 1 1 20.2500 S
## 595 1 2 female 30.00 0 0 13.0000 S
## 596 0 1 male 19.00 1 0 53.1000 D30 S
## 597 0 3 male 31.00 0 0 7.7500 Q
## 598 1 2 female 4.00 1 1 23.0000 S
## 599 1 3 male 6.00 0 1 12.4750 E121 S
## 600 0 3 male 33.00 0 0 9.5000 S
## 601 0 3 male 23.00 0 0 7.8958 S
## 602 1 2 female 48.00 1 2 65.0000 S
## 603 1 2 male 0.67 1 1 14.5000 S
## 604 0 3 male 28.00 0 0 7.7958 S
## 605 0 2 male 18.00 0 0 11.5000 S
## 606 0 3 male 34.00 0 0 8.0500 S
## 607 1 1 female 33.00 0 0 86.5000 B77 S
## 608 0 3 male 41.00 0 0 7.1250 S
## 609 1 3 male 20.00 0 0 7.2292 C
## 610 1 1 female 36.00 1 2 120.0000 B96 B98 S
## 611 0 3 male 16.00 0 0 7.7750 S
## 612 1 1 female 51.00 1 0 77.9583 D11 S
## 613 0 3 female 30.50 0 0 7.7500 Q
## 614 0 3 male 32.00 0 0 8.3625 S
## 615 0 3 male 24.00 0 0 9.5000 S
## 616 0 3 male 48.00 0 0 7.8542 S
## 617 0 2 female 57.00 0 0 10.5000 E77 S
## 618 1 2 female 54.00 1 3 23.0000 S
## 619 0 3 male 18.00 0 0 7.7500 S
## 620 1 3 female 5.00 0 0 12.4750 S
## 621 1 1 female 43.00 0 1 211.3375 B3 S
## 622 1 3 female 13.00 0 0 7.2292 C
## 623 1 1 female 17.00 1 0 57.0000 B20 S
## 624 0 1 male 29.00 0 0 30.0000 D6 S
## 625 0 3 male 25.00 0 0 7.0500 S
## 626 0 3 male 25.00 0 0 7.2500 S
## 627 1 3 female 18.00 0 0 7.4958 S
## 628 0 3 male 8.00 4 1 29.1250 Q
## 629 1 3 male 1.00 1 2 20.5750 S
## 630 0 1 male 46.00 0 0 79.2000 B82 B84 C
## 631 0 2 male 16.00 0 0 26.0000 S
## 632 0 3 male 25.00 0 0 7.8958 S
## 633 0 2 male 39.00 0 0 13.0000 S
## 634 1 1 female 49.00 0 0 25.9292 D17 S
## 635 1 3 female 31.00 0 0 8.6833 S
## 636 0 3 male 30.00 0 0 7.2292 C
## 637 0 3 female 30.00 1 1 24.1500 S
## 638 0 2 male 34.00 0 0 13.0000 S
## 639 1 2 female 31.00 1 1 26.2500 S
## 640 1 1 male 11.00 1 2 120.0000 B96 B98 S
## 641 1 3 male 0.42 0 1 8.5167 C
## 642 1 3 male 27.00 0 0 6.9750 S
## 643 0 3 male 31.00 0 0 7.7750 S
## 644 0 1 male 39.00 0 0 0.0000 A36 S
## 645 0 3 female 18.00 0 0 7.7750 S
## 646 0 2 male 39.00 0 0 13.0000 S
## 647 1 1 female 33.00 1 0 53.1000 E8 S
## 648 0 3 male 26.00 0 0 7.8875 S
## 649 0 3 male 39.00 0 0 24.1500 S
## 650 0 2 male 35.00 0 0 10.5000 S
## 651 0 3 female 6.00 4 2 31.2750 S
## 652 0 3 male 30.50 0 0 8.0500 S
## 653 0 3 female 23.00 0 0 7.9250 S
## 654 0 2 male 31.00 1 1 37.0042 C
## 655 0 3 male 43.00 0 0 6.4500 S
## 656 0 3 male 10.00 3 2 27.9000 S
## 657 1 1 female 52.00 1 1 93.5000 B69 S
## 658 1 3 male 27.00 0 0 8.6625 S
## 659 0 1 male 38.00 0 0 0.0000 S
## 660 1 3 female 27.00 0 1 12.4750 E121 S
## 661 0 3 male 2.00 4 1 39.6875 S
## 662 1 2 male 1.00 0 2 37.0042 C
## 663 1 1 female 62.00 0 0 80.0000 B28
## 664 1 3 female 15.00 1 0 14.4542 C
## 665 1 2 male 0.83 1 1 18.7500 S
## 666 0 3 male 23.00 0 0 7.8542 S
## 667 0 3 male 18.00 0 0 8.3000 S
## 668 1 1 female 39.00 1 1 83.1583 E49 C
## 669 0 3 male 21.00 0 0 8.6625 S
## 670 1 3 male 32.00 0 0 56.4958 S
## 671 0 3 male 20.00 0 0 7.9250 S
## 672 0 2 male 16.00 0 0 10.5000 S
## 673 1 1 female 30.00 0 0 31.0000 C
## 674 0 3 male 34.50 0 0 6.4375 C
## 675 0 3 male 17.00 0 0 8.6625 S
## 676 0 3 male 42.00 0 0 7.5500 S
## 677 0 3 male 35.00 0 0 7.8958 C
## 678 0 2 male 28.00 0 1 33.0000 S
## 679 0 3 male 4.00 4 2 31.2750 S
## 680 0 3 male 74.00 0 0 7.7750 S
## 681 0 3 female 9.00 1 1 15.2458 C
## 682 1 1 female 16.00 0 1 39.4000 D28 S
## 683 0 2 female 44.00 1 0 26.0000 S
## 684 1 3 female 18.00 0 1 9.3500 S
## 685 1 1 female 45.00 1 1 164.8667 S
## 686 1 1 male 51.00 0 0 26.5500 E17 S
## 687 1 3 female 24.00 0 3 19.2583 C
## 688 0 3 male 41.00 2 0 14.1083 S
## 689 0 2 male 21.00 1 0 11.5000 S
## 690 1 1 female 48.00 0 0 25.9292 D17 S
## 691 0 2 male 24.00 0 0 13.0000 S
## 692 1 2 female 42.00 0 0 13.0000 S
## 693 1 2 female 27.00 1 0 13.8583 C
## 694 0 1 male 31.00 0 0 50.4958 A24 S
## 695 1 3 male 4.00 1 1 11.1333 S
## 696 0 3 male 26.00 0 0 7.8958 S
## 697 1 1 female 47.00 1 1 52.5542 D35 S
## 698 0 1 male 33.00 0 0 5.0000 B51 B53 B55 S
## 699 0 3 male 47.00 0 0 9.0000 S
## 700 1 2 female 28.00 1 0 24.0000 C
## 701 1 3 female 15.00 0 0 7.2250 C
## 702 0 3 male 20.00 0 0 9.8458 S
## 703 0 3 male 19.00 0 0 7.8958 S
## 704 1 1 female 56.00 0 1 83.1583 C50 C
## 705 1 2 female 25.00 0 1 26.0000 S
## 706 0 3 male 33.00 0 0 7.8958 S
## 707 0 3 female 22.00 0 0 10.5167 S
## 708 0 2 male 28.00 0 0 10.5000 S
## 709 0 3 male 25.00 0 0 7.0500 S
## 710 0 3 female 39.00 0 5 29.1250 Q
## 711 0 2 male 27.00 0 0 13.0000 S
## 712 1 1 female 19.00 0 0 30.0000 B42 S
## 713 1 1 male 26.00 0 0 30.0000 C148 C
## 714 0 3 male 32.00 0 0 7.7500 Q
On the other hand, keeping missing factor levels might be able to lead to meaningful models Empty level names of the Cabin and Embarked factors will cause problems in some analysis. Other missing factor value imputation remains a good option beyond the scope of this tutorial.
# Count empty strings in all columns
# filter to count all no zero columns
titanic %>%
summarize(across(everything(), ~ sum(. == ""))) %>%
t() %>%
as.data.frame() %>%
filter(V1>0)
## V1
## Cabin 687
## Embarked 2
titanic <- titanic %>%
mutate(Cabin = if_else(Cabin == "","Missing",Cabin),
Embarked = if_else(Embarked == "","Missing",Embarked))
titanic %>%
summarize(across(everything(), ~ sum(. == "")))
## Survived Pclass Sex Age SibSp Parch Fare Cabin Embarked
## 1 0 0 0 NA 0 0 0 0 0
# base R version
summary(titanic[c("Sex", "Age")])
## Sex Age
## female:314 Min. : 0.42
## male :577 1st Qu.:20.12
## Median :28.00
## Mean :29.70
## 3rd Qu.:38.00
## Max. :80.00
## NA's :177
#tidyverse version
titanic %>% select(Sex,Age) %>% summary()
## Sex Age
## female:314 Min. : 0.42
## male :577 1st Qu.:20.12
## Median :28.00
## Mean :29.70
## 3rd Qu.:38.00
## Max. :80.00
## NA's :177
# YOU DO.
titanic %>% select(Sex,Age,Fare) %>% summary()
## Sex Age Fare
## female:314 Min. : 0.42 Min. : 0.00
## male :577 1st Qu.:20.12 1st Qu.: 7.91
## Median :28.00 Median : 14.45
## Mean :29.70 Mean : 32.20
## 3rd Qu.:38.00 3rd Qu.: 31.00
## Max. :80.00 Max. :512.33
## NA's :177
Show summary of one or more columns
# quantiles and deciles in Base R
quantile(titanic$Fare, seq(from = 0, to = 1, by = 0.20), na.rm=TRUE)
## 0% 20% 40% 60% 80% 100%
## 0.0000 7.8542 10.5000 21.6792 39.6875 512.3292
quantile(titanic$Fare, seq(from = 0, to = 1, by = 0.10), na.rm=TRUE)
## 0% 10% 20% 30% 40% 50% 60% 70%
## 0.0000 7.5500 7.8542 8.0500 10.5000 14.4542 21.6792 27.0000
## 80% 90% 100%
## 39.6875 77.9583 512.3292
# same in tidyverse
titanic %>% pull(Fare) %>% quantile(., seq(from = 0, to = 1, by = 0.20), na.rm=TRUE)
## 0% 20% 40% 60% 80% 100%
## 0.0000 7.8542 10.5000 21.6792 39.6875 512.3292
titanic %>% pull(Fare) %>% quantile(., seq(from = 0, to = 1, by = 0.10), na.rm=TRUE)
## 0% 10% 20% 30% 40% 50% 60% 70%
## 0.0000 7.5500 7.8542 8.0500 10.5000 14.4542 21.6792 27.0000
## 80% 90% 100%
## 39.6875 77.9583 512.3292
# now you do the same for Age
titanic %>% pull(Age) %>% quantile(., seq(from = 0, to = 1, by = 0.20), na.rm=TRUE)
## 0% 20% 40% 60% 80% 100%
## 0.42 19.00 25.00 31.80 41.00 80.00
titanic %>% pull(Age) %>% quantile(., seq(from = 0, to = 1, by = 0.10), na.rm=TRUE)
## 0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100%
## 0.42 14.00 19.00 22.00 25.00 28.00 31.80 36.00 41.00 50.00 80.00
Useful numeric visualization.
# Base R
boxplot(titanic$Fare, main="Boxplot of Age in the titanic data set",
ylab="Age")
# ggplot
# replace the following code with code to display Fare.
# change the title to reflect the new data.
titanic %>%
ggplot(aes(x=Fare)) +
geom_boxplot() +
ggtitle('boxplot of Fare ($)')
# histograms of a numeric variable
hist(titanic$Fare, main = "Histogram of Fare in the titanic data set",
xlab = "Fare")
titanic %>% ggplot() +
geom_histogram(aes(x=Fare),binwidth = 20) +
ggtitle('Histogram of Fare in the titanic data set')
# create a new histogram of Age using the following template
titanic %>% ggplot() +
geom_histogram(aes(x=Age),binwidth = 20) +
ggtitle('Histogram of Age in the titanic data set')
## Warning: Removed 177 rows containing non-finite values (`stat_bin()`).
#base R
var(titanic$Fare)
## [1] 2469.437
sd(titanic$Fare)
## [1] 49.69343
#tidyverse
titanic %>% pull(Fare) %>% var()
## [1] 2469.437
titanic %>% pull(Fare) %>% sd()
## [1] 49.69343
# now you do the same for Age
var(titanic$Age)
## [1] NA
sd(titanic$Age)
## [1] NA
It’s nice to be able to make numeric variables more readable. Consider rounding to improve readability.
titanic %>% select(Fare) %>% head()
## Fare
## 1 7.2500
## 2 71.2833
## 3 7.9250
## 4 53.1000
## 5 8.0500
## 6 8.4583
# rounded
titanic %>% select(Fare) %>% round() %>% head()
## Fare
## 1 7
## 2 71
## 3 8
## 4 53
## 5 8
## 6 8
# now you try for Age
titanic %>% select(Age) %>% head()
## Age
## 1 22
## 2 38
## 3 26
## 4 35
## 5 35
## 6 NA
Generate correlation coefficients of two numeric variables in a 2x2 matrix cor(X,Y) lies between -1 and 1. zero means no correlation. 1 or -1 indicates full correlation positive value means positive correlation and negative values mean negative relationships Examine the components in the formulation for correlation coefficients cor(X,Y) = cov(X,Y)/(sd(X)sd(Y)) cov(X,Y) = E[X-E(X)]E[Y-E(Y)]
# cor, boxplot, 2D scatter plot - plot, 3D scatter plot
# scatter plot: two numeric variables
# base R
plot(titanic$Age, titanic$Fare)
# ggplot
titanic %>% ggplot() +
geom_point(aes(x=Age,y=Fare))
## Warning: Removed 177 rows containing missing values (`geom_point()`).
cov(titanic[,c("Fare","Age")]) # this will display incorrect results if missing values are not removed
## Fare Age
## Fare 2469.437 NA
## Age NA NA
var(titanic[,c("Fare","Age")])
## Fare Age
## Fare 2469.437 NA
## Age NA NA
# Generate 2D scatter plots and correlation coefficients
# tidyverse
# select only numeric variables for pairs panels.
titanic %>% select(where(is.numeric)) %>% pairs.panels()
# A factor's distinct values
# base R
is.factor(titanic$Survived)
## [1] TRUE
nlevels(titanic$Survived)
## [1] 2
# tidy syntax
titanic %>% pull(Survived) %>% nlevels()
## [1] 2
# now you do the same for Pclass
is.factor(titanic$Pclass)
## [1] TRUE
nlevels(titanic$Pclass)
## [1] 3
plot(titanic$Pclass,main="Barplot of Pclass")
#tidyveryse
titanic %>% ggplot() +
geom_bar(aes(x=Pclass)) +
ggtitle("Barplot of Pclass")
# now you create a barplot for Sex
titanic %>% ggplot() +
geom_bar(aes(x=Sex)) +
ggtitle("Barplot of Sex")
# Exploring Categorical grouped by Categorical (Factor by Factor)
# base R
table(titanic$Survived,titanic$Pclass) # shows the raw counts
##
## 1 2 3
## 0 80 97 372
## 1 136 87 119
prop.table(table(titanic$Survived,titanic$Pclass)) # shows the proportions
##
## 1 2 3
## 0 0.08978676 0.10886644 0.41750842
## 1 0.15263749 0.09764310 0.13355780
prop.table(table(titanic$Survived,titanic$Pclass))*100 # shows the percentages
##
## 1 2 3
## 0 8.978676 10.886644 41.750842
## 1 15.263749 9.764310 13.355780
# sort of tidyverse (ish)
titanic %>% select(Survived,Pclass) %>% table()
## Pclass
## Survived 1 2 3
## 0 80 97 372
## 1 136 87 119
titanic %>% select(Survived,Pclass) %>% table() %>% prop.table() %>% round(2)
## Pclass
## Survived 1 2 3
## 0 0.09 0.11 0.42
## 1 0.15 0.10 0.13
titanic %>% select(Survived,Pclass) %>% table() %>% prop.table() %>% round(2) * 100
## Pclass
## Survived 1 2 3
## 0 9 11 42
## 1 15 10 13
# now you create a barplot for Sex grouped by Survived
titanic %>% ggplot() +
geom_bar(aes(x=Sex, fill=Survived)) +
ggtitle("Barplot of Sex by Survived")
#tidyveryse
titanic %>% ggplot() +
geom_bar(aes(x=Pclass,fill=Survived),position="dodge") +
ggtitle("Barplot of Pclass by Survived")
# now you create a barplot for Sex grouped by Survived
titanic %>% ggplot() +
geom_bar(aes(x=Sex,fill=Survived),position="dodge") +
ggtitle("Barplot of Sex by Survived")
Wow! A lot more passengers in Class 3 didn’t survive than those who did.
And conversely in Class 1 more passengers survived than did not. In
Class 2 it was a bit of a mixed bag.
Often simply looking at a single column is insufficient for the needs of the analysis. Being able to ask the question: When I group the data by a column, how do other columns behave? Is a more interesting and useful EDA task. For example: In the titanic how do the groups of passengers who survived differ from those who did not? Group our data by survived to explore this question.
This groups values of a numeric variable based on the values of a factor
# base R
boxplot(Age~Survived, data = titanic)
# Below change the graph to display the difference between Survived by Fare
titanic %>%
ggplot() +
geom_boxplot(aes(x=Fare,y=Survived,color=Fare)) +
ggtitle('Survived by Fare')
## Warning: The following aesthetics were dropped during statistical transformation: colour
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
## the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
## variable into a factor?
## The aggregate function
# We can use the aggregate command to aggregate a numeric feature by a categorical one.
# The aggregate function has three parameters
# 1. The numeric value, e.g. sales, to be aggregated to find out, e.g., total of sales,
# average of sales, number of sales (i.e. orders).
# 2. The set of categories, product_category and sales_region, on which you wish
# to aggregate
# 3.The aggregation function (e.g., sum, mean, length) that you wish to use
# this will not show in output until Knit.
aggregate(Fare~Survived, summary, data = titanic)
## Survived Fare.Min. Fare.1st Qu. Fare.Median Fare.Mean Fare.3rd Qu. Fare.Max.
## 1 0 0.00000 7.85420 10.50000 22.11789 26.00000 263.00000
## 2 1 0.00000 12.47500 26.00000 48.39541 57.00000 512.32920
#tidyverse
titanic %>%
group_by(Survived) %>%
summarize(
min = min(Fare),
q1 = quantile(Fare, 0.25),
median = median(Fare),
mean = mean(Fare), # adding in mean as well
q3 = quantile(Fare, 0.75),
max = max(Fare)
)
## # A tibble: 2 × 7
## Survived min q1 median mean q3 max
## <fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 0 0 7.85 10.5 22.1 26 263
## 2 1 0 12.5 26 48.4 57 512.
plot(titanic$Age,titanic$Fare, col=titanic$Survived, pch = as.numeric((titanic$Survived)))
titanic %>% ggplot() + geom_point(aes(x=Age,y=Fare,color=Survived))
## Warning: Removed 177 rows containing missing values (`geom_point()`).