df <- read.csv("titanic_full.csv",stringsAsFactors=TRUE,na.strings=c("","NA")) # reads the objects in the .csv file
str(df) # displays strcture of the objetcs in .csv file
## 'data.frame':    1309 obs. of  21 variables:
##  $ PassengerId: int  1 2 3 4 5 6 7 8 9 10 ...
##  $ Survived   : int  0 1 1 1 0 0 0 0 1 1 ...
##  $ Pclass     : int  3 1 3 1 3 3 1 3 3 2 ...
##  $ Name       : Factor w/ 1307 levels "Abbing, Mr. Anthony",..: 156 287 531 430 23 826 775 922 613 855 ...
##  $ Sex        : Factor w/ 2 levels "female","male": 2 1 1 1 2 2 2 2 1 1 ...
##  $ Age        : num  22 38 26 35 35 NA 54 2 27 14 ...
##  $ SibSp      : int  1 1 0 1 0 0 0 3 0 1 ...
##  $ Parch      : int  0 0 0 0 0 0 0 1 2 0 ...
##  $ Ticket     : Factor w/ 929 levels "110152","110413",..: 721 817 915 66 650 374 110 542 478 175 ...
##  $ Fare       : num  7.25 71.28 7.92 53.1 8.05 ...
##  $ Cabin      : Factor w/ 186 levels "A10","A11","A14",..: NA 107 NA 71 NA NA 164 NA NA NA ...
##  $ Embarked   : Factor w/ 3 levels "C","Q","S": 3 1 3 3 3 2 3 3 3 1 ...
##  $ WikiId     : int  691 90 865 127 627 785 200 1108 902 520 ...
##  $ Name_wiki  : Factor w/ 1303 levels "Abbing, Mr. Anthony",..: 183 319 545 450 24 371 786 932 628 867 ...
##  $ Age_wiki   : num  22 35 26 35 35 22 54 2 26 14 ...
##  $ Hometown   : Factor w/ 566 levels "Abbeyleix, Laois, Ireland[note 1]",..: 71 359 261 439 55 122 143 60 464 562 ...
##  $ Boarded    : Factor w/ 4 levels "Belfast","Cherbourg",..: 4 2 4 4 4 3 4 4 4 2 ...
##  $ Destination: Factor w/ 291 levels "Aberdeen, South Dakota, US",..: 216 186 185 241 185 185 72 56 249 59 ...
##  $ Lifeboat   : Factor w/ 24 levels "?","1","10","11",..: NA 14 8 24 NA NA NA NA 9 1 ...
##  $ Body       : Factor w/ 135 levels "?","??","[66][67]",..: NA 6 6 6 6 6 42 2 2 2 ...
##  $ Class      : int  3 1 3 1 3 3 1 3 3 2 ...

TABLE FUNCTIONS

Table function analysis - 1

table(df$Sex)# counts the no of male and female
## 
## female   male 
##    466    843

Table function analysis - 2

df1 <-table(df$Sex) # creates a new data object variable df1
df1 <- as.data.frame(df1) # converts the data variable to data frame
str(df1) # returns structure of the new created data frame
## 'data.frame':    2 obs. of  2 variables:
##  $ Var1: Factor w/ 2 levels "female","male": 1 2
##  $ Freq: int  466 843

Table function analysis - 3

table (df$Sex,df$Boarded) # count of males and female boarded in Cherbourg Queenstown Southampton respectively
##         
##          Belfast Cherbourg Queenstown Southampton
##   female       0       114         58         291
##   male        10       145         61         625

Table function analysis - 4

table(df$Age>25,df$Class) # returns count of age greater than 25 in all the 3 classes
##        
##           1   2   3
##   FALSE  60 101 280
##   TRUE  227 156 219

Table function analysis - 5

apply(is.na(df),2,sum)
## PassengerId    Survived      Pclass        Name         Sex         Age 
##           0         418           0           0           0         263 
##       SibSp       Parch      Ticket        Fare       Cabin    Embarked 
##           0           0           0           1        1014           2 
##      WikiId   Name_wiki    Age_wiki    Hometown     Boarded Destination 
##           5           5           7           5           5           5 
##    Lifeboat        Body       Class 
##         807        1137           5
# missing data in the dataset

CROSS TABLE FUNCTIONS
Cross Table function analysis - 1

xtabs(~ Sex + Class, data= df) # returns count of male and female in 1,2,and 3 class
##         Class
## Sex        1   2   3
##   female 144 105 214
##   male   182 167 492

Cross Table function analysis - 2

xtabs(~ Survived + Boarded, data= df) # returns data of how many survived or didn't based on respective places they boarded
##         Boarded
## Survived Belfast Cherbourg Queenstown Southampton
##        0       9        73         47         419
##        1       0        93         29         219

Cross Table function analysis - 3

crosstab <- xtabs(~ Sex + Class, data= df)
ftable(crosstab)
##        Class   1   2   3
## Sex                     
## female       144 105 214
## male         182 167 492
100* prop.table(crosstab,1) # returns % to proportion by row
##         Class
## Sex             1        2        3
##   female 31.10151 22.67819 46.22030
##   male   21.64090 19.85731 58.50178
ftable(crosstab)
##        Class   1   2   3
## Sex                     
## female       144 105 214
## male         182 167 492
100 * prop.table(crosstab,2) #returns % to proportion by column
##         Class
## Sex             1        2        3
##   female 44.17178 38.60294 30.31161
##   male   55.82822 61.39706 69.68839

Cross Table function analysis - 4

library(gmodels)
CrossTable(df$Sex,df$Class) # report percentages (row, column, cell), specify decimal places, produce Chi-square
## 
##  
##    Cell Contents
## |-------------------------|
## |                       N |
## | Chi-square contribution |
## |           N / Row Total |
## |           N / Col Total |
## |         N / Table Total |
## |-------------------------|
## 
##  
## Total Observations in Table:  1304 
## 
##  
##              | df$Class 
##       df$Sex |         1 |         2 |         3 | Row Total | 
## -------------|-----------|-----------|-----------|-----------|
##       female |       144 |       105 |       214 |       463 | 
##              |     6.895 |     0.735 |     5.365 |           | 
##              |     0.311 |     0.227 |     0.462 |     0.355 | 
##              |     0.442 |     0.386 |     0.303 |           | 
##              |     0.110 |     0.081 |     0.164 |           | 
## -------------|-----------|-----------|-----------|-----------|
##         male |       182 |       167 |       492 |       841 | 
##              |     3.796 |     0.404 |     2.954 |           | 
##              |     0.216 |     0.199 |     0.585 |     0.645 | 
##              |     0.558 |     0.614 |     0.697 |           | 
##              |     0.140 |     0.128 |     0.377 |           | 
## -------------|-----------|-----------|-----------|-----------|
## Column Total |       326 |       272 |       706 |      1304 | 
##              |     0.250 |     0.209 |     0.541 |           | 
## -------------|-----------|-----------|-----------|-----------|
## 
## 

ⒸRkvision2022