This is an R Markdown Notebook. When you execute code within the notebook, the results appear beneath the code.

Try executing this chunk by clicking the Run button within the chunk or by placing your cursor inside it and pressing Cmd+Shift+Enter.

summary(titanic)
  PassengerId       Survived          Pclass     
 Min.   :  1.0   Min.   :0.0000   Min.   :1.000  
 1st Qu.:223.5   1st Qu.:0.0000   1st Qu.:2.000  
 Median :446.0   Median :0.0000   Median :3.000  
 Mean   :446.0   Mean   :0.3838   Mean   :2.309  
 3rd Qu.:668.5   3rd Qu.:1.0000   3rd Qu.:3.000  
 Max.   :891.0   Max.   :1.0000   Max.   :3.000  
                                                 
     Name               Sex                 Age       
 Length:891         Length:891         Min.   : 0.42  
 Class :character   Class :character   1st Qu.:20.12  
 Mode  :character   Mode  :character   Median :28.00  
                                       Mean   :29.70  
                                       3rd Qu.:38.00  
                                       Max.   :80.00  
                                       NA's   :177    
     SibSp           Parch           Ticket         
 Min.   :0.000   Min.   :0.0000   Length:891        
 1st Qu.:0.000   1st Qu.:0.0000   Class :character  
 Median :0.000   Median :0.0000   Mode  :character  
 Mean   :0.523   Mean   :0.3816                     
 3rd Qu.:1.000   3rd Qu.:0.0000                     
 Max.   :8.000   Max.   :6.0000                     
                                                    
      Fare           Cabin             Embarked        
 Min.   :  0.00   Length:891         Length:891        
 1st Qu.:  7.91   Class :character   Class :character  
 Median : 14.45   Mode  :character   Mode  :character  
 Mean   : 32.20                                        
 3rd Qu.: 31.00                                        
 Max.   :512.33                                        
                                                       
str(titanic)
tibble [891 × 12] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
 $ PassengerId: num [1:891] 1 2 3 4 5 6 7 8 9 10 ...
 $ Survived   : num [1:891] 0 1 1 1 0 0 0 0 1 1 ...
 $ Pclass     : num [1:891] 3 1 3 1 3 3 1 3 3 2 ...
 $ Name       : chr [1:891] "Braund, Mr. Owen Harris" "Cumings, Mrs. John Bradley (Florence Briggs Thayer)" "Heikkinen, Miss. Laina" "Futrelle, Mrs. Jacques Heath (Lily May Peel)" ...
 $ Sex        : chr [1:891] "male" "female" "female" "female" ...
 $ Age        : num [1:891] 22 38 26 35 35 NA 54 2 27 14 ...
 $ SibSp      : num [1:891] 1 1 0 1 0 0 0 3 0 1 ...
 $ Parch      : num [1:891] 0 0 0 0 0 0 0 1 2 0 ...
 $ Ticket     : chr [1:891] "A/5 21171" "PC 17599" "STON/O2. 3101282" "113803" ...
 $ Fare       : num [1:891] 7.25 71.28 7.92 53.1 8.05 ...
 $ Cabin      : chr [1:891] NA "C85" NA "C123" ...
 $ Embarked   : chr [1:891] "S" "C" "S" "S" ...
 - attr(*, "spec")=
  .. cols(
  ..   PassengerId = col_double(),
  ..   Survived = col_double(),
  ..   Pclass = col_double(),
  ..   Name = col_character(),
  ..   Sex = col_character(),
  ..   Age = col_double(),
  ..   SibSp = col_double(),
  ..   Parch = col_double(),
  ..   Ticket = col_character(),
  ..   Fare = col_double(),
  ..   Cabin = col_character(),
  ..   Embarked = col_character()
  .. )

Select a column

head(titanic)
head(titanic$PassengerId,5)
[1] 1 2 3 4 5
tail(titanic)

select a single row in titanic

titanic[1,]

table command - tabulates one variable vs other variable get no of people who survived and no of people who did not survive

table(titanic$Survived)

  0   1 
549 342 
t <- table(titanic$Survived)
prop.table(t)

        0         1 
0.6161616 0.3838384 
t<-table(titanic$Sex,titanic$Survived)
prop.table(t,margin = 1)
        
                 0         1
  female 0.2579618 0.7420382
  male   0.8110919 0.1889081
prop.table(t,margin = 2)
        
                 0         1
  female 0.1475410 0.6812865
  male   0.8524590 0.3187135

Data manipulation using dplyr library(dplyr) require(dplyr) %>% - this is a piping operator -

titanic %>% group_by(Pclass) %>% 
  summarise(avg_price = mean(Fare))
titanic %>% select(Name,Age) %>% arrange(desc(Age)) %>%
  head(10)

mutate - this function operates on an already created column

titanic %>% group_by(Sex,Pclass) %>% 
  summarise(count = n_distinct(Name)) %>%
  mutate(count_2 = count/2)

Spread function to spread the Sex variable

titanic %>% group_by(Sex,Pclass) %>% 
  summarise(count = n_distinct(Name)) %>% 
  spread(Sex,count)
LS0tCnRpdGxlOiAiRGF0YSBNYW5pcHVsYXRpb24gd2l0aCBSIFRpdGFuaWMgZGF0YXNldCIKb3V0cHV0OiBodG1sX25vdGVib29rCi0tLQoKVGhpcyBpcyBhbiBbUiBNYXJrZG93bl0oaHR0cDovL3JtYXJrZG93bi5yc3R1ZGlvLmNvbSkgTm90ZWJvb2suIFdoZW4geW91IGV4ZWN1dGUgY29kZSB3aXRoaW4gdGhlIG5vdGVib29rLCB0aGUgcmVzdWx0cyBhcHBlYXIgYmVuZWF0aCB0aGUgY29kZS4gCgpUcnkgZXhlY3V0aW5nIHRoaXMgY2h1bmsgYnkgY2xpY2tpbmcgdGhlICpSdW4qIGJ1dHRvbiB3aXRoaW4gdGhlIGNodW5rIG9yIGJ5IHBsYWNpbmcgeW91ciBjdXJzb3IgaW5zaWRlIGl0IGFuZCBwcmVzc2luZyAqQ21kK1NoaWZ0K0VudGVyKi4gCgpgYGB7cn0Kc3VtbWFyeSh0aXRhbmljKQpgYGAKYGBge3J9CnN0cih0aXRhbmljKQpgYGAKClNlbGVjdCBhIGNvbHVtbiAKYGBge3J9CmhlYWQodGl0YW5pYykKYGBgCgoKYGBge3J9CmhlYWQodGl0YW5pYyRQYXNzZW5nZXJJZCw1KQpgYGAKCmBgYHtyfQp0YWlsKHRpdGFuaWMpCmBgYAoKc2VsZWN0IGEgc2luZ2xlIHJvdyBpbiB0aXRhbmljCmBgYHtyfQp0aXRhbmljWzEsXQpgYGAKCnRhYmxlIGNvbW1hbmQgLSB0YWJ1bGF0ZXMgb25lIHZhcmlhYmxlIHZzIG90aGVyIHZhcmlhYmxlCmdldCBubyBvZiBwZW9wbGUgd2hvIHN1cnZpdmVkIGFuZCBubyBvZiBwZW9wbGUgd2hvIGRpZCBub3Qgc3Vydml2ZQpgYGB7cn0KdGFibGUodGl0YW5pYyRTdXJ2aXZlZCkKYGBgCgpgYGB7cn0KdCA8LSB0YWJsZSh0aXRhbmljJFN1cnZpdmVkKQpwcm9wLnRhYmxlKHQpCmBgYAoKYGBge3J9CnQgPC0gdGFibGUodGl0YW5pYyRTZXgsdGl0YW5pYyRTdXJ2aXZlZCkKCnByb3AudGFibGUodCxtYXJnaW4gPSAxKSAjIHBlcmNlbnRhZ2UgYnkgcm93cwpwcm9wLnRhYmxlKHQsbWFyZ2luID0gMikgIyBwZXJjZW50YWdlIGJ5IGNvbHVtbnMgCgpgYGAKCkRhdGEgbWFuaXB1bGF0aW9uIHVzaW5nIGRwbHlyCmxpYnJhcnkoZHBseXIpCnJlcXVpcmUoZHBseXIpCiU+JSAtIHRoaXMgaXMgYSBwaXBpbmcgb3BlcmF0b3IgLSAKYGBge3J9CnRpdGFuaWMgJT4lIGdyb3VwX2J5KFBjbGFzcykgJT4lIAogIHN1bW1hcmlzZShhdmdfcHJpY2UgPSBtZWFuKEZhcmUpKQpgYGAKCmBgYHtyfQp0aXRhbmljICU+JSBzZWxlY3QoTmFtZSxBZ2UpICU+JSBhcnJhbmdlKGRlc2MoQWdlKSkgJT4lCiAgaGVhZCgxMCkKYGBgCgptdXRhdGUgLSB0aGlzIGZ1bmN0aW9uIG9wZXJhdGVzIG9uIGFuIGFscmVhZHkgY3JlYXRlZCBjb2x1bW4KYGBge3J9CnRpdGFuaWMgJT4lIGdyb3VwX2J5KFNleCxQY2xhc3MpICU+JSAKICBzdW1tYXJpc2UoY291bnQgPSBuX2Rpc3RpbmN0KE5hbWUpKSAlPiUKICBtdXRhdGUoY291bnRfMiA9IGNvdW50LzIpCmBgYAoKU3ByZWFkIGZ1bmN0aW9uIHRvIHNwcmVhZCB0aGUgU2V4IHZhcmlhYmxlIApgYGB7cn0KdGl0YW5pYyAlPiUgZ3JvdXBfYnkoU2V4LFBjbGFzcykgJT4lIAogIHN1bW1hcmlzZShjb3VudCA9IG5fZGlzdGluY3QoTmFtZSkpICU+JSAKICBzcHJlYWQoU2V4LGNvdW50KQpgYGAKCg==