This is an R Markdown Notebook. When you execute code within the notebook, the results appear beneath the code.

Try executing this chunk by clicking the Run button within the chunk or by placing your cursor inside it and pressing Ctrl+Shift+Enter.

r<-read.csv("titanic_train.csv")
head(r)
str(r)
'data.frame':   850 obs. of  15 variables:
 $ passenger_id: int  1216 699 1267 449 576 1083 898 560 1079 908 ...
 $ pclass      : int  3 3 3 2 2 3 3 2 3 3 ...
 $ name        : chr  "Smyth, Miss. Julia" "Cacic, Mr. Luka" "Van Impe, Mrs. Jean Baptiste (Rosalie Paula Govaert)" "Hocking, Mrs. Elizabeth (Eliza Needs)" ...
 $ sex         : chr  "female" "male" "female" "female" ...
 $ age         : num  NA 38 30 54 40 28 19 30 22 21 ...
 $ sibsp       : int  0 0 1 1 0 0 0 0 0 1 ...
 $ parch       : int  0 0 1 3 0 0 0 0 0 0 ...
 $ ticket      : chr  "335432" "315089" "345773" "29105" ...
 $ fare        : num  7.73 8.66 24.15 23 13 ...
 $ cabin       : chr  "" "" "" "" ...
 $ embarked    : chr  "Q" "S" "S" "S" ...
 $ boat        : chr  "13" "" "" "4" ...
 $ body        : int  NA NA NA NA NA 173 NA NA NA NA ...
 $ home.dest   : chr  "" "Croatia" "" "Cornwall / Akron, OH" ...
 $ survived    : int  1 0 0 1 0 0 0 1 1 0 ...
colSums(is.na(r))
passenger_id       pclass         name 
           0            0            0 
         sex          age        sibsp 
           0          174            0 
       parch       ticket         fare 
           0            0            1 
       cabin     embarked         boat 
           0            0            0 
        body    home.dest     survived 
         777            0            0 
colSums(r=="")
passenger_id       pclass         name 
           0            0            0 
         sex          age        sibsp 
           0           NA            0 
       parch       ticket         fare 
           0            0           NA 
       cabin     embarked         boat 
         659            1          542 
        body    home.dest     survived 
          NA          386            0 
r$embarked[r$embarked==""]="C"
apply(r,2, function(a) length(unique(a)))
passenger_id       pclass         name 
         850            3          849 
         sex          age        sibsp 
           2           89            7 
       parch       ticket         fare 
           8          660          237 
       cabin     embarked         boat 
         136            3           27 
        body    home.dest     survived 
          74          273            2 
co<-c("survived","pclass","sex","embarked")
for (i in co){
r[,i] <- as.factor(r[,i])
}
str(r)
'data.frame':   850 obs. of  15 variables:
 $ passenger_id: int  1216 699 1267 449 576 1083 898 560 1079 908 ...
 $ pclass      : Factor w/ 3 levels "1","2","3": 3 3 3 2 2 3 3 2 3 3 ...
 $ name        : chr  "Smyth, Miss. Julia" "Cacic, Mr. Luka" "Van Impe, Mrs. Jean Baptiste (Rosalie Paula Govaert)" "Hocking, Mrs. Elizabeth (Eliza Needs)" ...
 $ sex         : Factor w/ 2 levels "female","male": 1 2 1 1 2 2 2 1 1 1 ...
 $ age         : num  NA 38 30 54 40 28 19 30 22 21 ...
 $ sibsp       : int  0 0 1 1 0 0 0 0 0 1 ...
 $ parch       : int  0 0 1 3 0 0 0 0 0 0 ...
 $ ticket      : chr  "335432" "315089" "345773" "29105" ...
 $ fare        : num  7.73 8.66 24.15 23 13 ...
 $ cabin       : chr  "" "" "" "" ...
 $ embarked    : Factor w/ 3 levels "C","Q","S": 2 3 3 3 3 3 3 3 3 3 ...
 $ boat        : chr  "13" "" "" "4" ...
 $ body        : int  NA NA NA NA NA 173 NA NA NA NA ...
 $ home.dest   : chr  "" "Croatia" "" "Cornwall / Akron, OH" ...
 $ survived    : Factor w/ 2 levels "0","1": 2 1 1 2 1 1 1 2 2 1 ...
library(ggplot2)
package 㤼㸱ggplot2㤼㸲 was built under R version 4.0.5
ggplot(r,aes(x=sex,fill=survived))+geom_bar()

ggplot(r,aes(x=embarked,fill=survived))+geom_bar(position="fill")+ylab("Frequency")

t<-table(r$embarked,r$survived)
for (i in 1:dim(t)[1]){
t[i,]<-t[i,]/sum(t[i,])*100
}
print(t)
   
           0        1
  C 48.02260 51.97740
  Q 65.47619 34.52381
  S 67.40238 32.59762
ggplot(data = r,aes(x=pclass,fill=survived))+geom_bar(position="fill")+ylab("Frequency")

ggplot(r,aes(x=sibsp,fill=survived))+geom_bar()

ggplot(r,aes(x=parch,fill=survived))+geom_bar()

Analysis

1. It looks that you have a better chance to survive if you

#Embarked in ‘C’ (55% compared to 33% and 38%). ### 2. It looks like you have a better chance to survive if you in lower ticket class. ### 3.That shows that families with a family size bigger or equal to 2 but less than 6 have a more than 50% to survive, in contrast to families with 1 member or more than 5 members.

LS0tDQp0aXRsZTogIlIgTm90ZWJvb2siDQpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sNCi0tLQ0KDQpUaGlzIGlzIGFuIFtSIE1hcmtkb3duXShodHRwOi8vcm1hcmtkb3duLnJzdHVkaW8uY29tKSBOb3RlYm9vay4gV2hlbiB5b3UgZXhlY3V0ZSBjb2RlIHdpdGhpbiB0aGUgbm90ZWJvb2ssIHRoZSByZXN1bHRzIGFwcGVhciBiZW5lYXRoIHRoZSBjb2RlLiANCg0KVHJ5IGV4ZWN1dGluZyB0aGlzIGNodW5rIGJ5IGNsaWNraW5nIHRoZSAqUnVuKiBidXR0b24gd2l0aGluIHRoZSBjaHVuayBvciBieSBwbGFjaW5nIHlvdXIgY3Vyc29yIGluc2lkZSBpdCBhbmQgcHJlc3NpbmcgKkN0cmwrU2hpZnQrRW50ZXIqLiANCg0KYGBge3J9DQpyPC1yZWFkLmNzdigidGl0YW5pY190cmFpbi5jc3YiKQ0KaGVhZChyKQ0KYGBgDQoNCmBgYHtyfQ0Kc3RyKHIpDQpgYGANCg0KYGBge3J9DQpjb2xTdW1zKGlzLm5hKHIpKQ0KYGBgDQoNCmBgYHtyfQ0KY29sU3VtcyhyPT0iIikNCmBgYA0KDQpgYGB7cn0NCnIkZW1iYXJrZWRbciRlbWJhcmtlZD09IiJdPSJDIg0KYXBwbHkociwyLCBmdW5jdGlvbihhKSBsZW5ndGgodW5pcXVlKGEpKSkNCmBgYA0KDQpgYGB7cn0NCmNvPC1jKCJzdXJ2aXZlZCIsInBjbGFzcyIsInNleCIsImVtYmFya2VkIikNCmZvciAoaSBpbiBjbyl7DQpyWyxpXSA8LSBhcy5mYWN0b3IoclssaV0pDQp9DQpzdHIocikNCmBgYA0KDQpgYGB7cn0NCmxpYnJhcnkoZ2dwbG90MikNCmdncGxvdChyLGFlcyh4PXNleCxmaWxsPXN1cnZpdmVkKSkrZ2VvbV9iYXIoKQ0KYGBgDQoNCmBgYHtyfQ0KZ2dwbG90KHIsYWVzKHg9ZW1iYXJrZWQsZmlsbD1zdXJ2aXZlZCkpK2dlb21fYmFyKHBvc2l0aW9uPSJmaWxsIikreWxhYigiRnJlcXVlbmN5IikNCmBgYA0KDQpgYGB7cn0NCnQ8LXRhYmxlKHIkZW1iYXJrZWQsciRzdXJ2aXZlZCkNCmZvciAoaSBpbiAxOmRpbSh0KVsxXSl7DQp0W2ksXTwtdFtpLF0vc3VtKHRbaSxdKSoxMDANCn0NCnByaW50KHQpDQpgYGANCg0KYGBge3J9DQpnZ3Bsb3QoZGF0YSA9IHIsYWVzKHg9cGNsYXNzLGZpbGw9c3Vydml2ZWQpKStnZW9tX2Jhcihwb3NpdGlvbj0iZmlsbCIpK3lsYWIoIkZyZXF1ZW5jeSIpDQpgYGANCg0KYGBge3J9DQpnZ3Bsb3QocixhZXMoeD1zaWJzcCxmaWxsPXN1cnZpdmVkKSkrZ2VvbV9iYXIoKQ0KYGBgDQoNCmBgYHtyfQ0KZ2dwbG90KHIsYWVzKHg9cGFyY2gsZmlsbD1zdXJ2aXZlZCkpK2dlb21fYmFyKCkNCmBgYA0KDQojIEFuYWx5c2lzDQojIyMgMS4gSXQgbG9va3MgdGhhdCB5b3UgaGF2ZSBhIGJldHRlciBjaGFuY2UgdG8gc3Vydml2ZSBpZiB5b3UNCiNFbWJhcmtlZCBpbiAnQycgKDU1JSBjb21wYXJlZCB0byAzMyUgYW5kIDM4JSkuDQojIyMgMi4gSXQgbG9va3MgbGlrZSB5b3UgaGF2ZSBhIGJldHRlciBjaGFuY2UgdG8gc3Vydml2ZSBpZiB5b3UgaW4gbG93ZXIgdGlja2V0IGNsYXNzLg0KIyMjIDMuVGhhdCBzaG93cyB0aGF0IGZhbWlsaWVzIHdpdGggYSBmYW1pbHkgc2l6ZSBiaWdnZXIgb3IgZXF1YWwgdG8gMiBidXQgbGVzcyB0aGFuIDYgaGF2ZSBhIG1vcmUgdGhhbiA1MCUgdG8gc3Vydml2ZSwgaW4gY29udHJhc3QgdG8gZmFtaWxpZXMgd2l0aCAxIG1lbWJlciBvciBtb3JlIHRoYW4gNSBtZW1iZXJzLg0KDQo=