## Including Plots

You can also embed plots, for example:


```r
library(readr)
library(rpart)
library(rpart.plot)
library(rattle)
## Loading required package: tibble
## Loading required package: bitops
## Rattle: A free graphical interface for data science with R.
## Version 5.5.1 Copyright (c) 2006-2021 Togaware Pty Ltd.
## Type 'rattle()' to shake, rattle, and roll your data.
library(RColorBrewer)
library(e1071)
#1,2
t3 <- read_csv("Downloads/titanic3.csv")
## Rows: 1309 Columns: 14
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (8): pclass, name, sex, ticket, cabin, embarked, boat, home.dest
## dbl (6): survived, age, sibsp, parch, fare, body
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
View(t3)

#3
titanic <- t3[, c("survived", "embarked", "sex","age", "sibsp", "parch", "fare")]
titanic
## # A tibble: 1,309 × 7
##    survived embarked sex      age sibsp parch  fare
##       <dbl> <chr>    <chr>  <dbl> <dbl> <dbl> <dbl>
##  1        1 S        female 29        0     0 211. 
##  2        1 S        male    0.92     1     2 152. 
##  3        0 S        female  2        1     2 152. 
##  4        0 S        male   30        1     2 152. 
##  5        0 S        female 25        1     2 152. 
##  6        1 S        male   48        0     0  26.6
##  7        1 S        female 63        1     0  78.0
##  8        0 S        male   39        0     0   0  
##  9        1 S        female 53        2     0  51.5
## 10        0 C        male   71        0     0  49.5
## # ℹ 1,299 more rows
#4
summary(titanic)
##     survived       embarked             sex                 age       
##  Min.   :0.000   Length:1309        Length:1309        Min.   : 0.17  
##  1st Qu.:0.000   Class :character   Class :character   1st Qu.:21.00  
##  Median :0.000   Mode  :character   Mode  :character   Median :28.00  
##  Mean   :0.382                                         Mean   :29.88  
##  3rd Qu.:1.000                                         3rd Qu.:39.00  
##  Max.   :1.000                                         Max.   :80.00  
##                                                        NA's   :263    
##      sibsp            parch            fare        
##  Min.   :0.0000   Min.   :0.000   Min.   :  0.000  
##  1st Qu.:0.0000   1st Qu.:0.000   1st Qu.:  7.896  
##  Median :0.0000   Median :0.000   Median : 14.454  
##  Mean   :0.4989   Mean   :0.385   Mean   : 33.295  
##  3rd Qu.:1.0000   3rd Qu.:0.000   3rd Qu.: 31.275  
##  Max.   :8.0000   Max.   :9.000   Max.   :512.329  
##                                   NA's   :1
str(titanic)
## tibble [1,309 × 7] (S3: tbl_df/tbl/data.frame)
##  $ survived: num [1:1309] 1 1 0 0 0 1 1 0 1 0 ...
##  $ embarked: chr [1:1309] "S" "S" "S" "S" ...
##  $ sex     : chr [1:1309] "female" "male" "female" "male" ...
##  $ age     : num [1:1309] 29 0.92 2 30 25 48 63 39 53 71 ...
##  $ sibsp   : num [1:1309] 0 1 1 1 1 0 1 0 2 0 ...
##  $ parch   : num [1:1309] 0 2 2 2 2 0 0 0 0 0 ...
##  $ fare    : num [1:1309] 211 152 152 152 152 ...
#6
titanic <- na.omit(titanic)
summary(titanic)
##     survived        embarked             sex                 age       
##  Min.   :0.0000   Length:1043        Length:1043        Min.   : 0.17  
##  1st Qu.:0.0000   Class :character   Class :character   1st Qu.:21.00  
##  Median :0.0000   Mode  :character   Mode  :character   Median :28.00  
##  Mean   :0.4075                                         Mean   :29.81  
##  3rd Qu.:1.0000                                         3rd Qu.:39.00  
##  Max.   :1.0000                                         Max.   :80.00  
##      sibsp            parch             fare       
##  Min.   :0.0000   Min.   :0.0000   Min.   :  0.00  
##  1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:  8.05  
##  Median :0.0000   Median :0.0000   Median : 15.75  
##  Mean   :0.5043   Mean   :0.4219   Mean   : 36.60  
##  3rd Qu.:1.0000   3rd Qu.:1.0000   3rd Qu.: 35.08  
##  Max.   :8.0000   Max.   :6.0000   Max.   :512.33
#5
prop.table(table(titanic$survived))
## 
##         0         1 
## 0.5925216 0.4074784
#4,8
titanic_correlation <- t3[, c( "age", "sibsp", "parch", "fare","survived")]
tcor<-cor(titanic_correlation)


# calculate standard deviation for all attributes
sapply(titanic, sd)
## Warning in var(if (is.vector(x) || is.factor(x)) x else as.double(x), na.rm =
## na.rm): NAs introduced by coercion

## Warning in var(if (is.vector(x) || is.factor(x)) x else as.double(x), na.rm =
## na.rm): NAs introduced by coercion
##   survived   embarked        sex        age      sibsp      parch       fare 
##  0.4916009         NA         NA 14.3662545  0.9130797  0.8406546 55.7536477
# calculate skewness for each variable
skew <- apply(titanic_correlation,2, skewness)
# display skewness, larger/smaller deviations from 0 show more skew
print(skew)
##      age    sibsp    parch     fare survived 
##       NA 3.835415 3.660674       NA 0.485290
#7
titanic$survived <- factor(titanic$survived)
titanic$embarked <- factor(titanic$embarked)
titanic$sex <- factor(titanic$sex)


# load package
library(corrplot)
## corrplot 0.92 loaded
# 9create correlation plot
corrplot(tcor, method="circle")

plot(tcor)

pairs(titanic)

#10,11
set.seed(1000)
train_index <- sample(1:nrow(titanic), 0.8 * nrow(titanic))


train <- titanic[train_index, ]
test <- titanic[-train_index, ]

#11)    Build your training (till index 1046) and test (till index 1308) datasets
#dim(titanic)
#[1] 1043    7
#11
titanic_train<-titanic[1:834,]
titanic_test<- titanic[835:1043,]


#13
fit<- rpart(survived ~ sex + age + sibsp + parch +fare + embarked , data = titanic_train,
            method = "class")
str(titanic)
## tibble [1,043 × 7] (S3: tbl_df/tbl/data.frame)
##  $ survived: Factor w/ 2 levels "0","1": 2 2 1 1 1 2 2 1 2 1 ...
##  $ embarked: Factor w/ 3 levels "C","Q","S": 3 3 3 3 3 3 3 3 3 1 ...
##  $ sex     : Factor w/ 2 levels "female","male": 1 2 1 2 1 2 1 2 1 2 ...
##  $ age     : num [1:1043] 29 0.92 2 30 25 48 63 39 53 71 ...
##  $ sibsp   : num [1:1043] 0 1 1 1 1 0 1 0 2 0 ...
##  $ parch   : num [1:1043] 0 2 2 2 2 0 0 0 0 0 ...
##  $ fare    : num [1:1043] 211 152 152 152 152 ...
##  - attr(*, "na.action")= 'omit' Named int [1:266] 16 38 41 47 60 70 71 75 81 107 ...
##   ..- attr(*, "names")= chr [1:266] "16" "38" "41" "47" ...
#14,16 plot your regression tree and save plot into an image file

fit
## n= 834 
## 
## node), split, n, loss, yval, (yprob)
##       * denotes terminal node
## 
##  1) root 834 373 0 (0.5527578 0.4472422)  
##    2) sex=male 511 110 0 (0.7847358 0.2152642)  
##      4) age>=10 483  89 0 (0.8157350 0.1842650) *
##      5) age< 10 28   7 1 (0.2500000 0.7500000) *
##    3) sex=female 323  60 1 (0.1857585 0.8142415)  
##      6) fare< 10.1625 42  19 0 (0.5476190 0.4523810)  
##       12) age>=19.5 28   9 0 (0.6785714 0.3214286) *
##       13) age< 19.5 14   4 1 (0.2857143 0.7142857) *
##      7) fare>=10.1625 281  37 1 (0.1316726 0.8683274)  
##       14) sibsp>=3.5 7   1 0 (0.8571429 0.1428571) *
##       15) sibsp< 3.5 274  31 1 (0.1131387 0.8868613) *
fancyRpartPlot(fit)

#17 examine the tree obtained. what's the most "important" feature over
#which the tree first split?
#sex is the most important 

#18 use the predict function with your model fit to make predictions on the test
#dataset and save it in a variable
predicts <- predict(fit,titanic_test,type = "class")

predicts
##   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17  18  19  20 
##   0   0   0   0   0   0   0   0   0   1   0   0   0   1   0   0   0   0   0   0 
##  21  22  23  24  25  26  27  28  29  30  31  32  33  34  35  36  37  38  39  40 
##   0   0   0   0   0   1   0   0   1   0   0   0   0   0   0   1   1   0   0   0 
##  41  42  43  44  45  46  47  48  49  50  51  52  53  54  55  56  57  58  59  60 
##   0   1   1   0   1   0   0   1   0   0   1   0   0   0   0   0   0   0   0   0 
##  61  62  63  64  65  66  67  68  69  70  71  72  73  74  75  76  77  78  79  80 
##   0   1   0   0   0   0   0   0   0   0   0   0   0   1   1   1   1   1   1   1 
##  81  82  83  84  85  86  87  88  89  90  91  92  93  94  95  96  97  98  99 100 
##   1   0   0   1   0   0   0   1   1   1   0   0   0   0   0   0   0   0   1   0 
## 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 
##   0   0   0   0   0   1   1   1   1   1   1   0   0   1   1   0   1   0   0   0 
## 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 
##   0   0   0   0   0   1   1   1   0   0   0   0   0   0   0   1   1   0   1   1 
## 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 
##   0   1   0   0   0   0   0   0   0   0   0   0   1   1   0   0   0   0   0   0 
## 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 
##   0   0   1   1   0   0   0   0   1   1   1   0   1   0   0   0   1   0   1   0 
## 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 
##   0   0   0   1   0   0   1   0   0   1   0   0   0   0   0   0   0   0   0   0 
## 201 202 203 204 205 206 207 208 209 
##   0   0   0   1   0   1   0   0   0 
## Levels: 0 1
#19 save your data frame in a data result that will have two columns

Result <- data.frame(passengersex = titanic_test$sex, survivals = predicts)
Result
##     passengersex survivals
## 1           male         0
## 2           male         0
## 3           male         0
## 4           male         0
## 5           male         0
## 6           male         0
## 7         female         0
## 8         female         0
## 9           male         0
## 10        female         1
## 11          male         0
## 12          male         0
## 13          male         0
## 14        female         1
## 15          male         0
## 16          male         0
## 17          male         0
## 18        female         0
## 19          male         0
## 20          male         0
## 21          male         0
## 22          male         0
## 23        female         0
## 24          male         0
## 25          male         0
## 26        female         1
## 27        female         0
## 28          male         0
## 29        female         1
## 30          male         0
## 31          male         0
## 32          male         0
## 33          male         0
## 34          male         0
## 35          male         0
## 36          male         1
## 37        female         1
## 38          male         0
## 39        female         0
## 40          male         0
## 41          male         0
## 42        female         1
## 43        female         1
## 44          male         0
## 45        female         1
## 46          male         0
## 47          male         0
## 48        female         1
## 49        female         0
## 50          male         0
## 51        female         1
## 52        female         0
## 53          male         0
## 54          male         0
## 55          male         0
## 56          male         0
## 57          male         0
## 58        female         0
## 59          male         0
## 60          male         0
## 61        female         0
## 62          male         1
## 63          male         0
## 64          male         0
## 65        female         0
## 66          male         0
## 67          male         0
## 68          male         0
## 69        female         0
## 70        female         0
## 71          male         0
## 72          male         0
## 73        female         0
## 74          male         1
## 75          male         1
## 76        female         1
## 77        female         1
## 78        female         1
## 79          male         1
## 80          male         1
## 81          male         1
## 82          male         0
## 83          male         0
## 84        female         1
## 85          male         0
## 86          male         0
## 87          male         0
## 88          male         1
## 89        female         1
## 90        female         1
## 91          male         0
## 92          male         0
## 93          male         0
## 94          male         0
## 95          male         0
## 96        female         0
## 97          male         0
## 98          male         0
## 99        female         1
## 100         male         0
## 101         male         0
## 102         male         0
## 103         male         0
## 104         male         0
## 105         male         0
## 106         male         1
## 107         male         1
## 108         male         1
## 109         male         1
## 110       female         1
## 111       female         1
## 112         male         0
## 113         male         0
## 114       female         1
## 115       female         1
## 116         male         0
## 117       female         1
## 118         male         0
## 119         male         0
## 120         male         0
## 121         male         0
## 122         male         0
## 123         male         0
## 124       female         0
## 125         male         0
## 126       female         1
## 127       female         1
## 128       female         1
## 129         male         0
## 130         male         0
## 131         male         0
## 132         male         0
## 133         male         0
## 134         male         0
## 135         male         0
## 136       female         1
## 137         male         1
## 138         male         0
## 139       female         1
## 140       female         1
## 141         male         0
## 142       female         1
## 143         male         0
## 144         male         0
## 145         male         0
## 146         male         0
## 147       female         0
## 148         male         0
## 149         male         0
## 150       female         0
## 151         male         0
## 152         male         0
## 153       female         1
## 154       female         1
## 155         male         0
## 156         male         0
## 157         male         0
## 158         male         0
## 159         male         0
## 160         male         0
## 161         male         0
## 162         male         0
## 163         male         1
## 164       female         1
## 165         male         0
## 166         male         0
## 167         male         0
## 168         male         0
## 169         male         1
## 170       female         1
## 171       female         1
## 172         male         0
## 173       female         1
## 174       female         0
## 175         male         0
## 176         male         0
## 177       female         1
## 178         male         0
## 179       female         1
## 180         male         0
## 181         male         0
## 182         male         0
## 183         male         0
## 184       female         1
## 185         male         0
## 186         male         0
## 187       female         1
## 188         male         0
## 189         male         0
## 190       female         1
## 191         male         0
## 192         male         0
## 193         male         0
## 194       female         0
## 195         male         0
## 196         male         0
## 197         male         0
## 198       female         0
## 199         male         0
## 200         male         0
## 201         male         0
## 202         male         0
## 203         male         0
## 204       female         1
## 205         male         0
## 206       female         1
## 207         male         0
## 208         male         0
## 209         male         0
#20 save your data frame in a .csv file by using
write.csv(Result, file = "TitanicTree.csv", row.names = FALSE)

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.