Import csv file from url

library(readr)
T3 <- read_csv("https://goo.gl/At238b")
## Rows: 1309 Columns: 14
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (8): pclass, name, sex, ticket, cabin, embarked, boat, home.dest
## dbl (6): survived, age, sibsp, parch, fare, body
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
View(T3)

A new dataset called titanic is built.

It contains the following features:

T3_drop<-c("pclass","name","ticket","cabin","boat","body","home.dest")
titanic=T3[,!(names(T3)%in%T3_drop)]
View(titanic)
any(is.na(titanic))
## [1] TRUE
titanic_1<-na.omit(titanic)
#reconfirmation
any(is.na(titanic_1))
## [1] FALSE

Statistical Analysis

summary(titanic_1)
##     survived          sex                 age            sibsp       
##  Min.   :0.0000   Length:1043        Min.   : 0.17   Min.   :0.0000  
##  1st Qu.:0.0000   Class :character   1st Qu.:21.00   1st Qu.:0.0000  
##  Median :0.0000   Mode  :character   Median :28.00   Median :0.0000  
##  Mean   :0.4075                      Mean   :29.81   Mean   :0.5043  
##  3rd Qu.:1.0000                      3rd Qu.:39.00   3rd Qu.:1.0000  
##  Max.   :1.0000                      Max.   :80.00   Max.   :8.0000  
##      parch             fare          embarked        
##  Min.   :0.0000   Min.   :  0.00   Length:1043       
##  1st Qu.:0.0000   1st Qu.:  8.05   Class :character  
##  Median :0.0000   Median : 15.75   Mode  :character  
##  Mean   :0.4219   Mean   : 36.60                     
##  3rd Qu.:1.0000   3rd Qu.: 35.08                     
##  Max.   :6.0000   Max.   :512.33
str(titanic_1)
## tibble [1,043 × 7] (S3: tbl_df/tbl/data.frame)
##  $ survived: num [1:1043] 1 1 0 0 0 1 1 0 1 0 ...
##  $ sex     : chr [1:1043] "female" "male" "female" "male" ...
##  $ age     : num [1:1043] 29 0.92 2 30 25 48 63 39 53 71 ...
##  $ sibsp   : num [1:1043] 0 1 1 1 1 0 1 0 2 0 ...
##  $ parch   : num [1:1043] 0 2 2 2 2 0 0 0 0 0 ...
##  $ fare    : num [1:1043] 211 152 152 152 152 ...
##  $ embarked: chr [1:1043] "S" "S" "S" "S" ...
##  - attr(*, "na.action")= 'omit' Named int [1:266] 16 38 41 47 60 70 71 75 81 107 ...
##   ..- attr(*, "names")= chr [1:266] "16" "38" "41" "47" ...
Dimension of dataset and data type of each attribute
dim(titanic_1)
## [1] 1043    7
sapply(titanic_1, class)
##    survived         sex         age       sibsp       parch        fare 
##   "numeric" "character"   "numeric"   "numeric"   "numeric"   "numeric" 
##    embarked 
## "character"
Standard Deviation
sapply(titanic_1,sd)
## Warning in var(if (is.vector(x) || is.factor(x)) x else as.double(x), na.rm =
## na.rm): NAs introduced by coercion

## Warning in var(if (is.vector(x) || is.factor(x)) x else as.double(x), na.rm =
## na.rm): NAs introduced by coercion
##   survived        sex        age      sibsp      parch       fare   embarked 
##  0.4916009         NA 14.3662545  0.9130797  0.8406546 55.7536477         NA
Skew
library(mlbench)
library(e1071)
skew_titanic<-data.frame(titanic_1$survived, titanic_1$age, titanic_1$sibsp, titanic_1$parch, titanic_1$fare)
skew<-apply(skew_titanic, 2, skewness)
print(skew)
## titanic_1.survived      titanic_1.age    titanic_1.sibsp    titanic_1.parch 
##          0.3760484          0.4057134          2.7984033          2.6527564 
##     titanic_1.fare 
##          4.1106587
plot(skew, type="b", col='black')

Correlation Matrix
cor_titanic<-data.frame(titanic_1$survived, titanic_1$age, titanic_1$sibsp, titanic_1$parch, titanic_1$fare)
cor(cor_titanic)
##                    titanic_1.survived titanic_1.age titanic_1.sibsp
## titanic_1.survived         1.00000000   -0.05741486     -0.01140343
## titanic_1.age             -0.05741486    1.00000000     -0.24234489
## titanic_1.sibsp           -0.01140343   -0.24234489      1.00000000
## titanic_1.parch            0.11543601   -0.14931063      0.37395967
## titanic_1.fare             0.24785762    0.17720569      0.14213054
##                    titanic_1.parch titanic_1.fare
## titanic_1.survived       0.1154360      0.2478576
## titanic_1.age           -0.1493106      0.1772057
## titanic_1.sibsp          0.3739597      0.1421305
## titanic_1.parch          1.0000000      0.2176495
## titanic_1.fare           0.2176495      1.0000000
Column Attributes
  • Survived: There was a grand total of 1043 passengers onboard from which 425 survived and 618 who didn’t survive.
table(titanic_1$survived)
## 
##   0   1 
## 618 425
  • Sex: There were 386 females and 657 males onboard.
table(titanic_1$sex)
## 
## female   male 
##    386    657
  • Embarked: There were 3 different ports( C, Q, S).

    • 212(20.3)% passengers came in through port C(Cherbourg).

    • 50(4.8%) passengers came in through port Q(Queenstown).

    • 781(74.9%) passengers came in through port S(Southampton).

table(titanic_1$embarked)
## 
##   C   Q   S 
## 212  50 781
  • Sibsp:

    • There were 682 passengers with no siblings or spouses onboard.

    • 280 passengers had 1 sibling or spouse onboard.

    • 36 passengers had 2 siblings or spouses onboard.

    • 16 passengers had 3 siblings or spouses onboard.

    • 22 passengers had 4 siblings or spouses onboard.

    • 6 passengers had 5 siblings or spouses onboard.

    • 1 passenger had 8 siblings or spouses onboard.

table(titanic_1$sibsp)
## 
##   0   1   2   3   4   5   8 
## 682 280  36  16  22   6   1
  • Parch:

    • There were 765 passengers with no parents or children onboard.

    • 160 passengers had 1 parent or child onboard.

    • 97 passengers had 2 parents or children onboard.

    • 8 passengers had 3 parents or children onboard.

    • 5 passengers had 4 parents or children onboard.

    • 6 passengers had 5 parents or children onboard.

    • 2 passengers had 6 parents or children onboard.

table(titanic_1$parch)
## 
##   0   1   2   3   4   5   6 
## 765 160  97   8   5   6   2
  • Fare
table(titanic_1$fare)
## 
##        0   3.1708   4.0125        5   6.2375   6.4375     6.45   6.4958 
##        8        1        1        1        1        1        1        3 
##     6.75     6.95    6.975        7   7.0458     7.05   7.0542    7.125 
##        2        1        2        1        1        7        2        4 
##   7.1417    7.225   7.2292     7.25   7.2833   7.4958   7.5208     7.55 
##        1       13       13       14        1        3        1        3 
##   7.5792   7.6292     7.65    7.725   7.7333   7.7417     7.75    7.775 
##        1        1        6        1        4        1       20       23 
##   7.7958      7.8   7.8208   7.8292     7.85   7.8542    7.875   7.8792 
##       10        1        1        1        1       21        1        4 
##   7.8875   7.8958    7.925   8.0292     8.05   8.1583      8.3   8.3625 
##        1       30       23        1       38        1        1        1 
##   8.4042   8.4333   8.5167   8.6542   8.6625   8.6833     8.85   8.9625 
##        1        1        2        1       20        1        1        1 
##        9   9.2167    9.225    9.325     9.35    9.475   9.4833      9.5 
##        2        1        3        1        3        1        1       11 
##   9.5875   9.6875    9.825   9.8375   9.8417   9.8458  10.1708  10.4625 
##        2        1        2        1        1        1        1        2 
##     10.5  10.5167  11.1333  11.2417     11.5       12  12.1833   12.275 
##       35        1        3        2        6        1        2        1 
##  12.2875    12.35   12.475   12.525    12.65  12.7375   12.875       13 
##        2        4        4        1        1        1        1       58 
##  13.4167     13.5   13.775  13.7917  13.8583  13.8625     13.9       14 
##        2        7        3        1        3        1        2        1 
##  14.1083     14.4  14.4542  14.4583     14.5       15  15.0333  15.0458 
##        2        3        7        1        7        1        1        2 
##     15.1  15.2458     15.5    15.55  15.7417    15.75    15.85     15.9 
##        1        5        2        2        3        2        4        3 
##       16     16.1     16.7     17.4     17.8       18    18.75  18.7875 
##        2        8        3        2        2        4        3        2 
##  19.2583     19.5  20.2125    20.25   20.525   20.575       21   21.075 
##        4        2        3        3        3        4       13        5 
##   22.025   22.525       23       24    24.15  25.5875     25.7  25.9292 
##        3        3        7        2        6        1        1        2 
##       26    26.25  26.2833  26.2875  26.3875    26.55       27  27.4458 
##       48        6        1        3        1       18        2        1 
##  27.7208    27.75     27.9     28.5  28.5375  28.7125       29   29.125 
##        8        5        6        2        1        1        3        6 
##     29.7       30  30.0708     30.5  30.6958       31   31.275  31.3875 
##        4        6        2        5        1        2        7        7 
##     31.5  31.6792  32.3208     32.5       33     33.5  34.0208   34.375 
##        3        1        1        3        2        1        1        5 
##  34.6542     35.5    36.75  37.0042     38.5       39     39.4     39.6 
##        1        3        4        3        1        7        2        1 
##  39.6875   40.125  41.5792     42.4     42.5     45.5     46.9     47.1 
##        7        1        4        1        1        1        8        2 
##     49.5  49.5042       50  50.4958  51.4792  51.8625       52  52.5542 
##        1        2        1        2        2        2        6        4 
##     53.1       55  55.4417     55.9  56.4958  56.9292       57    57.75 
##        6        1        4        2        4        2        2        2 
##  57.9792     59.4       60   61.175  61.3792  61.9792  63.3583       65 
##        2        4        2        2        2        2        2        5 
##     66.6     69.3    69.55       71  71.2833     73.5  75.2417    75.25 
##        2        2        1        2        2        7        2        2 
##  76.2917  76.7292  77.2875  77.9583  78.2667    78.85     79.2    79.65 
##        2        3        2        3        2        3        5        3 
##  81.8583  82.1708  82.2667  83.1583   83.475     86.5  89.1042       90 
##        3        1        2        6        2        3        1        5 
##  91.0792     93.5  106.425    108.9 110.8833  113.275      120   133.65 
##        2        4        3        3        3        3        4        1 
##    134.5 135.6333 136.7792 146.5208   151.55 153.4625 164.8667 211.3375 
##        5        4        2        2        6        3        4        4 
##    211.5 221.7792  227.525 247.5208  262.375      263 512.3292 
##        5        3        4        3        7        6        4
plot(titanic_1$fare, type='l')

summary(titanic_1$fare)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    0.00    8.05   15.75   36.60   35.08  512.33
  • Age:

    • The oldest passenger onboard was 80 years old.

    • The youngest passenger onboard was 17 months old.

    • The average age for the passengers onboard is approximately 30 years.

table(titanic_1$age)
## 
## 0.17 0.33 0.42 0.67 0.75 0.83 0.92    1    2    3    4    5    6    7    8    9 
##    1    1    1    1    3    3    2   10   12    7   10    5    6    4    6   10 
##   10   11 11.5   12   13   14 14.5   15   16   17   18 18.5   19   20 20.5   21 
##    4    4    1    3    5    8    2    6   19   20   39    3   29   23    1   41 
##   22 22.5   23 23.5   24 24.5   25   26 26.5   27   28 28.5   29   30 30.5   31 
##   43    1   26    1   47    1   34   30    1   30   32    3   30   40    2   23 
##   32 32.5   33   34 34.5   35   36 36.5   37   38 38.5   39   40 40.5   41   42 
##   24    4   21   16    2   23   31    2    9   13    1   20   18    3   11   18 
##   43   44   45 45.5   46   47   48   49   50   51   52   53   54   55 55.5   56 
##    9   10   21    2    6   14   14    9   15    8    6    4   10    8    1    4 
##   57   58   59   60   61   62   63   64   65   66   67   70 70.5   71   74   76 
##    5    6    3    7    5    4    4    5    3    1    1    2    1    2    1    1 
##   80 
##    1
summary(titanic_1$age)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    0.17   21.00   28.00   29.81   39.00   80.00
  • Overall Analysis & Plot
suppressPackageStartupMessages(library(dplyr))
library(explore)
titanic_1 %>% explore_all()

Factors

str(titanic_1$survived)
##  num [1:1043] 1 1 0 0 0 1 1 0 1 0 ...
titanic_1$survived<-factor(titanic_1$survived, levels=c("0", "1"), labels=c("0","1"))
str(titanic_1$survived)
##  Factor w/ 2 levels "0","1": 2 2 1 1 1 2 2 1 2 1 ...
str(titanic_1$embarked)
##  chr [1:1043] "S" "S" "S" "S" "S" "S" "S" "S" "S" "C" "C" "C" "C" "S" "S" ...
titanic_1$embarked<-factor(titanic_1$embarked, levels=c("C","Q","S"), labels=c("C","Q","S"))
str(titanic_1$embarked)
##  Factor w/ 3 levels "C","Q","S": 3 3 3 3 3 3 3 3 3 1 ...
str(titanic_1$sex)
##  chr [1:1043] "female" "male" "female" "male" "female" "male" "female" ...
titanic_1$sex<-factor(titanic_1$sex, levels=c("female","male"), labels=c("female", "male"))
str(titanic_1$sex)
##  Factor w/ 2 levels "female","male": 1 2 1 2 1 2 1 2 1 2 ...
head(titanic_1)
## # A tibble: 6 × 7
##   survived sex      age sibsp parch  fare embarked
##   <fct>    <fct>  <dbl> <dbl> <dbl> <dbl> <fct>   
## 1 1        female 29        0     0 211.  S       
## 2 1        male    0.92     1     2 152.  S       
## 3 0        female  2        1     2 152.  S       
## 4 0        male   30        1     2 152.  S       
## 5 0        female 25        1     2 152.  S       
## 6 1        male   48        0     0  26.6 S

Dependent Variable

table(titanic_1$survived)
## 
##   0   1 
## 618 425
prop.table(table(titanic_1$survived))
## 
##         0         1 
## 0.5925216 0.4074784
Correlation

Plot Survival with other features to see if any correlation exists.

  • Sex:

    • 79.5% of the males did not survive while 20.5% survived.

    • 24.9% of the females did not survive while 75.1% survived.

    • The female passengers had a higher survival rate in the titanic than the male passengers.

library(explore)
titanic_1 %>% explore(survived, target=sex)

  • Embarked:

    • 64.1% of the passengers that came in trough S didn’t survive while 35.9% survived.

    • 74% of the passengers that came in through Q didn’t survive while 26% survived.

    • 37.7% of the passengers that came in though C didn’t survive while 62.3% survived.

    • Port C has the highest survival rate out of the 3 ports.

titanic_1 %>% explore(survived, target=embarked)

  • Sibsp:

    • 62.9% of passengers without siblings or spouses onboard didn’t survive while 37.1% survived.

    • 47.5% of passengers with 1 siblings or spouses onboard didn’t survive while 52.5% survived…etc.

titanic_1 %>% explore(survived, target=sibsp)

  • The code below shows the full table of the proportion of those that survived and those that didn’t based on Sibsp.
Prop_Sibsp<- titanic_1 %>%
  group_by(sibsp) %>%
  summarise(Survived = mean(survived == 1), DidnotSurvive = mean(survived == 0))
Prop_Sibsp
## # A tibble: 7 × 3
##   sibsp Survived DidnotSurvive
##   <dbl>    <dbl>         <dbl>
## 1     0    0.371         0.629
## 2     1    0.525         0.475
## 3     2    0.444         0.556
## 4     3    0.375         0.625
## 5     4    0.136         0.864
## 6     5    0             1    
## 7     8    0             1
  • Parch:

    • 65% of passengers with no parents or children onboard did not survive while 35% survived.

    • 40.6% of passengers with 1 parent or child aboard did not survive while 59.4% survived…etc.

titanic_1 %>% explore(survived, target=parch)

  • Full table of survival proportion based on Parch.
Prop_Parch<- titanic_1 %>%
  group_by(parch) %>%
  summarise(Survived = mean(survived == 1), DidnotSurvive = mean(survived == 0))
Prop_Parch
## # A tibble: 7 × 3
##   parch Survived DidnotSurvive
##   <dbl>    <dbl>         <dbl>
## 1     0    0.350         0.650
## 2     1    0.594         0.406
## 3     2    0.567         0.433
## 4     3    0.625         0.375
## 5     4    0.2           0.8  
## 6     5    0.167         0.833
## 7     6    0             1
  • Fare
titanic %>% explore(survived, target=fare)

  • Partial table of survival proportion based on Fare.
Prop_fare<- titanic_1 %>%
  group_by(fare) %>%
  summarise(Survived = mean(survived == 1), DidnotSurvive = mean(survived == 0))
head(Prop_fare)
## # A tibble: 6 × 3
##    fare Survived DidnotSurvive
##   <dbl>    <dbl>         <dbl>
## 1  0        0.25          0.75
## 2  3.17     1             0   
## 3  4.01     0             1   
## 4  5        0             1   
## 5  6.24     0             1   
## 6  6.44     0             1
  • Age:

    • 64.1% of passengers aged 18 did not survive while 35.9% survived.

    • 73.2% of passengers aged 21 did not survive while 26.8% survived…etc.

    • Both the youngest passenger(17 months) and the oldest passenger(80 years) survived.

titanic_1%>%explore_bar(survived, target = age)

  • Partial table of survival proportion based on Age.
Prop_age<-titanic_1%>%
  group_by(age)%>%
  summarise(Survived = mean(survived == 1), DidnotSurvive = mean(survived == 0))
head(Prop_age)
## # A tibble: 6 × 3
##     age Survived DidnotSurvive
##   <dbl>    <dbl>         <dbl>
## 1  0.17    1             0    
## 2  0.33    0             1    
## 3  0.42    1             0    
## 4  0.67    1             0    
## 5  0.75    0.667         0.333
## 6  0.83    1             0

Data Preparation

set.seed(1000)
titanic_rows<-nrow(titanic_1)
titanic_train_index<-1:(titanic_rows * 0.8)
titanic_test_index<-(titanic_rows * 0.8 + 1):titanic_rows
titanic_test_index<-as.integer((titanic_test_index))
Build Test and Training Dataset
titanic_train<-titanic_1[titanic_train_index, ]
titanic_test<-titanic_1[titanic_test_index, ]
  • Training dataset is built till index 834 and test till index 1043
train_titanic<-titanic_1[1:834, ]
test_titanic<-titanic_1[835:1043, ]

Training

library(rpart)
fit<-rpart(survived~sex+age+sibsp+parch+fare+embarked,
           data=train_titanic,
           method="class")
fit
## n= 834 
## 
## node), split, n, loss, yval, (yprob)
##       * denotes terminal node
## 
##  1) root 834 373 0 (0.5527578 0.4472422)  
##    2) sex=male 511 110 0 (0.7847358 0.2152642)  
##      4) age>=10 483  89 0 (0.8157350 0.1842650) *
##      5) age< 10 28   7 1 (0.2500000 0.7500000) *
##    3) sex=female 323  60 1 (0.1857585 0.8142415)  
##      6) fare< 10.1625 42  19 0 (0.5476190 0.4523810)  
##       12) age>=19.5 28   9 0 (0.6785714 0.3214286) *
##       13) age< 19.5 14   4 1 (0.2857143 0.7142857) *
##      7) fare>=10.1625 281  37 1 (0.1316726 0.8683274)  
##       14) sibsp>=3.5 7   1 0 (0.8571429 0.1428571) *
##       15) sibsp< 3.5 274  31 1 (0.1131387 0.8868613) *
plot(fit)

suppressPackageStartupMessages(library(rattle))
library(rattle)
library(rpart.plot)
library(RColorBrewer)
fancyRpartPlot(fit)

Prediction

Prediction<-predict(fit, test_titanic, type = "class")
Prediction_df<-data.frame(PassengerSex=test_titanic$sex, Survived=Prediction)
Prediction_df
##     PassengerSex Survived
## 1           male        0
## 2           male        0
## 3           male        0
## 4           male        0
## 5           male        0
## 6           male        0
## 7         female        0
## 8         female        0
## 9           male        0
## 10        female        1
## 11          male        0
## 12          male        0
## 13          male        0
## 14        female        1
## 15          male        0
## 16          male        0
## 17          male        0
## 18        female        0
## 19          male        0
## 20          male        0
## 21          male        0
## 22          male        0
## 23        female        0
## 24          male        0
## 25          male        0
## 26        female        1
## 27        female        0
## 28          male        0
## 29        female        1
## 30          male        0
## 31          male        0
## 32          male        0
## 33          male        0
## 34          male        0
## 35          male        0
## 36          male        1
## 37        female        1
## 38          male        0
## 39        female        0
## 40          male        0
## 41          male        0
## 42        female        1
## 43        female        1
## 44          male        0
## 45        female        1
## 46          male        0
## 47          male        0
## 48        female        1
## 49        female        0
## 50          male        0
## 51        female        1
## 52        female        0
## 53          male        0
## 54          male        0
## 55          male        0
## 56          male        0
## 57          male        0
## 58        female        0
## 59          male        0
## 60          male        0
## 61        female        0
## 62          male        1
## 63          male        0
## 64          male        0
## 65        female        0
## 66          male        0
## 67          male        0
## 68          male        0
## 69        female        0
## 70        female        0
## 71          male        0
## 72          male        0
## 73        female        0
## 74          male        1
## 75          male        1
## 76        female        1
## 77        female        1
## 78        female        1
## 79          male        1
## 80          male        1
## 81          male        1
## 82          male        0
## 83          male        0
## 84        female        1
## 85          male        0
## 86          male        0
## 87          male        0
## 88          male        1
## 89        female        1
## 90        female        1
## 91          male        0
## 92          male        0
## 93          male        0
## 94          male        0
## 95          male        0
## 96        female        0
## 97          male        0
## 98          male        0
## 99        female        1
## 100         male        0
## 101         male        0
## 102         male        0
## 103         male        0
## 104         male        0
## 105         male        0
## 106         male        1
## 107         male        1
## 108         male        1
## 109         male        1
## 110       female        1
## 111       female        1
## 112         male        0
## 113         male        0
## 114       female        1
## 115       female        1
## 116         male        0
## 117       female        1
## 118         male        0
## 119         male        0
## 120         male        0
## 121         male        0
## 122         male        0
## 123         male        0
## 124       female        0
## 125         male        0
## 126       female        1
## 127       female        1
## 128       female        1
## 129         male        0
## 130         male        0
## 131         male        0
## 132         male        0
## 133         male        0
## 134         male        0
## 135         male        0
## 136       female        1
## 137         male        1
## 138         male        0
## 139       female        1
## 140       female        1
## 141         male        0
## 142       female        1
## 143         male        0
## 144         male        0
## 145         male        0
## 146         male        0
## 147       female        0
## 148         male        0
## 149         male        0
## 150       female        0
## 151         male        0
## 152         male        0
## 153       female        1
## 154       female        1
## 155         male        0
## 156         male        0
## 157         male        0
## 158         male        0
## 159         male        0
## 160         male        0
## 161         male        0
## 162         male        0
## 163         male        1
## 164       female        1
## 165         male        0
## 166         male        0
## 167         male        0
## 168         male        0
## 169         male        1
## 170       female        1
## 171       female        1
## 172         male        0
## 173       female        1
## 174       female        0
## 175         male        0
## 176         male        0
## 177       female        1
## 178         male        0
## 179       female        1
## 180         male        0
## 181         male        0
## 182         male        0
## 183         male        0
## 184       female        1
## 185         male        0
## 186         male        0
## 187       female        1
## 188         male        0
## 189         male        0
## 190       female        1
## 191         male        0
## 192         male        0
## 193         male        0
## 194       female        0
## 195         male        0
## 196         male        0
## 197         male        0
## 198       female        0
## 199         male        0
## 200         male        0
## 201         male        0
## 202         male        0
## 203         male        0
## 204       female        1
## 205         male        0
## 206       female        1
## 207         male        0
## 208         male        0
## 209         male        0
write.csv(Prediction_df, file="Titanicdtree.csv", row.names = FALSE)