## Warning: package 'tidyverse' was built under R version 3.5.1
## -- Attaching packages -------------------------------------------------------------------------- tidyverse 1.2.1 --
## v ggplot2 2.2.1     v purrr   0.2.4
## v tibble  1.4.2     v dplyr   0.7.6
## v tidyr   0.8.0     v stringr 1.3.0
## v readr   1.1.1     v forcats 0.3.0
## Warning: package 'dplyr' was built under R version 3.5.1
## -- Conflicts ----------------------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()

Dados

Importa dados do Kaggle Titanic.

## Parsed with column specification:
## cols(
##   PassengerId = col_integer(),
##   Survived = col_integer(),
##   Pclass = col_integer(),
##   Name = col_character(),
##   Sex = col_character(),
##   Age = col_double(),
##   SibSp = col_integer(),
##   Parch = col_integer(),
##   Ticket = col_character(),
##   Fare = col_double(),
##   Cabin = col_character(),
##   Embarked = col_character()
## )
## Parsed with column specification:
## cols(
##   PassengerId = col_integer(),
##   Pclass = col_integer(),
##   Name = col_character(),
##   Sex = col_character(),
##   Age = col_double(),
##   SibSp = col_integer(),
##   Parch = col_integer(),
##   Ticket = col_character(),
##   Fare = col_double(),
##   Cabin = col_character(),
##   Embarked = col_character()
## )
train <- train %>% mutate(
  Sobreviveu = factor(Survived, levels = c(0,1), labels = c("Não", "Sim") ),
  Classe = factor(Pclass, levels = c(1,2,3), labels = c("1ª Classe", "2ª Classe", "3ª Classe")),
  Embarque = factor(Embarked),
  Sexo = factor(Sex, levels = c(1,2), labels= c("Feminino", "Masculino")),
  Idade = Age,
  Tarifa = Fare
)

str(train$Survived)
##  int [1:891] 0 1 1 1 0 0 0 0 1 1 ...
str(train$Pclass)
##  int [1:891] 3 1 3 1 3 3 1 3 3 2 ...
str(train$Sex)
##  chr [1:891] "male" "female" "female" "female" "male" "male" "male" ...

Pessoas mais velhas que pagaram menos sobreviveram?

## Warning: Removed 177 rows containing non-finite values (stat_smooth).
## Warning: Removed 177 rows containing missing values (geom_point).

O preço da passagem era mais caro por classe?

## Warning: Removed 177 rows containing missing values (geom_point).