library(tidyverse)## -- Attaching packages ------------------------------------------------------- tidyverse 1.2.1 --
## v ggplot2 3.1.0 v purrr 0.2.5
## v tibble 1.4.2 v dplyr 0.7.6
## v tidyr 0.8.2 v stringr 1.3.1
## v readr 1.1.1 v forcats 0.3.0
## -- Conflicts ---------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(titanic)
library(ggplot2)
library(dplyr)df <- dplyr::bind_rows(titanic::titanic_test,
titanic::titanic_train)
df <- as_tibble(df)
filter(df, Survived == 1)## # A tibble: 342 x 12
## PassengerId Pclass Name Sex Age SibSp Parch Ticket Fare Cabin
## <int> <int> <chr> <chr> <dbl> <int> <int> <chr> <dbl> <chr>
## 1 2 1 Cumi~ fema~ 38 1 0 PC 17~ 71.3 C85
## 2 3 3 Heik~ fema~ 26 0 0 STON/~ 7.92 ""
## 3 4 1 Futr~ fema~ 35 1 0 113803 53.1 C123
## 4 9 3 John~ fema~ 27 0 2 347742 11.1 ""
## 5 10 2 Nass~ fema~ 14 1 0 237736 30.1 ""
## 6 11 3 Sand~ fema~ 4 1 1 PP 95~ 16.7 G6
## 7 12 1 Bonn~ fema~ 58 0 0 113783 26.6 C103
## 8 16 2 "Hew~ fema~ 55 0 0 248706 16 ""
## 9 18 2 Will~ male NA 0 0 244373 13 ""
## 10 20 3 Mass~ fema~ NA 0 0 2649 7.22 ""
## # ... with 332 more rows, and 2 more variables: Embarked <chr>,
## # Survived <int>
total_vivos <- filter(df, Survived == 1)
filter(df, Age < 15)## # A tibble: 109 x 12
## PassengerId Pclass Name Sex Age SibSp Parch Ticket Fare Cabin
## <int> <int> <chr> <chr> <dbl> <int> <int> <chr> <dbl> <chr>
## 1 897 3 Sven~ male 14 0 0 7538 9.22 ""
## 2 913 3 Olse~ male 9 0 1 C 173~ 3.17 ""
## 3 947 3 Rice~ male 10 4 1 382652 29.1 ""
## 4 956 1 Ryer~ male 13 2 2 PC 17~ 262. B57 ~
## 5 972 3 Boul~ male 6 1 1 2678 15.2 ""
## 6 981 2 Well~ male 2 1 1 29103 23 ""
## 7 1009 3 Sand~ fema~ 1 1 1 PP 95~ 16.7 G6
## 8 1012 2 Watt~ fema~ 12 0 0 C.A. ~ 15.8 ""
## 9 1032 3 Good~ fema~ 10 5 2 CA 21~ 46.9 ""
## 10 1046 3 Aspl~ male 13 4 2 347077 31.4 ""
## # ... with 99 more rows, and 2 more variables: Embarked <chr>,
## # Survived <int>
niñes_total <- filter(df, Age < 15)
filter (df, Sex == "female")## # A tibble: 466 x 12
## PassengerId Pclass Name Sex Age SibSp Parch Ticket Fare Cabin
## <int> <int> <chr> <chr> <dbl> <int> <int> <chr> <dbl> <chr>
## 1 893 3 Wilk~ fema~ 47 1 0 363272 7 ""
## 2 896 3 Hirv~ fema~ 22 1 1 31012~ 12.3 ""
## 3 898 3 Conn~ fema~ 30 0 0 330972 7.63 ""
## 4 900 3 Abra~ fema~ 18 0 0 2657 7.23 ""
## 5 904 1 Snyd~ fema~ 23 1 0 21228 82.3 B45
## 6 906 1 Chaf~ fema~ 47 1 0 W.E.P~ 61.2 E31
## 7 907 2 del ~ fema~ 24 1 0 SC/PA~ 27.7 ""
## 8 910 3 Ilma~ fema~ 27 1 0 STON/~ 7.92 ""
## 9 911 3 "Ass~ fema~ 45 0 0 2696 7.22 ""
## 10 914 1 Fleg~ fema~ NA 0 0 PC 17~ 31.7 ""
## # ... with 456 more rows, and 2 more variables: Embarked <chr>,
## # Survived <int>
mujeres_total <- filter (df, Sex == "female") En el barco iban
ggplot(df) +
geom_count(mapping = aes(x = Sex, y = factor (Pclass), color = factor (..n..))) +
ggtitle("Total de pasajeros por sexo y clase")ggplot(df) +
geom_jitter(mapping = aes(x = Fare, y = Sex, color = factor(Survived))) +
ggtitle("Muertos y sobrevivientes según precio del ticket y sexo")## Warning: Removed 1 rows containing missing values (geom_point).
ggplot(df) +
geom_jitter(mapping = aes(x = Sex, y = Pclass, color = factor (Survived))) +
ggtitle("Sobrevivientes y muertos por clase y sexo")ggplot(mujeres_total) +
geom_count(mapping = aes(x = factor (Survived) , y = factor (Pclass), color = factor (..n..))) +
ggtitle("Mujeres vivas y muertas por clase")ggplot(niñes_total) +
geom_bar(mapping = aes(x = factor (Survived), fill = factor(Pclass))) +
ggtitle("Sobrevivientes y muertos menores de 15 años por clase")ggplot(data = df, mapping = aes(x = Pclass, fill = factor (Sex))) +
geom_bar() +
facet_wrap(~ Survived) +
ggtitle("Cantidad de muertos y sobrevivientes según sexo y clase")