library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.0 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.1 ✔ tibble 3.1.8
## ✔ lubridate 1.9.2 ✔ tidyr 1.3.0
## ✔ purrr 1.0.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the ]8;;http://conflicted.r-lib.org/conflicted package]8;; to force all conflicts to become errors
titanic_data <- read.csv("titanic_train.csv")
newdf <- titanic_data %>%
select(Survived, Pclass, Sex, Age, Fare)
summary(newdf)
## Survived Pclass Sex Age
## Min. :0.0000 Min. :1.000 Length:891 Min. : 0.42
## 1st Qu.:0.0000 1st Qu.:2.000 Class :character 1st Qu.:20.12
## Median :0.0000 Median :3.000 Mode :character Median :28.00
## Mean :0.3838 Mean :2.309 Mean :29.70
## 3rd Qu.:1.0000 3rd Qu.:3.000 3rd Qu.:38.00
## Max. :1.0000 Max. :3.000 Max. :80.00
## NA's :177
## Fare
## Min. : 0.00
## 1st Qu.: 7.91
## Median : 14.45
## Mean : 32.20
## 3rd Qu.: 31.00
## Max. :512.33
##
nrow(newdf)
## [1] 891
over50alive <- newdf %>%
filter(Survived == 1, Age > 50)
nrow(over50alive %>% filter(Sex == "Male"))
## [1] 0