Titanic Dplyr

library(tidyverse)

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.0     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.4.1     ✔ tibble    3.1.8
## ✔ lubridate 1.9.2     ✔ tidyr     1.3.0
## ✔ purrr     1.0.1     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the ]8;;http://conflicted.r-lib.org/conflicted package]8;; to force all conflicts to become errors

titanic_data <- read.csv("titanic_train.csv")

newdf <- titanic_data %>%
  select(Survived, Pclass, Sex, Age, Fare)

summary(newdf)

##     Survived          Pclass          Sex                 Age       
##  Min.   :0.0000   Min.   :1.000   Length:891         Min.   : 0.42  
##  1st Qu.:0.0000   1st Qu.:2.000   Class :character   1st Qu.:20.12  
##  Median :0.0000   Median :3.000   Mode  :character   Median :28.00  
##  Mean   :0.3838   Mean   :2.309                      Mean   :29.70  
##  3rd Qu.:1.0000   3rd Qu.:3.000                      3rd Qu.:38.00  
##  Max.   :1.0000   Max.   :3.000                      Max.   :80.00  
##                                                      NA's   :177    
##       Fare       
##  Min.   :  0.00  
##  1st Qu.:  7.91  
##  Median : 14.45  
##  Mean   : 32.20  
##  3rd Qu.: 31.00  
##  Max.   :512.33  
##

nrow(newdf)

## [1] 891

over50alive <- newdf %>%
  filter(Survived == 1, Age > 50)

nrow(over50alive %>% filter(Sex == "Male"))

## [1] 0

Titanic Dplyr

Stina Drill

2023-03-07