221024_Quiz2_jerome

getwd(

)

## [1] "C:/Users/Jerome/Documents/0000_Work_Files/0000_Montgomery_College/Data_Science_101/Data_101_Fall_2022/221024_Quiz2"

### Initialize
library(tidyverse)

## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6      ✔ purrr   0.3.5 
## ✔ tibble  3.1.8      ✔ dplyr   1.0.10
## ✔ tidyr   1.2.1      ✔ stringr 1.4.1 
## ✔ readr   2.1.3      ✔ forcats 0.5.2 
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()

library(dplyr)
library(readr)

### Read the dataset
train <- read_csv("train.csv")

## Rows: 1310 Columns: 14
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (7): name, sex, ticket, cabin, embarked, boat, home.dest
## dbl (7): pclass, survived, age, sibsp, parch, fare, body
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

### Question 1
newdf <- select(train, survived, pclass, sex, age, fare)

### Question 2  Nine male passengers > 50 survived

old_survive <- filter(newdf, survived == 1 & age > 50)
table(old_survive$sex)

## 
## female   male 
##     29      9

### Question 3 The mean age of those who lived was 28.9 years. The mean age of those who died was 30.5 years. 
### The mean fare of those who lived was 49.36 (pounds?); the mean fare of those who died was 23.35(pounds?)
### A class-based survival rate, it seems. 
newdf %>%
  group_by(survived) %>%
summarise(age )

## `summarise()` has grouped output by 'survived'. You can override using the
## `.groups` argument.

## # A tibble: 1,310 × 2
## # Groups:   survived [3]
##    survived   age
##       <dbl> <dbl>
##  1        0     2
##  2        0    30
##  3        0    25
##  4        0    39
##  5        0    71
##  6        0    47
##  7        0    NA
##  8        0    24
##  9        0    36
## 10        0    25
## # … with 1,300 more rows

lived <- filter(newdf, survived == 1)
died <- filter(newdf, survived == 0)
mean(lived$age)

## [1] NA

mean(lived$age, na.rm = TRUE)

## [1] 28.91823

mean(died$age, na.rm = TRUE)

## [1] 30.54537

mean(lived$fare, na.rm = TRUE)

## [1] 49.36118

mean(died$fare, na.rm = TRUE)

## [1] 23.35383

### Question 4
newdf %>%
  mutate(Fare_Today = fare*28.98)

## # A tibble: 1,310 × 6
##    survived pclass sex       age  fare Fare_Today
##       <dbl>  <dbl> <chr>   <dbl> <dbl>      <dbl>
##  1        1      1 female 29     211.       6125.
##  2        1      1 male    0.917 152.       4392.
##  3        0      1 female  2     152.       4392.
##  4        0      1 male   30     152.       4392.
##  5        0      1 female 25     152.       4392.
##  6        1      1 male   48      26.6       769.
##  7        1      1 female 63      78.0      2259.
##  8        0      1 male   39       0           0 
##  9        1      1 female 53      51.5      1492.
## 10        0      1 male   71      49.5      1435.
## # … with 1,300 more rows

### Question 5  I'm lost on this one. 

##newdf <- subset(newdf, select -c(fare))

221024_Quiz2_jerome_anderson

Jerome

2022-10-24