R Markdown

LIBRARIES

library(readr)
library(readxl)
library(ggplot2)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(purrr)

ASSIGNMENT 1: IMPORT DATA

titanic <- read_csv("C:/Users/hp/Downloads/train.csv")
## Rows: 891 Columns: 12
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (5): Name, Sex, Ticket, Cabin, Embarked
## dbl (7): PassengerId, Survived, Pclass, Age, SibSp, Parch, Fare
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
extra <- read_excel("C:/Users/hp/OneDrive/Desktop/passenger_extra.xlsx")

View(titanic)
View(extra)

ASSIGNMENT 2: MERGE DATA

colnames(extra)[colnames(extra) == "Passenger Id"] <- "PassengerId"

merged_data <- merge(titanic, extra, by = "PassengerId")

View(merged_data)

ASSIGNMENT 3: GROUP BY + GRAPHS

survival_summary <- merged_data %>%
  group_by(Survived) %>%
  summarise(count = n())

ggplot(survival_summary, aes(x = factor(Survived), y = count)) +
  geom_bar(stat = "identity")

gender_survival <- merged_data %>%
  group_by(Sex, Survived) %>%
  summarise(count = n(), .groups = "drop")

ggplot(gender_survival, aes(x = Sex, y = count, fill = factor(Survived))) +
  geom_bar(stat = "identity", position = "dodge")

ASSIGNMENT 4: TRACE & RECOVER

trace(mean, tracer = quote(cat("TRACE: mean() is running\n")), print = FALSE)
## Tracing function "mean" in package "base"
## [1] "mean"
mean(c(10, 20, 30))
## TRACE: mean() is running
## [1] 20
untrace(mean)
## Untracing function "mean" in package "base"
options(error = recover)

x <- c(1, 2, 3)
mean(x, na.rm = "TRUE")   # intentional error
## [1] 2
options(error = NULL)

ASSIGNMENT 5: GEOMS

ggplot(titanic, aes(x = factor(Survived))) +
  geom_bar()

ggplot(titanic, aes(x = Sex)) +
  geom_bar()

ggplot(titanic, aes(x = Sex, fill = factor(Survived))) +
  geom_bar()

ggplot(titanic, aes(x = Age)) +
  geom_histogram(binwidth = 5)
## Warning: Removed 177 rows containing non-finite outside the scale range
## (`stat_bin()`).

ggplot(titanic, aes(x = factor(Survived), y = Age)) +
  geom_boxplot()
## Warning: Removed 177 rows containing non-finite outside the scale range
## (`stat_boxplot()`).

ASSIGNMENT 6: APPLY FUNCTIONS

sapply(titanic[, c("Age", "Fare")], mean, na.rm = TRUE)
##      Age     Fare 
## 29.69912 32.20421

lapply(titanic[, c("Age", "Fare")], summary)
## $Age
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.     NAs 
##    0.42   20.12   28.00   29.70   38.00   80.00     177 
## 
## $Fare
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    0.00    7.91   14.45   32.20   31.00  512.33

titanic %>%
  select(Age, Fare) %>%
  map(function(x) mean(x, na.rm = TRUE))
## $Age
## [1] 29.69912
## 
## $Fare
## [1] 32.20421



vapply(titanic[, c("Age", "Fare")], mean, numeric(1), na.rm = TRUE)



split(titanic$Fare, titanic$Survived)

tapply(titanic$Fare, titanic$Survived, mean, na.rm = TRUE)
##        0        1 
## 22.11789 48.39541