Charlie Stevens
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(stringr)
Question 4:
bike_complete <- read.csv("train.csv")
bike_complete$weather <- factor(bike_complete$weather,
levels = c(1,2,3,4),
labels = c("Clear", "Mist", "Light Snow/Rain", "Heavy Snow/Rain"))
str(bike_complete$weather)
## Factor w/ 4 levels "Clear","Mist",..: 1 1 1 1 1 2 1 1 1 1 ...
Question 7:
cast <- read.csv("raw_cast.csv")
str(cast)
## 'data.frame': 1298 obs. of 3 variables:
## $ X : int 1 2 3 4 5 6 7 8 9 10 ...
## $ Name.1: chr "Angela Bassett" "Peter Krause" "Oliver Stark" "Aisha Hinds" ...
## $ Name.2: chr "Athena Grant\n 87 episodes, 2018-2022" "Bobby Nash\n 87 episodes, 2018-2022" "Evan 'Buck' Buckley\n 87 episodes, 2018-2022" "Henrietta 'Hen' Wilson\n 87 episodes, 2018-2022" ...
summary(cast)
## X Name.1 Name.2
## Min. : 1.0 Length:1298 Length:1298
## 1st Qu.: 325.2 Class :character Class :character
## Median : 649.5 Mode :character Mode :character
## Mean : 649.5
## 3rd Qu.: 973.8
## Max. :1298.0
table(is.na(cast))
##
## FALSE
## 3894
Answer: FALSE
Question 8:
cast$Name.2 <- trimws(cast$Name.2)
split_data <- strsplit(cast$Name.2, "\n")
cast$Name2 <- sapply(split_data, function(x) trimws(x[1]))
cast$Episode <- sapply(split_data, function(x) trimws(x[2]))
cast$Name.2 <- NULL
head(cast[, c( "Name2", "Episode")])
Question 9:
cast <- cast %>%
mutate(
Name2 = str_split_fixed(Name.1, "\n", 2)[, 1],
Episode = str_split_fixed(Name.1, "\n", 2)[, 2]
)
cast$Name2 <- trimws(cast$Name2)
cast$Episode <- trimws(cast$Episode)
head(cast[, c("Name2", "Episode")])
Question 10:
cast$Episode <- str_trim(cast$Episode, side = "both")
head(cast[, c("Episode")])
## [1] "" "" "" "" "" ""