Charlie Stevens

library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(stringr)

Question 4:

bike_complete <- read.csv("train.csv")

bike_complete$weather <- factor(bike_complete$weather, 
                                levels = c(1,2,3,4), 
                                labels = c("Clear", "Mist", "Light Snow/Rain", "Heavy Snow/Rain"))

str(bike_complete$weather)
##  Factor w/ 4 levels "Clear","Mist",..: 1 1 1 1 1 2 1 1 1 1 ...

Question 7:

cast <- read.csv("raw_cast.csv")

str(cast) 
## 'data.frame':    1298 obs. of  3 variables:
##  $ X     : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ Name.1: chr  "Angela Bassett" "Peter Krause" "Oliver Stark" "Aisha Hinds" ...
##  $ Name.2: chr  "Athena Grant\n                  87 episodes, 2018-2022" "Bobby Nash\n                  87 episodes, 2018-2022" "Evan 'Buck' Buckley\n                  87 episodes, 2018-2022" "Henrietta 'Hen' Wilson\n                  87 episodes, 2018-2022" ...
summary(cast)  
##        X             Name.1             Name.2         
##  Min.   :   1.0   Length:1298        Length:1298       
##  1st Qu.: 325.2   Class :character   Class :character  
##  Median : 649.5   Mode  :character   Mode  :character  
##  Mean   : 649.5                                        
##  3rd Qu.: 973.8                                        
##  Max.   :1298.0
table(is.na(cast))
## 
## FALSE 
##  3894

Answer: FALSE

Question 8:

cast$Name.2 <- trimws(cast$Name.2)

split_data <- strsplit(cast$Name.2, "\n")

cast$Name2 <- sapply(split_data, function(x) trimws(x[1]))  
cast$Episode <- sapply(split_data, function(x) trimws(x[2]))  

cast$Name.2 <- NULL
head(cast[, c( "Name2", "Episode")])

Question 9:

cast <- cast %>%
  mutate(
    Name2 = str_split_fixed(Name.1, "\n", 2)[, 1],  
    Episode = str_split_fixed(Name.1, "\n", 2)[, 2] 
  )


cast$Name2 <- trimws(cast$Name2)
cast$Episode <- trimws(cast$Episode)


head(cast[, c("Name2", "Episode")])

Question 10:

cast$Episode <- str_trim(cast$Episode, side = "both")

head(cast[, c("Episode")])
## [1] "" "" "" "" "" ""