Library

library(stringr)
bike <- read.csv("bike_sharing_data.csv")

Question 1

# is.finite()
# is.na() yes
# is.nan()
# is.null()

Question 2

# na.omit(df)
# which(is.na(df$var)==TRUE)
# str_detect(df,"NA")
# table(is.na(df))
# complete.cases(df)

Question 3

# bad_data <- str_subset(bike$humidity, "[a-z, A-Z]") 
# can return all the values that contain a character.

# bike$humidity <- str_replace_all(bike$humidity,bad_data,"61") 
# can serach all the bad_data for values in the variable humidity and replace them all with 61.

Question 4

# bike <- transform(bike, weather= as.integer(weather))
# bike$weather <- factor(bike$weather, levels = c(1,2,3,4), labels = c("spring","summer","fall","winter"))
# bike$weather <- factor(bike$weather, levels = c(0,1), labels = c("no","yes"))
# bike$weather <- factor(bike$weather, levels = c(1,2,3,4), labels = c("Clear","Mist","Light Snow/Rain", "Heavy Snow/Rain"))

Question 7

cast <- read.csv("raw_cast.csv")
str(cast)
## 'data.frame':    1298 obs. of  3 variables:
##  $ X     : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ Name.1: chr  "Angela Bassett" "Peter Krause" "Oliver Stark" "Aisha Hinds" ...
##  $ Name.2: chr  "Athena Grant\n                  87 episodes, 2018-2022" "Bobby Nash\n                  87 episodes, 2018-2022" "Evan 'Buck' Buckley\n                  87 episodes, 2018-2022" "Henrietta 'Hen' Wilson\n                  87 episodes, 2018-2022" ...
any(is.na(cast))
## [1] FALSE

Question 8

any(grepl("\n", cast$Name.2))
## [1] TRUE

Question 9

library(tidyr)
library(stringr)
library(tidyr)
cast <- separate(cast, "Name.2", into = c("Character Name", "Episode Info"), sep = "\n", extra = "merge")
## Warning: Expected 2 pieces. Missing pieces filled with `NA` in 118 rows [1181, 1182,
## 1183, 1184, 1185, 1186, 1187, 1188, 1189, 1190, 1191, 1192, 1193, 1194, 1195,
## 1196, 1197, 1198, 1199, 1200, ...].

Question 10

cast$`Episode Info` <- str_trim(cast$`Episode Info`, side = "both")