Library
library(stringr)
bike <- read.csv("bike_sharing_data.csv")
Question 1
# is.finite()
# is.na() yes
# is.nan()
# is.null()
Question 2
# na.omit(df)
# which(is.na(df$var)==TRUE)
# str_detect(df,"NA")
# table(is.na(df))
# complete.cases(df)
Question 3
# bad_data <- str_subset(bike$humidity, "[a-z, A-Z]")
# can return all the values that contain a character.
# bike$humidity <- str_replace_all(bike$humidity,bad_data,"61")
# can serach all the bad_data for values in the variable humidity and replace them all with 61.
Question 4
# bike <- transform(bike, weather= as.integer(weather))
# bike$weather <- factor(bike$weather, levels = c(1,2,3,4), labels = c("spring","summer","fall","winter"))
# bike$weather <- factor(bike$weather, levels = c(0,1), labels = c("no","yes"))
# bike$weather <- factor(bike$weather, levels = c(1,2,3,4), labels = c("Clear","Mist","Light Snow/Rain", "Heavy Snow/Rain"))
Question 7
cast <- read.csv("raw_cast.csv")
str(cast)
## 'data.frame': 1298 obs. of 3 variables:
## $ X : int 1 2 3 4 5 6 7 8 9 10 ...
## $ Name.1: chr "Angela Bassett" "Peter Krause" "Oliver Stark" "Aisha Hinds" ...
## $ Name.2: chr "Athena Grant\n 87 episodes, 2018-2022" "Bobby Nash\n 87 episodes, 2018-2022" "Evan 'Buck' Buckley\n 87 episodes, 2018-2022" "Henrietta 'Hen' Wilson\n 87 episodes, 2018-2022" ...
any(is.na(cast))
## [1] FALSE
Question 8
any(grepl("\n", cast$Name.2))
## [1] TRUE
Question 9
library(tidyr)
library(stringr)
library(tidyr)
cast <- separate(cast, "Name.2", into = c("Character Name", "Episode Info"), sep = "\n", extra = "merge")
## Warning: Expected 2 pieces. Missing pieces filled with `NA` in 118 rows [1181, 1182,
## 1183, 1184, 1185, 1186, 1187, 1188, 1189, 1190, 1191, 1192, 1193, 1194, 1195,
## 1196, 1197, 1198, 1199, 1200, ...].
Question 10
cast$`Episode Info` <- str_trim(cast$`Episode Info`, side = "both")