library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
cast <- read.csv("raw_cast.csv", stringsAsFactors = FALSE)

Question 7

sum(is.na(cast))  
## [1] 0
nrow(cast) == nrow(unique(cast))  
## [1] TRUE
str(cast) 
## 'data.frame':    1298 obs. of  3 variables:
##  $ X     : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ Name.1: chr  "Angela Bassett" "Peter Krause" "Oliver Stark" "Aisha Hinds" ...
##  $ Name.2: chr  "Athena Grant\n                  87 episodes, 2018-2022" "Bobby Nash\n                  87 episodes, 2018-2022" "Evan 'Buck' Buckley\n                  87 episodes, 2018-2022" "Henrietta 'Hen' Wilson\n                  87 episodes, 2018-2022" ...
summary(cast)  
##        X             Name.1             Name.2         
##  Min.   :   1.0   Length:1298        Length:1298       
##  1st Qu.: 325.2   Class :character   Class :character  
##  Median : 649.5   Mode  :character   Mode  :character  
##  Mean   : 649.5                                        
##  3rd Qu.: 973.8                                        
##  Max.   :1298.0
table(cast$column_name)  
## < table of extent 0 >
head(cast[, 2]) 
## [1] "Angela Bassett"  "Peter Krause"    "Oliver Stark"    "Aisha Hinds"    
## [5] "Kenneth Choi"    "Corinne Massiah"

Question 8

# Example using space
split_values <- strsplit(cast[, 2], " ")  
head(split_values)
## [[1]]
## [1] "Angela"  "Bassett"
## 
## [[2]]
## [1] "Peter"  "Krause"
## 
## [[3]]
## [1] "Oliver" "Stark" 
## 
## [[4]]
## [1] "Aisha" "Hinds"
## 
## [[5]]
## [1] "Kenneth" "Choi"   
## 
## [[6]]
## [1] "Corinne" "Massiah"
library(stringr)

Question 9

cast[, c("Name2", "Episode")] <- str_split_fixed(cast[, 2], " ", 2)
head(split_values)
## [[1]]
## [1] "Angela"  "Bassett"
## 
## [[2]]
## [1] "Peter"  "Krause"
## 
## [[3]]
## [1] "Oliver" "Stark" 
## 
## [[4]]
## [1] "Aisha" "Hinds"
## 
## [[5]]
## [1] "Kenneth" "Choi"   
## 
## [[6]]
## [1] "Corinne" "Massiah"

Question 10

cast$Episode <- str_trim(cast$Episode, side = "both")
head(cast)
##   X          Name.1
## 1 1  Angela Bassett
## 2 2    Peter Krause
## 3 3    Oliver Stark
## 4 4     Aisha Hinds
## 5 5    Kenneth Choi
## 6 6 Corinne Massiah
##                                                             Name.2   Name2
## 1           Athena Grant\n                  87 episodes, 2018-2022  Angela
## 2             Bobby Nash\n                  87 episodes, 2018-2022   Peter
## 3    Evan 'Buck' Buckley\n                  87 episodes, 2018-2022  Oliver
## 4 Henrietta 'Hen' Wilson\n                  87 episodes, 2018-2022   Aisha
## 5    Howie 'Chimney' Han\n                  87 episodes, 2018-2022 Kenneth
## 6              May Grant\n                  82 episodes, 2018-2022 Corinne
##   Episode
## 1 Bassett
## 2  Krause
## 3   Stark
## 4   Hinds
## 5    Choi
## 6 Massiah