Inspiration

1- Which streaming platform(s) can I find this tv show on?
2- IMDb ratings of a tv show?
3- Target age group tv shows vs the streaming application they can be found on
4- The year during which a tv show was produced and the streaming platform they can be found on

# Reading the dataset
tvshows <- read_csv("C:/Users/hukha/Desktop/MS - Data Science/Python Datacamp/tv_shows.csv", col_names = TRUE) %>% select(-c(type))

# Changing improper name
colnames(tvshows) <- str_replace(colnames(tvshows), "X1", "ID")

# Head
head(tvshows) %>% DT::datatable()
# Cleaning data

## Extracting % from Rotten tomatoes
tvshows$`Rotten Tomatoes` <- str_extract(tvshows$`Rotten Tomatoes`, pattern = "\\d+") %>%  str_replace_na(replacement = 0) %>% as.numeric()
tvshows$IMDb <- str_replace_na(tvshows$IMDb, replacement = 0) %>% as.numeric()

## Factors
tvshows$Age <- as.factor(tvshows$Age)

# Trimming
tvshows$Title <- str_trim(tvshows$Title, side=c("both", "left", "right"))
# Visualizing frequency of TV shows

tvshows %>% select(Netflix, `Prime Video`, Hulu, `Disney+`) %>% gather(key="Media_Service_Provider", value="TV shows") %>% group_by(`Media_Service_Provider`) %>% summarise(`Freq` = sum(`TV shows`)) %>% arrange(Freq) %>% ggplot()+geom_bar(aes(reorder(x=Media_Service_Provider, Freq), y=Freq), stat="identity",fill="steelblue") + labs(title ="Number of TV Shows by Media Service Providers", x="Number of TV Shows", y="Media Service Providers") + geom_text(aes(`Media_Service_Provider`,Freq, label=Freq),vjust=-0.3) + theme_update(plot.title= element_text(hjust=0.5)) + theme_classic()