Check for any null values.
library(DBI)
library(rvest)
library(tidyverse)
data607film <- read.csv("https://raw.githubusercontent.com/TheWerefriend/data607/master/homework2/data607film.csv")
filmMeta <- read.csv("https://raw.githubusercontent.com/TheWerefriend/data607/master/homework2/filmMeta.csv")
alias <- c("theQueensGambit", "emilyInParis", "lucifer", "theUmbrellaAcademy",
"moneyHeist", "darkDesire", "friends", "theCrown", "ratched", "dark")
colnames(data607film) <- c("timestamp", alias, "genreFavorites", "genreDislike",
"weeklyNetflixHours", "recommendation")
filmMeta <- cbind(alias, filmMeta)
anyNA(data607film)
## [1] FALSE
anyNA(filmMeta)
## [1] TRUE
data607film <- na.omit(data607film)
filmMeta <- na.omit(filmMeta)
anyNA(data607film)
## [1] FALSE
anyNA(filmMeta)
## [1] FALSE
con <- dbConnect(RSQLite::SQLite(), ":memory")
dbWriteTable(con, "film", data607film, overwrite=TRUE)
dbWriteTable(con, "meta", filmMeta, overwrite=TRUE)
film_table <- data.frame(dbGetQuery(con, "SELECT * FROM film"))
meta_table <- data.frame(dbGetQuery(con, "SELECT * FROM meta"))
dbDisconnect(con)
heavy_watchers <- film_table %>%
filter(weeklyNetflixHours >= median(weeklyNetflixHours)) %>%
arrange(desc(weeklyNetflixHours))
heavy_watchers <- within(heavy_watchers, rm(moneyHeist, darkDesire))
values = c(0:5)
opinions = c("No opinion - I haven't seen it",
"Poor", "Fair", "Average", "Good",
"Excellent")
names(values) = opinions
heavy_opinions <- heavy_watchers[,2:9]
responses <- c()
average <- c()
out_of_ten <- c()
# values[heavy_opinions[i]]
# for opinion levels
for (i in heavy_opinions) {
tally = 0
for (v in i) {
if (v != "No opinion - I haven't seen it") {
tally <- tally + 1 }
}
responses <- append(responses, tally)
average <- append(average,
sum(as.numeric(values[i]))/tally)
out_of_ten <- append(out_of_ten, tail(average, 1)*2)
}
data <- t(meta_table) %>%
rbind(responses) %>%
rbind(average) %>%
rbind(out_of_ten)
# First row to column names
colnames(data) <- data[1,]
data <- data[-1,]
heavy_opinions <- heavy_opinions[,-6]
data <- data[,-6]
rows <- rownames(data)[-c(1,6)]
cor(as.numeric(data['average',]), as.numeric(data['imdbRating',]))
## [1] 0.5553707
cor(as.numeric(data['average',]), as.numeric(data['metacriticScore',]))
## [1] -0.1232937
meta_corr <- c()
count = 1
for (i in heavy_opinions) {
tally <- 0
differences <- c()
for (v in i) {
if (v != "No opinion - I haven't seen it") {
tally <- tally + 1
diff <- abs(as.numeric(data["metacriticRating", count]) - as.numeric(values[v])*2)
differences <- c(differences, diff)
}
}
meta_corr <- c(meta_corr, mean(differences))
count <- count + 1
}
meta_corr
## [1] 3.750000 1.433333 1.700000 0.700000 3.000000 1.400000 2.400000
Eerybody likes Friends. Our class “I’ve seen it” levels are somewhat correlated with the total numbers of votes sent to IMDB. IMDB ratings are indicative of IMDB popularity. Our class agrees a bit with IMDB ratings, and both disagree a bit with Metacritic. Class opinions were most divergent with Metacritic over Friends and The Queen’s Gambit, and the least divergent on The Umbrella Academy.