In order to avoid any issue with the database connector the data got exported into a csv file so we can work with the data independently.
df_movies_rate <- read_csv("https://raw.githubusercontent.com/alinsimon/data607/main/DF_MovieRating.csv",show_col_types = FALSE)
view(df_movies_rate)
In order to start working in the data we will give different format into a new dataframe
#Getting available movies
Movies_name <- df_movies_rate |>
distinct(movie_title)|>
mutate(movie_title = str_replace_all(movie_title, " ", ""))
#Getting Critics
critics_name <- df_movies_rate |>
distinct(name) |>
rename(Critic = name )
#create another df for the globalbaseline
global_baseline <- critics_name
#We will create a function to get the rate provided by the critic
get_movierate <- function(x,y){
return_result <- df_movies_rate |>
filter(str_replace_all(movie_title, " ", "") == x , name == y)|>
select(rating)
ifelse(return_result == 0, NA, return_result)
}
#we will add the movies as columns
for( i in 1:nrow(Movies_name)){
# Create column name
newcol_name <- paste(Movies_name[i,])
var1_value <- as.character(Movies_name[i,1])
# Add a new column with default values
global_baseline[[newcol_name]] <- NA
for (c in 1:nrow(critics_name)) {
var2_value <- as.character(critics_name[c,1])
global_baseline[c,i+1] <- get_movierate(var1_value,var2_value)
}
}
#Now we will add calculate user_avg
global_baseline[["user_avg"]] <- NA
for (c in 1:nrow(critics_name)) {
numeric_values <- as.numeric(global_baseline[c, ])
mean_value <- mean(numeric_values, na.rm = TRUE)
global_baseline[c,"user_avg"] <- mean_value
numeric_values_row <- as.numeric(global_baseline[c,2:ncol(global_baseline)-1])
if(c==1){
numeric_values_all <- numeric_values_row
} else {
numeric_values_all <- c(numeric_values_all,numeric_values_row)
}
}
#Now we will add user_avg_less_meanmovie
mean_value_general <- mean(numeric_values_all, na.rm = TRUE)
global_baseline[["user_avg_less_meanmovie"]] <- NA
for (c in 1:nrow(critics_name)) {
global_baseline[c,"user_avg_less_meanmovie"] <- global_baseline[c,"user_avg"] - mean_value_general
}
global_baseline
## # A tibble: 5 × 9
## Critic `Dune:PartTwo` KillersoftheFlowerMoon Mission:Impossible-D…¹ PoorThings
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 Chris 1 5 3 NA
## 2 David 5 4 3 4
## 3 Juan 1 5 4 4
## 4 Micha… 1 3 2 4
## 5 Sam 2 4 5 5
## # ℹ abbreviated name: ¹`Mission:Impossible-DeadReckoningPartTwo`
## # ℹ 4 more variables: TheLastVoyageoftheDemeter <dbl>, TheMarvels <dbl>,
## # user_avg <dbl>, user_avg_less_meanmovie <dbl>
movie_avg <- global_baseline |>
summarize(across(.cols = where(is.numeric) & !c("user_avg","user_avg_less_meanmovie"), mean, na.rm = TRUE))
movie_avg_less_mean <- movie_avg |>
mutate(across(where(is.numeric), ~ . - mean_value_general))
Global Baseline
#global_baseline
global_baseline %>%
kable() %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive")) %>%
scroll_box(width = "100%", height = "100%")
Critic | Dune:PartTwo | KillersoftheFlowerMoon | Mission:Impossible-DeadReckoningPartTwo | PoorThings | TheLastVoyageoftheDemeter | TheMarvels | user_avg | user_avg_less_meanmovie |
---|---|---|---|---|---|---|---|---|
Chris | 1 | 5 | 3 | NA | NA | NA | 3.000000 | -0.5000000 |
David | 5 | 4 | 3 | 4 | 5 | 3 | 4.000000 | 0.5000000 |
Juan | 1 | 5 | 4 | 4 | 3 | 4 | 3.500000 | 0.0000000 |
Michael | 1 | 3 | 2 | 4 | 5 | 1 | 2.666667 | -0.8333333 |
Sam | 2 | 4 | 5 | 5 | NA | 5 | 4.200000 | 0.7000000 |
Movie Average
movie_avg %>%
kable() %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive")) %>%
scroll_box(width = "100%", height = "100%")
Dune:PartTwo | KillersoftheFlowerMoon | Mission:Impossible-DeadReckoningPartTwo | PoorThings | TheLastVoyageoftheDemeter | TheMarvels |
---|---|---|---|---|---|
2 | 4.2 | 3.4 | 4.25 | 4.333333 | 3.25 |
Movie Average - mean movie
movie_avg_less_mean %>%
kable() %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive")) %>%
scroll_box(width = "100%", height = "100%")
Dune:PartTwo | KillersoftheFlowerMoon | Mission:Impossible-DeadReckoningPartTwo | PoorThings | TheLastVoyageoftheDemeter | TheMarvels |
---|---|---|---|---|---|
-1.5 | 0.7 | -0.1 | 0.75 | 0.8333333 | -0.25 |