Loading data from assignment Week2

In order to avoid any issue with the database connector the data got exported into a csv file so we can work with the data independently.

df_movies_rate <- read_csv("https://raw.githubusercontent.com/alinsimon/data607/main/DF_MovieRating.csv",show_col_types = FALSE)
view(df_movies_rate)

Data format

In order to start working in the data we will give different format into a new dataframe

#Getting available movies
Movies_name <-  df_movies_rate |>
                distinct(movie_title)|> 
                mutate(movie_title = str_replace_all(movie_title, " ", ""))

#Getting Critics
critics_name <- df_movies_rate |>
                distinct(name) |>
                rename(Critic = name )

#create another df for the globalbaseline
global_baseline <- critics_name

#We will create a function to get the rate provided by the critic
get_movierate <- function(x,y){
  return_result <- df_movies_rate |>
    filter(str_replace_all(movie_title, " ", "") == x , name == y)|>
    select(rating)
  ifelse(return_result == 0, NA, return_result)
}

#we will add the movies as columns
for( i in 1:nrow(Movies_name)){
  
# Create column name 
newcol_name <- paste(Movies_name[i,])
var1_value <-  as.character(Movies_name[i,1])

# Add a new column with default values
global_baseline[[newcol_name]] <- NA

  for (c in 1:nrow(critics_name)) {

    var2_value <-  as.character(critics_name[c,1])
    global_baseline[c,i+1] <- get_movierate(var1_value,var2_value)
    
  }

}

#Now we will add calculate user_avg
global_baseline[["user_avg"]] <- NA

for (c in 1:nrow(critics_name)) {
  
  numeric_values <- as.numeric(global_baseline[c, ])
  
  mean_value <- mean(numeric_values, na.rm = TRUE)
  
  global_baseline[c,"user_avg"] <- mean_value
  
   numeric_values_row <-    as.numeric(global_baseline[c,2:ncol(global_baseline)-1])
  
   if(c==1){
     numeric_values_all <- numeric_values_row
   } else {
     numeric_values_all <- c(numeric_values_all,numeric_values_row)
   }
  
}


#Now we will add user_avg_less_meanmovie
 mean_value_general <- mean(numeric_values_all, na.rm = TRUE)
 global_baseline[["user_avg_less_meanmovie"]] <- NA

for (c in 1:nrow(critics_name)) {
  global_baseline[c,"user_avg_less_meanmovie"] <- global_baseline[c,"user_avg"] - mean_value_general
}
global_baseline
## # A tibble: 5 × 9
##   Critic `Dune:PartTwo` KillersoftheFlowerMoon Mission:Impossible-D…¹ PoorThings
##   <chr>           <dbl>                  <dbl>                  <dbl>      <dbl>
## 1 Chris               1                      5                      3         NA
## 2 David               5                      4                      3          4
## 3 Juan                1                      5                      4          4
## 4 Micha…              1                      3                      2          4
## 5 Sam                 2                      4                      5          5
## # ℹ abbreviated name: ¹​`Mission:Impossible-DeadReckoningPartTwo`
## # ℹ 4 more variables: TheLastVoyageoftheDemeter <dbl>, TheMarvels <dbl>,
## #   user_avg <dbl>, user_avg_less_meanmovie <dbl>

Visualize Summary

movie_avg <- global_baseline |>
  summarize(across(.cols = where(is.numeric) & !c("user_avg","user_avg_less_meanmovie"), mean, na.rm = TRUE))

movie_avg_less_mean <- movie_avg |>
  mutate(across(where(is.numeric), ~ . - mean_value_general))

Global Baseline

#global_baseline

global_baseline %>%
  kable() %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive")) %>%
  scroll_box(width = "100%", height = "100%")
Critic Dune:PartTwo KillersoftheFlowerMoon Mission:Impossible-DeadReckoningPartTwo PoorThings TheLastVoyageoftheDemeter TheMarvels user_avg user_avg_less_meanmovie
Chris 1 5 3 NA NA NA 3.000000 -0.5000000
David 5 4 3 4 5 3 4.000000 0.5000000
Juan 1 5 4 4 3 4 3.500000 0.0000000
Michael 1 3 2 4 5 1 2.666667 -0.8333333
Sam 2 4 5 5 NA 5 4.200000 0.7000000

Movie Average

movie_avg %>%
  kable() %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive")) %>%
  scroll_box(width = "100%", height = "100%")
Dune:PartTwo KillersoftheFlowerMoon Mission:Impossible-DeadReckoningPartTwo PoorThings TheLastVoyageoftheDemeter TheMarvels
2 4.2 3.4 4.25 4.333333 3.25

Movie Average - mean movie

movie_avg_less_mean %>%
  kable() %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive")) %>%
  scroll_box(width = "100%", height = "100%")
Dune:PartTwo KillersoftheFlowerMoon Mission:Impossible-DeadReckoningPartTwo PoorThings TheLastVoyageoftheDemeter TheMarvels
-1.5 0.7 -0.1 0.75 0.8333333 -0.25