#library(tidyverse)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(DBI)
library(RMySQL)
## Warning: package 'RMySQL' was built under R version 4.3.2
password has been masked
lamadb <- dbConnect(RMySQL::MySQL(),
user = "chhiring.lama65",
password = Sys.getenv("PASSWORD_DB"),
dbname = "chhiring.lama65",
host = "cunydata607sql.mysql.database.azure.com")
dbListTables(lamadb)
## [1] "chess_tournament" "clean_company_ids" "clean_job_dets"
## [4] "company" "gbe_updated" "job"
## [7] "movie_rating"
table_name <- "movie_rating"
column_names <- dbListFields(lamadb, table_name)
query <- sprintf("SELECT * FROM %s", table_name)
table_data <- dbGetQuery(lamadb, query)
table_data
## ratingID CriticName Gender Age The_Creator The_Holdovers The_Marvels
## 1 1 Nancy Jain Female 30 3 <NA> 4
## 2 2 David Goldsmith Male 45 4 1 2
## 3 3 Putar Junee Male 28 2 3 <NA>
## 4 4 Jasprit Devi Female 35 1 <NA> <NA>
## 5 5 John Doe Male 40 5 4 3
## 6 6 Jane Smith Female 38 2 3 4
## 7 7 Alice Johnson Female 25 <NA> 4 2
## Wonka Society_of_the_Snow
## 1 5 <NA>
## 2 <NA> 5
## 3 3 4
## 4 2 3
## 5 <NA> 2
## 6 5 <NA>
## 7 3 5
table_name <- "movie_rating"
query <- paste("SELECT * FROM", table_name)
table_data <- dbGetQuery(lamadb, query)
table_data
## ratingID CriticName Gender Age The_Creator The_Holdovers The_Marvels
## 1 1 Nancy Jain Female 30 3 <NA> 4
## 2 2 David Goldsmith Male 45 4 1 2
## 3 3 Putar Junee Male 28 2 3 <NA>
## 4 4 Jasprit Devi Female 35 1 <NA> <NA>
## 5 5 John Doe Male 40 5 4 3
## 6 6 Jane Smith Female 38 2 3 4
## 7 7 Alice Johnson Female 25 <NA> 4 2
## Wonka Society_of_the_Snow
## 1 5 <NA>
## 2 <NA> 5
## 3 3 4
## 4 2 3
## 5 <NA> 2
## 6 5 <NA>
## 7 3 5
str(table_data)
## 'data.frame': 7 obs. of 9 variables:
## $ ratingID : int 1 2 3 4 5 6 7
## $ CriticName : chr "Nancy Jain" "David Goldsmith" "Putar Junee" "Jasprit Devi" ...
## $ Gender : chr "Female" "Male" "Male" "Female" ...
## $ Age : int 30 45 28 35 40 38 25
## $ The_Creator : chr "3" "4" "2" "1" ...
## $ The_Holdovers : chr NA "1" "3" NA ...
## $ The_Marvels : chr "4" "2" NA NA ...
## $ Wonka : chr "5" NA "3" "2" ...
## $ Society_of_the_Snow: chr NA "5" "4" "3" ...
Convert list columns to numeric vectors
table_data[, 5:9] <- lapply(table_data[, 5:9], function(x) unlist(x))
Convert rating columns to numeric
table_data[, 5:9] <- lapply(table_data[, 5:9], as.numeric)
global_mean <- mean(unlist(table_data[, 5:9]), na.rm = TRUE)
movie_means <- colMeans(table_data[, 5:9], na.rm = TRUE)
# Subtract movie means from global mean
baseline_ratings <- global_mean - movie_means
table_data$The_Creator[is.na(table_data$The_Creator)] <- baseline_ratings["The_Creator"]
table_data$The_Holdovers[is.na(table_data$The_Holdovers)] <- baseline_ratings["The_Holdovers"]
table_data$The_Marvels[is.na(table_data$The_Marvels)] <- baseline_ratings["The_Marvels"]
table_data$Wonka[is.na(table_data$Wonka)] <- baseline_ratings["Wonka"]
table_data$Society_of_the_Snow[is.na(table_data$Society_of_the_Snow)] <- baseline_ratings["Society_of_the_Snow"]
baseline_ratings
## The_Creator The_Holdovers The_Marvels Wonka
## 0.3974359 0.2307692 0.2307692 -0.3692308
## Society_of_the_Snow
## -0.5692308
# Display updated table_data
print(table_data)
dbWriteTable(lamadb, "gbe_updated", table_data, overwrite = TRUE)
## [1] TRUE
dbDisconnect(lamadb)
## [1] TRUE