Please check my sql folder in the repository I attempted to read the sql file and run it directly here but had no success. Therefore I broke it down into chunks. Another solution I want to try in the future is running a bash script, but I am currently using windows which complicated everything. I have ubuntu set up and need to set up my R env on there along with mysql
please reset the values #installing RMYSQL
#install.packages("RMySQL")
Running sql statements to create the db, create tables, and add rows into tables Stored my credentials and host in my .Renviron file
library(readr)
library(RMySQL)
## Warning: package 'RMySQL' was built under R version 4.3.3
## Loading required package: DBI
## Warning: package 'DBI' was built under R version 4.3.3
#current_dir <- getwd()
#sql_folder <- "sql"
#file_path <- file.path(current_dir, sql_folder, 'movie_db.sql')
#print(file_path)
#class(file_path)
#sql_script <- read_file(file_path)
#sql_script <- iconv(sql_script, from = "UTF-8", to = "UTF-8")
#cat(sql_script)
user <- Sys.getenv("DB_USER")
password <- Sys.getenv("DB_PASSWORD")
host <- Sys.getenv("DB_HOST")
connection <- dbConnect(MySQL(),
user = user,
password = password,
host = host)
#dbExecute(connection, sql_script)
# Execute the SQL script (this assumes the script contains valid SQL with statements like CREATE DATABASE, etc.)
dbExecute(connection, "CREATE SCHEMA IF NOT EXISTS ahmed_hassan_607_assignments")
## [1] 1
dbExecute(connection, "
CREATE TABLE ahmed_hassan_607_assignments.Movies (
movie_id INT PRIMARY KEY,
title VARCHAR(255) NOT NULL
)")
## [1] 0
dbExecute(connection, "
CREATE TABLE ahmed_hassan_607_assignments.People (
person_id INT PRIMARY KEY,
gender CHAR(1),
generation VARCHAR(50)
)")
## [1] 0
dbExecute(connection, "
CREATE TABLE ahmed_hassan_607_assignments.Ratings (
rating_id INT AUTO_INCREMENT PRIMARY KEY,
movie_id INT,
person_id INT,
rating INT CHECK (rating BETWEEN 1 AND 5),
FOREIGN KEY (movie_id) REFERENCES ahmed_hassan_607_assignments.Movies(movie_id),
FOREIGN KEY (person_id) REFERENCES ahmed_hassan_607_assignments.People(person_id)
)")
## [1] 0
# Insert data
dbExecute(connection, "
INSERT INTO ahmed_hassan_607_assignments.People (person_id, gender, generation) VALUES
(1, 'M', 'millennial'),
(2, 'M', 'millennial'),
(3, 'M', 'millennial'),
(4, 'M', 'gen z'),
(5, 'F', 'gen z')
")
## [1] 5
dbExecute(connection, "
INSERT INTO ahmed_hassan_607_assignments.Movies (movie_id, title) VALUES
(1, 'Deadpool & Wolverine'),
(2, 'It Ends With Us'),
(3, 'Inside Out 2'),
(4, 'Fly Me to the Moon'),
(5, 'MaxxXine'),
(6, 'Twisters')
")
## [1] 6
dbExecute(connection, "
INSERT INTO ahmed_hassan_607_assignments.Ratings (rating_id, movie_id, person_id, rating) VALUES
(1, 1, 1, 5),
(2, 1, 2, 5),
(3, 1, 3, 5),
(4, 1, 4, 5),
(5, 1, 5, 3),
(6, 2, 1, 3),
(7, 2, 2, NULL),
(8, 2, 3, NULL),
(9, 2, 4, 4),
(10, 2, 5, 5),
(11, 3, 1, NULL),
(12, 3, 2, 2),
(13, 3, 3, NULL),
(14, 3, 4, NULL),
(15, 3, 5, NULL),
(16, 4, 1, NULL),
(17, 4, 2, NULL),
(18, 4, 3, 3),
(19, 4, 4, 2),
(20, 4, 5, NULL),
(21, 5, 1, NULL),
(22, 5, 2, NULL),
(23, 5, 3, 3),
(24, 5, 4, 3),
(25, 5, 5, NULL),
(26, 6, 1, NULL),
(27, 6, 2, 1),
(28, 6, 3, NULL),
(29, 6, 4, 1),
(30, 6, 5, NULL)
")
## [1] 30
# After execution, disconnect from the server
dbDisconnect(connection)
## [1] TRUE
Testing if sql statement were pushed into the db
library(RMySQL)
user <- Sys.getenv("DB_USER")
password <- Sys.getenv("DB_PASSWORD")
host <- Sys.getenv("DB_HOST")
connection <- dbConnect(MySQL(),
user = user, # Replace with your MySQL username
password = password, # Replace with your MySQL password
host = host, # Replace with your MySQL host (e.g., 'localhost')
dbname = 'ahmed_hassan_607_assignments') # Replace with your MySQL database name
movies_df <- dbGetQuery(connection, "SELECT * FROM ahmed_hassan_607_assignments.Movies")
people_df <- dbGetQuery(connection, "SELECT * FROM ahmed_hassan_607_assignments.People")
ratings_df <- dbGetQuery(connection, "SELECT * FROM ahmed_hassan_607_assignments.Ratings")
#test to see if we successfully pulled from the db by running the following lines below
#head(movies_df)
#head(people_df)
#head(ratings_df)
#I would rather omit/drop the values than set them to 0 unless I am accounting for how many people didnt see the movie
#omitting null values
ratings_df <- na.omit(ratings_df)
print(ratings_df)
## rating_id movie_id person_id rating
## 1 1 1 1 5
## 2 2 1 2 5
## 3 3 1 3 5
## 4 4 1 4 5
## 5 5 1 5 3
## 6 6 2 1 3
## 9 9 2 4 4
## 10 10 2 5 5
## 12 12 3 2 2
## 18 18 4 3 3
## 19 19 4 4 2
## 23 23 5 3 3
## 24 24 5 4 3
## 27 27 6 2 1
## 29 29 6 4 1
#setting null values to zero
ratings_df <- dbGetQuery(connection, "SELECT * FROM ahmed_hassan_607_assignments.Ratings")
ratings_df$rating[is.na(ratings_df$rating)] <- 0
print(ratings_df)
## rating_id movie_id person_id rating
## 1 1 1 1 5
## 2 2 1 2 5
## 3 3 1 3 5
## 4 4 1 4 5
## 5 5 1 5 3
## 6 6 2 1 3
## 7 7 2 2 0
## 8 8 2 3 0
## 9 9 2 4 4
## 10 10 2 5 5
## 11 11 3 1 0
## 12 12 3 2 2
## 13 13 3 3 0
## 14 14 3 4 0
## 15 15 3 5 0
## 16 16 4 1 0
## 17 17 4 2 0
## 18 18 4 3 3
## 19 19 4 4 2
## 20 20 4 5 0
## 21 21 5 1 0
## 22 22 5 2 0
## 23 23 5 3 3
## 24 24 5 4 3
## 25 25 5 5 0
## 26 26 6 1 0
## 27 27 6 2 1
## 28 28 6 3 0
## 29 29 6 4 1
## 30 30 6 5 0