#Create file path of data in github
file_path <- "https://raw.githubusercontent.com/catfoodlover/Data607/main/Data607_HW2_Data_WilliamAiken.csv"
#Read in data
movie_df <- read_csv(file_path, show_col_types = FALSE)
#Rename columns
names(movie_df) <- c("Timestamp", "Ghost", "Pretty_Woman", "Home_Alone", "TMNT", "THFRO", "Total_Recall")
#Add ID column and drop the time stamp
movie_df <- movie_df %>% mutate(USERID = row_number()) %>% select(-c("Timestamp"))
#Reshape from wide to long to make it easier to normalize the data
temp <- melt(data = movie_df, id.vars = "USERID", measure.vars = c("Ghost", "Pretty_Woman", "Home_Alone", "TMNT", "THFRO", "Total_Recall"))
temp <- temp %>% mutate(value = ifelse(value %in% c("Strongly agree","Agree","Neutral","Strongly disagree", "Disagree"), value, 'NULL'))
temp$variable <- as.character(temp$variable)
temp <-
temp %>% mutate(
variable = case_when(
variable == "Pretty_Woman" ~ "Pretty Woman",
variable == "Home_Alone" ~ "Home Alone",
variable == "TMNT" ~ "Teenage Mutatant Ninja Turtles",
variable == "THFRO" ~ "The Hunt for Red October",
variable == "Total_Recall" ~ "Total Recall",
TRUE ~ variable))
#check to make sure we have a row for every respondent and movie
temp %>% group_by(variable) %>% count(value) %>% View()
#create ids for movie and response
temp$movieID<- temp %>% group_indices(variable)
## Warning: The `...` argument of `group_keys()` is deprecated as of dplyr 1.0.0.
## Please `group_by()` first
temp$responseID<- temp %>% group_indices(value)
#create movie table
movies <- temp %>% select(movieID, movie_name = variable) %>% distinct()
#create response table
responses <- temp %>% select(responseID, value) %>% distinct()
#create main table
main_tlb <- temp %>% select(USERID, movieID, responseID) %>% distinct()
#get my password for bit.i
password <- key_get("bit.io", "catfoodlover_demo_db_connection")
#connect to bit.io
con <- dbConnect(RPostgreSQL::PostgreSQL(), dbname = 'bitdotio',
host = 'db.bit.io',
port = 5432,
user = 'bitdotio',
password = password)
#Create movies table
DBI::dbSendQuery(con, 'CREATE TABLE IF NOT EXISTS "catfoodlover/demo_repo"."movies" (
movieID INTEGER,
movie_name TEXT
)')
## <PostgreSQLResult>
#Insert data into table
DBI::dbSendQuery(
con,
'INSERT INTO "catfoodlover/demo_repo"."movies" VALUES (1,\'Ghost\'), (3, \'Pretty Woman\'), (2, \'Home Alone\'), (4, \'Teenage Mutatant Ninja Turtles\'), (5, \'The Hunt for Red October\'), (6, \'Total Recall\');'
)
## <PostgreSQLResult>
#Create response table
DBI::dbSendQuery(
con,
'CREATE TABLE IF NOT EXISTS "catfoodlover/demo_repo"."responses" (
responseID INTEGER,
value TEXT
)'
)
## <PostgreSQLResult>
#Insert data into table
DBI::dbSendQuery(
con,
'INSERT INTO "catfoodlover/demo_repo"."responses" VALUES (1,\'Agree\'), (2, \'Disagree\'), (3, \'Neutral\'), (4, \'NULL\'), (5, \'Strongly agree\');'
)
## <PostgreSQLResult>
#Create main table
DBI::dbSendQuery(
con,
'CREATE TABLE IF NOT EXISTS "catfoodlover/demo_repo"."main" (
USERID INTEGER,
movieID INTEGER,
responseID INTEGER
)'
)
## <PostgreSQLResult>
DBI::dbSendQuery(
con,
'INSERT INTO "catfoodlover/demo_repo"."main" VALUES
(1,1,1),
(2,1,1),
(3,1,1),
(4,1,2),
(5,1,3),
(1,3,3),
(2,3,4),
(3,3,5),
(4,3,1),
(5,3,5),
(1,2,5),
(2,2,1),
(3,2,2),
(4,2,1),
(5,2,5),
(1,4,5),
(2,4,1),
(3,4,3),
(4,4,4),
(5,4,1),
(1,5,4),
(2,5,4),
(3,5,5),
(4,5,2),
(5,5,4),
(1,6,5),
(2,6,1),
(3,6,1),
(4,6,1),
(5,6,4);')
## <PostgreSQLResult>
d <- dbSendQuery(con, 'SELECT * FROM "catfoodlover/demo_repo"."main" AS main
LEFT JOIN "catfoodlover/demo_repo"."movies" AS movies
ON main.movieID = movies.movieID
LEFT JOIN "catfoodlover/demo_repo"."responses" AS response
ON main.responseID = response.responseID;')
data <- fetch(d)
#drop my tables
dbSendQuery(con, 'DROP TABLE "catfoodlover/demo_repo"."main";')
## <PostgreSQLResult>
dbSendQuery(con, 'DROP TABLE "catfoodlover/demo_repo"."movies";')
## <PostgreSQLResult>
dbSendQuery(con, 'DROP TABLE "catfoodlover/demo_repo"."responses";')
## <PostgreSQLResult>
data$value[data$value == 'NULL'] <- NA
data %>% select(movie_name, value) %>% mutate(value = factor(value, c("Strongly agree", "Agree", "Neutral", "Disagree", "Strongly Disagree"))) %>% tbl_summary(by = movie_name)
Characteristic | Ghost, N = 51 | Home Alone, N = 51 | Pretty Woman, N = 51 | Teenage Mutatant Ninja Turtles, N = 51 | The Hunt for Red October, N = 51 | Total Recall, N = 51 |
---|---|---|---|---|---|---|
value | ||||||
Strongly agree | 0 (0%) | 2 (40%) | 2 (50%) | 1 (25%) | 1 (50%) | 1 (25%) |
Agree | 3 (60%) | 2 (40%) | 1 (25%) | 2 (50%) | 0 (0%) | 3 (75%) |
Neutral | 1 (20%) | 0 (0%) | 1 (25%) | 1 (25%) | 0 (0%) | 0 (0%) |
Disagree | 1 (20%) | 1 (20%) | 0 (0%) | 0 (0%) | 1 (50%) | 0 (0%) |
Strongly Disagree | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) |
Unknown | 0 | 0 | 1 | 1 | 3 | 1 |
1
n (%)
|