Overview

Five participants were asked to complete a survey reporting their feelings about six recent movies:

  • Ghost
  • Home Alone
  • Pretty Woman
  • Teenage Mutant Ninja Turtles
  • The Hunt for Red October
  • Total Recall

I forgot to mention that it’s 1990 and these were the top six grossing movies of the year.

Methods

1. Participants were sent a Google survey via email

Link to sample survey

Answers were on a 5 point Likert scale. Questions could also be answered ‘Other’ or left blank.

  • Strongly Agree
  • Agree
  • Neutral
  • Disagree
  • Strongly Disagree

2. Results were uploaded as a csv to Github

Link to dataset

#Create file path of data in github
file_path <- "https://raw.githubusercontent.com/catfoodlover/Data607/main/Data607_HW2_Data_WilliamAiken.csv"

3. Created an account on bit.io, a free site for hosting SQL databases

Link to bit.io

4. Data was pulled from Github into R for processing

#Read in data
movie_df <- read_csv(file_path, show_col_types = FALSE)

#Rename columns
names(movie_df) <- c("Timestamp", "Ghost", "Pretty_Woman", "Home_Alone", "TMNT", "THFRO", "Total_Recall")

#Add ID column and drop the time stamp
movie_df <- movie_df %>% mutate(USERID = row_number()) %>% select(-c("Timestamp"))

#Reshape from wide to long to make it easier to normalize the data
temp <- melt(data = movie_df, id.vars = "USERID", measure.vars = c("Ghost", "Pretty_Woman", "Home_Alone", "TMNT", "THFRO", "Total_Recall"))

temp <- temp %>% mutate(value = ifelse(value %in% c("Strongly agree","Agree","Neutral","Strongly disagree", "Disagree"), value, 'NULL'))

temp$variable <- as.character(temp$variable)

temp <-
  temp %>% mutate(
    variable = case_when(
      variable == "Pretty_Woman" ~ "Pretty Woman",
      variable == "Home_Alone" ~ "Home Alone",
      variable == "TMNT" ~ "Teenage Mutatant Ninja Turtles",
      variable == "THFRO" ~ "The Hunt for Red October",
      variable == "Total_Recall" ~ "Total Recall",
      TRUE ~ variable))
      

#check to make sure we have a row for every respondent and movie
temp %>% group_by(variable) %>% count(value) %>% View()

#create ids for movie and response
temp$movieID<- temp %>% group_indices(variable)
## Warning: The `...` argument of `group_keys()` is deprecated as of dplyr 1.0.0.
## Please `group_by()` first
temp$responseID<- temp %>% group_indices(value)

5. Data was reshaped and split into 3 sub tables for data normalization

#create movie table
movies <- temp %>% select(movieID, movie_name = variable) %>% distinct()

#create response table
responses <- temp %>% select(responseID, value) %>% distinct()

#create main table
main_tlb <- temp %>% select(USERID, movieID, responseID) %>% distinct()

6. Tables were loaded to bit.io as postgres SQL statement

#get my password for bit.i
password <- key_get("bit.io", "catfoodlover_demo_db_connection")

#connect to bit.io
con <- dbConnect(RPostgreSQL::PostgreSQL(), dbname = 'bitdotio', 
                 host = 'db.bit.io',
                 port = 5432,
                 user = 'bitdotio',
                 password = password)


#Create movies table
DBI::dbSendQuery(con, 'CREATE TABLE IF NOT EXISTS "catfoodlover/demo_repo"."movies" (
  movieID INTEGER,
  movie_name TEXT
)')
## <PostgreSQLResult>
#Insert data into table
DBI::dbSendQuery(
  con,
  'INSERT INTO "catfoodlover/demo_repo"."movies" VALUES (1,\'Ghost\'), (3, \'Pretty Woman\'), (2, \'Home Alone\'), (4, \'Teenage Mutatant Ninja Turtles\'), (5, \'The Hunt for Red October\'), (6, \'Total Recall\');'
)
## <PostgreSQLResult>
#Create response table
DBI::dbSendQuery(
  con,
  'CREATE TABLE IF NOT EXISTS "catfoodlover/demo_repo"."responses" (
  responseID INTEGER,
  value TEXT
)'
)
## <PostgreSQLResult>
#Insert data into table
DBI::dbSendQuery(
  con,
  'INSERT INTO "catfoodlover/demo_repo"."responses" VALUES (1,\'Agree\'), (2, \'Disagree\'), (3, \'Neutral\'), (4, \'NULL\'), (5, \'Strongly agree\');'
)
## <PostgreSQLResult>
#Create main table
DBI::dbSendQuery(
  con,
  'CREATE TABLE IF NOT EXISTS "catfoodlover/demo_repo"."main" (
  USERID INTEGER,
  movieID INTEGER,
  responseID INTEGER
)'
)
## <PostgreSQLResult>
DBI::dbSendQuery(
  con,
  'INSERT INTO "catfoodlover/demo_repo"."main" VALUES
(1,1,1),
(2,1,1),
(3,1,1),
(4,1,2),
(5,1,3),
(1,3,3),
(2,3,4),
(3,3,5),
(4,3,1),
(5,3,5),
(1,2,5),
(2,2,1),
(3,2,2),
(4,2,1),
(5,2,5),
(1,4,5),
(2,4,1),
(3,4,3),
(4,4,4),
(5,4,1),
(1,5,4),
(2,5,4),
(3,5,5),
(4,5,2),
(5,5,4),
(1,6,5),
(2,6,1),
(3,6,1),
(4,6,1),
(5,6,4);')
## <PostgreSQLResult>

7. Data was rejoined and pulled from bit.io

d <- dbSendQuery(con, 'SELECT * FROM "catfoodlover/demo_repo"."main" AS main
                       LEFT JOIN "catfoodlover/demo_repo"."movies" AS movies
                       ON main.movieID = movies.movieID
                       LEFT JOIN "catfoodlover/demo_repo"."responses"  AS response
                       ON main.responseID = response.responseID;')

data <- fetch(d)

8. Tables were dropped

#drop my tables
dbSendQuery(con, 'DROP TABLE "catfoodlover/demo_repo"."main";')
## <PostgreSQLResult>
dbSendQuery(con, 'DROP TABLE "catfoodlover/demo_repo"."movies";')
## <PostgreSQLResult>
dbSendQuery(con, 'DROP TABLE "catfoodlover/demo_repo"."responses";')
## <PostgreSQLResult>

9. Data is summarised

data$value[data$value == 'NULL'] <- NA

data %>% select(movie_name, value) %>% mutate(value = factor(value, c("Strongly agree", "Agree", "Neutral", "Disagree", "Strongly Disagree"))) %>% tbl_summary(by = movie_name)
Characteristic Ghost, N = 51 Home Alone, N = 51 Pretty Woman, N = 51 Teenage Mutatant Ninja Turtles, N = 51 The Hunt for Red October, N = 51 Total Recall, N = 51
value
Strongly agree 0 (0%) 2 (40%) 2 (50%) 1 (25%) 1 (50%) 1 (25%)
Agree 3 (60%) 2 (40%) 1 (25%) 2 (50%) 0 (0%) 3 (75%)
Neutral 1 (20%) 0 (0%) 1 (25%) 1 (25%) 0 (0%) 0 (0%)
Disagree 1 (20%) 1 (20%) 0 (0%) 0 (0%) 1 (50%) 0 (0%)
Strongly Disagree 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%)
Unknown 0 0 1 1 3 1

1 n (%)

Conclusions