Step 1: Load CSV

I collected information on my friends’ preferences using a Google form, so was then able to download a CSV with the responses preloaded.

#avoid filepath shenanigans by using file.choose(), (does not knit)
#f <- file.choose()
#df <- read.csv(f,header=TRUE,sep=",")
df <- read.csv("/Users/alice/Documents/movies.csv",header=TRUE,sep=",")

Step 2: Normalize data

require(reshape2)
## Loading required package: reshape2

One table, “friends”, deals only with who answered the question

emails <- df[,2]
emails
## [1] Alice   Rayna   Gladysa Myraida Inbar   Aaron   Alyssa  Andrea 
## Levels: Aaron Alice Alyssa Andrea Gladysa Inbar Myraida Rayna
friendsid <- c(1:length(emails))
friends <- data.frame(friendsid, emails)
friends
##   friendsid  emails
## 1         1   Alice
## 2         2   Rayna
## 3         3 Gladysa
## 4         4 Myraida
## 5         5   Inbar
## 6         6   Aaron
## 7         7  Alyssa
## 8         8  Andrea

One table, “movies”, is a list of the movies ranked

remove(movie.names,moviesid,movies)
## Warning in remove(movie.names, moviesid, movies): object 'movie.names' not
## found
## Warning in remove(movie.names, moviesid, movies): object 'moviesid' not
## found
## Warning in remove(movie.names, moviesid, movies): object 'movies' not found
titles <- colnames(df[3:8])
moviesid <- c(1:length(titles))
movies <- data.frame(moviesid, titles)
movies
##   moviesid             titles
## 1        1      Black.Panther
## 2        2             Moanna
## 3        3             Frozen
## 4        4           I..Tonya
## 5        5 Back.to.the.Future
## 6        6     What.About.Bob

A third matching table will contain the rank, friend, and movie information

rank.melt <- melt(df[2:8], id="Username")
## Warning: attributes are not identical across measure variables; they will
## be dropped
rankid <- c(1:length(rank.melt$variable))
rank <- data.frame(rankid,rank.melt)
colnames(rank)[3] <- "titles"
rank
##    rankid Username             titles     value
## 1       1    Alice      Black.Panther         1
## 2       2    Rayna      Black.Panther         3
## 3       3  Gladysa      Black.Panther         5
## 4       4  Myraida      Black.Panther  1 (Best)
## 5       5    Inbar      Black.Panther         3
## 6       6    Aaron      Black.Panther         3
## 7       7   Alyssa      Black.Panther         3
## 8       8   Andrea      Black.Panther  1 (Best)
## 9       9    Alice             Moanna         2
## 10     10    Rayna             Moanna         4
## 11     11  Gladysa             Moanna         2
## 12     12  Myraida             Moanna         5
## 13     13    Inbar             Moanna         4
## 14     14    Aaron             Moanna  1 (Best)
## 15     15   Alyssa             Moanna  1 (Best)
## 16     16   Andrea             Moanna         5
## 17     17    Alice             Frozen         3
## 18     18    Rayna             Frozen         5
## 19     19  Gladysa             Frozen         4
## 20     20  Myraida             Frozen         3
## 21     21    Inbar             Frozen         5
## 22     22    Aaron             Frozen         2
## 23     23   Alyssa             Frozen 6 (Worst)
## 24     24   Andrea             Frozen         4
## 25     25    Alice           I..Tonya         4
## 26     26    Rayna           I..Tonya         6
## 27     27  Gladysa           I..Tonya         3
## 28     28  Myraida           I..Tonya         4
## 29     29    Inbar           I..Tonya         2
## 30     30    Aaron           I..Tonya         4
## 31     31   Alyssa           I..Tonya         2
## 32     32   Andrea           I..Tonya         3
## 33     33    Alice Back.to.the.Future         5
## 34     34    Rayna Back.to.the.Future         2
## 35     35  Gladysa Back.to.the.Future 6 (Worst)
## 36     36  Myraida Back.to.the.Future         2
## 37     37    Inbar Back.to.the.Future  1 (Best)
## 38     38    Aaron Back.to.the.Future         5
## 39     39   Alyssa Back.to.the.Future         4
## 40     40   Andrea Back.to.the.Future         2
## 41     41    Alice     What.About.Bob 6 (Worst)
## 42     42    Rayna     What.About.Bob  1 (Best)
## 43     43  Gladysa     What.About.Bob  1 (Best)
## 44     44  Myraida     What.About.Bob 6 (Worst)
## 45     45    Inbar     What.About.Bob 6 (Worst)
## 46     46    Aaron     What.About.Bob 6 (Worst)
## 47     47   Alyssa     What.About.Bob         5
## 48     48   Andrea     What.About.Bob 6 (Worst)

Step 3: Setup SQLite, establish database connection

require(DBI)
## Loading required package: DBI
# Create an RSQLite database
mydb <- dbConnect(RSQLite::SQLite(), "my-db.sqlite")

Step 4: Write tables to SQLite database

dbWriteTable(mydb, "rank", rank, overwrite=TRUE)
dbWriteTable(mydb, "friends", friends, overwrite=TRUE)
dbWriteTable(mydb,"movies",movies, overwrite=TRUE)
dbListTables(mydb)
## [1] "friends" "movies"  "rank"    "summary"

Step 6: Using SQL, clean data

dbSendQuery(mydb, "UPDATE rank SET value='1' WHERE value='1 (Best)';")
## <SQLiteResult>
##   SQL  UPDATE rank SET value='1' WHERE value='1 (Best)';
##   ROWS Fetched: 0 [complete]
##        Changed: 7
dbSendQuery(mydb, "UPDATE rank SET value='6' WHERE value='6 (Worst)';")
## Warning: Closing open result set, pending rows
## <SQLiteResult>
##   SQL  UPDATE rank SET value='6' WHERE value='6 (Worst)';
##   ROWS Fetched: 0 [complete]
##        Changed: 7
dbGetQuery(mydb, "Select * FROM rank ORDER BY Username, value;")
## Warning: Closing open result set, pending rows
##    rankid Username             titles value
## 1      14    Aaron             Moanna     1
## 2      22    Aaron             Frozen     2
## 3       6    Aaron      Black.Panther     3
## 4      30    Aaron           I..Tonya     4
## 5      38    Aaron Back.to.the.Future     5
## 6      46    Aaron     What.About.Bob     6
## 7       1    Alice      Black.Panther     1
## 8       9    Alice             Moanna     2
## 9      17    Alice             Frozen     3
## 10     25    Alice           I..Tonya     4
## 11     33    Alice Back.to.the.Future     5
## 12     41    Alice     What.About.Bob     6
## 13     15   Alyssa             Moanna     1
## 14     31   Alyssa           I..Tonya     2
## 15      7   Alyssa      Black.Panther     3
## 16     39   Alyssa Back.to.the.Future     4
## 17     47   Alyssa     What.About.Bob     5
## 18     23   Alyssa             Frozen     6
## 19      8   Andrea      Black.Panther     1
## 20     40   Andrea Back.to.the.Future     2
## 21     32   Andrea           I..Tonya     3
## 22     24   Andrea             Frozen     4
## 23     16   Andrea             Moanna     5
## 24     48   Andrea     What.About.Bob     6
## 25     43  Gladysa     What.About.Bob     1
## 26     11  Gladysa             Moanna     2
## 27     27  Gladysa           I..Tonya     3
## 28     19  Gladysa             Frozen     4
## 29      3  Gladysa      Black.Panther     5
## 30     35  Gladysa Back.to.the.Future     6
## 31     37    Inbar Back.to.the.Future     1
## 32     29    Inbar           I..Tonya     2
## 33      5    Inbar      Black.Panther     3
## 34     13    Inbar             Moanna     4
## 35     21    Inbar             Frozen     5
## 36     45    Inbar     What.About.Bob     6
## 37      4  Myraida      Black.Panther     1
## 38     36  Myraida Back.to.the.Future     2
## 39     20  Myraida             Frozen     3
## 40     28  Myraida           I..Tonya     4
## 41     12  Myraida             Moanna     5
## 42     44  Myraida     What.About.Bob     6
## 43     42    Rayna     What.About.Bob     1
## 44     34    Rayna Back.to.the.Future     2
## 45      2    Rayna      Black.Panther     3
## 46     10    Rayna             Moanna     4
## 47     18    Rayna             Frozen     5
## 48     26    Rayna           I..Tonya     6

Step 7: Establish constraints using primary and foreign keys

Note: SQLite automatially generates a row called “rowid” which acts as an autoincrement primary key, so the added ID colummns are not strictly necessary. It might be good to set your own primary keys if you want to enforce cascading updates.

It is not possible to set a primary key or foreign in SQLite to an existing table, so the following is a workaround to enforce relationships between the imported tables as a workaround.

dbSendStatement(mydb, "ALTER TABLE friends RENAME TO old_friends;")
## <SQLiteResult>
##   SQL  ALTER TABLE friends RENAME TO old_friends;
##   ROWS Fetched: 0 [complete]
##        Changed: 0
dbSendStatement(mydb,"CREATE TABLE friends (
                      friendsid INTERGER NOT NULL, 
                      Username TEXT NOT NULL PRIMARY KEY);")
## Warning: Closing open result set, pending rows
## <SQLiteResult>
##   SQL  CREATE TABLE friends (
##                       friendsid INTERGER NOT NULL, 
##                       Username TEXT NOT NULL PRIMARY KEY);
##   ROWS Fetched: 0 [complete]
##        Changed: 0
dbSendStatement(mydb, "INSERT INTO friends SELECT * FROM old_friends;")
## Warning: Closing open result set, pending rows
## <SQLiteResult>
##   SQL  INSERT INTO friends SELECT * FROM old_friends;
##   ROWS Fetched: 0 [complete]
##        Changed: 8
dbSendStatement(mydb, "DROP TABLE old_friends;")
## Warning: Closing open result set, pending rows
## <SQLiteResult>
##   SQL  DROP TABLE old_friends;
##   ROWS Fetched: 0 [complete]
##        Changed: 0
dbGetQuery(mydb,"Select * FROM friends;")
## Warning: Closing open result set, pending rows
##   friendsid Username
## 1         1    Alice
## 2         2    Rayna
## 3         3  Gladysa
## 4         4  Myraida
## 5         5    Inbar
## 6         6    Aaron
## 7         7   Alyssa
## 8         8   Andrea
dbSendStatement(mydb, "ALTER TABLE movies RENAME TO old_movies;")
## <SQLiteResult>
##   SQL  ALTER TABLE movies RENAME TO old_movies;
##   ROWS Fetched: 0 [complete]
##        Changed: 0
dbSendStatement(mydb,"CREATE TABLE movies (
                      moviesid INTERGER NOT NULL, 
                      titles TEXT NOT NULL PRIMARY KEY
                      );"
              )
## Warning: Closing open result set, pending rows
## <SQLiteResult>
##   SQL  CREATE TABLE movies (
##                       moviesid INTERGER NOT NULL, 
##                       titles TEXT NOT NULL PRIMARY KEY
##                       );
##   ROWS Fetched: 0 [complete]
##        Changed: 0
dbSendStatement(mydb, "INSERT INTO movies SELECT * FROM old_movies;")
## Warning: Closing open result set, pending rows
## <SQLiteResult>
##   SQL  INSERT INTO movies SELECT * FROM old_movies;
##   ROWS Fetched: 0 [complete]
##        Changed: 6
dbSendStatement(mydb, "DROP TABLE old_movies;")
## Warning: Closing open result set, pending rows
## <SQLiteResult>
##   SQL  DROP TABLE old_movies;
##   ROWS Fetched: 0 [complete]
##        Changed: 0
dbGetQuery(mydb,"Select * FROM movies;")
## Warning: Closing open result set, pending rows
##   moviesid             titles
## 1        1      Black.Panther
## 2        2             Moanna
## 3        3             Frozen
## 4        4           I..Tonya
## 5        5 Back.to.the.Future
## 6        6     What.About.Bob
dbSendStatement(mydb, "ALTER TABLE rank RENAME TO old_rank;")
## <SQLiteResult>
##   SQL  ALTER TABLE rank RENAME TO old_rank;
##   ROWS Fetched: 0 [complete]
##        Changed: 0
dbSendStatement(mydb,"CREATE TABLE rank (
                      rankid INTERGER NOT NULL PRIMARY KEY, 
                      Username TEXT NOT NULL,
                      titles TEXT NOT NULL,
                      value TEXT NOT NULL,
                      FOREIGN KEY (titles) REFERENCES movies (titles) 
                        ON DELETE CASCADE ON UPDATE CASCADE,
                      FOREIGN KEY (Username) REFERENCES friends (Username) 
                        ON DELETE CASCADE ON UPDATE CASCADE
                      );"
)
## Warning: Closing open result set, pending rows
## <SQLiteResult>
##   SQL  CREATE TABLE rank (
##                       rankid INTERGER NOT NULL PRIMARY KEY, 
##                       Username TEXT NOT NULL,
##                       titles TEXT NOT NULL,
##                       value TEXT NOT NULL,
##                       FOREIGN KEY (titles) REFERENCES movies (titles) 
##                         ON DELETE CASCADE ON UPDATE CASCADE,
##                       FOREIGN KEY (Username) REFERENCES friends (Username) 
##                         ON DELETE CASCADE ON UPDATE CASCADE
##                       );
##   ROWS Fetched: 0 [complete]
##        Changed: 0
#Note: New columns names must be in same order as old column names for this to copy correctly
dbSendStatement(mydb, "INSERT INTO rank SELECT * FROM old_rank;")
## Warning: Closing open result set, pending rows
## <SQLiteResult>
##   SQL  INSERT INTO rank SELECT * FROM old_rank;
##   ROWS Fetched: 0 [complete]
##        Changed: 48
dbSendStatement(mydb, "DROP TABLE old_rank;")
## Warning: Closing open result set, pending rows
## <SQLiteResult>
##   SQL  DROP TABLE old_rank;
##   ROWS Fetched: 0 [complete]
##        Changed: 0
dbGetQuery(mydb,"Select * FROM rank;")
## Warning: Closing open result set, pending rows
##    rankid Username             titles value
## 1       1    Alice      Black.Panther     1
## 2       2    Rayna      Black.Panther     3
## 3       3  Gladysa      Black.Panther     5
## 4       4  Myraida      Black.Panther     1
## 5       5    Inbar      Black.Panther     3
## 6       6    Aaron      Black.Panther     3
## 7       7   Alyssa      Black.Panther     3
## 8       8   Andrea      Black.Panther     1
## 9       9    Alice             Moanna     2
## 10     10    Rayna             Moanna     4
## 11     11  Gladysa             Moanna     2
## 12     12  Myraida             Moanna     5
## 13     13    Inbar             Moanna     4
## 14     14    Aaron             Moanna     1
## 15     15   Alyssa             Moanna     1
## 16     16   Andrea             Moanna     5
## 17     17    Alice             Frozen     3
## 18     18    Rayna             Frozen     5
## 19     19  Gladysa             Frozen     4
## 20     20  Myraida             Frozen     3
## 21     21    Inbar             Frozen     5
## 22     22    Aaron             Frozen     2
## 23     23   Alyssa             Frozen     6
## 24     24   Andrea             Frozen     4
## 25     25    Alice           I..Tonya     4
## 26     26    Rayna           I..Tonya     6
## 27     27  Gladysa           I..Tonya     3
## 28     28  Myraida           I..Tonya     4
## 29     29    Inbar           I..Tonya     2
## 30     30    Aaron           I..Tonya     4
## 31     31   Alyssa           I..Tonya     2
## 32     32   Andrea           I..Tonya     3
## 33     33    Alice Back.to.the.Future     5
## 34     34    Rayna Back.to.the.Future     2
## 35     35  Gladysa Back.to.the.Future     6
## 36     36  Myraida Back.to.the.Future     2
## 37     37    Inbar Back.to.the.Future     1
## 38     38    Aaron Back.to.the.Future     5
## 39     39   Alyssa Back.to.the.Future     4
## 40     40   Andrea Back.to.the.Future     2
## 41     41    Alice     What.About.Bob     6
## 42     42    Rayna     What.About.Bob     1
## 43     43  Gladysa     What.About.Bob     1
## 44     44  Myraida     What.About.Bob     6
## 45     45    Inbar     What.About.Bob     6
## 46     46    Aaron     What.About.Bob     6
## 47     47   Alyssa     What.About.Bob     5
## 48     48   Andrea     What.About.Bob     6

The value of this is that we can now update the email addresses or movie titles in their respective tables and the results will cascade to the rank table. (Note: This will only work if the foreign key is a primary key in the parent table.)

We can also add additional informaiton to the movies or friends tables that is not strictly related to the rank, e.g. release date or first and last names.

#Statement needed to turn foreign keys constraint on
dbSendStatement(mydb, "PRAGMA foreign_keys = ON;")
## <SQLiteResult>
##   SQL  PRAGMA foreign_keys = ON;
##   ROWS Fetched: 0 [complete]
##        Changed: 0
dbSendStatement(mydb, "UPDATE movies SET titles='Black Panther' WHERE titles='Black.Panther';")
## Warning: Closing open result set, pending rows
## <SQLiteResult>
##   SQL  UPDATE movies SET titles='Black Panther' WHERE titles='Black.Panther';
##   ROWS Fetched: 0 [complete]
##        Changed: 9
dbSendStatement(mydb, "UPDATE movies SET titles='I, Tonya' WHERE titles='I..Tonya';")
## Warning: Closing open result set, pending rows
## <SQLiteResult>
##   SQL  UPDATE movies SET titles='I, Tonya' WHERE titles='I..Tonya';
##   ROWS Fetched: 0 [complete]
##        Changed: 9
dbSendStatement(mydb, "UPDATE movies SET titles='Back to the Future' WHERE titles='Back.to.the.Future';")
## Warning: Closing open result set, pending rows
## <SQLiteResult>
##   SQL  UPDATE movies SET titles='Back to the Future' WHERE titles='Back.to.the.Future';
##   ROWS Fetched: 0 [complete]
##        Changed: 9
dbSendStatement(mydb, "UPDATE movies SET titles='What About Bob?' WHERE titles='What.About.Bob';")
## Warning: Closing open result set, pending rows
## <SQLiteResult>
##   SQL  UPDATE movies SET titles='What About Bob?' WHERE titles='What.About.Bob';
##   ROWS Fetched: 0 [complete]
##        Changed: 9
dbGetQuery(mydb, "SELECT titles FROM rank GROUP BY titles;")
## Warning: Closing open result set, pending rows
##               titles
## 1 Back to the Future
## 2      Black Panther
## 3             Frozen
## 4           I, Tonya
## 5             Moanna
## 6    What About Bob?
#Note tht we updated movies, but saw the changes in rank. Pretty cool!

Step 8: Explore results in SQL

#What's the movie wiht the highest-ranked composite score?
best_average_rank <- "SELECT titles, AVG(value) FROM rank GROUP BY titles ORDER BY AVG(value) LIMIT 1;"
dbGetQuery(mydb,best_average_rank)
##          titles AVG(value)
## 1 Black Panther        2.5
#Which the movie have the most number one rankings?
most_number_one_ranks <- "SELECT titles, COUNT(value) AS 'Number One Rankings' FROM rank WHERE value==1 GROUP BY titles HAVING value==1 ORDER BY COUNT(value) DESC;"
dbGetQuery(mydb,most_number_one_ranks)
##               titles Number One Rankings
## 1      Black Panther                   3
## 2             Moanna                   2
## 3    What About Bob?                   2
## 4 Back to the Future                   1
#Which the movie have the most number 6 (worst) rankings?
most_worst_ranks <- "SELECT titles, COUNT(value) AS 'Count Ranked as Worst' FROM rank WHERE value==6 GROUP BY titles HAVING value==6 ORDER BY COUNT(value) DESC;"
dbGetQuery(mydb,most_worst_ranks)
##               titles Count Ranked as Worst
## 1    What About Bob?                     5
## 2 Back to the Future                     1
## 3             Frozen                     1
## 4           I, Tonya                     1
#:Note: What About Bob? is a GREAT movie. These rankings are a travesty.

Step 9: Import into R dataframe

rank <- dbGetQuery(mydb, "SELECT * FROM rank;")
movies <- dbGetQuery(mydb, "SELECT * FROM movies;")
friends <- dbGetQuery(mydb, "SELECT * FROM friends;")
rank
##    rankid Username             titles value
## 1       1    Alice      Black Panther     1
## 2       2    Rayna      Black Panther     3
## 3       3  Gladysa      Black Panther     5
## 4       4  Myraida      Black Panther     1
## 5       5    Inbar      Black Panther     3
## 6       6    Aaron      Black Panther     3
## 7       7   Alyssa      Black Panther     3
## 8       8   Andrea      Black Panther     1
## 9       9    Alice             Moanna     2
## 10     10    Rayna             Moanna     4
## 11     11  Gladysa             Moanna     2
## 12     12  Myraida             Moanna     5
## 13     13    Inbar             Moanna     4
## 14     14    Aaron             Moanna     1
## 15     15   Alyssa             Moanna     1
## 16     16   Andrea             Moanna     5
## 17     17    Alice             Frozen     3
## 18     18    Rayna             Frozen     5
## 19     19  Gladysa             Frozen     4
## 20     20  Myraida             Frozen     3
## 21     21    Inbar             Frozen     5
## 22     22    Aaron             Frozen     2
## 23     23   Alyssa             Frozen     6
## 24     24   Andrea             Frozen     4
## 25     25    Alice           I, Tonya     4
## 26     26    Rayna           I, Tonya     6
## 27     27  Gladysa           I, Tonya     3
## 28     28  Myraida           I, Tonya     4
## 29     29    Inbar           I, Tonya     2
## 30     30    Aaron           I, Tonya     4
## 31     31   Alyssa           I, Tonya     2
## 32     32   Andrea           I, Tonya     3
## 33     33    Alice Back to the Future     5
## 34     34    Rayna Back to the Future     2
## 35     35  Gladysa Back to the Future     6
## 36     36  Myraida Back to the Future     2
## 37     37    Inbar Back to the Future     1
## 38     38    Aaron Back to the Future     5
## 39     39   Alyssa Back to the Future     4
## 40     40   Andrea Back to the Future     2
## 41     41    Alice    What About Bob?     6
## 42     42    Rayna    What About Bob?     1
## 43     43  Gladysa    What About Bob?     1
## 44     44  Myraida    What About Bob?     6
## 45     45    Inbar    What About Bob?     6
## 46     46    Aaron    What About Bob?     6
## 47     47   Alyssa    What About Bob?     5
## 48     48   Andrea    What About Bob?     6
movies
##   moviesid             titles
## 1        1      Black Panther
## 2        2             Moanna
## 3        3             Frozen
## 4        4           I, Tonya
## 5        5 Back to the Future
## 6        6    What About Bob?
friends
##   friendsid Username
## 1         1    Alice
## 2         2    Rayna
## 3         3  Gladysa
## 4         4  Myraida
## 5         5    Inbar
## 6         6    Aaron
## 7         7   Alyssa
## 8         8   Andrea

Step 10: Explore data in R, using “tidy”" data

require("ggplot2")
## Loading required package: ggplot2
rank_sub <- rank[3:4]
rank_long <- melt(table(rank_sub))
colnames(rank_long) <- c("Title","Rank","Count")

p <- ggplot(rank_long, aes(x=as.factor(Rank), y=Count)) + labs(x="Rank (1 is Best, 6 is Worst)")
p + geom_bar(stat="identity", aes(fill=Title)) + facet_wrap(~Title)