## -- Attaching packages ---------------------------------------------------------------------------------------------------------------------------------------------------------- tidyverse 1.2.1 --
## v ggplot2 2.2.1 v purrr 0.2.4
## v tibble 1.4.2 v dplyr 0.7.4
## v tidyr 0.8.0 v stringr 1.2.0
## v readr 1.1.1 v forcats 0.2.0
## -- Conflicts ------------------------------------------------------------------------------------------------------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
## Loading required package: bitops
##
## Attaching package: 'RCurl'
## The following object is masked from 'package:tidyr':
##
## complete
##
## Attaching package: 'magrittr'
## The following object is masked from 'package:purrr':
##
## set_names
## The following object is masked from 'package:tidyr':
##
## extract
data <- getURL("https://raw.githubusercontent.com/baroncurtin2/data607/master/ratings.csv") %>%
read_csv() %>%
data.frame(stringsAsFactors = FALSE)
# the data is currently very wide. one of the fundamentals of data is to arrange the data such that each variable is in a column, each observation is a row, and each value is a cell
tidydata <- data %>%
gather(movie, rating, -name, na.rm = TRUE) %>%
arrange(name, movie)
# lets make some boxplots for fun
ggplot(data = tidydata, aes(x = movie, y = rating, col = movie)) +
geom_boxplot()
Just for fun, we can conclude that Get Out was the highest rated movie amongst this sample. It also experienced the least deviation from the median/mean as evidenced by boxplot.