library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.1 ✔ stringr 1.5.2
## ✔ ggplot2 4.0.0 ✔ tibble 3.3.0
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.1.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library("dslabs")
data(package="dslabs")
list.files(system.file("script", package = "dslabs"))
## [1] "make-admissions.R"
## [2] "make-brca.R"
## [3] "make-brexit_polls.R"
## [4] "make-calificaciones.R"
## [5] "make-death_prob.R"
## [6] "make-divorce_margarine.R"
## [7] "make-gapminder-rdas.R"
## [8] "make-greenhouse_gases.R"
## [9] "make-historic_co2.R"
## [10] "make-mice_weights.R"
## [11] "make-mnist_127.R"
## [12] "make-mnist_27.R"
## [13] "make-movielens.R"
## [14] "make-murders-rda.R"
## [15] "make-na_example-rda.R"
## [16] "make-nyc_regents_scores.R"
## [17] "make-olive.R"
## [18] "make-outlier_example.R"
## [19] "make-polls_2008.R"
## [20] "make-polls_us_election_2016.R"
## [21] "make-pr_death_counts.R"
## [22] "make-reported_heights-rda.R"
## [23] "make-research_funding_rates.R"
## [24] "make-results_us_election_2012.R"
## [25] "make-stars.R"
## [26] "make-temp_carbon.R"
## [27] "make-tissue-gene-expression.R"
## [28] "make-trump_tweets.R"
## [29] "make-weekly_us_contagious_diseases.R"
## [30] "save-gapminder-example-csv.R"
data("movielens")
movieratings <- movielens |>
select("year","genres", "rating") |>
arrange(year)
movielens_f <- movieratings |>
filter (genres%in%c("Action","Adventure","Animation","Children","Comedy","Crime","Documentary","Drama","Fantasy","Film-Noir","Horror","Musical","Mystery","Romance","Sci-Fi","Thriller","War","Western"))
movielens_hm <- movielens_f |>
group_by(year,genres) |>
summarize(mean_rating = mean(rating))
## `summarise()` has grouped output by 'year'. You can override using the
## `.groups` argument.
movielens_hm |>
ggplot(aes(year, genres, fill = mean_rating)) +
geom_tile(color = "black") +
scale_fill_gradient(low = "red", high = "green", name = "Average Rating") +
theme_minimal() +
labs(title = "Genre Ratings From 1917-2016, Excluding Multi Genre",
caption = "Source: GroupLens Research lab", #searched up source for movielens data set in dslabs library in R
x = "Year",
y = "Genre")
## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_tile()`).
movielens_hm
## # A tibble: 454 × 3
## # Groups: year [98]
## year genres mean_rating
## <int> <fct> <dbl>
## 1 1917 Comedy 4.25
## 2 1918 Comedy 4.25
## 3 1921 Comedy 4.38
## 4 1922 Comedy 4.25
## 5 1922 Horror 3.81
## 6 1923 Comedy 4.25
## 7 1924 Comedy 4.5
## 8 1924 Drama 3.83
## 9 1925 Comedy 4
## 10 1926 Drama 3.5
## # ℹ 444 more rows
This visualization was made with the movielens data set from library dslabs which is about movie ratings. I chose to make a heat map on the mean ratings of genres that are not multi genre. First I selected the columns that I was going to use which where year, genres, and rating. Next I filtered for the genres that are not multi genre. Then grouped by year and genre and got the mean ratings. Lastly created a heat map where the x axis shows the year, the y axsis shows the genre, and the colors go from red(low rating) to green (high rating). I found it interesting to see that comedy and drama had ratings for the most amount of years and that mystery only has ratings for 2 years even though the ratings where high.