Scraping stages

Choosing the website

I decided to scrape https://www.imdb.com(IMDb website) and chosen the following variables: (1) movie name, (2) year, (3) director, (4) user rating and (5) number of votes in rating the movie.

Such a dataset can be used for analyzing a particular movie genre or another group of movies to identify preferences of viewers, e.g. what movies are watched more often (number of votes), which are more liked (ratings), whether it depends on the director, etc. Of course, more variables would need to be included for a deeper analysis, especially if such a dataset would be further used for building of recommendation systems (e.g. age restrictions, duration, genres can be added).

Choosing the page

This page shows Russian movies released in last 5 years and sorted by user ratings from highest to lowest.

url = "https://www.imdb.com/search/title/?title_type=feature&release_date=2018-01-01,2022-12-31&countries=ru&sort=user_rating,desc"
page = read_html(url)

Identifying elements of the page and CSS-tags

Finding CSS tags for movie names.
Finding CSS tags for movie names.
movie_name = page%>% 
  html_nodes(".lister-item-header a") %>% 
  html_text()
Finding CSS tags for release year.
Finding CSS tags for release year.
  year = page%>% 
  html_nodes(".text-muted.unbold") %>% 
  html_text()
Finding CSS tags for user ratings.
Finding CSS tags for user ratings.
  user_rating = page%>% 
  html_nodes(".ratings-imdb-rating strong") %>% 
  html_text()
Finding CSS tags for director names.
Finding CSS tags for director names.
  director = page%>% 
  html_nodes(".text-muted+ p a:nth-child(1)") %>% 
  html_text()
Finding CSS tags for user votes.
Finding CSS tags for user votes.
  votes = page%>% 
  html_nodes(".sort-num_votes-visible span:nth-child(2)") %>% 
  html_text()

Scraping several pages & creating a dataset

We can see the multiple pages with results. One page only has 50 results, I decided to scrape 5 first pages, getting 250 movies with highest user ratings.
We can see the multiple pages with results. One page only has 50 results, I decided to scrape 5 first pages, getting 250 movies with highest user ratings.
If we go to the second page, we can see how the URL changes (start=1 on the first page, while on the second it’s start=51). Thus, we need to specify in the loop from which result the page starts to scrape several pages.
If we go to the second page, we can see how the URL changes (start=1 on the first page, while on the second it’s start=51). Thus, we need to specify in the loop from which result the page starts to scrape several pages.
dataloop = data.frame()

for (i in seq(from = 1, to = 50, by = 50)) {
  link = paste0("https://www.imdb.com/search/title/?title_type=feature&release_date=2018-01-01,2022-12-31&countries=ru&sort=user_rating,desc&start=", 
               i, "&ref_=adv_nxt")
  page = read_html(link)
  
  movie_name = page%>% 
  html_nodes(".lister-item-header a") %>% 
  html_text()
  
  year = page%>% 
  html_nodes(".text-muted.unbold") %>% 
  html_text()
  
  user_rating = page%>% 
  html_nodes(".ratings-imdb-rating strong") %>% 
  html_text()
  
  director = page%>% 
  html_nodes(".text-muted+ p a:nth-child(1)") %>% 
  html_text()
  
  votes = page%>% 
  html_nodes(".sort-num_votes-visible span:nth-child(2)") %>% 
  html_text()
  
  page_data1 <- data.frame(movie_name, year, user_rating, director, votes, stringsAsFactors = FALSE)
  Sys.sleep(5)
  }
  for (i in seq(from = 51, to = 100, by = 50)) {
  link = paste0("https://www.imdb.com/search/title/?title_type=feature&release_date=2018-01-01,2022-12-31&countries=ru&sort=user_rating,desc&start=", 
               i, "&ref_=adv_nxt")
  page = read_html(link)
  
  movie_name = page%>% 
  html_nodes(".lister-item-header a") %>% 
  html_text()
  
  year = page%>% 
  html_nodes(".text-muted.unbold") %>% 
  html_text()
  
  user_rating = page%>% 
  html_nodes(".ratings-imdb-rating strong") %>% 
  html_text()
  
  director = page%>% 
  html_nodes(".text-muted+ p a:nth-child(1)") %>% 
  html_text()
  
  votes = page%>% 
  html_nodes(".sort-num_votes-visible span:nth-child(2)") %>% 
  html_text()
  
  page_data2 <- data.frame(movie_name, year, user_rating, director, votes, stringsAsFactors = FALSE)
  Sys.sleep(5)
  }
  
  for (i in seq(from = 101, to = 150, by = 50)) {
  link = paste0("https://www.imdb.com/search/title/?title_type=feature&release_date=2018-01-01,2022-12-31&countries=ru&sort=user_rating,desc&start=", 
               i, "&ref_=adv_nxt")
  page = read_html(link)
  
  movie_name = page%>% 
  html_nodes(".lister-item-header a") %>% 
  html_text()
  
  year = page%>% 
  html_nodes(".text-muted.unbold") %>% 
  html_text()
  
  user_rating = page%>% 
  html_nodes(".ratings-imdb-rating strong") %>% 
  html_text()
  
  director = page%>% 
  html_nodes(".text-muted+ p a:nth-child(1)") %>% 
  html_text()
  
  votes = page%>% 
  html_nodes(".sort-num_votes-visible span:nth-child(2)") %>% 
  html_text()
  
  page_data3 <- data.frame(movie_name, year, user_rating, director, votes, stringsAsFactors = FALSE)
  Sys.sleep(5)
  }
  
  for (i in seq(from = 151, to = 200, by = 50)) {
  link = paste0("https://www.imdb.com/search/title/?title_type=feature&release_date=2018-01-01,2022-12-31&countries=ru&sort=user_rating,desc&start=", 
               i, "&ref_=adv_nxt")
  page = read_html(link)
  
  movie_name = page%>% 
  html_nodes(".lister-item-header a") %>% 
  html_text()
  
  year = page%>% 
  html_nodes(".text-muted.unbold") %>% 
  html_text()
  
  user_rating = page%>% 
  html_nodes(".ratings-imdb-rating strong") %>% 
  html_text()
  
  director = page%>% 
  html_nodes(".text-muted+ p a:nth-child(1)") %>% 
  html_text()
  
  votes = page%>% 
  html_nodes(".sort-num_votes-visible span:nth-child(2)") %>% 
  html_text()
  
  page_data4 <- data.frame(movie_name, year, user_rating, director, votes, stringsAsFactors = FALSE)
  Sys.sleep(5)
  }
  
  for (i in seq(from = 201, to = 250, by = 50)) {
  link = paste0("https://www.imdb.com/search/title/?title_type=feature&release_date=2018-01-01,2022-12-31&countries=ru&sort=user_rating,desc&start=", 
               i, "&ref_=adv_nxt")
  page = read_html(link)
  
  movie_name = page%>% 
  html_nodes(".lister-item-header a") %>% 
  html_text()
  
  year = page%>% 
  html_nodes(".text-muted.unbold") %>% 
  html_text()
  
  user_rating = page%>% 
  html_nodes(".ratings-imdb-rating strong") %>% 
  html_text()
  
  director = page%>% 
  html_nodes(".text-muted+ p a:nth-child(1)") %>% 
  html_text()
  
  votes = page%>% 
  html_nodes(".sort-num_votes-visible span:nth-child(2)") %>% 
  html_text()
  
  page_data5 <- data.frame(movie_name, year, user_rating, director, votes, stringsAsFactors = FALSE)
  Sys.sleep(5)
  }
  ru_movies_imdb = bind_rows(dataloop, page_data1, page_data2, page_data3, page_data4, page_data5) 

  print(paste("Page:", i)) 
## [1] "Page: 201"

Finally, we create a csv file for the dataset

write.csv(ru_movies_imdb, "ru_movies.csv")

movies <- read.csv("ru_movies.csv")

Visualizations: summarizing variables

kable(movies, align = "lcclc", caption = "Table 1. Sample of Russian movies on IMDb 2018-2022")%>%
  kable_styling()
Table 1. Sample of Russian movies on IMDb 2018-2022
X movie_name year user_rating director votes
1 Who is Prince Oak Oakleyski
9.6 I. Kolyada 55
2 Real Emperor Kandanai Maneesawath
9.4 I. Kolyada 61
3 Prince of Eurasia
9.4 I. Kolyada 61
4 Prince Oakleyski Eurasia - Royalwiki
9.2 Prince Oak Oakleyski 60
5 Зимы не будет
9.1 Dmitri Frolov 38
6 Gjirokastra
9.0 Yuriy Arabov 7
7 Призраки Московского метро
9.0 Sergey A. 28
8 Ужас Битцевского парка
9.0 Sergey A. 38
9 Vordum: Price of Death
8.9 Ivan Akhmetov 22
10 Молитва Ангела
8.9 Maria Solovyova 13
11 Monte Cristo Musical
8.8 Dongwon Lee 20
12 Орбиус
8.8 Sergey A. 427
13 Боевик на НТВ: Окончательное издание
8.8 Ilya Novikov 10
14 Большие змеи Улли-Кале
8.8 Aleksey Fedorchenko 7
15 Vordum: Price of Death
8.6 Ivan Akhmetov 34
16 Chaos from the Old World. Part II
8.6 Paul Miloslavsky 10
17 Навстречу мечте
8.5 Irina Gobozashvili 20
18 Лесной монстр
8.4 Sergey A. 470
19 Тупик. Дорога.
8.4 Anatoly K. Ivanov 48
20 Мортис
8.4 Sergey A. 27
21 Balaban
8.3 Aysulu Onaran 13
22 План 9 с Алиэкспресса
8.3 Diana Galimzyanova 8
23 КАРАнтин
8.3 Diana Ringo 12
24 В ожидании смерти
8.3 Sergey A. 1,160
25 Манюня в кино
8.3 Arman Marutyan 100
26 Все люди исчезли навсегда
8.3 Sergey A. 453
27 Who Wants to Live Forever?
8.3 Nicole Andreas 17
28 Проклятый лес
8.3 Sergey A. 446
29 Коронавирус. Апокалипсис
8.3 Sergey A. 157
30 Капитан Голливуд
8.3 Evgeniy Tatarov 7
31 Сиреноголовый
8.2 Sergey A. 1,320
32 Хаос
8.2 Sergey A. 146
33 Убийства в лесу мёртвых акул
8.1 Sergey A. 546
34 Full film deepfake. By Vnuk Elkina: Don’t threat to younger debil, sitting in the sorrow of the chernobyl.
8.1 Alexandr Prokhorov 12
35 Это Эдик. Сказка о подаренном и украденном детстве
8.0 Ivan Proskuryakov 30
36 Opasnaya Zhizn
8.0 Ivan 6
37 После
7.9 Sergey A. 435
38 Эластико: Двенадцатый игрок
7.9 Dmitriy Vlaskin 102
39 Смотреть кино онлайн бесплатно
7.8 Sergey A. 157
40 Майор Дрон и Чумной Доктор
7.8 Sergey A. 1,050
41 Кадиш
7.8 Konstantin Fam 30
42 Ведьма 2
7.8 Sergey A. 434
43 Major Dron and the plague doctor 2
7.8 Sergey A. 18
44 Ведьма
7.8 Sergey A. 439
45 Reprint
7.8 Ivan Dulepov 15
46 Чернобыль
7.7 Sergey A. 265
47 AK 47 - 2020
7.7 Konstantin Buslov 11
48 Поиск
7.6 Aneesh Chaganty 176,160
49 Вторжение
7.6 Sergey A. 195
50 Маленький воин
7.6 Ilya Ermolov 90
51 Всё хорошо
7.6 Elena Hazanova 12
52 Телевиzор
7.6 Sergey A. 45
53 Ресторан по понятиям. Фильм
7.6 David Dadunashvili 98
54 Антология ужасов 8
7.6 Sergey A. 45
55 Нуучча
7.5 Vladimir Munkuev 32
56 Пальмира
7.5 Ivan Bolotnikov 39
57 Лорик
7.5 Aleksey Zlobin 77
58 Анна Каренина. Мюзикл
7.5 Yeji Shin 32
59 Уроки фарси
7.4 Vadim Perelman 11,042
60 Дорогие товарищи!
7.4 Andrey Konchalovskiy 5,592
61 Vertigo
7.4 Valery Konin 7
62 Два холма. Фильм
7.4 Dmitry Gribanov 28
63 Приснись Мне
7.4 Roman Olkhovka 15
64 Бетонная акула
7.4 Sergey A. 1,101
65 У самого Белого моря
7.4 Aleksandr Zachinyayev 19
66 Счастье - это… Часть 2
7.4 Irina Basenko 55
67 Город-зад
7.4 Aleksandr Pozhenskiy 29
68 Лето
7.3 Kirill Serebrennikov 7,724
69 Капитан Волконогов бежал
7.3 Natasha Merkulova 1,816
70 Я иду играть
7.3 Anna Zaytseva 34
71 Джетлаг
7.3 Michael Idov 246
72 Модель
7.3 Olga Land 83
73 Лес мёртвых акул
7.3 Sergey A. 558
74 Can I Recognize Your Soul
7.3 Struggle da Preacher 11
75 Месть в лесу мёртвых акул
7.3 Sergey A. 541
76 Первый снег
7.3 Nataliya Konchalovskaya 16
77 Libertas
7.3 Artemio Benki 10
78 Парень из Голливуда, или Необыкновенные приключения Вени Везунчика
7.3 Roman Svetlov 80
79 Купе номер 6
7.2 Juho Kuosmanen 14,126
80 Дылда
7.2 Kantemir Balagov 11,976
81 Доктор Лиза
7.2 Oksana Karas 702
82 Не хороните меня без Ивана
7.2 Lyubov Borisova 89
83 Надо мною солнце не садится
7.2 Lyubov Borisova 160
84 Серебряные коньки
7.1 Michael Lockshin 7,041
85 Айка
7.1 Sergei Dvortsevoy 2,114
86 Сестренка
7.1 Aleksandr Galibin 453
87 Император
7.1 Alfia Habibullina 19
88 Омут
7.1 Denis Kryuchkov 65
89 Царь-птица
7.1 Eduard Novikov 47
90 Лето
7.1 Vadim Kostrov 23
91 UFO
7.1 Gennadiy Vyrypaev 18
92 Африка
7.1 Darya Binevskaya 23
93 В винном отражении
7.1 Vitaliy Muzychenka 39
94 Зови меня Дрозд
7.1 Pavel Mirzoev 13
95 Сокровенный человек
7.1 Roman Liberov 17
96 Хрусталь
7.0 Darya Zhuk 1,830
97 Нос, или Заговор «не таких»
7.0 Andrey Khrzhanovskiy 271
98 Пальма
6.9 Aleksandr Domogarov 994
99 Знахарь
6.9 Yaroslav Mochalov 30
100 Дочь рыбака
6.9 Ismail Safarali 24
101 Совесть
6.9 Aleksey Kozlov 35
102 Нелегал
6.9 Dmitrii Davydov 33
103 Т-34
6.8 Aleksey Sidorov 13,716
104 Папа, сдохни
6.8 Kirill Sokolov 5,121
105 Грех
6.8 Andrey Konchalovskiy 1,290
106 Брайтон 4
6.8 Levan Koguashvili 861
107 Лёд
6.8 Oleg Trofim 3,470
108 Человек из Подольска
6.8 Semyon Serzin 752
109 Завод
6.8 Yuriy Bykov 2,752
110 Война Анны
6.8 Aleksey Fedorchenko 605
111 Земля Эльзы
6.8 Yuliya Kolesnik 20
112 Жанна
6.8 Konstantin Statskiy 46
113 Неадекватные люди 2
6.8 Roman Karimov 805
114 Second Sun
6.8 Rinat Tashimov 7
115 Профайл
6.7 Timur Bekmambetov 6,592
116 Петровы в гриппе
6.7 Kirill Serebrennikov 2,857
117 Сказка
6.7 Aleksandr Sokurov 345
118 Аманат
6.7 Rauf Kubayev 485
119 Казнь
6.7 Lado Kvataniya 1,536
120 Холоп
6.7 Klim Shipenko 5,012
121 Батя
6.7 Dmitriy Efimovich 1,409
122 Ника
6.7 Vasilisa Kuzmina 216
123 Конференция
6.7 Ivan I. Tverdovskiy 406
124 Многоэтажка
6.7 Anton Maslov 341
125 Француз
6.7 Andrey Smirnov 278
126 Сердце мира
6.7 Nataliya Meshchaninova 619
127 Я вернусь
6.7 Darya Shumakova 20
128 Анна
6.6 Luc Besson 89,692
129 Балканский рубеж
6.6 Andrey Volgin 10,306
130 Текст
6.6 Klim Shipenko 3,340
131 Калашников
6.6 Konstantin Buslov 4,497
132 Солдатик
6.6 Viktoria Fanasiutina 772
133 Razzhimaya kulaki
6.6 Kira Kovalenko 1,541
134 Айта
6.6 Stepan Burnashev 198
135 Страна Саша
6.6 Yuliya Trofimova 103
136 Молодой человек
6.6 Aleksandr Fomin 646
137 Мама, я дома
6.6 Vladimir Bitokov 265
138 Человек, который удивил всех
6.6 Natasha Merkulova 942
139 Громкая связь
6.6 Alexey Nuzhny 1,412
140 Голиаф
6.6 Adilkhan Yerzhanov 69
141 Я буду жить
6.6 Eduard Bordukov 55
142 Молоко
6.6 Karen Oganesyan 99
143 История одного назначения
6.6 Avdotya Smirnova 581
144 В плену у сакуры
6.6 Masaki Inoue 31
145 Будь моим Кириллом
6.6 Alla Eliseeva 69
146 Гранд канкан
6.6 Mikhail Kosyrev-Nesterov 15
147 Выйти из группы
6.6 Maria Tumova 15
148 Седьмой пробег по контуру Земного шара
6.6 Vitaliy Suslin 14
149 Квнщики
6.6 Ilya Aksyonov 87
150 ЭТЮД #2
6.6 Andrey Burmistrov 9
151 Жена Чайковского
6.5 Kirill Serebrennikov 1,262
152 Лучшие в аду
6.5 Andrey Batov 692
153 ДАУ. Регенерация
6.5 Ilya Khrzhanovskiy 610
154 Огонь
6.5 Alexey Nuzhny 1,514
155 Подольские курсанты
6.5 Vadim Shmelyov 1,739
156 Непрощённый
6.5 Sarik Andreasyan 1,269
157 По-мужски
6.5 Maksim Kulagin 258
158 Лёд 2
6.5 Zhora Kryzhovnikov 821
159 Я худею
6.5 Alexey Nuzhny 2,415
160 Здоровый человек
6.5 Pyotr Todorovskiy 167
161 Межсезонье
6.5 Aleksandr Khant 356
162 Чиновник
6.5 Vladimir Motashnev 57
163 Юморист
6.5 Michael Idov 1,088
164 Обходные пути
6.5 Ekaterina Selenkina 64
165 Далекие близкие
6.5 Ivan Sosnin 153
166 Вечное новое
6.5 Andrey Leskin 7
167 Проклятый чиновник
6.5 Sarik Andreasyan 503
168 Счастье в конверте
6.5 Svetlana Sukhanova 101
169 Вечер шутов, или Серьезно с приветом
6.5 Liliya Trofimova 530
170 Спутник
6.4 Egor Abramenko 26,339
171 Собибор
6.4 Konstantin Khabenskiy 5,582
172 Китобой
6.4 Philipp Yuryev 1,502
173 Довлатов
6.4 Aleksey German Jr.  2,150
174 Дело
6.4 Aleksey German Jr.  373
175 Один вдох
6.4 Elena Hazanova 439
176 Саша
6.4 Vladimir Bek 18
177 На острие
6.4 Eduard Bordukov 410
178 Ыт
6.4 Stepan Burnashev 59
179 Ван Гоги
6.4 Sergey Livnev 393
180 Сквозь чёрное стекло
6.4 Konstantin Lopushanskiy 110
181 О чём говорят мужчины. Продолжение
6.4 Flyuza Farkhshatova 1,560
182 Бык
6.4 Boris Akopov 880
183 Простой карандаш
6.4 Natalya Nazarova 182
184 Хэппи-энд
6.4 Evgeniy Shelyakin 331
185 Глубокие реки
6.4 Vladimir Bitokov 190
186 Про Лёлю и Миньку
6.4 Anna Tchernakova 36
187 Слоны могут играть в футбол
6.4 Mikhail Segal 222
188 Керосин
6.4 Yusup Razykov 138
189 Osen
6.4 Vadim Kostrov 12
190 Все о нас
6.4 Sasha Tse 10
191 On the Dream’s Shore
6.4 Bair Uladaev 8
192 Опасные танцы
6.4 Ekaterina Dvigubskaya 21
193 Кома
6.3 Nikita Argunov 11,247
194 Майор Гром: Чумной Доктор
6.3 Oleg Trofim 13,223
195 ДАУ. Наташа
6.3 Ilya Khrzhanovskiy 1,514
196 Ваш репетитор
6.3 Anton Kolomeets 79
197 Чемпион мира
6.3 Aleksey Sidorov 610
198 ДАУ. Нора сын
6.3 Ilya Khrzhanovskiy 25
199 Лев Яшин. Вратарь моей мечты
6.3 Vasiliy Chiginskiy 930
200 Одесса
6.3 Valeriy Todorovskiy 565
201 Пугало
6.3 Dmitrii Davydov 506
202 Мальчик русский
6.3 Alexander Zolotukhin 805
203 Черный снег
6.3 Stepan Burnashev 140
204 Пара из будущего
6.3 Alexey Nuzhny 578
205 Свидетели
6.3 Konstantin Fam 133
206 Оторви и выбрось
6.3 Kirill Sokolov 372
207 Тренер
6.3 Danila Kozlovskiy 1,463
208 Марш утренней зари
6.3 Roman Kachanov 56
209 Трое
6.3 Anna Melikyan 225
210 Нефутбол
6.3 Maksim Sveshnikov 411
211 День слепого Валентина
6.3 Aleksandr Barshak 49
212 Блокадный дневник
6.3 Andrey Zaytsev 120
213 Накануне
6.3 Alisa Erokhina 34
214 Наша зима
6.3 Stepan Burnashev 18
215 Мой папа не подарок
6.3 Aleksandr Karpilovskiy 29
216 Самый Новый год!
6.3 Antonina Ruzhe 82
217 Узлы
6.3 Oleg Khamokov 15
218 Мёртвые ласточки
6.3 Natalia Pershina 93
219 Гупёшка
6.3 Vlad Furman 44
220 Istorii napisannye krovyu
6.3 Sergey A. 58
221 Molodi
6.3 Alexander Seliverstov 8
222 Красный призрак
6.2 Andrey Bogatyrev 1,863
223 Ганзель, Гретель и Агентство Магии
6.2 Alex Tsitsilin 4,613
224 Скажи ей
6.2 Aleksandr Molochnikov 247
225 Отель «Белград»
6.2 Konstantin Statskiy 1,703
226 Скиф
6.2 Rustam Mosafir 2,663
227 Подбросы
6.2 Ivan I. Tverdovskiy 604
228 Гудбай, Америка
6.2 Sarik Andreasyan 1,303
229 Продукты 24
6.2 Michael Borodin 159
230 Сторож
6.2 Yuriy Bykov 1,892
231 Дочь рыбака
6.2 Uldus Bakhtiozina 77
232 Остерегайся псов
6.2 Nadia Bedzhanova 128
233 Штурм
6.2 Adilkhan Yerzhanov 85
234 Экспресс
6.2 Ruslan Bratov 147
235 Дунай
6.2 Lyubov Mulmenko 85
236 Маруся фореvа!
6.2 Aleksandr Galibin 18
237 Смотри как я
6.2 Egor Salnikov 32
238 Лена и справедливость
6.2 Ekaterina Vesheva 30
239 Селфи#Selfie
6.2 Maksim Boev 19
240 Половина неба
6.2 Sara Blecher 16
241 Иваново счастье
6.2 Ivan Sosnin 105
242 Старые шишки
6.2 Andrei Shavkero 62
243 Два билета домой
6.2 Dmitriy Meskhiev 83
244 Разговорник
6.2 Sergey Sentsov 148
245 Я свободен
6.2 Ilya Severov 28
246 День мёртвых
6.2 Viktor Ryzhakov 94
247 АРМЕН и Я
6.2 Maxim Airapetov 32
248 Дылда
6.2 Anastasiya Zhakulina 6
249 Лётчик
6.1 Renat Davletyarov 1,488
250 Братство
6.1 Pavel Lungin 1,247
ggplot(movies, aes(x = as.numeric(as.character(votes)))) +
  geom_histogram(binwidth = 50, fill = "#DE3163", color = "#811331") +
  labs(
    x = "Votes",
    y = "Frequency"
  ) +
  theme_minimal()
Fig. 1. Distribution of User Votes

Fig. 1. Distribution of User Votes

ggplot(movies, aes(x = as.numeric(as.character(user_rating)))) +
  geom_histogram(binwidth = 0.2, fill = "#F88379", color = "#A95C68") +
  labs(
    x = "Ratings",
    y = "Frequency"
  ) +
  theme_minimal()
Fig. 2. Distribution of User Ratings

Fig. 2. Distribution of User Ratings

movies$years <- as.character(movies$year)

movies$years[movies$years == "(2018)" | movies$years == "(I) (2018)" | 
             movies$years == "(II) (2018)" | movies$years == "(III) (2018)"] <- "2018"

movies$years[movies$years == "(2019)" | movies$years == "(I) (2019)" | 
             movies$years == "(II) (2019)" | movies$years == "(IV) (2019)"] <- "2019"

movies$years[movies$years == "(2020)" | movies$years == "(I) (2020)" | 
             movies$years == "(II) (2020)"] <- "2020"

movies$years[movies$years == "(2021)" | movies$years == "(I) (2021)" | 
             movies$years == "(II) (2021)"] <- "2021"

movies$years[movies$years == "(2022)" | movies$years == "(I) (2022)"] <- "2022"


movies$years <- factor(movies$years, ordered = TRUE, 
                               levels = c("2018", "2019", "2020", "2021", "2022"))
ggplot(data = movies,
       aes(x = years,
           fill = years))+
  geom_bar(position = "dodge")+
  labs(
    x = "Year",
    y = "Number of movies released"
  )+
  scale_fill_brewer(palette="Set2")+
  theme(legend.position='none')
Fig. 3. Distribution of Movie Releases

Fig. 3. Distribution of Movie Releases