HELLO!
WELCOME TO MY WEBSITE
Teknik Informatika UIN MAULANA MALIK IBRAHIM MALANG|| Lalu Egiq Fahalik Anggara_220605110066 |kelas C
KALKULUS by Prof. Dr. Suhartono, M.Kom
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.2.2
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6 ✔ purrr 0.3.5
## ✔ tibble 3.1.8 ✔ dplyr 1.0.10
## ✔ tidyr 1.2.1 ✔ stringr 1.4.1
## ✔ readr 2.1.3 ✔ forcats 0.5.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(dslabs)
## Warning: package 'dslabs' was built under R version 4.2.2
movielens <- as_tibble(movielens)
movielens
## # A tibble: 100,004 × 7
## movieId title year genres userId rating times…¹
## <int> <chr> <int> <fct> <int> <dbl> <int>
## 1 31 Dangerous Minds 1995 Drama 1 2.5 1.26e9
## 2 1029 Dumbo 1941 Anima… 1 3 1.26e9
## 3 1061 Sleepers 1996 Thril… 1 3 1.26e9
## 4 1129 Escape from New York 1981 Actio… 1 2 1.26e9
## 5 1172 Cinema Paradiso (Nuovo cinema Par… 1989 Drama 1 4 1.26e9
## 6 1263 Deer Hunter, The 1978 Drama… 1 2 1.26e9
## 7 1287 Ben-Hur 1959 Actio… 1 2 1.26e9
## 8 1293 Gandhi 1982 Drama 1 2 1.26e9
## 9 1339 Dracula (Bram Stoker's Dracula) 1992 Fanta… 1 3.5 1.26e9
## 10 1343 Cape Fear 1991 Thril… 1 2 1.26e9
## # … with 99,994 more rows, and abbreviated variable name ¹timestamp
glimpse(movielens)
## Rows: 100,004
## Columns: 7
## $ movieId <int> 31, 1029, 1061, 1129, 1172, 1263, 1287, 1293, 1339, 1343, 13…
## $ title <chr> "Dangerous Minds", "Dumbo", "Sleepers", "Escape from New Yor…
## $ year <int> 1995, 1941, 1996, 1981, 1989, 1978, 1959, 1982, 1992, 1991, …
## $ genres <fct> Drama, Animation|Children|Drama|Musical, Thriller, Action|Ad…
## $ userId <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
## $ rating <dbl> 2.5, 3.0, 3.0, 2.0, 4.0, 2.0, 2.0, 2.0, 3.5, 2.0, 2.5, 1.0, …
## $ timestamp <int> 1260759144, 1260759179, 1260759182, 1260759185, 1260759205, …
base R
set.seed(123)
number_data <- runif(n = 15, min = 0, max = 100)
head(sort(round(number_data, digit = 2), decreasing = TRUE))
## [1] 95.68 94.05 89.24 88.30 78.83 67.76
operator pipe
number_data %>%
round(digits = 2) %>%
sort(decreasing = TRUE) %>%
head()
## [1] 95.68 94.05 89.24 88.30 78.83 67.76
movielens %>%
select(title, year, genres)
## # A tibble: 100,004 × 3
## title year genres
## <chr> <int> <fct>
## 1 Dangerous Minds 1995 Drama
## 2 Dumbo 1941 Animation|Children|Drama|Music…
## 3 Sleepers 1996 Thriller
## 4 Escape from New York 1981 Action|Adventure|Sci-Fi|Thrill…
## 5 Cinema Paradiso (Nuovo cinema Paradiso) 1989 Drama
## 6 Deer Hunter, The 1978 Drama|War
## 7 Ben-Hur 1959 Action|Adventure|Drama
## 8 Gandhi 1982 Drama
## 9 Dracula (Bram Stoker's Dracula) 1992 Fantasy|Horror|Romance|Thriller
## 10 Cape Fear 1991 Thriller
## # … with 99,994 more rows
movielens %>%
select(2, 3, 4)
## # A tibble: 100,004 × 3
## title year genres
## <chr> <int> <fct>
## 1 Dangerous Minds 1995 Drama
## 2 Dumbo 1941 Animation|Children|Drama|Music…
## 3 Sleepers 1996 Thriller
## 4 Escape from New York 1981 Action|Adventure|Sci-Fi|Thrill…
## 5 Cinema Paradiso (Nuovo cinema Paradiso) 1989 Drama
## 6 Deer Hunter, The 1978 Drama|War
## 7 Ben-Hur 1959 Action|Adventure|Drama
## 8 Gandhi 1982 Drama
## 9 Dracula (Bram Stoker's Dracula) 1992 Fantasy|Horror|Romance|Thriller
## 10 Cape Fear 1991 Thriller
## # … with 99,994 more rows
movielens %>%
select(-title, -year, -genres)
## # A tibble: 100,004 × 4
## movieId userId rating timestamp
## <int> <int> <dbl> <int>
## 1 31 1 2.5 1260759144
## 2 1029 1 3 1260759179
## 3 1061 1 3 1260759182
## 4 1129 1 2 1260759185
## 5 1172 1 4 1260759205
## 6 1263 1 2 1260759151
## 7 1287 1 2 1260759187
## 8 1293 1 2 1260759148
## 9 1339 1 3.5 1260759125
## 10 1343 1 2 1260759131
## # … with 99,994 more rows
movielens %>%
select(movie_title = title, year, genres)
## # A tibble: 100,004 × 3
## movie_title year genres
## <chr> <int> <fct>
## 1 Dangerous Minds 1995 Drama
## 2 Dumbo 1941 Animation|Children|Drama|Music…
## 3 Sleepers 1996 Thriller
## 4 Escape from New York 1981 Action|Adventure|Sci-Fi|Thrill…
## 5 Cinema Paradiso (Nuovo cinema Paradiso) 1989 Drama
## 6 Deer Hunter, The 1978 Drama|War
## 7 Ben-Hur 1959 Action|Adventure|Drama
## 8 Gandhi 1982 Drama
## 9 Dracula (Bram Stoker's Dracula) 1992 Fantasy|Horror|Romance|Thriller
## 10 Cape Fear 1991 Thriller
## # … with 99,994 more rows
movielens %>%
filter(year == 1995)
## # A tibble: 6,635 × 7
## movieId title year genres userId rating times…¹
## <int> <chr> <int> <fct> <int> <dbl> <int>
## 1 31 Dangerous Minds 1995 Drama 1 2.5 1.26e9
## 2 10 GoldenEye 1995 Action|Adventure|… 2 4 8.35e8
## 3 17 Sense and Sensibility 1995 Drama|Romance 2 5 8.35e8
## 4 39 Clueless 1995 Comedy|Romance 2 5 8.35e8
## 5 47 Seven (a.k.a. Se7en) 1995 Mystery|Thriller 2 4 8.35e8
## 6 50 Usual Suspects, The 1995 Crime|Mystery|Thr… 2 4 8.35e8
## 7 52 Mighty Aphrodite 1995 Comedy|Drama|Roma… 2 3 8.35e8
## 8 62 Mr. Holland's Opus 1995 Drama 2 3 8.35e8
## 9 110 Braveheart 1995 Action|Drama|War 2 4 8.35e8
## 10 144 Brothers McMullen, The 1995 Comedy 2 3 8.35e8
## # … with 6,625 more rows, and abbreviated variable name ¹timestamp
movielens %>%
filter(year %in% c(1995, 1996) & genres == 'Drama')
## # A tibble: 582 × 7
## movieId title year genres userId rating timestamp
## <int> <chr> <int> <fct> <int> <dbl> <int>
## 1 31 Dangerous Minds 1995 Drama 1 2.5 1260759144
## 2 62 Mr. Holland's Opus 1995 Drama 2 3 835355749
## 3 1358 Sling Blade 1996 Drama 6 2 1109258181
## 4 31 Dangerous Minds 1995 Drama 7 3 851868750
## 5 40 Cry, the Beloved Country 1995 Drama 7 4 851866901
## 6 1358 Sling Blade 1996 Drama 8 0.5 1154474527
## 7 26 Othello 1995 Drama 9 3 938628655
## 8 1358 Sling Blade 1996 Drama 9 4 938628450
## 9 1358 Sling Blade 1996 Drama 10 5 942766420
## 10 1423 Hearts and Minds 1996 Drama 10 4 942766420
## # … with 572 more rows
movielens %>%
filter(year %in% c(1995, 1996) & genres == 'Drama') %>%
select(-genres)
## # A tibble: 582 × 6
## movieId title year userId rating timestamp
## <int> <chr> <int> <int> <dbl> <int>
## 1 31 Dangerous Minds 1995 1 2.5 1260759144
## 2 62 Mr. Holland's Opus 1995 2 3 835355749
## 3 1358 Sling Blade 1996 6 2 1109258181
## 4 31 Dangerous Minds 1995 7 3 851868750
## 5 40 Cry, the Beloved Country 1995 7 4 851866901
## 6 1358 Sling Blade 1996 8 0.5 1154474527
## 7 26 Othello 1995 9 3 938628655
## 8 1358 Sling Blade 1996 9 4 938628450
## 9 1358 Sling Blade 1996 10 5 942766420
## 10 1423 Hearts and Minds 1996 10 4 942766420
## # … with 572 more rows
movielens %>%
mutate(ts = as.POSIXct(timestamp, origin = "1970-01-01")) %>%
select(-timestamp)
## # A tibble: 100,004 × 7
## movieId title year genres userId rating ts
## <int> <chr> <int> <fct> <int> <dbl> <dttm>
## 1 31 Dangerous Minds 1995 Drama 1 2.5 2009-12-14 09:52:24
## 2 1029 Dumbo 1941 Anima… 1 3 2009-12-14 09:52:59
## 3 1061 Sleepers 1996 Thril… 1 3 2009-12-14 09:53:02
## 4 1129 Escape from New York 1981 Actio… 1 2 2009-12-14 09:53:05
## 5 1172 Cinema Paradiso (Nuov… 1989 Drama 1 4 2009-12-14 09:53:25
## 6 1263 Deer Hunter, The 1978 Drama… 1 2 2009-12-14 09:52:31
## 7 1287 Ben-Hur 1959 Actio… 1 2 2009-12-14 09:53:07
## 8 1293 Gandhi 1982 Drama 1 2 2009-12-14 09:52:28
## 9 1339 Dracula (Bram Stoker'… 1992 Fanta… 1 3.5 2009-12-14 09:52:05
## 10 1343 Cape Fear 1991 Thril… 1 2 2009-12-14 09:52:11
## # … with 99,994 more rows
movielens %>%
mutate(isDrama = grepl("Drama", genres))
## # A tibble: 100,004 × 8
## movieId title year genres userId rating times…¹ isDrama
## <int> <chr> <int> <fct> <int> <dbl> <int> <lgl>
## 1 31 Dangerous Minds 1995 Drama 1 2.5 1.26e9 TRUE
## 2 1029 Dumbo 1941 Anima… 1 3 1.26e9 TRUE
## 3 1061 Sleepers 1996 Thril… 1 3 1.26e9 FALSE
## 4 1129 Escape from New York 1981 Actio… 1 2 1.26e9 FALSE
## 5 1172 Cinema Paradiso (Nuovo ci… 1989 Drama 1 4 1.26e9 TRUE
## 6 1263 Deer Hunter, The 1978 Drama… 1 2 1.26e9 TRUE
## 7 1287 Ben-Hur 1959 Actio… 1 2 1.26e9 TRUE
## 8 1293 Gandhi 1982 Drama 1 2 1.26e9 TRUE
## 9 1339 Dracula (Bram Stoker's Dr… 1992 Fanta… 1 3.5 1.26e9 FALSE
## 10 1343 Cape Fear 1991 Thril… 1 2 1.26e9 FALSE
## # … with 99,994 more rows, and abbreviated variable name ¹timestamp
movielens %>%
mutate(ts = as.POSIXct(timestamp, origin = "1970-01-01"),
isDrama = grepl("Drama", genres)) %>%
select(-timestamp)
## # A tibble: 100,004 × 8
## movieId title year genres userId rating ts isDrama
## <int> <chr> <int> <fct> <int> <dbl> <dttm> <lgl>
## 1 31 Dangerous Min… 1995 Drama 1 2.5 2009-12-14 09:52:24 TRUE
## 2 1029 Dumbo 1941 Anima… 1 3 2009-12-14 09:52:59 TRUE
## 3 1061 Sleepers 1996 Thril… 1 3 2009-12-14 09:53:02 FALSE
## 4 1129 Escape from N… 1981 Actio… 1 2 2009-12-14 09:53:05 FALSE
## 5 1172 Cinema Paradi… 1989 Drama 1 4 2009-12-14 09:53:25 TRUE
## 6 1263 Deer Hunter, … 1978 Drama… 1 2 2009-12-14 09:52:31 TRUE
## 7 1287 Ben-Hur 1959 Actio… 1 2 2009-12-14 09:53:07 TRUE
## 8 1293 Gandhi 1982 Drama 1 2 2009-12-14 09:52:28 TRUE
## 9 1339 Dracula (Bram… 1992 Fanta… 1 3.5 2009-12-14 09:52:05 FALSE
## 10 1343 Cape Fear 1991 Thril… 1 2 2009-12-14 09:52:11 FALSE
## # … with 99,994 more rows
movielens %>%
summarise(uniqueTitle = n_distinct(title),
totalReview = n(),
avgRating = mean(rating))
## # A tibble: 1 × 3
## uniqueTitle totalReview avgRating
## <int> <int> <dbl>
## 1 8832 100004 3.54
movielens %>%
group_by(year) %>%
summarise(uniqueTitle = n_distinct(title),
totalReview = n(),
avgRating = mean(rating))
## # A tibble: 104 × 4
## year uniqueTitle totalReview avgRating
## <int> <int> <int> <dbl>
## 1 1902 1 6 4.33
## 2 1915 1 2 3
## 3 1916 1 1 3.5
## 4 1917 1 2 4.25
## 5 1918 1 2 4.25
## 6 1919 1 1 3
## 7 1920 3 15 3.7
## 8 1921 5 12 4.42
## 9 1922 6 28 3.80
## 10 1923 3 3 4.17
## # … with 94 more rows
movielens %>%
group_by(year) %>%
mutate(uniqueTitle = n_distinct(title),
totalReview = n(),
avgRating = mean(rating)) %>%
filter(year < 1920)
## # A tibble: 14 × 10
## # Groups: year [6]
## movieId title year genres userId rating times…¹ uniqu…² total…³ avgRa…⁴
## <int> <chr> <int> <fct> <int> <dbl> <int> <int> <int> <dbl>
## 1 7065 Birth of … 1915 Drama… 262 2.5 1.43e9 1 2 3
## 2 32898 Trip to t… 1902 Actio… 262 3 1.43e9 1 6 4.33
## 3 32898 Trip to t… 1902 Actio… 299 4.5 1.34e9 1 6 4.33
## 4 32898 Trip to t… 1902 Actio… 378 4 1.44e9 1 6 4.33
## 5 3309 Dog's Lif… 1918 Comedy 468 4.5 1.30e9 1 2 4.25
## 6 7065 Birth of … 1915 Drama… 468 3.5 1.30e9 1 2 3
## 7 8511 Immigrant… 1917 Comedy 468 4.5 1.30e9 1 2 4.25
## 8 32898 Trip to t… 1902 Actio… 468 4.5 1.30e9 1 6 4.33
## 9 62383 20,000 Le… 1916 Actio… 468 3.5 1.30e9 1 1 3.5
## 10 72626 Billy Bla… 1919 Comed… 468 3 1.30e9 1 1 3
## 11 32898 Trip to t… 1902 Actio… 481 5 1.44e9 1 6 4.33
## 12 32898 Trip to t… 1902 Actio… 547 5 1.43e9 1 6 4.33
## 13 3309 Dog's Lif… 1918 Comedy 554 4 1.01e9 1 2 4.25
## 14 8511 Immigrant… 1917 Comedy 648 4 1.18e9 1 2 4.25
## # … with abbreviated variable names ¹timestamp, ²uniqueTitle, ³totalReview,
## # ⁴avgRating
movielens %>%
group_by(year) %>%
mutate(uniqueTitle = n_distinct(title),
totalReview = n(),
avgRating = mean(rating)) %>%
filter(year < 1920) %>%
arrange(year)
## # A tibble: 14 × 10
## # Groups: year [6]
## movieId title year genres userId rating times…¹ uniqu…² total…³ avgRa…⁴
## <int> <chr> <int> <fct> <int> <dbl> <int> <int> <int> <dbl>
## 1 32898 Trip to t… 1902 Actio… 262 3 1.43e9 1 6 4.33
## 2 32898 Trip to t… 1902 Actio… 299 4.5 1.34e9 1 6 4.33
## 3 32898 Trip to t… 1902 Actio… 378 4 1.44e9 1 6 4.33
## 4 32898 Trip to t… 1902 Actio… 468 4.5 1.30e9 1 6 4.33
## 5 32898 Trip to t… 1902 Actio… 481 5 1.44e9 1 6 4.33
## 6 32898 Trip to t… 1902 Actio… 547 5 1.43e9 1 6 4.33
## 7 7065 Birth of … 1915 Drama… 262 2.5 1.43e9 1 2 3
## 8 7065 Birth of … 1915 Drama… 468 3.5 1.30e9 1 2 3
## 9 62383 20,000 Le… 1916 Actio… 468 3.5 1.30e9 1 1 3.5
## 10 8511 Immigrant… 1917 Comedy 468 4.5 1.30e9 1 2 4.25
## 11 8511 Immigrant… 1917 Comedy 648 4 1.18e9 1 2 4.25
## 12 3309 Dog's Lif… 1918 Comedy 468 4.5 1.30e9 1 2 4.25
## 13 3309 Dog's Lif… 1918 Comedy 554 4 1.01e9 1 2 4.25
## 14 72626 Billy Bla… 1919 Comed… 468 3 1.30e9 1 1 3
## # … with abbreviated variable names ¹timestamp, ²uniqueTitle, ³totalReview,
## # ⁴avgRating
movielens %>%
filter(grepl("Drama", genres)) %>%
mutate(yearRating = as.numeric(format(as.POSIXct(timestamp, origin = "1970-01-01"), "%Y"))) %>%
mutate(firstYear = year == yearRating, nextYear = year < yearRating) %>%
group_by(title) %>%
summarise(firstYear = sum(firstYear), nextYear = sum(nextYear)) %>%
arrange(desc(firstYear))
## # A tibble: 4,249 × 3
## title firstYear nextYear
## <chr> <int> <int>
## 1 Fargo 19 205
## 2 Gladiator 19 153
## 3 American Beauty 18 202
## 4 Blair Witch Project, The 18 68
## 5 Ex Machina 18 8
## 6 High Fidelity 18 70
## 7 Dark Knight, The 17 104
## 8 Sixth Sense, The 17 176
## 9 Erin Brockovich 16 69
## 10 Eraser 14 55
## # … with 4,239 more rows
movielens %>%
group_by(year, title) %>%
summarise(avgRating = mean(rating), nRating = n()) %>%
group_by(year) %>%
arrange(year, desc(avgRating), desc(nRating)) %>%
mutate(rn = row_number()) %>%
filter(rn == 1) %>%
select(-rn, -nRating) %>%
ungroup()
## `summarise()` has grouped output by 'year'. You can override using the
## `.groups` argument.
## # A tibble: 104 × 3
## year title avgRating
## <int> <chr> <dbl>
## 1 1902 Trip to the Moon, A (Voyage dans la lune, Le) 4.33
## 2 1915 Birth of a Nation, The 3
## 3 1916 20,000 Leagues Under the Sea 3.5
## 4 1917 Immigrant, The 4.25
## 5 1918 Dog's Life, A 4.25
## 6 1919 Billy Blazes, Esq. 3
## 7 1920 Cabinet of Dr. Caligari, The (Cabinet des Dr. Caligari., Das) 4
## 8 1921 Goat, The 5
## 9 1922 Cops 5
## 10 1923 Our Hospitality 4.5
## # … with 94 more rows
DAFTAR PUSTAKA Bache, Stefan Milton, and Hadley Wickham. 2020. Magrittr: A Forward-Pipe Operator for r. https://CRAN.R-project.org/package=magrittr. Harper, F. Maxwell, and Joseph A. Konstan. 2015. “The MovieLens Datasets: History and Context.” ACM Trans. Interact. Intell. Syst. 5 (4). https://doi.org/10.1145/2827872. Stobierski, Tim. 2021. “Data Wrangling: What It Is & Why It’s Important.” Harvard Business School Online. https://online.hbs.edu/blog/post/data-wrangling. The OHI Team. 2019. “Introduction to Open Data Science.” Ocean Health Index. https://ohi-science.org/data-science-training/. Wickham, Hadley, Mara Averick, Jennifer Bryan, Winston Chang, Lucy D’Agostino McGowan, Romain François, Garrett Grolemund, et al. 2019. “Welcome to the tidyverse.” Journal of Open Source Software 4 (43): 1686. https://doi.org/10.21105/joss.01686. Wickham, Hadley, Romain François, Lionel Henry, and Kirill Müller. 2021a. “A Grammar of Data Manipulation: Dplyr.” RStudio. https://dplyr.tidyverse.org/. ———. 2021b. Dplyr: A Grammar of Data Manipulation. https://CRAN.R-project.org/package=dplyr. Wickham, Hadley, and Garrett Grolemund. 2017. R for Data Science: Import, Tidy, Transform, Visualize, and Model Data. 1st ed. Paperback; O’Reilly Media. http://r4ds.had.co.nz/.