#Instal Package yang dibutuhkan
library(readr)
## Warning: package 'readr' was built under R version 3.6.3
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 3.6.3
## -- Attaching packages --------------------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.1 v dplyr 1.0.0
## v tibble 3.0.1 v stringr 1.4.0
## v tidyr 1.1.0 v forcats 0.5.0
## v purrr 0.3.4
## Warning: package 'ggplot2' was built under R version 3.6.3
## Warning: package 'tibble' was built under R version 3.6.3
## Warning: package 'tidyr' was built under R version 3.6.3
## Warning: package 'purrr' was built under R version 3.6.3
## Warning: package 'dplyr' was built under R version 3.6.3
## Warning: package 'stringr' was built under R version 3.6.3
## Warning: package 'forcats' was built under R version 3.6.3
## -- Conflicts ------------------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(skimr)
## Warning: package 'skimr' was built under R version 3.6.3
library(DataExplorer)
## Warning: package 'DataExplorer' was built under R version 3.6.3
library(tidyr)
library(dplyr)
library(ggplot2)
library(purrr)
#Fungsi memanggil dataset sportify dalam bentuk CSV
library(readr)
spotify <- read_csv("C:/Users/LENOVO/Downloads/edar-master/edar-master/data/spotify.csv")
## Parsed with column specification:
## cols(
## .default = col_double(),
## id = col_character(),
## name = col_character(),
## album.id = col_character(),
## album.name = col_character(),
## artist = col_character()
## )
## See spec(...) for full column specifications.
View(spotify)
#Lihat dataset spotify
#Dataset berupa kategori factor dan numerik yang memiliki 10 variabel/feature dengan jumlah dataset sebanyak 1168 packet record. #dataset sportify terdapat kolom sebanyak 21 kolom dengan jumlah data 1225 jenis data berupa karakter dan numerik
vignette("pivot", package = "tidyr")
## starting httpd help server ... done
glimpse(spotify)
## Rows: 1,225
## Columns: 21
## $ id <chr> "3I1JTx525DKElzlTYOBfZN", "0GxQ1A5L9xnMOytbP6eKB...
## $ name <chr> "Best 4 U", "What Lovers Do (feat. SZA)", "Wait"...
## $ popularity <dbl> 54, 74, 64, 58, 54, 55, 53, 54, 68, 53, 53, 55, ...
## $ album.id <chr> "1Li4rADxSxjT2g4xqUcMYh", "1Li4rADxSxjT2g4xqUcMY...
## $ album.name <chr> "Red Pill Blues (Deluxe)", "Red Pill Blues (Delu...
## $ album.total_tracks <dbl> 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, ...
## $ track_number <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 1...
## $ duration_ms <dbl> 239751, 199849, 190642, 216930, 196120, 193603, ...
## $ danceability <dbl> 0.526, 0.799, 0.655, 0.652, 0.759, 0.934, 0.812,...
## $ energy <dbl> 0.608, 0.597, 0.603, 0.555, 0.604, 0.564, 0.670,...
## $ key <dbl> 10, 5, 8, 9, 8, 11, 5, 10, 0, 0, 1, 0, 11, 7, 9,...
## $ loudness <dbl> -5.776, -5.131, -5.014, -6.608, -6.663, -4.806, ...
## $ mode <dbl> 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 0, ...
## $ speechiness <dbl> 0.1690, 0.0611, 0.0555, 0.0320, 0.0510, 0.0638, ...
## $ acousticness <dbl> 0.12700, 0.07880, 0.09590, 0.13700, 0.14100, 0.4...
## $ instrumentalness <dbl> 0.00e+00, 5.66e-06, 0.00e+00, 2.18e-05, 0.00e+00...
## $ liveness <dbl> 0.1130, 0.1000, 0.1070, 0.0900, 0.1490, 0.1010, ...
## $ valence <dbl> 0.3720, 0.4190, 0.4520, 0.1070, 0.4180, 0.5430, ...
## $ tempo <dbl> 93.311, 110.001, 126.088, 103.043, 121.096, 115....
## $ time_signature <dbl> 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, ...
## $ artist <chr> "Maroon 5", "Maroon 5", "Maroon 5", "Maroon 5", ...
key_labs = c('c', 'c#', 'd', 'd#', 'e', 'f',
'f#', 'g', 'g#', 'a', 'a#', 'b')
mode_labs = c('minor', 'major')
spotify <- spotify %>%
mutate(time_signature = factor(time_signature),
key = factor(key, labels = key_labs),
mode = factor(mode, labels = mode_labs))
summary(spotify)
## id name popularity album.id
## Length:1225 Length:1225 Min. : 0.00 Length:1225
## Class :character Class :character 1st Qu.:21.00 Class :character
## Mode :character Mode :character Median :27.00 Mode :character
## Mean :29.95
## 3rd Qu.:36.00
## Max. :82.00
##
## album.name album.total_tracks track_number duration_ms
## Length:1225 Min. : 6.00 Min. : 1.000 Min. : 4000
## Class :character 1st Qu.:14.00 1st Qu.: 4.000 1st Qu.: 186786
## Mode :character Median :16.00 Median : 7.000 Median : 223733
## Mean :18.29 Mean : 7.638 Mean : 233450
## 3rd Qu.:22.00 3rd Qu.:11.000 3rd Qu.: 271600
## Max. :41.00 Max. :30.000 Max. :2054800
##
## danceability energy key loudness mode
## Min. :0.0000 Min. :0.00174 d :203 Min. :-33.592 minor:384
## 1st Qu.:0.3410 1st Qu.:0.48500 g :162 1st Qu.:-10.853 major:841
## Median :0.5000 Median :0.69500 a :161 Median : -7.701
## Mean :0.4983 Mean :0.64788 e :146 Mean : -8.606
## 3rd Qu.:0.6560 3rd Qu.:0.84100 c :128 3rd Qu.: -5.595
## Max. :0.9460 Max. :0.99600 f : 87 Max. : -1.872
## (Other):338
## speechiness acousticness instrumentalness liveness
## Min. :0.00000 Min. :0.0000 Min. :0.0000000 Min. :0.0000
## 1st Qu.:0.03580 1st Qu.:0.0330 1st Qu.:0.0000000 1st Qu.:0.1070
## Median :0.05080 Median :0.1450 Median :0.0000084 Median :0.2030
## Mean :0.09702 Mean :0.2664 Mean :0.0485253 Mean :0.3338
## 3rd Qu.:0.08970 3rd Qu.:0.4340 3rd Qu.:0.0008080 3rd Qu.:0.4540
## Max. :0.96400 Max. :0.9840 Max. :1.0000000 Max. :1.0000
##
## valence tempo time_signature artist
## Min. :0.0000 Min. : 0.0 0: 4 Length:1225
## 1st Qu.:0.2670 1st Qu.: 95.3 1: 4 Class :character
## Median :0.4190 Median :116.8 3: 120 Mode :character
## Mean :0.4454 Mean :117.8 4:1080
## 3rd Qu.:0.6160 3rd Qu.:140.0 5: 17
## Max. :0.9730 Max. :207.5
##
library(skimr)
skim(spotify)
| Name | spotify |
| Number of rows | 1225 |
| Number of columns | 21 |
| _______________________ | |
| Column type frequency: | |
| character | 5 |
| factor | 3 |
| numeric | 13 |
| ________________________ | |
| Group variables | None |
Variable type: character
| skim_variable | n_missing | complete_rate | min | max | empty | n_unique | whitespace |
|---|---|---|---|---|---|---|---|
| id | 0 | 1 | 22 | 22 | 0 | 1225 | 0 |
| name | 0 | 1 | 3 | 88 | 0 | 961 | 0 |
| album.id | 0 | 1 | 22 | 22 | 0 | 76 | 0 |
| album.name | 0 | 1 | 4 | 67 | 0 | 68 | 0 |
| artist | 0 | 1 | 5 | 10 | 0 | 3 | 0 |
Variable type: factor
| skim_variable | n_missing | complete_rate | ordered | n_unique | top_counts |
|---|---|---|---|---|---|
| key | 0 | 1 | FALSE | 12 | d: 203, g: 162, a: 161, e: 146 |
| mode | 0 | 1 | FALSE | 2 | maj: 841, min: 384 |
| time_signature | 0 | 1 | FALSE | 5 | 4: 1080, 3: 120, 5: 17, 0: 4 |
Variable type: numeric
| skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| popularity | 0 | 1 | 29.95 | 13.89 | 0.00 | 21.00 | 27.00 | 36.00 | 82.00 | ▁▇▃▁▁ |
| album.total_tracks | 0 | 1 | 18.29 | 6.87 | 6.00 | 14.00 | 16.00 | 22.00 | 41.00 | ▅▇▃▂▁ |
| track_number | 0 | 1 | 7.64 | 5.03 | 1.00 | 4.00 | 7.00 | 11.00 | 30.00 | ▇▆▂▁▁ |
| duration_ms | 0 | 1 | 233450.19 | 113887.88 | 4000.00 | 186786.00 | 223733.00 | 271600.00 | 2054800.00 | ▇▁▁▁▁ |
| danceability | 0 | 1 | 0.50 | 0.19 | 0.00 | 0.34 | 0.50 | 0.66 | 0.95 | ▁▇▇▇▂ |
| energy | 0 | 1 | 0.65 | 0.23 | 0.00 | 0.48 | 0.70 | 0.84 | 1.00 | ▁▃▅▇▇ |
| loudness | 0 | 1 | -8.61 | 4.16 | -33.59 | -10.85 | -7.70 | -5.59 | -1.87 | ▁▁▁▆▇ |
| speechiness | 0 | 1 | 0.10 | 0.16 | 0.00 | 0.04 | 0.05 | 0.09 | 0.96 | ▇▁▁▁▁ |
| acousticness | 0 | 1 | 0.27 | 0.28 | 0.00 | 0.03 | 0.14 | 0.43 | 0.98 | ▇▂▁▂▁ |
| instrumentalness | 0 | 1 | 0.05 | 0.18 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
| liveness | 0 | 1 | 0.33 | 0.30 | 0.00 | 0.11 | 0.20 | 0.45 | 1.00 | ▇▃▁▁▂ |
| valence | 0 | 1 | 0.45 | 0.24 | 0.00 | 0.27 | 0.42 | 0.62 | 0.97 | ▅▇▇▆▃ |
| tempo | 0 | 1 | 117.85 | 30.07 | 0.00 | 95.30 | 116.77 | 139.95 | 207.55 | ▁▂▇▅▁ |
#tampilan bagan pada sportify tidak ada missing coloumns dan missing observations
ggplot(spotify)
plot_intro(spotify)
plot_missing(spotify)
#untuk key memiliki nilai tertinggi pada d, untuk mode tertinggi di major, time signature tertinggi pada angka 4, dan untuk artis tertinggi pada band Queen
plot_bar(spotify)
## 4 columns ignored with more than 50 categories.
## id: 1225 categories
## name: 961 categories
## album.id: 76 categories
## album.name: 68 categories
#accousticness nilai tertinggi 200, album total tracks tertinggi 250, danceability tertinggi 80,duration ms tertinggi melibihi 400, energy tertinggi 100,instrumentalness tertinggi 900, liveness tertinggi 200,loudness tertinggi150 keatas, popularity tertinggi 150, spechiness tertinggi kurang dari 600, tempo tertinggi diatas 100,track number tertinggi 100, dan valance sebesar 80
plot_histogram(spotify)
plot_correlation(spotify)
## 4 features with more than 20 categories ignored!
## id: 1225 categories
## name: 961 categories
## album.id: 76 categories
## album.name: 68 categories