#Instal Package yang dibutuhkan

library(readr)
## Warning: package 'readr' was built under R version 3.6.3
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 3.6.3
## -- Attaching packages --------------------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.1     v dplyr   1.0.0
## v tibble  3.0.1     v stringr 1.4.0
## v tidyr   1.1.0     v forcats 0.5.0
## v purrr   0.3.4
## Warning: package 'ggplot2' was built under R version 3.6.3
## Warning: package 'tibble' was built under R version 3.6.3
## Warning: package 'tidyr' was built under R version 3.6.3
## Warning: package 'purrr' was built under R version 3.6.3
## Warning: package 'dplyr' was built under R version 3.6.3
## Warning: package 'stringr' was built under R version 3.6.3
## Warning: package 'forcats' was built under R version 3.6.3
## -- Conflicts ------------------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(skimr)
## Warning: package 'skimr' was built under R version 3.6.3
library(DataExplorer)
## Warning: package 'DataExplorer' was built under R version 3.6.3
library(tidyr)
library(dplyr)
library(ggplot2)
library(purrr)

#Fungsi memanggil dataset sportify dalam bentuk CSV

library(readr)
spotify <- read_csv("C:/Users/LENOVO/Downloads/edar-master/edar-master/data/spotify.csv")
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   id = col_character(),
##   name = col_character(),
##   album.id = col_character(),
##   album.name = col_character(),
##   artist = col_character()
## )
## See spec(...) for full column specifications.
View(spotify)

#Lihat dataset spotify

#Dataset berupa kategori factor dan numerik yang memiliki 10 variabel/feature dengan jumlah dataset sebanyak 1168 packet record. #dataset sportify terdapat kolom sebanyak 21 kolom dengan jumlah data 1225 jenis data berupa karakter dan numerik

vignette("pivot", package = "tidyr")
## starting httpd help server ... done
glimpse(spotify)
## Rows: 1,225
## Columns: 21
## $ id                 <chr> "3I1JTx525DKElzlTYOBfZN", "0GxQ1A5L9xnMOytbP6eKB...
## $ name               <chr> "Best 4 U", "What Lovers Do (feat. SZA)", "Wait"...
## $ popularity         <dbl> 54, 74, 64, 58, 54, 55, 53, 54, 68, 53, 53, 55, ...
## $ album.id           <chr> "1Li4rADxSxjT2g4xqUcMYh", "1Li4rADxSxjT2g4xqUcMY...
## $ album.name         <chr> "Red Pill Blues (Deluxe)", "Red Pill Blues (Delu...
## $ album.total_tracks <dbl> 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, ...
## $ track_number       <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 1...
## $ duration_ms        <dbl> 239751, 199849, 190642, 216930, 196120, 193603, ...
## $ danceability       <dbl> 0.526, 0.799, 0.655, 0.652, 0.759, 0.934, 0.812,...
## $ energy             <dbl> 0.608, 0.597, 0.603, 0.555, 0.604, 0.564, 0.670,...
## $ key                <dbl> 10, 5, 8, 9, 8, 11, 5, 10, 0, 0, 1, 0, 11, 7, 9,...
## $ loudness           <dbl> -5.776, -5.131, -5.014, -6.608, -6.663, -4.806, ...
## $ mode               <dbl> 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 0, ...
## $ speechiness        <dbl> 0.1690, 0.0611, 0.0555, 0.0320, 0.0510, 0.0638, ...
## $ acousticness       <dbl> 0.12700, 0.07880, 0.09590, 0.13700, 0.14100, 0.4...
## $ instrumentalness   <dbl> 0.00e+00, 5.66e-06, 0.00e+00, 2.18e-05, 0.00e+00...
## $ liveness           <dbl> 0.1130, 0.1000, 0.1070, 0.0900, 0.1490, 0.1010, ...
## $ valence            <dbl> 0.3720, 0.4190, 0.4520, 0.1070, 0.4180, 0.5430, ...
## $ tempo              <dbl> 93.311, 110.001, 126.088, 103.043, 121.096, 115....
## $ time_signature     <dbl> 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, ...
## $ artist             <chr> "Maroon 5", "Maroon 5", "Maroon 5", "Maroon 5", ...
key_labs = c('c', 'c#', 'd', 'd#', 'e', 'f', 
             'f#', 'g', 'g#', 'a', 'a#', 'b')
mode_labs = c('minor', 'major')

spotify <- spotify %>%
  mutate(time_signature = factor(time_signature),
         key = factor(key, labels = key_labs),
         mode = factor(mode, labels = mode_labs))
summary(spotify)
##       id                name             popularity      album.id        
##  Length:1225        Length:1225        Min.   : 0.00   Length:1225       
##  Class :character   Class :character   1st Qu.:21.00   Class :character  
##  Mode  :character   Mode  :character   Median :27.00   Mode  :character  
##                                        Mean   :29.95                     
##                                        3rd Qu.:36.00                     
##                                        Max.   :82.00                     
##                                                                          
##   album.name        album.total_tracks  track_number     duration_ms     
##  Length:1225        Min.   : 6.00      Min.   : 1.000   Min.   :   4000  
##  Class :character   1st Qu.:14.00      1st Qu.: 4.000   1st Qu.: 186786  
##  Mode  :character   Median :16.00      Median : 7.000   Median : 223733  
##                     Mean   :18.29      Mean   : 7.638   Mean   : 233450  
##                     3rd Qu.:22.00      3rd Qu.:11.000   3rd Qu.: 271600  
##                     Max.   :41.00      Max.   :30.000   Max.   :2054800  
##                                                                          
##   danceability        energy             key         loudness          mode    
##  Min.   :0.0000   Min.   :0.00174   d      :203   Min.   :-33.592   minor:384  
##  1st Qu.:0.3410   1st Qu.:0.48500   g      :162   1st Qu.:-10.853   major:841  
##  Median :0.5000   Median :0.69500   a      :161   Median : -7.701              
##  Mean   :0.4983   Mean   :0.64788   e      :146   Mean   : -8.606              
##  3rd Qu.:0.6560   3rd Qu.:0.84100   c      :128   3rd Qu.: -5.595              
##  Max.   :0.9460   Max.   :0.99600   f      : 87   Max.   : -1.872              
##                                     (Other):338                                
##   speechiness       acousticness    instrumentalness       liveness     
##  Min.   :0.00000   Min.   :0.0000   Min.   :0.0000000   Min.   :0.0000  
##  1st Qu.:0.03580   1st Qu.:0.0330   1st Qu.:0.0000000   1st Qu.:0.1070  
##  Median :0.05080   Median :0.1450   Median :0.0000084   Median :0.2030  
##  Mean   :0.09702   Mean   :0.2664   Mean   :0.0485253   Mean   :0.3338  
##  3rd Qu.:0.08970   3rd Qu.:0.4340   3rd Qu.:0.0008080   3rd Qu.:0.4540  
##  Max.   :0.96400   Max.   :0.9840   Max.   :1.0000000   Max.   :1.0000  
##                                                                         
##     valence           tempo       time_signature    artist         
##  Min.   :0.0000   Min.   :  0.0   0:   4         Length:1225       
##  1st Qu.:0.2670   1st Qu.: 95.3   1:   4         Class :character  
##  Median :0.4190   Median :116.8   3: 120         Mode  :character  
##  Mean   :0.4454   Mean   :117.8   4:1080                           
##  3rd Qu.:0.6160   3rd Qu.:140.0   5:  17                           
##  Max.   :0.9730   Max.   :207.5                                    
## 
library(skimr)
skim(spotify)
Data summary
Name spotify
Number of rows 1225
Number of columns 21
_______________________
Column type frequency:
character 5
factor 3
numeric 13
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
id 0 1 22 22 0 1225 0
name 0 1 3 88 0 961 0
album.id 0 1 22 22 0 76 0
album.name 0 1 4 67 0 68 0
artist 0 1 5 10 0 3 0

Variable type: factor

skim_variable n_missing complete_rate ordered n_unique top_counts
key 0 1 FALSE 12 d: 203, g: 162, a: 161, e: 146
mode 0 1 FALSE 2 maj: 841, min: 384
time_signature 0 1 FALSE 5 4: 1080, 3: 120, 5: 17, 0: 4

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
popularity 0 1 29.95 13.89 0.00 21.00 27.00 36.00 82.00 ▁▇▃▁▁
album.total_tracks 0 1 18.29 6.87 6.00 14.00 16.00 22.00 41.00 ▅▇▃▂▁
track_number 0 1 7.64 5.03 1.00 4.00 7.00 11.00 30.00 ▇▆▂▁▁
duration_ms 0 1 233450.19 113887.88 4000.00 186786.00 223733.00 271600.00 2054800.00 ▇▁▁▁▁
danceability 0 1 0.50 0.19 0.00 0.34 0.50 0.66 0.95 ▁▇▇▇▂
energy 0 1 0.65 0.23 0.00 0.48 0.70 0.84 1.00 ▁▃▅▇▇
loudness 0 1 -8.61 4.16 -33.59 -10.85 -7.70 -5.59 -1.87 ▁▁▁▆▇
speechiness 0 1 0.10 0.16 0.00 0.04 0.05 0.09 0.96 ▇▁▁▁▁
acousticness 0 1 0.27 0.28 0.00 0.03 0.14 0.43 0.98 ▇▂▁▂▁
instrumentalness 0 1 0.05 0.18 0.00 0.00 0.00 0.00 1.00 ▇▁▁▁▁
liveness 0 1 0.33 0.30 0.00 0.11 0.20 0.45 1.00 ▇▃▁▁▂
valence 0 1 0.45 0.24 0.00 0.27 0.42 0.62 0.97 ▅▇▇▆▃
tempo 0 1 117.85 30.07 0.00 95.30 116.77 139.95 207.55 ▁▂▇▅▁
#tampilan bagan pada sportify tidak ada missing coloumns dan missing observations
ggplot(spotify)

plot_intro(spotify)

plot_missing(spotify)

#untuk key memiliki nilai tertinggi pada d, untuk mode tertinggi di major, time signature tertinggi pada angka 4, dan untuk artis tertinggi pada band Queen
plot_bar(spotify)
## 4 columns ignored with more than 50 categories.
## id: 1225 categories
## name: 961 categories
## album.id: 76 categories
## album.name: 68 categories

#accousticness nilai tertinggi 200, album total tracks tertinggi 250, danceability tertinggi 80,duration ms tertinggi melibihi 400, energy tertinggi 100,instrumentalness tertinggi 900, liveness tertinggi 200,loudness tertinggi150 keatas, popularity tertinggi 150, spechiness tertinggi kurang dari 600, tempo tertinggi diatas 100,track number tertinggi 100, dan valance sebesar 80
plot_histogram(spotify)

plot_correlation(spotify)
## 4 features with more than 20 categories ignored!
## id: 1225 categories
## name: 961 categories
## album.id: 76 categories
## album.name: 68 categories