This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
library(ggplot2)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
data <- read.csv("/Users/yashuvaishu/Downloads/Spotify.csv")
If we want we can take selected data but I am considering my full data.
''df_1 <-select(data1,trackName,artistName,msPlayed,energy,key,loudness)
print(df_1)
summary(df_1)
num_samples <- 7
# Create a list to store sub samples
sample_list <- list()
for (i in 1:num_samples) {
# Randomly select rows with replacement
sample_indices <- sample(1:nrow(data), size = nrow(data) * 0.5, replace = TRUE)
# Create a sub sample data frame
subsample_df <- data[sample_indices, ]
# Store it in the list
sample_list[[i]] <- subsample_df
}
for (col in 1:6) {
if (!is.null(sample_list[[col]])) {
print(head(sample_list[[col]]))
}
}
## trackName artistName msPlayed genre
## 6329 In Your Eyes The Weeknd 2380596 canadian contemporary r&b
## 810 Blue Bird Piano Kato 91118 anime lo-fi
## 1433 FAKE SMILE GREEN ORXNGE 145354 drift phonk
## 5943 Go Flex Post Malone 179613 dfw rap
## 4205 You Said Connor Price 3944172 singer-songwriter pop
## 4421 Alone in My Car NIKI DEMAR 89717 social media pop
## danceability energy key loudness speechiness valence tempo
## 6329 0.667 0.7210 7 -5.349 0.0330 0.703 100.018
## 810 0.313 0.0211 6 -28.785 0.0602 0.369 68.130
## 1433 0.594 0.9280 11 -3.080 0.0649 0.389 119.988
## 5943 0.665 0.4980 5 -8.185 0.0832 0.127 81.967
## 4205 0.726 0.4810 0 -8.254 0.2070 0.224 79.978
## 4421 0.625 0.5740 0 -7.343 0.0356 0.305 92.089
## id duration_ms
## 6329 5GxZOe2B3P8hPeaHiDfP3y 237522
## 810 6W4gN6gKT1FVAptTGJ6JJf 84500
## 1433 2vxSwL6IltooHhdhfrD2rH 145354
## 5943 5yuShbu70mtHXY0yLzCQLQ 179613
## 4205 2k5rWMAz9RvFS3k3kk3cc5 135000
## 4421 6zDdR5SUiRPmAhQIxnsbQU 193173
## trackName artistName msPlayed
## 3125 Rest Charlotte Gainsbourg 218720
## 3425 So Far Away (feat. Jamie Scott & Romy Dya) Martin Garrix 80580
## 5858 Foundations Curtis Schweitzer 204106
## 4301 A New Beginning Yasumu 143378
## 7192 PARAMOUR (feat. AURORA) Sub Urban 1448740
## 473 drivers license Olivia Rodrigo 2550439
## genre danceability energy key loudness speechiness valence
## 3125 art pop 0.629 0.538 9 -13.242 0.0438 0.3630
## 3425 dutch edm 0.526 0.520 6 -7.985 0.0569 0.1300
## 5858 indie game soundtrack 0.442 0.315 7 -20.382 0.0483 0.0706
## 4301 lo-fi study 0.695 0.287 0 -14.608 0.0499 0.3790
## 7192 modern indie pop 0.716 0.465 2 -7.818 0.2360 0.5750
## 473 pop 0.561 0.431 10 -8.810 0.0578 0.1370
## tempo id duration_ms
## 3125 91.988 3tayEO7e0Cv953xKHC1WvP 218720
## 3425 149.119 0OlnLZY4cmQzT6ZGttvWBM 183637
## 5858 120.063 4pIUe7nlQWBvQ7NzMCcrUC 204107
## 4301 128.028 0xtbVIWkbfu5G6TgCVmvVn 143379
## 7192 165.836 3PIla3hOLQxw5l6GW2jdZu 168434
## 473 143.875 5wANPM4fQCJwkGd4rN57mH 242013
## trackName artistName msPlayed
## 5312 Chori Chori Chupke Chupke Udit Narayan 1941488
## 8160 Traveling Alone Tom The Mail Man 160165
## 8364 What If StreamBeats Originals 212906
## 6003 Haan Tu Hain Pritam 10731
## 2286 Left and Right (Feat. Jung Kook of BTS) Charlie Puth 3025182
## 6516 Kusanagi ODESZA 208000
## genre danceability energy key loudness speechiness valence
## 5312 chutney 0.679 0.534 6 -9.398 0.0762 0.790
## 8160 modern indie pop 0.691 0.595 8 -6.865 0.0377 0.415
## 8364 game mood 0.396 0.686 0 -7.243 0.0374 0.391
## 6003 filmi 0.415 0.904 2 -3.586 0.1040 0.729
## 2286 pop 0.881 0.592 2 -4.898 0.0324 0.719
## 6516 chillwave 0.335 0.330 0 -8.302 0.0310 0.040
## tempo id duration_ms
## 5312 158.809 5QDJF339x2VaamzFq47J88 373133
## 8160 121.004 00c6I4e1BL1vqbD97KIh50 160165
## 8364 77.908 5hZZpyKrq8WB2jVlCv7GJU 212977
## 6003 83.648 4vgCpNUUcpEIBifidhQOnR 324880
## 2286 101.058 0mBP9X2gPCuapvpZ7TGDk3 154487
## 6516 75.070 3i2Q3cFdFzJ39nV9sAt3Pc 208000
## trackName artistName msPlayed genre danceability
## 3077 Randy Dandy-O The Longest Johns 1657 shanty 0.631
## 2855 Only For You Lund 457865 cloud rap 0.725
## 2436 Lucy Still Woozy 142274 bedroom pop 0.647
## 6148 Holiday HRVY 411796 pop 0.700
## 1800 Hell To The Stars Harry Hudson 221943 pop r&b 0.613
## 1580 Friends Chase Atlantic 287387 alternative r&b 0.422
## energy key loudness speechiness valence tempo id
## 3077 0.669 7 -4.171 0.1270 0.8800 110.094 5j1afatmPZ6MxqpLdSWAGn
## 2855 0.523 0 -5.258 0.0315 0.0948 130.006 3QXzwDjRimQIGqLgQqMMNG
## 2436 0.537 6 -12.006 0.2100 0.8460 84.460 5RxpYHVbGJPOvSEATQyg9P
## 6148 0.724 9 -4.812 0.0375 0.4880 128.028 3aSyt9938O2QVZfbkNFS4w
## 1800 0.550 2 -6.591 0.0255 0.2570 99.029 7zglIQAMFHMBJD7QSRii6C
## 1580 0.609 11 -6.361 0.0335 0.0911 107.340 2jQiSYrwJehQAcuaaQrXnS
## duration_ms
## 3077 126493
## 2855 228938
## 2436 142500
## 6148 207605
## 1800 221943
## 1580 230011
## trackName artistName msPlayed
## 6627 Line By Line (feat. Maren Morris) JP Saxe 206029
## 8203 Uma Thurman Fall Out Boy 226728
## 6538 La Traviata: Overture André Rieu 512711
## 1180 Dard Dilo Ke Shahid Mallya 28080
## 256 Baba O'Riley The Who 171815
## 7228 Party All Night Yo Yo Honey Singh 291248
## genre danceability energy key loudness speechiness valence
## 6627 alt z 0.645 0.486 0 -7.299 0.0425 0.4530
## 8203 emo 0.623 0.962 4 -2.616 0.0895 0.6760
## 6538 easy listening 0.262 0.166 11 -19.585 0.0403 0.0545
## 1180 filmi 0.587 0.415 7 -7.086 0.0270 0.3000
## 256 album rock 0.489 0.724 5 -8.367 0.0352 0.1500
## 7228 filmi 0.854 0.738 6 -5.388 0.2310 0.8040
## tempo id duration_ms
## 6627 72.536 2gA74HvN6NKFrhgzpd5oNE 208187
## 8203 149.963 5PUawWFG1oIS2NwEcyHaCr 211573
## 6538 122.740 5fXzyFs4FEGy9SAHrarvNW 213267
## 1180 86.066 3fMbCqlyxx5zKvyicIAGO3 304274
## 256 117.292 3qiyyUfYe7CRYLucrPmulD 300400
## 7228 129.989 0fGImK9uWSruIRhKGyjUC0 282793
## trackName artistName msPlayed
## 4546 Baba O'Riley - ConfidentialMX Remix The Who 69538
## 3722 The Hype Twenty One Pilots 194
## 6910 Moonlight XXXTENTACION 3050606
## 5521 Devil Barren Gates 533503
## 7974 Thank You Sasha Alex Sloan 311543
## 3984 WITHOUT A HEAD SPURIA 186057
## genre danceability energy key loudness speechiness valence
## 4546 album rock 0.293 0.369 5 -17.075 0.0425 0.221
## 3722 modern rock 0.530 0.816 9 -6.149 0.0503 0.371
## 6910 emo rap 0.921 0.537 9 -5.723 0.0804 0.711
## 5521 traprun 0.507 0.864 6 -1.421 0.1680 0.410
## 7974 alt z 0.341 0.519 7 -7.007 0.0615 0.221
## 3984 aggressive phonk 0.578 0.998 0 3.010 0.1550 0.547
## tempo id duration_ms
## 4546 117.652 5iq0Tela3gm73ZW72XZ5dm 153987
## 3722 87.019 7I3skNaQdvZSS7zXY2VHId 265373
## 6910 128.009 0JP9xo3adEtGSdUEISiszL 135090
## 5521 150.075 3lj59hy6FrRDyYNo8Onuud 176000
## 7974 189.764 63C9gvk9V2Qhipa73tegPS 190280
## 3984 102.480 1nhlcMuUhvvVY80i75ZI5k 120163
## Performing Summary ##
summary_data <- lapply(sample_list, function(subsample_df){
summary_df <-summary(subsample_df)
})
for (i in 1: num_samples){
cat("### Subsample", i, "summary statisics \n")
print(head(summary_data[i]))
}
## ### Subsample 1 summary statisics
## [[1]]
## trackName artistName msPlayed genre
## Length:4255 Length:4255 Min. : 0 Length:4255
## Class :character Class :character 1st Qu.: 141668 Class :character
## Mode :character Mode :character Median : 269586 Mode :character
## Mean : 1580681
## 3rd Qu.: 1230068
## Max. :158367130
## danceability energy key loudness
## Min. :0.0631 Min. :0.00256 Min. : 0.000 Min. :-38.793
## 1st Qu.:0.5060 1st Qu.:0.40800 1st Qu.: 2.000 1st Qu.: -9.803
## Median :0.6250 Median :0.58900 Median : 5.000 Median : -7.119
## Mean :0.6018 Mean :0.56629 Mean : 5.296 Mean : -8.542
## 3rd Qu.:0.7130 3rd Qu.:0.75100 3rd Qu.: 8.000 3rd Qu.: -5.332
## Max. :0.9760 Max. :0.99700 Max. :11.000 Max. : 1.106
## speechiness valence tempo id
## Min. :0.02370 Min. :0.0000 Min. : 36.73 Length:4255
## 1st Qu.:0.03610 1st Qu.:0.2420 1st Qu.: 95.96 Class :character
## Median :0.04730 Median :0.4100 Median :118.03 Mode :character
## Mean :0.07705 Mean :0.4383 Mean :118.49
## 3rd Qu.:0.08115 3rd Qu.:0.6200 3rd Qu.:138.50
## Max. :0.94100 Max. :0.9860 Max. :236.20
## duration_ms
## Min. : 17075
## 1st Qu.: 165102
## Median : 197355
## Mean : 205405
## 3rd Qu.: 230954
## Max. :1847210
##
## ### Subsample 2 summary statisics
## [[1]]
## trackName artistName msPlayed genre
## Length:4255 Length:4255 Min. : 0 Length:4255
## Class :character Class :character 1st Qu.: 139660 Class :character
## Mode :character Mode :character Median : 280040 Mode :character
## Mean : 1630994
## 3rd Qu.: 1327410
## Max. :158367130
## danceability energy key loudness
## Min. :0.0000 Min. :0.00108 Min. : 0.00 Min. :-42.044
## 1st Qu.:0.5100 1st Qu.:0.40900 1st Qu.: 2.00 1st Qu.: -9.931
## Median :0.6250 Median :0.59300 Median : 5.00 Median : -7.063
## Mean :0.6026 Mean :0.56420 Mean : 5.22 Mean : -8.619
## 3rd Qu.:0.7110 3rd Qu.:0.74400 3rd Qu.: 9.00 3rd Qu.: -5.338
## Max. :0.9760 Max. :0.99800 Max. :11.00 Max. : 3.010
## speechiness valence tempo id
## Min. :0.00000 Min. :0.0000 Min. : 0.00 Length:4255
## 1st Qu.:0.03630 1st Qu.:0.2450 1st Qu.: 96.88 Class :character
## Median :0.04860 Median :0.4150 Median :118.01 Mode :character
## Mean :0.07682 Mean :0.4387 Mean :118.76
## 3rd Qu.:0.08020 3rd Qu.:0.6185 3rd Qu.:138.95
## Max. :0.58700 Max. :0.9720 Max. :236.20
## duration_ms
## Min. : 10027
## 1st Qu.: 164262
## Median : 195240
## Mean : 203683
## 3rd Qu.: 229526
## Max. :1847210
##
## ### Subsample 3 summary statisics
## [[1]]
## trackName artistName msPlayed genre
## Length:4255 Length:4255 Min. : 0 Length:4255
## Class :character Class :character 1st Qu.: 136813 Class :character
## Mode :character Mode :character Median : 273382 Mode :character
## Mean : 1412892
## 3rd Qu.: 1307592
## Max. :158367130
## danceability energy key loudness
## Min. :0.0000 Min. :0.00281 Min. : 0.000 Min. :-39.219
## 1st Qu.:0.5020 1st Qu.:0.40550 1st Qu.: 2.000 1st Qu.:-10.090
## Median :0.6200 Median :0.58600 Median : 5.000 Median : -7.119
## Mean :0.6001 Mean :0.56225 Mean : 5.182 Mean : -8.615
## 3rd Qu.:0.7120 3rd Qu.:0.74200 3rd Qu.: 8.000 3rd Qu.: -5.294
## Max. :0.9650 Max. :0.99900 Max. :11.000 Max. : 1.106
## speechiness valence tempo id
## Min. :0.00000 Min. :0.0000 Min. : 0.00 Length:4255
## 1st Qu.:0.03570 1st Qu.:0.2390 1st Qu.: 97.97 Class :character
## Median :0.04730 Median :0.4150 Median :119.89 Mode :character
## Mean :0.07792 Mean :0.4368 Mean :119.56
## 3rd Qu.:0.07800 3rd Qu.:0.6210 3rd Qu.:139.82
## Max. :0.94100 Max. :0.9860 Max. :236.20
## duration_ms
## Min. : 10027
## 1st Qu.: 161266
## Median : 194227
## Mean : 203949
## 3rd Qu.: 230646
## Max. :1847210
##
## ### Subsample 4 summary statisics
## [[1]]
## trackName artistName msPlayed genre
## Length:4255 Length:4255 Min. : 0 Length:4255
## Class :character Class :character 1st Qu.: 141167 Class :character
## Mode :character Mode :character Median : 277714 Mode :character
## Mean : 1626080
## 3rd Qu.: 1178395
## Max. :158367130
## danceability energy key loudness
## Min. :0.0000 Min. :0.00256 Min. : 0.000 Min. :-38.222
## 1st Qu.:0.5100 1st Qu.:0.41200 1st Qu.: 2.000 1st Qu.: -9.965
## Median :0.6260 Median :0.59100 Median : 6.000 Median : -7.116
## Mean :0.6038 Mean :0.56840 Mean : 5.396 Mean : -8.510
## 3rd Qu.:0.7125 3rd Qu.:0.75400 3rd Qu.: 9.000 3rd Qu.: -5.271
## Max. :0.9650 Max. :0.99900 Max. :11.000 Max. : 3.010
## speechiness valence tempo id
## Min. :0.00000 Min. :0.0000 Min. : 0.00 Length:4255
## 1st Qu.:0.03610 1st Qu.:0.2440 1st Qu.: 97.97 Class :character
## Median :0.04730 Median :0.4150 Median :118.97 Mode :character
## Mean :0.07653 Mean :0.4397 Mean :118.93
## 3rd Qu.:0.07740 3rd Qu.:0.6240 3rd Qu.:138.72
## Max. :0.77700 Max. :0.9810 Max. :236.20
## duration_ms
## Min. : 17075
## 1st Qu.: 161878
## Median : 195692
## Mean : 204041
## 3rd Qu.: 230880
## Max. :1847210
##
## ### Subsample 5 summary statisics
## [[1]]
## trackName artistName msPlayed genre
## Length:4255 Length:4255 Min. : 0 Length:4255
## Class :character Class :character 1st Qu.: 141764 Class :character
## Mode :character Mode :character Median : 277714 Mode :character
## Mean : 1388617
## 3rd Qu.: 1143454
## Max. :102503285
## danceability energy key loudness
## Min. :0.0000 Min. :0.00108 Min. : 0.000 Min. :-42.044
## 1st Qu.:0.5155 1st Qu.:0.41050 1st Qu.: 2.000 1st Qu.:-10.058
## Median :0.6280 Median :0.58600 Median : 5.000 Median : -7.142
## Mean :0.6084 Mean :0.56512 Mean : 5.233 Mean : -8.582
## 3rd Qu.:0.7165 3rd Qu.:0.75200 3rd Qu.: 8.000 3rd Qu.: -5.322
## Max. :0.9760 Max. :0.99900 Max. :11.000 Max. : 1.106
## speechiness valence tempo id
## Min. :0.00000 Min. :0.0000 Min. : 0.00 Length:4255
## 1st Qu.:0.03620 1st Qu.:0.2470 1st Qu.: 97.09 Class :character
## Median :0.04830 Median :0.4160 Median :118.03 Mode :character
## Mean :0.07750 Mean :0.4438 Mean :118.84
## 3rd Qu.:0.08005 3rd Qu.:0.6390 3rd Qu.:138.03
## Max. :0.77700 Max. :0.9860 Max. :208.11
## duration_ms
## Min. : 10027
## 1st Qu.: 161947
## Median : 194267
## Mean : 203386
## 3rd Qu.: 229682
## Max. :1847210
##
## ### Subsample 6 summary statisics
## [[1]]
## trackName artistName msPlayed genre
## Length:4255 Length:4255 Min. : 0 Length:4255
## Class :character Class :character 1st Qu.: 137984 Class :character
## Mode :character Mode :character Median : 268630 Mode :character
## Mean : 1525876
## 3rd Qu.: 1205351
## Max. :158367130
## danceability energy key loudness
## Min. :0.0000 Min. :0.00256 Min. : 0.00 Min. :-39.219
## 1st Qu.:0.5075 1st Qu.:0.40800 1st Qu.: 2.00 1st Qu.: -9.975
## Median :0.6230 Median :0.59100 Median : 5.00 Median : -7.117
## Mean :0.6016 Mean :0.56658 Mean : 5.17 Mean : -8.580
## 3rd Qu.:0.7150 3rd Qu.:0.75500 3rd Qu.: 8.00 3rd Qu.: -5.282
## Max. :0.9630 Max. :0.99800 Max. :11.00 Max. : 3.010
## speechiness valence tempo id
## Min. :0.00000 Min. :0.0000 Min. : 0.00 Length:4255
## 1st Qu.:0.03590 1st Qu.:0.2385 1st Qu.: 97.95 Class :character
## Median :0.04830 Median :0.4110 Median :119.95 Mode :character
## Mean :0.07748 Mean :0.4349 Mean :119.51
## 3rd Qu.:0.08230 3rd Qu.:0.6135 3rd Qu.:139.86
## Max. :0.94100 Max. :0.9860 Max. :210.16
## duration_ms
## Min. : 10027
## 1st Qu.: 163133
## Median : 196787
## Mean : 203960
## 3rd Qu.: 231500
## Max. :1394312
##
## ### Subsample 7 summary statisics
## [[1]]
## trackName artistName msPlayed genre
## Length:4255 Length:4255 Min. : 0 Length:4255
## Class :character Class :character 1st Qu.: 136811 Class :character
## Mode :character Mode :character Median : 266375 Mode :character
## Mean : 1568773
## 3rd Qu.: 1089508
## Max. :158367130
## danceability energy key loudness
## Min. :0.0000 Min. :0.00108 Min. : 0.000 Min. :-42.044
## 1st Qu.:0.5020 1st Qu.:0.40600 1st Qu.: 2.000 1st Qu.:-10.034
## Median :0.6200 Median :0.59400 Median : 5.000 Median : -7.177
## Mean :0.5993 Mean :0.56556 Mean : 5.282 Mean : -8.633
## 3rd Qu.:0.7170 3rd Qu.:0.75300 3rd Qu.: 8.000 3rd Qu.: -5.303
## Max. :0.9760 Max. :0.99700 Max. :11.000 Max. : 1.106
## speechiness valence tempo id
## Min. :0.00000 Min. :0.0000 Min. : 0.00 Length:4255
## 1st Qu.:0.03630 1st Qu.:0.2460 1st Qu.: 97.51 Class :character
## Median :0.04770 Median :0.4100 Median :118.97 Mode :character
## Mean :0.07864 Mean :0.4395 Mean :119.16
## 3rd Qu.:0.08110 3rd Qu.:0.6290 3rd Qu.:138.96
## Max. :0.94100 Max. :0.9710 Max. :210.16
## duration_ms
## Min. : 10027
## 1st Qu.: 162890
## Median : 195952
## Mean : 203688
## 3rd Qu.: 233464
## Max. :1847210
summary_stats <- lapply( sample_list[[i]], summary)
Area <- lapply(sample_list, function(subsample_df) {
ggplot(subsample_df, aes(x = key, y= msPlayed )) +
geom_area(binwidth = 1, fill = 'steelblue', color = 'green') +
labs(title = "Area for Keys vs msPlayed", x = 'Value', y = 'Frequency')
})
## Warning in geom_area(binwidth = 1, fill = "steelblue", color = "green"): Ignoring unknown parameters: `binwidth`
## Ignoring unknown parameters: `binwidth`
## Ignoring unknown parameters: `binwidth`
## Ignoring unknown parameters: `binwidth`
## Ignoring unknown parameters: `binwidth`
## Ignoring unknown parameters: `binwidth`
## Ignoring unknown parameters: `binwidth`
for (i in 1:num_samples) {
cat("Subsample", i, "summary statistics:\n")
print(summary_stats[[i]])
print(Area[[i]])
}
## Subsample 1 summary statistics:
## Length Class Mode
## 4255 character character
## Subsample 2 summary statistics:
## Length Class Mode
## 4255 character character
## Subsample 3 summary statistics:
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0 136811 266375 1568773 1089508 158367130
## Subsample 4 summary statistics:
## Length Class Mode
## 4255 character character
## Subsample 5 summary statistics:
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0000 0.5020 0.6200 0.5993 0.7170 0.9760
## Subsample 6 summary statistics:
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.00108 0.40600 0.59400 0.56556 0.75300 0.99700
## Subsample 7 summary statistics:
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.000 2.000 5.000 5.282 8.000 11.000
means <- lapply(sample_list, function(subsample_df) {
})
sds <- lapply(sample_list, function(subsample_df) {
})
for (i in 1:num_samples) {
cat("Subsample", i, "Mean of Key:", means[[i]], "\n")
cat("Subsample", i, "varience of Key:", sds[[i]], "\n")
}
## Subsample 1 Mean of Key:
## Subsample 1 varience of Key:
## Subsample 2 Mean of Key:
## Subsample 2 varience of Key:
## Subsample 3 Mean of Key:
## Subsample 3 varience of Key:
## Subsample 4 Mean of Key:
## Subsample 4 varience of Key:
## Subsample 5 Mean of Key:
## Subsample 5 varience of Key:
## Subsample 6 Mean of Key:
## Subsample 6 varience of Key:
## Subsample 7 Mean of Key:
## Subsample 7 varience of Key:
The mean and variance of the sub samples has minimal to no deviation compared to other sub samples. This means that the data within the column is consistent.