This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
data=read.csv("C:/Users/java/Downloads/Book1.csv")
##Summary
summary(data)
## track_name artist.s._name artist_count released_year
## Length:167 Length:167 Min. :1.000 Min. :1975
## Class :character Class :character 1st Qu.:1.000 1st Qu.:2018
## Mode :character Mode :character Median :1.000 Median :2022
## Mean :1.587 Mean :2019
## 3rd Qu.:2.000 3rd Qu.:2023
## Max. :8.000 Max. :2023
## released_month released_day in_spotify_playlists in_spotify_charts
## Min. : 1.000 Min. : 1.00 Min. : 31 Min. : 6.00
## 1st Qu.: 3.000 1st Qu.: 6.00 1st Qu.: 872 1st Qu.: 24.50
## Median : 5.000 Median :14.00 Median : 2988 Median : 38.00
## Mean : 5.419 Mean :14.07 Mean : 6878 Mean : 42.68
## 3rd Qu.: 7.000 3rd Qu.:22.00 3rd Qu.: 8477 3rd Qu.: 52.50
## Max. :12.000 Max. :31.00 Max. :43899 Max. :147.00
## streams in_apple_playlists in_apple_charts in_deezer_playlists
## Min. :2.762e+03 Min. : 0.00 Min. : 0.0 Length:167
## 1st Qu.:1.208e+08 1st Qu.: 21.50 1st Qu.: 69.0 Class :character
## Median :4.117e+08 Median : 60.00 Median :105.0 Mode :character
## Mean :7.171e+08 Mean : 94.52 Mean :100.4
## 3rd Qu.:1.113e+09 3rd Qu.:110.00 3rd Qu.:124.5
## Max. :3.704e+09 Max. :672.00 Max. :263.0
## in_deezer_charts in_shazam_charts bpm key
## Min. : 0.000 Length:167 Min. : 67.0 Length:167
## 1st Qu.: 1.000 Class :character 1st Qu.:103.0 Class :character
## Median : 4.000 Mode :character Median :125.0 Mode :character
## Mean : 7.976 Mean :125.6
## 3rd Qu.:12.000 3rd Qu.:140.0
## Max. :58.000 Max. :206.0
## mode danceability_. valence_. energy_.
## Length:167 Min. :34.00 Min. :10.00 Min. : 9.00
## Class :character 1st Qu.:56.00 1st Qu.:36.00 1st Qu.:58.50
## Mode :character Median :67.00 Median :52.00 Median :68.00
## Mean :66.99 Mean :53.47 Mean :66.76
## 3rd Qu.:78.00 3rd Qu.:72.50 3rd Qu.:77.50
## Max. :93.00 Max. :96.00 Max. :97.00
## acousticness_. instrumentalness_. liveness_. speechiness_.
## Min. : 0.00 Min. : 0.000 Min. : 3.00 Min. : 2.000
## 1st Qu.: 6.00 1st Qu.: 0.000 1st Qu.:10.00 1st Qu.: 4.000
## Median :16.00 Median : 0.000 Median :12.00 Median : 5.000
## Mean :24.34 Mean : 1.228 Mean :17.19 Mean : 7.365
## 3rd Qu.:38.50 3rd Qu.: 0.000 3rd Qu.:23.00 3rd Qu.: 7.000
## Max. :96.00 Max. :63.000 Max. :83.00 Max. :34.000
##structure
str(data)
## 'data.frame': 167 obs. of 24 variables:
## $ track_name : chr "Seven (feat. Latto) (Explicit Ver.)" "LALA" "vampire" "Cruel Summer" ...
## $ artist.s._name : chr "Latto, Jung Kook" "Myke Towers" "Olivia Rodrigo" "Taylor Swift" ...
## $ artist_count : int 2 1 1 1 1 2 2 1 1 2 ...
## $ released_year : int 2023 2023 2023 2019 2023 2023 2023 2023 2023 2023 ...
## $ released_month : int 7 3 6 8 5 6 3 7 5 3 ...
## $ released_day : int 14 23 30 23 18 1 16 7 15 17 ...
## $ in_spotify_playlists: int 553 1474 1397 7858 3133 2186 3090 714 1096 2953 ...
## $ in_spotify_charts : int 147 48 113 100 50 91 50 43 83 44 ...
## $ streams : num 1.41e+08 1.34e+08 1.40e+08 8.01e+08 3.03e+08 ...
## $ in_apple_playlists : int 43 48 94 116 84 67 34 25 60 49 ...
## $ in_apple_charts : int 263 126 207 207 133 213 222 89 210 110 ...
## $ in_deezer_playlists : chr "45" "58" "91" "125" ...
## $ in_deezer_charts : int 10 14 14 12 15 17 13 13 11 13 ...
## $ in_shazam_charts : chr "826" "382" "949" "548" ...
## $ bpm : int 125 92 138 170 144 141 148 100 130 170 ...
## $ key : chr "B" "C#" "F" "A" ...
## $ mode : chr "Major" "Major" "Major" "Major" ...
## $ danceability_. : int 80 71 51 55 65 92 67 67 85 81 ...
## $ valence_. : int 89 61 32 58 23 66 83 26 22 56 ...
## $ energy_. : int 83 74 53 72 80 58 76 71 62 48 ...
## $ acousticness_. : int 31 7 17 11 14 19 48 37 12 21 ...
## $ instrumentalness_. : int 0 0 0 0 63 0 0 0 0 0 ...
## $ liveness_. : int 8 10 31 11 11 8 8 11 28 8 ...
## $ speechiness_. : int 4 4 6 15 6 24 3 4 9 33 ...
head(data)
## track_name artist.s._name artist_count
## 1 Seven (feat. Latto) (Explicit Ver.) Latto, Jung Kook 2
## 2 LALA Myke Towers 1
## 3 vampire Olivia Rodrigo 1
## 4 Cruel Summer Taylor Swift 1
## 5 WHERE SHE GOES Bad Bunny 1
## 6 Sprinter Dave, Central Cee 2
## released_year released_month released_day in_spotify_playlists
## 1 2023 7 14 553
## 2 2023 3 23 1474
## 3 2023 6 30 1397
## 4 2019 8 23 7858
## 5 2023 5 18 3133
## 6 2023 6 1 2186
## in_spotify_charts streams in_apple_playlists in_apple_charts
## 1 147 141381703 43 263
## 2 48 133716286 48 126
## 3 113 140003974 94 207
## 4 100 800840817 116 207
## 5 50 303236322 84 133
## 6 91 183706234 67 213
## in_deezer_playlists in_deezer_charts in_shazam_charts bpm key mode
## 1 45 10 826 125 B Major
## 2 58 14 382 92 C# Major
## 3 91 14 949 138 F Major
## 4 125 12 548 170 A Major
## 5 87 15 425 144 A Minor
## 6 88 17 946 141 C# Major
## danceability_. valence_. energy_. acousticness_. instrumentalness_.
## 1 80 89 83 31 0
## 2 71 61 74 7 0
## 3 51 32 53 17 0
## 4 55 58 72 11 0
## 5 65 23 80 14 63
## 6 92 66 58 19 0
## liveness_. speechiness_.
## 1 8 4
## 2 10 4
## 3 31 6
## 4 11 15
## 5 11 6
## 6 8 24
tail(data)
## track_name artist.s._name artist_count
## 162 Gasolina Daddy Yankee 1
## 163 One Dance Drake, WizKid, Kyla 3
## 164 Enchanted Taylor Swift 1
## 165 Save Your Tears The Weeknd 1
## 166 Sure Thing Miguel 1
## 167 Every Breath You Take - Remastered 2003 The Police 1
## released_year released_month released_day in_spotify_playlists
## 162 2004 7 13 6457
## 163 2016 4 4 43257
## 164 2010 1 1 4564
## 165 2020 3 20 12688
## 166 2010 5 25 13801
## 167 1983 1 6 22439
## in_spotify_charts streams in_apple_playlists in_apple_charts
## 162 18 657723613 98 95
## 163 24 2713922350 433 107
## 164 16 621660989 24 101
## 165 13 1591223784 197 115
## 166 19 950906471 137 125
## 167 19 1593270737 211 74
## in_deezer_playlists in_deezer_charts in_shazam_charts bpm key mode
## 162 453 0 454 96 Major
## 163 3,631 0 26 104 C# Major
## 164 113 0 40 164 G# Major
## 165 112 0 200 118 Major
## 166 435 6 285 81 B Minor
## 167 929 0 129 117 C# Major
## danceability_. valence_. energy_. acousticness_. instrumentalness_.
## 162 86 74 80 33 0
## 163 77 36 63 1 0
## 164 45 24 62 8 0
## 165 68 61 82 2 0
## 166 68 51 60 3 0
## 167 82 73 45 54 0
## liveness_. speechiness_.
## 162 8 6
## 163 36 5
## 164 16 3
## 165 50 3
## 166 19 10
## 167 7 3
library(ggplot2)
#Data Layer
ggplot(data=data) + labs(title ="most streamed songs plot")
# Aesthetic Layer
ggplot(data =data, aes(x = in_spotify_charts, y =in_spotify_playlists, col = bpm))+labs(title = "most streamed songs")
# Geometric layer
ggplot(data = data, aes(x=in_spotify_charts, y =in_spotify_playlists, col = bpm)) +
geom_point() +
labs(title = "in_spotify_playlists vs in_spotify_charts", x = "in_spotify_charts", y = "in_spotify_playlists")
ggplot(data = data, aes(x=in_spotify_charts, y =in_spotify_playlists, size = bpm)) +
geom_point() +
labs(title = "in_spotify_playlists vs in_spotify_charts", x = "in_spotify_charts", y = "in_spotify_playlists")
ggplot(data = data, aes(x = in_spotify_charts, y = in_spotify_playlists, col = factor(bpm), shape = factor(released_month))) +geom_point() +
labs(title = " in_spotify_playlists vs in_spotify_charts", x = "in_spotify_charts", y = "in_spotify_playlists")
## Warning: The shape palette can deal with a maximum of 6 discrete values because
## more than 6 becomes difficult to discriminate; you have 12. Consider
## specifying shapes manually if you must have them.
## Warning: Removed 55 rows containing missing values (geom_point).
# Histogram plot
ggplot(data = data, aes(x = in_spotify_playlists)) +
geom_histogram(binwidth =5,color="black", fill="lightblue") +
labs(title = "Histogram of in_spotify_playlists", x = "in_spotify_playlists", y = "Count")
carb = table(data$released_month)
data.labels = names(carb)
share = round(carb/sum(carb)*100)
data.labels = paste(data.labels, share)
data.labels = paste(data.labels,"%",sep="")
pie(carb,labels = data.labels,clockwise=TRUE, col=heat.colors(length(data.labels)), main="released month")
ggplot(data=data,aes(x=released_month)) +
geom_histogram(binwidth=1,color="black",fill="lightblue") +
labs(title="histogram of no.of songs released month in each month",x="released_month",y="count")