Load library

library(dplyr)
library(data.table)
library(ggplot2)
library(treemap)
library(googleVis)
library(flexclust)
library(FactoMineR)
library(factoextra)
Load Data
songs=fread('../data/songs.csv')
songs
Artist_top10.table=songs %>% 
  filter(.,Top10==1) %>%
  group_by(.,artistname) %>%
  summarise(
    num=n()
  )
Artist_top10.table
Artist_top10.table[order(Artist_top10.table$num,decreasing = T),] %>% 
  head(10) %>% 
  ggplot(data=.) +
  geom_bar(aes(x=substr(artistname,start=0,stop=5),y=num,fill=artistname),stat = "identity")+
  xlab('ArtistName')+
  ylab('#Top10 songs ')

Analysis the most Keys

treemap

Key=c('C','#C','D','#D','E','F','#F','G','#G','A','#A','B')
songs$Key_str=Key[songs$key+1]
mostkey.data=songs %>% 
  group_by(.,Key_str) %>%
  summarise(
    num=n()
  )
mostkey.data
treemap(mostkey.data,
        index='Key_str',
        vSize = 'num',
        type='value',
        vColor = 'num', 
        palette = rainbow(6),
        title='#Key'
)

Analysis the most tempo types

x <- songs$tempo
songs$tempo_type=case_when(
  x >=66 & x<76 ~ "Adagio",
  x >=76 & x<108 ~ "Andante",
  x >=108 & x<120 ~ "Moderato",
  x >=120 & x<156 ~ "Allegro",
  x >=156 & x<176 ~ "Vivace",
  x >=176  ~ "Presto",
  TRUE ~"Others"
)
songs$tlabel=case_when(
  x >=66 & x<76 ~ "66-76",
  x >=76 & x<108 ~ "76-108",
  x >=108 & x<120 ~ "108-120",
  x >=120 & x<156 ~ "120-156",
  x >=156 & x<176 ~ "156-176",
  x >=176  ~ ">176",
  TRUE ~"< 66 BPM"
)
most_temptype.table=songs %>% 
  group_by(.,tempo_type,tlabel) %>%
  summarise(
    num=n()
    
  )
most_temptype.table
most_temptype.table %>%
ggplot(data=., aes(x=reorder(tempo_type,num), y=num))+
  geom_bar(aes(y=num),stat="identity", alpha=0.8,fill="skyblue" )+ 
  ylab("Count")+ xlab("Tempo Type")+
  ggtitle("What is the most popular Tempo type? ")+
  geom_text(aes(label=tlabel), vjust=1, color="maroon", size=3.5)+ 
  theme_minimal()

Analysis Temple、Key、Top10

temp_key_top10.table= songs %>%
  filter(Top10==1) %>%
  group_by(.,tempo_type,Key_str) %>%
  summarise(
    num=n()
  )
temp_key_top10.table

Heat map

temp_key_top10.table %>%
ggplot(., aes(x=tempo_type, y=Key_str, fill = num)) + 
    geom_tile(colour = "white")  +
    scale_fill_gradient(low="skyblue", high="Pink") +
    labs(x="Tempo", y=NULL, title="Heatmap of #Top10" ,fill="#Top10")

分群分析:

#dist.method:Euclidean
#hclust.method:Complete
songs[,c(8,9,11:13)]%>% names()
[1] "loudness"       "tempo"          "key"            "key_confidence" "energy"        
hc = songs[,c(8,9,11:13)] %>% scale %>% dist %>% hclust
plot(hc,labels=FALSE)
rect.hclust(hc, k=3, border="red")

開啟平行運算

# library(doParallel)
# clust = makeCluster(detectCores())
# registerDoParallel(clust); getDoParWorkers()

層級式分群視覺化

library(FactoMineR)
library(factoextra)
fviz_dend(
  hc, k=3, show_labels=F, rect=T, rect_fill=T,
  labels_track_height=0,
  palette="ucscgb", rect_border="ucscgb")

視覺化分群

songs$group = cutree(hc, k=3) %>% factor
ggplot(songs, aes(x=tempo, y=loudness, col=group)) +
  geom_point(size=3, alpha=0.5) + 
  theme_light()

PCA降維視覺化

songs[,c(8,9,11:13)] %>% PCA(graph=F) %>% fviz_pca_biplot(
  label="var", col.ind=songs$group,
  pointshape=19, mean.point=F,
  addEllipses=T, ellipse.level=0.7,
  ellipse.type = "convex", palette="ucscgb",
  repel=T
  )

切割群組

grp = cutree(hc, k = 3)
table(grp)
grp
   1    2    3 
4706 2725  143 

群組屬性

sapply(split(songs[,c(8,9,13:15)], grp), colMeans) %>% round(3)
                  1       2       3
loudness     -8.206  -9.290 -19.908
tempo        94.588 130.682  82.628
energy        0.687   0.680   0.192
pitch         0.010   0.011   0.014
timbre_0_min  4.048   4.272   3.720

資料視覺化

table(grp)
grp
   1    2    3 
4706 2725  143 
layout(matrix(c(1,2,2), 3, 1))
par(mar=c(2,3,1,1), cex=0.8)
table(grp) %>% barplot(col=3:5, names.arg=paste0("Group-",1:3))
par(mar=c(6,3,2,1))
sapply(split(songs[,c(8,9,13:15)], grp), colMeans) %>% t %>% 
  barplot(beside=T, col=3:5, las=2)

LS0tDQp0aXRsZTogIue1seioiOWtuOe/kuiIh+WIhuaekOacn+acq+WwiOmhjCINCmF1dGhvcjogPGVtPkF1dGhvcjrlionogrLpipg8L2VtPg0Kb3V0cHV0OiBodG1sX25vdGVib29rDQogICANCi0tLQ0KDQoNCiMjIyMjTG9hZCBsaWJyYXJ5DQoNCmBgYHtyfQ0KDQpsaWJyYXJ5KGRwbHlyKQ0KbGlicmFyeShkYXRhLnRhYmxlKQ0KbGlicmFyeShnZ3Bsb3QyKQ0KbGlicmFyeSh0cmVlbWFwKQ0KbGlicmFyeShnb29nbGVWaXMpDQpsaWJyYXJ5KGZsZXhjbHVzdCkNCmxpYnJhcnkoRmFjdG9NaW5lUikNCmxpYnJhcnkoZmFjdG9leHRyYSkNCmBgYA0KDQoNCiMjIyMjTG9hZCBEYXRhDQoNCmBgYHtyfQ0KDQpzb25ncz1mcmVhZCgnLi4vZGF0YS9zb25ncy5jc3YnKQ0Kc29uZ3MNCg0KDQpgYGANCg0KDQoNCmBgYHtyfQ0KDQpBcnRpc3RfdG9wMTAudGFibGU9c29uZ3MgJT4lIA0KICBmaWx0ZXIoLixUb3AxMD09MSkgJT4lDQogIGdyb3VwX2J5KC4sYXJ0aXN0bmFtZSkgJT4lDQogIHN1bW1hcmlzZSgNCiAgICBudW09bigpDQogICkNCkFydGlzdF90b3AxMC50YWJsZQ0KYGBgDQoNCg0KYGBge3J9DQoNCkFydGlzdF90b3AxMC50YWJsZVtvcmRlcihBcnRpc3RfdG9wMTAudGFibGUkbnVtLGRlY3JlYXNpbmcgPSBUKSxdICU+JSANCiAgaGVhZCgxMCkgJT4lIA0KICBnZ3Bsb3QoZGF0YT0uKSArDQogIGdlb21fYmFyKGFlcyh4PXN1YnN0cihhcnRpc3RuYW1lLHN0YXJ0PTAsc3RvcD01KSx5PW51bSxmaWxsPWFydGlzdG5hbWUpLHN0YXQgPSAiaWRlbnRpdHkiKSsNCiAgeGxhYignQXJ0aXN0TmFtZScpKw0KICB5bGFiKCcjVG9wMTAgc29uZ3MgJykNCg0KYGBgDQoNCg0KDQojIyNBbmFseXNpcyB0aGUgbW9zdCBLZXlzDQoNCiMjIyN0cmVlbWFwDQpgYGB7cn0NCg0KS2V5PWMoJ0MnLCcjQycsJ0QnLCcjRCcsJ0UnLCdGJywnI0YnLCdHJywnI0cnLCdBJywnI0EnLCdCJykNCg0Kc29uZ3MkS2V5X3N0cj1LZXlbc29uZ3Mka2V5KzFdDQoNCg0KYGBgDQoNCg0KYGBge3J9DQoNCm1vc3RrZXkuZGF0YT1zb25ncyAlPiUgDQogIGdyb3VwX2J5KC4sS2V5X3N0cikgJT4lDQogIHN1bW1hcmlzZSgNCiAgICBudW09bigpDQogICkNCm1vc3RrZXkuZGF0YQ0KYGBgDQoNCg0KYGBge3J9DQp0cmVlbWFwKG1vc3RrZXkuZGF0YSwNCiAgICAgICAgaW5kZXg9J0tleV9zdHInLA0KICAgICAgICB2U2l6ZSA9ICdudW0nLA0KICAgICAgICB0eXBlPSd2YWx1ZScsDQogICAgICAgIHZDb2xvciA9ICdudW0nLCANCiAgICAgICAgcGFsZXR0ZSA9IHJhaW5ib3coNiksDQogICAgICAgIHRpdGxlPScjS2V5Jw0KKQ0KDQpgYGANCg0KIyMjQW5hbHlzaXMgdGhlIG1vc3QgdGVtcG8gdHlwZXMNCg0KDQpgYGB7cn0NCg0KeCA8LSBzb25ncyR0ZW1wbw0Kc29uZ3MkdGVtcG9fdHlwZT1jYXNlX3doZW4oDQogIHggPj02NiAmIHg8NzYgfiAiQWRhZ2lvIiwNCiAgeCA+PTc2ICYgeDwxMDggfiAiQW5kYW50ZSIsDQogIHggPj0xMDggJiB4PDEyMCB+ICJNb2RlcmF0byIsDQogIHggPj0xMjAgJiB4PDE1NiB+ICJBbGxlZ3JvIiwNCiAgeCA+PTE1NiAmIHg8MTc2IH4gIlZpdmFjZSIsDQogIHggPj0xNzYgIH4gIlByZXN0byIsDQogIFRSVUUgfiJPdGhlcnMiDQopDQoNCnNvbmdzJHRsYWJlbD1jYXNlX3doZW4oDQogIHggPj02NiAmIHg8NzYgfiAiNjYtNzYiLA0KICB4ID49NzYgJiB4PDEwOCB+ICI3Ni0xMDgiLA0KICB4ID49MTA4ICYgeDwxMjAgfiAiMTA4LTEyMCIsDQogIHggPj0xMjAgJiB4PDE1NiB+ICIxMjAtMTU2IiwNCiAgeCA+PTE1NiAmIHg8MTc2IH4gIjE1Ni0xNzYiLA0KICB4ID49MTc2ICB+ICI+MTc2IiwNCiAgVFJVRSB+IjwgNjYgQlBNIg0KKQ0KYGBgDQoNCg0KYGBge3J9DQoNCm1vc3RfdGVtcHR5cGUudGFibGU9c29uZ3MgJT4lIA0KICBncm91cF9ieSguLHRlbXBvX3R5cGUsdGxhYmVsKSAlPiUNCiAgc3VtbWFyaXNlKA0KICAgIG51bT1uKCkNCiAgICANCiAgKQ0KDQptb3N0X3RlbXB0eXBlLnRhYmxlDQoNCmBgYA0KDQpgYGB7cn0NCm1vc3RfdGVtcHR5cGUudGFibGUgJT4lDQpnZ3Bsb3QoZGF0YT0uLCBhZXMoeD1yZW9yZGVyKHRlbXBvX3R5cGUsbnVtKSwgeT1udW0pKSsNCiAgZ2VvbV9iYXIoYWVzKHk9bnVtKSxzdGF0PSJpZGVudGl0eSIsIGFscGhhPTAuOCxmaWxsPSJza3libHVlIiApKyANCiAgeWxhYigiQ291bnQiKSsgeGxhYigiVGVtcG8gVHlwZSIpKw0KICBnZ3RpdGxlKCJXaGF0IGlzIHRoZSBtb3N0IHBvcHVsYXIgVGVtcG8gdHlwZT8gIikrDQogIGdlb21fdGV4dChhZXMobGFiZWw9dGxhYmVsKSwgdmp1c3Q9MSwgY29sb3I9Im1hcm9vbiIsIHNpemU9My41KSsgDQogIHRoZW1lX21pbmltYWwoKQ0KDQpgYGANCg0KIyMjQW5hbHlzaXMgVGVtcGxl44CBS2V544CBVG9wMTANCg0KYGBge3J9DQp0ZW1wX2tleV90b3AxMC50YWJsZT0gc29uZ3MgJT4lDQogIGZpbHRlcihUb3AxMD09MSkgJT4lDQogIGdyb3VwX2J5KC4sdGVtcG9fdHlwZSxLZXlfc3RyKSAlPiUNCiAgc3VtbWFyaXNlKA0KICAgIG51bT1uKCkNCiAgKQ0KdGVtcF9rZXlfdG9wMTAudGFibGUNCmBgYA0KDQoNCiMjIyNIZWF0IG1hcA0KDQpgYGB7cn0NCnRlbXBfa2V5X3RvcDEwLnRhYmxlICU+JQ0KZ2dwbG90KC4sIGFlcyh4PXRlbXBvX3R5cGUsIHk9S2V5X3N0ciwgZmlsbCA9IG51bSkpICsgDQogICAgZ2VvbV90aWxlKGNvbG91ciA9ICJ3aGl0ZSIpICArDQogICAgc2NhbGVfZmlsbF9ncmFkaWVudChsb3c9InNreWJsdWUiLCBoaWdoPSJQaW5rIikgKw0KICAgIGxhYnMoeD0iVGVtcG8iLCB5PU5VTEwsIHRpdGxlPSJIZWF0bWFwIG9mICNUb3AxMCIgLGZpbGw9IiNUb3AxMCIpDQoNCmBgYA0KDQoNCg0KDQojIyPliIbnvqTliIbmnpA6DQoNCmBgYHtyfQ0KI2Rpc3QubWV0aG9kOkV1Y2xpZGVhbg0KI2hjbHVzdC5tZXRob2Q6Q29tcGxldGUNCg0Kc29uZ3NbLGMoOCw5LDExOjEzKV0lPiUgbmFtZXMoKQ0KaGMgPSBzb25nc1ssYyg4LDksMTE6MTMpXSAlPiUgc2NhbGUgJT4lIGRpc3QgJT4lIGhjbHVzdA0KcGxvdChoYyxsYWJlbHM9RkFMU0UpDQpyZWN0LmhjbHVzdChoYywgaz0zLCBib3JkZXI9InJlZCIpDQoNCg0KYGBgDQoNCiPplovllZ/lubPooYzpgYvnrpcNCg0KYGBge3J9DQojIGxpYnJhcnkoZG9QYXJhbGxlbCkNCiMgY2x1c3QgPSBtYWtlQ2x1c3RlcihkZXRlY3RDb3JlcygpKQ0KIyByZWdpc3RlckRvUGFyYWxsZWwoY2x1c3QpOyBnZXREb1BhcldvcmtlcnMoKQ0KYGBgDQoNCg0KI+WxpOe0muW8j+WIhue+pOimluimuuWMlg0KDQpgYGB7cn0NCiMgbGlicmFyeShGYWN0b01pbmVSKQ0KIyBsaWJyYXJ5KGZhY3RvZXh0cmEpDQojIA0KIyBmdml6X2RlbmQoDQojICAgaGMsIGs9Mywgc2hvd19sYWJlbHM9RiwgcmVjdD1ULCByZWN0X2ZpbGw9VCwNCiMgICBsYWJlbHNfdHJhY2tfaGVpZ2h0PTAsDQojICAgcGFsZXR0ZT0idWNzY2diIiwgcmVjdF9ib3JkZXI9InVjc2NnYiIpDQoNCg0KYGBgDQoNCg0KDQoj6KaW6Ka65YyW5YiG576kDQoNCmBgYHtyfQ0Kc29uZ3MkZ3JvdXAgPSBjdXRyZWUoaGMsIGs9MykgJT4lIGZhY3Rvcg0KZ2dwbG90KHNvbmdzLCBhZXMoeD10ZW1wbywgeT1sb3VkbmVzcywgY29sPWdyb3VwKSkgKw0KICBnZW9tX3BvaW50KHNpemU9MywgYWxwaGE9MC41KSArIA0KICB0aGVtZV9saWdodCgpDQpgYGANCg0KDQoNCg0KDQoNCg0KDQoNCiNQQ0HpmY3ntq3oppboprrljJYNCg0KDQoNCmBgYHtyIGZpZy5oZWlnaHQ9NywgZmlnLndpZHRoPTl9DQoNCnNvbmdzWyxjKDgsOSwxMToxMyldICU+JSBQQ0EoZ3JhcGg9RikgJT4lIGZ2aXpfcGNhX2JpcGxvdCgNCiAgbGFiZWw9InZhciIsIGNvbC5pbmQ9c29uZ3MkZ3JvdXAsDQogIHBvaW50c2hhcGU9MTksIG1lYW4ucG9pbnQ9RiwNCiAgYWRkRWxsaXBzZXM9VCwgZWxsaXBzZS5sZXZlbD0wLjcsDQogIGVsbGlwc2UudHlwZSA9ICJjb252ZXgiLCBwYWxldHRlPSJ1Y3NjZ2IiLA0KICByZXBlbD1UDQogICkNCmBgYA0KDQoNCiPliIflibLnvqTntYQNCg0KYGBge3J9DQpncnAgPSBjdXRyZWUoaGMsIGsgPSAzKQ0KdGFibGUoZ3JwKQ0KYGBgDQoNCiPnvqTntYTlsazmgKcNCg0KYGBge3J9DQpzYXBwbHkoc3BsaXQoc29uZ3NbLGMoOCw5LDEzOjE1KV0sIGdycCksIGNvbE1lYW5zKSAlPiUgcm91bmQoMykNCmBgYA0KDQoj6LOH5paZ6KaW6Ka65YyWDQoNCmBgYHtyfQ0KbGF5b3V0KG1hdHJpeChjKDEsMiwyKSwgMywgMSkpDQpwYXIobWFyPWMoMiwzLDEsMSksIGNleD0wLjgpDQp0YWJsZShncnApICU+JSBiYXJwbG90KGNvbD0zOjUsIG5hbWVzLmFyZz1wYXN0ZTAoIkdyb3VwLSIsMTozKSkNCnBhcihtYXI9Yyg2LDMsMiwxKSkNCmBgYA0KDQoNCg0KDQoNCg0KYGBge3J9DQoNCmxheW91dChtYXRyaXgoYygxLDIsMiksIDMsIDEpKQ0KcGFyKG1hcj1jKDIsMywxLDEpLCBjZXg9MC44KQ0KdGFibGUoZ3JwKSAlPiUgYmFycGxvdChjb2w9Mzo1LCBuYW1lcy5hcmc9cGFzdGUwKCJHcm91cC0iLDE6MykpDQpwYXIobWFyPWMoNiwzLDIsMSkpDQoNCnNhcHBseShzcGxpdChzb25nc1ssYyg4LDksMTM6MTUpXSwgZ3JwKSwgY29sTWVhbnMpICU+JSB0ICU+JSANCiAgYmFycGxvdChiZXNpZGU9VCwgY29sPTM6NSwgbGFzPTIpDQoNCmBgYA0KDQoNCg0KDQoNCg0KDQoNCg0KDQoNCg0KDQoNCg0KDQoNCg0KPHN0eWxlPg0KDQplbSB7DQogICAgY29sb3I6ICNGRkVBNkM7DQogICAgYmFja2dyb3VuZDogIzdEN0Q3RDsNCn0NCg0KLmNhcHRpb24gew0KICBjb2xvcjogIzc3NzsNCiAgbWFyZ2luLXRvcDogMTBweDsNCn0NCnAgY29kZSB7DQogIHdoaXRlLXNwYWNlOiBpbmhlcml0Ow0KfQ0KcHJlIHsNCiAgd29yZC1icmVhazogbm9ybWFsOw0KICB3b3JkLXdyYXA6IG5vcm1hbDsNCiAgbGluZS1oZWlnaHQ6IDE7DQp9DQpwcmUgY29kZSB7DQogIHdoaXRlLXNwYWNlOiBpbmhlcml0Ow0KfQ0KcCxsaSB7DQogIGZvbnQtZmFtaWx5OiAiVHJlYnVjaGV0IE1TIiwgIuW+rui7n+ato+m7kemrlCIsICJNaWNyb3NvZnQgSmhlbmdIZWkiOw0KfQ0KDQoucnsNCiAgbGluZS1oZWlnaHQ6IDEuMjsNCn0NCg0KLnFpeiB7DQogIGxpbmUtaGVpZ2h0OiAxLjc1Ow0KICBiYWNrZ3JvdW5kOiAjZjBmMGYwOw0KICBib3JkZXItbGVmdDogMTJweCBzb2xpZCAjY2NmZmNjOw0KICBwYWRkaW5nOiA0cHg7DQogIHBhZGRpbmctbGVmdDogMTBweDsNCiAgY29sb3I6ICMwMDk5MDA7DQp9DQoNCnRpdGxlew0KICBjb2xvcjogI2NjMDAwMDsNCiAgZm9udC1mYW1pbHk6ICJUcmVidWNoZXQgTVMiLCAi5b6u6Luf5q2j6buR6auUIiwgIk1pY3Jvc29mdCBKaGVuZ0hlaSI7DQp9DQoNCmJvZHl7DQogIGZvbnQtZmFtaWx5OiAiVHJlYnVjaGV0IE1TIiwgIuW+rui7n+ato+m7kemrlCIsICJNaWNyb3NvZnQgSmhlbmdIZWkiOw0KfQ0KDQpoMSxoMixoMyxoNCxoNXsNCiAgY29sb3I6ICMwMDY2ZmY7DQogIGZvbnQtZmFtaWx5OiAiVHJlYnVjaGV0IE1TIiwgIuW+rui7n+ato+m7kemrlCIsICJNaWNyb3NvZnQgSmhlbmdIZWkiOw0KfQ0KDQoNCmgzew0KICBjb2xvcjogI2IzNmIwMDsNCiAgYmFja2dyb3VuZDogI2ZmZTBiMzsNCiAgbGluZS1oZWlnaHQ6IDI7DQogIGZvbnQtd2VpZ2h0OiBib2xkOw0KfQ0KDQpoNXsNCiAgY29sb3I6ICMwMDYwMDA7DQogIGJhY2tncm91bmQ6ICNmOGY4Zjg7DQogIGxpbmUtaGVpZ2h0OiAxLjU7DQogIGZvbnQtd2VpZ2h0OiBib2xkOw0KfQ0KDQpoNiB7DQogICAgY29sb3I6ICMwMDYwMDA7DQogICAgYmFja2dyb3VuZDogIzAwZmZmZjsNCiAgICBsaW5lLWhlaWdodDogMjsNCiAgICBmb250LXdlaWdodDogYm9sZDsNCn0NCg0KPC9zdHlsZT4=