1.1 - Objetivo
1.2 - Hipóteses
2 - Fonte de dados
https://www.kaggle.com/leonardopena/top50spotify2019
3 - Descrição
Tarefas
Metodologia
library(readr)
library(ggplot2)
top50 <- read_csv("C:/Users/ferre/Desktop/estatisticas/Spotify/top50 concertado.csv",
col_types = cols(X1 = col_character()))## Warning: Missing column names filled in: 'X1' [1]
summary(top50)## X1 Sexo Grupo.ou.Solo Nome.da.Trilha
## Length:50 Length:50 Length:50 Length:50
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## Artista Gênero.da.Faixa Batidas.por.Minute Energia
## Length:50 Length:50 Min. : 85.0 Min. :32.00
## Class :character Class :character 1st Qu.: 96.0 1st Qu.:55.25
## Mode :character Mode :character Median :104.5 Median :66.50
## Mean :120.1 Mean :64.06
## 3rd Qu.:137.5 3rd Qu.:74.75
## Max. :190.0 Max. :88.00
## Dançabilidade Volume.dB Ao.Vivo Valencia.
## Min. :29.00 Min. :-11.00 Min. : 5.00 Min. :10.00
## 1st Qu.:67.00 1st Qu.: -6.75 1st Qu.: 8.00 1st Qu.:38.25
## Median :73.50 Median : -6.00 Median :11.00 Median :55.50
## Mean :71.38 Mean : -5.66 Mean :14.66 Mean :54.60
## 3rd Qu.:79.75 3rd Qu.: -4.00 3rd Qu.:15.75 3rd Qu.:69.50
## Max. :90.00 Max. : -2.00 Max. :58.00 Max. :95.00
## Duração. Acústica. Discurso. Popularidade
## Min. :115.0 Min. : 1.00 Min. : 3.00 Min. :70.00
## 1st Qu.:176.8 1st Qu.: 8.25 1st Qu.: 5.00 1st Qu.:86.00
## Median :198.0 Median :15.00 Median : 7.00 Median :88.00
## Mean :201.0 Mean :22.16 Mean :12.48 Mean :87.50
## 3rd Qu.:217.5 3rd Qu.:33.75 3rd Qu.:15.00 3rd Qu.:90.75
## Max. :309.0 Max. :75.00 Max. :46.00 Max. :95.00
table(top50$Sexo)##
## Feminino Masculino
## 10 40
table(top50$Grupo.ou.Solo)##
## Grupo Solo
## 4 46
table(top50$Gênero.da.Faixa)##
## atl hip hop australian pop big room boy band
## 1 1 1 1
## brostep canadian hip hop canadian pop country rap
## 2 3 2 2
## dance pop dfw rap edm electropop
## 8 2 3 2
## escape room latin panamanian pop pop
## 1 5 2 7
## pop house r&b en espanol reggaeton reggaeton flow
## 1 1 2 2
## trap music
## 1
table(top50$Artista)##
## Ali Gatie Anuel AA Ariana Grande Bad Bunny
## 1 1 2 1
## Billie Eilish Chris Brown Daddy Yankee DJ Snake
## 2 1 1 1
## Drake Ed Sheeran J Balvin Jhay Cortez
## 1 4 2 1
## Jonas Brothers Katy Perry Khalid Kygo
## 1 1 1 1
## Lady Gaga Lauv Lewis Capaldi Lil Nas X
## 1 1 1 2
## Lil Tecca Lizzo Lunay Maluma
## 1 1 1 1
## Marshmello Martin Garrix MEDUZA Nicky Jam
## 2 1 1 1
## Post Malone ROSALÍA Sam Smith Sech
## 2 1 1 2
## Shawn Mendes Taylor Swift The Chainsmokers Tones and I
## 2 1 2 1
## Y2K Young Thug
## 1 1
tabela_sexo <-table(top50$Sexo)
tabela_gpsolo <-table(top50$Grupo.ou.Solo)
tabela_genero <-table(top50$Gênero.da.Faixa)
tabela_artista <-table(top50$Artista)ggplot(top50) +
aes(x = Sexo, fill = Sexo) +
geom_bar() +
scale_fill_hue(direction = -1) +
labs(x = "Sexo",
y = "Quantidade", title = "Relação entre o sexo dos artistas", subtitle = "Feminino/Masculino") +
theme_minimal() +
theme(plot.title = element_text(size = 20L, face = "bold", hjust = 0.5), plot.subtitle = element_text(size = 14L,
hjust = 0.5), axis.title.y = element_text(size = 14L, face = "bold"), axis.title.x = element_text(size = 14L,
face = "bold"))ggplot(top50) +
aes(x = Gênero.da.Faixa, fill = Gênero.da.Faixa) +
geom_bar() +
scale_fill_manual(values = list(
`atl hip hop` = "#F8766D", `australian pop` = "#E98141", `big room` = "#DA8C15", `boy band` = "#C69600",
brostep = "#ACA000", `canadian hip hop` = "#93AA00", `canadian pop` = "#58B016", `country rap` = "#1DB62C",
`dance pop` = "#00BB4C", `dfw rap` = "#00BE75", edm = "#00C19F", electropop = "#00BDBA", `escape room` = "#00BAD5",
latin = "#13B3E8", `panamanian pop` = "#3AA7F3", pop = "#619CFF", `pop house` = "#918BFD", `r&b en espanol` = "#C27AFB",
reggaeton = "#E26EEF", `reggaeton flow` = "#F067D9", `trap music` = "#FF61C3")) +
labs(x = "Gênero Musical",
y = "Quantidade", title = "Gênero Musical das Faixas") +
coord_flip() +
theme_minimal() +
theme(legend.position = "bottom",
plot.title = element_text(size = 20L, face = "bold", hjust = 0.5), plot.subtitle = element_text(size = 14L,
hjust = 0.5), axis.title.y = element_text(size = 14L, face = "bold"), axis.title.x = element_text(size = 14L,
face = "bold"))ggplot(top50) +
aes(
x = Gênero.da.Faixa,
fill = Gênero.da.Faixa,
weight = Discurso.
) +
geom_bar() +
scale_fill_hue(direction = 1) +
labs(
x = "Gênero Musical",
y = "Discurso",
title = "Gênero Musical com mais diversidade de palavras"
) +
coord_flip() +
theme_minimal() +
theme(
plot.title = element_text(
size = 20L,
face = "bold",
hjust = 0.5
),
axis.title.y = element_text(
size = 14L,
face = "bold"
),
axis.title.x = element_text(
size = 14L,
face = "bold"
)
)ggplot(top50) +
aes(x = Sexo, y = Discurso.) +
geom_boxplot(shape = "circle", fill = "#28CBB6") +
labs(x = "Sexo",
y = "Discurso",
title = "Relação entre Sexo e Discurso") +
theme_minimal() + theme(plot.title = element_text(
size = 20L,face = "bold",hjust = 0.5),
axis.title.y = element_text(size = 14L,face = "bold"),
axis.title.x = element_text(size = 14L,face = "bold"))ggplot(top50) +
aes(x = Grupo.ou.Solo, y = Popularidade) +
geom_boxplot(shape = "circle", fill = "orange") +
scale_y_continuous(trans = "log2") +
theme_minimal()