#variables Cualitativas nominal: Track, Artista
#variables Cualitativas Discretas: Populary , year
#Variables Cuantitativas: Duration ,
##Graficado de base datos 1980s classic
library(dslabs)
library(readr)
library(dplyr)
##
## Adjuntando el paquete: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(viridis)
## Cargando paquete requerido: viridisLite
library(viridisLite)
library(RColorBrewer)
library(ggplot2)
#base datos externa https://www.kaggle.com/datasets/thebumpkin/1980s-classic
bd_ana<-read.csv('1980sClassics.csv')
#Grafico de Torta
bd_ana <- bd_ana %>%
mutate(Popularity_Category = cut(Popularity, breaks = 5, labels = paste0("Category ", 1:5)))
tabla_Categoria <- table(bd_ana$Popularity_Category)
porcentaje_categoria <- round(tabla_Categoria * 100 / sum(tabla_Categoria), 1)
# Crear gráfico de torta con etiquetas de porcentaje
pie(porcentaje_categoria,
labels = paste(porcentaje_categoria, "%"),
main = "CATEGORIAS DE POPULARIDAD",
col = brewer.pal(4,"Accent"),
border = brewer.pal(4,"Accent"))
# Agregar leyenda
legend("bottomright",
legend = names(porcentaje_categoria),
fill = brewer.pal(4,"Accent"),
cex = 0.6,
border = brewer.pal(4,"Accent"))
#Grafico de Barras
artist_counts <- bd_ana %>%
group_by(Artist) %>%
summarise(Track_Count = n()) %>%
arrange(desc(Track_Count)) %>%
head(10)
# Crear el gráfico de barras
barplot(artist_counts$Track_Count,
names.arg = artist_counts$Artist,
main = "Gráfico de Barras Frecuencia por Artista",
ylab = "Frecuencia",
xlab = "Artista",
ylim = c(0, 20),
col = viridis(4),
border=viridis(4),
las = 2
) # rotar etiquetas del eje x para mejor lectura
grid(nx = NA, ny = NULL, col = "lightgray", lty = "dotted", lwd = par("lwd"))
#Histograma
duracion_year <- bd_ana %>%
mutate(Duration_Seconds = as.numeric(substr(Duration, 1, regexpr(":", Duration) - 1)) * 60 +
as.numeric(substr(Duration, regexpr(":", Duration) + 1, nchar(Duration)))) %>%
mutate(collab = grepl('1989', Year)) %>%
filter(collab == TRUE)
hist(duracion_year$Duration_Seconds,
breaks = "Sturges",
main = "Histograma de Duracion Canciones año 1989",
xlab = "Duracion canciones",
col = brewer.pal(9,"Set1"),
border = brewer.pal(9,"Set1"),
ylim = c(0,40),
xlim = c(0, 500),
labels = TRUE)
# Histograma densidad
hist(duracion_year$Duration_Seconds,
breaks = "Sturges",
probability = TRUE,
main = "Histograma Densidad de Duracion Canciones año 1989",
xlab = "Duracion canciones",
col = brewer.pal(9,"Set1"),
border = brewer.pal(9,"Set1"),
ylim = c(0,0.008),
xlim = c(0, 500))
lines(density(duracion_year$Duration_Seconds), col = "black", lwd = 2)
#Gráfico de Caja (Boxplot)
summary(duracion_year$Duration_Seconds)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 79.0 210.5 250.5 255.1 293.2 405.0
# Obtener los valores resumen del summary()
resumen <- summary(duracion_year$Duration_Seconds)
# Extraer los valores importantes del summary
minimo <- resumen["Min."]
primer_cuartil <- resumen["1st Qu."]
mediana <- resumen["Median"]
media <- mean(duracion_year$Duration_Seconds, na.rm = TRUE)
tercer_cuartil <- resumen["3rd Qu."]
maximo <- resumen["Max."]
# Graficar el boxplot
boxplot(duracion_year$Duration_Seconds,
col="Blue",
ylab="Tasa de duración de canciones",
outline=FALSE,
main="Boxplot",
ylim=c(0,450))
# Adicionar la media
points(media, col="black", pch=10)
text(media, paste("Media:", round(media, 2)), pos=4, cex=0.8)
# Valores de resumen
valores <- c(minimo, primer_cuartil, mediana, media, tercer_cuartil, maximo)
labels <- c("Min", "1st Qu.", "Mediana", "Media", "3rd Qu.", "Max.")
# Adicionar los puntos y etiquetas para cada valor del resumen
for (i in 1:length(valores)) {
points(valores[i], col="red", pch=10)
text(valores[i], labels[i], pos=4, cex=0.8)
}
tabla_cate <- table(bd_ana$Popularity_Category)
#Comparacion #Diagrama de dispersion
x<- duracion_year$Popularity
y<-duracion_year$Duration_Seconds
plot(x,y,main="Popularidad vs Duracion canciones",col=duracion_year$Popularity_Category,pch=20,xlab = "Popularidad",ylab="Duracion Segundos")
legend("bottomright",legend=levels(duracion_year$Popularity_Category),fill = unique(duracion_year$Popularity_Category),cex=0.75)
#Boxplot Duration_Seconds vs Popularity_Category
boxplot(Duration_Seconds~Popularity_Category,data=duracion_year,col=brewer.pal(9,"Set1"),outline=FALSE,ylim=c(0,300),main="Boxplot Duraccion vs Popularity_Category")
abline(h=mean(duracion_year$Duration_Seconds),col="blue",lwd=2)
text(paste("",round(mean(duracion_year$Duration_Seconds),2)),x=1,y=100,col="blue")
#unir reportes
par(mfrow=c(1,2))
x<- duracion_year$Popularity
y<-duracion_year$Duration_Seconds
plot(x,y,main="Popularidad vs Duracion canciones",col=duracion_year$Popularity_Category,pch=20,xlab = "Popularidad",ylab="Duracion Segundos")
legend("bottomright",legend=levels(duracion_year$Popularity_Category),fill = unique(duracion_year$Popularity_Category),cex=0.75)
boxplot(Duration_Seconds~Popularity_Category,data=duracion_year,col=brewer.pal(9,"Set1"),outline=FALSE,ylim=c(0,300),main="Boxplot Duraccion vs Popularity_Category")
abline(h=mean(duracion_year$Duration_Seconds),col="blue",lwd=2)
text(paste("",round(mean(duracion_year$Duration_Seconds),2)),x=1,y=100,col="blue")