Act2

library(readxl)
library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

spotify <- read_excel("Spotify 2024-1.xlsx", sheet = "Most Streamed Spotify Songs 202")

spotify_modelo <- spotify %>%
  select(Explicit, Score) %>%
  filter(!is.na(Explicit), !is.na(Score))

modelo_logit <- glm(Explicit ~ Score, data = spotify_modelo, family = binomial(link = "logit"))
summary(modelo_logit)

## 
## Call:
## glm(formula = Explicit ~ Score, family = binomial(link = "logit"), 
##     data = spotify_modelo)
## 
## Coefficients:
##               Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -0.6721349  0.0452379  -14.86  < 2e-16 ***
## Score        0.0021821  0.0007822    2.79  0.00528 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 6005.7  on 4599  degrees of freedom
## Residual deviance: 5997.9  on 4598  degrees of freedom
## AIC: 6001.9
## 
## Number of Fisher Scoring iterations: 4

modelo_probit <- glm(Explicit ~ Score, data = spotify_modelo, family = binomial(link = "probit"))
summary(modelo_probit)

## 
## Call:
## glm(formula = Explicit ~ Score, family = binomial(link = "probit"), 
##     data = spotify_modelo)
## 
## Coefficients:
##               Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -0.4181568  0.0278873 -14.995  < 2e-16 ***
## Score        0.0013476  0.0004856   2.775  0.00552 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 6005.7  on 4599  degrees of freedom
## Residual deviance: 5997.9  on 4598  degrees of freedom
## AIC: 6001.9
## 
## Number of Fisher Scoring iterations: 4

AIC(modelo_logit, modelo_probit)

##               df      AIC
## modelo_logit   2 6001.864
## modelo_probit  2 6001.859

coef(modelo_logit)

##  (Intercept)        Score 
## -0.672134857  0.002182056

"El modelo tiene un AIC bajo lo cual es bueno"

## [1] "El modelo tiene un AIC bajo lo cual es bueno"

" En el analisis el modelo mas favorable fue el de LOGIT por su interpretacion de datos mas directa "

## [1] " En el analisis el modelo mas favorable fue el de LOGIT por su interpretacion de datos mas directa "

" la formula de LOGIT utilizada fue la siguiente: logit(p)=−0.672+0.00218"

## [1] " la formula de LOGIT utilizada fue la siguiente: logit(p)=−0.672+0.00218"

" En concliusion a medida que aumenta el score tambien aumenta la probabiidad "

## [1] " En concliusion a medida que aumenta el score tambien aumenta la probabiidad "

" durante el modelo se limpiaron los datos "

## [1] " durante el modelo se limpiaron los datos "

Act2

2025-06-06