library(visdat)
library(dplyr)
Attaching package: 'dplyr'
The following objects are masked from 'package:stats':
filter, lag
The following objects are masked from 'package:base':
intersect, setdiff, setequal, union
vis_dat(spotify_history[1:10000,])
library(stringr)
spotify_history <- spotify_history %>%
mutate(ts = as.POSIXct(ts, format = "%Y-%m-%d %H:%M:%S", tz = "UTC"))
spotify_history <- spotify_history %>%
mutate(
artist_name = str_to_lower(str_trim(artist_name)),
track_name = str_to_lower(str_trim(track_name)),
album_name = str_to_lower(str_trim(album_name))
)
spotify_history <- unique(spotify_history)
write.csv(spotify_history, "spotify_history_cleaned_r.csv", row.names = FALSE)
Attaching package: 'MASS'
The following object is masked from 'package:dplyr':
select
library(dplyr)
spotify_history_cleaned <- read.csv("spotify_history_cleaned_r.csv")
spotify_history_cleaned <- spotify_history_cleaned %>%
mutate(
platform = as.factor(platform),
reason_start = as.factor(reason_start),
reason_end = as.factor(reason_end),
shuffle = as.factor(shuffle),
skipped = as.factor(skipped)
)
full_model <- lm(ms_played ~ platform + reason_start + reason_end + shuffle + skipped, data = spotify_history_cleaned)
best_model <- stepAIC(full_model, direction = "backward", trace = FALSE)
#
print("The best model found by step-wise AIC is:")
[1] "The best model found by step-wise AIC is:"
Call:
lm(formula = ms_played ~ platform + reason_start + reason_end +
skipped, data = spotify_history_cleaned)
Residuals:
Min 1Q Median 3Q Max
-226173 -31164 -12927 18687 1404991
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 83655.2 7533.9 11.104 < 2e-16 ***
platformcast to device 2395.7 1274.1 1.880 0.060064 .
platformiOS 4212.8 1290.5 3.264 0.001097 **
platformmac 7334.6 2007.7 3.653 0.000259 ***
platformweb player -1227.3 6015.9 -0.204 0.838343
platformwindows -5565.4 1685.5 -3.302 0.000960 ***
reason_startappload -48929.3 6322.0 -7.740 1.00e-14 ***
reason_startautoplay -36100.8 69580.4 -0.519 0.603876
reason_startbackbtn 11386.8 6394.3 1.781 0.074949 .
reason_startclickrow 11806.2 6256.9 1.887 0.059176 .
reason_startendplay -93554.4 48657.1 -1.923 0.054516 .
reason_startfwdbtn -8936.0 6235.6 -1.433 0.151846
reason_startnextbtn -15398.7 20569.9 -0.749 0.454098
reason_startplaybtn -9859.0 6481.3 -1.521 0.128228
reason_startpopup 188852.8 37074.9 5.094 3.51e-07 ***
reason_startremote -29895.2 7061.3 -4.234 2.30e-05 ***
reason_starttrackdone 18714.1 6218.4 3.009 0.002618 **
reason_starttrackerror -4957.9 8823.3 -0.562 0.574176
reason_startunknown 20128.3 15658.1 1.285 0.198624
reason_endappload 17501.7 26691.7 0.656 0.512021
reason_endbackbtn -73121.9 7041.6 -10.384 < 2e-16 ***
reason_endclickrow -43142.2 14817.0 -2.912 0.003596 **
reason_endendplay -33045.6 6911.1 -4.781 1.74e-06 ***
reason_endfwdbtn -59999.5 6890.9 -8.707 < 2e-16 ***
reason_endlogout 19318.3 6965.1 2.774 0.005545 **
reason_endnextbtn -42220.5 20975.8 -2.013 0.044136 *
reason_endpopup -28817.5 34176.0 -0.843 0.399114
reason_endreload 11784.0 35581.9 0.331 0.740510
reason_endremote -33539.6 8273.1 -4.054 5.04e-05 ***
reason_endtrackdone 121407.9 6886.9 17.629 < 2e-16 ***
reason_endtrackerror -90533.3 34942.0 -2.591 0.009572 **
reason_endunexpected-exit -23321.8 9323.2 -2.501 0.012369 *
reason_endunexpected-exit-while-paused 12325.2 7079.0 1.741 0.081668 .
reason_endunknown 6968.0 8075.2 0.863 0.388201
skippedTRUE 2052.5 834.6 2.459 0.013925 *
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 68230 on 148640 degrees of freedom
Multiple R-squared: 0.6647, Adjusted R-squared: 0.6647
F-statistic: 8668 on 34 and 148640 DF, p-value: < 2.2e-16