homework 8

spotify_history <- read.csv("~/Downloads/archive/spotify_history.csv") 
library(visdat)
library(dplyr)

Attaching package: 'dplyr'
The following objects are masked from 'package:stats':

    filter, lag
The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union
vis_dat(spotify_history[1:10000,])

library(stringr)
spotify_history <- spotify_history %>%
  mutate(ts = as.POSIXct(ts, format = "%Y-%m-%d %H:%M:%S", tz = "UTC"))

spotify_history <- spotify_history %>%
  mutate(
    artist_name = str_to_lower(str_trim(artist_name)),
    track_name = str_to_lower(str_trim(track_name)),
    album_name = str_to_lower(str_trim(album_name))
  )

spotify_history <- unique(spotify_history)


write.csv(spotify_history, "spotify_history_cleaned_r.csv", row.names = FALSE)
library(MASS)

Attaching package: 'MASS'
The following object is masked from 'package:dplyr':

    select
library(dplyr)


spotify_history_cleaned <- read.csv("spotify_history_cleaned_r.csv")


spotify_history_cleaned <- spotify_history_cleaned %>%
  mutate(
    platform = as.factor(platform),
    reason_start = as.factor(reason_start),
    reason_end = as.factor(reason_end),
    shuffle = as.factor(shuffle),
    skipped = as.factor(skipped)
  )


full_model <- lm(ms_played ~ platform + reason_start + reason_end + shuffle + skipped, data = spotify_history_cleaned)

best_model <- stepAIC(full_model, direction = "backward", trace = FALSE)

# 
print("The best model found by step-wise AIC is:")
[1] "The best model found by step-wise AIC is:"
summary(best_model)

Call:
lm(formula = ms_played ~ platform + reason_start + reason_end + 
    skipped, data = spotify_history_cleaned)

Residuals:
    Min      1Q  Median      3Q     Max 
-226173  -31164  -12927   18687 1404991 

Coefficients:
                                       Estimate Std. Error t value Pr(>|t|)    
(Intercept)                             83655.2     7533.9  11.104  < 2e-16 ***
platformcast to device                   2395.7     1274.1   1.880 0.060064 .  
platformiOS                              4212.8     1290.5   3.264 0.001097 ** 
platformmac                              7334.6     2007.7   3.653 0.000259 ***
platformweb player                      -1227.3     6015.9  -0.204 0.838343    
platformwindows                         -5565.4     1685.5  -3.302 0.000960 ***
reason_startappload                    -48929.3     6322.0  -7.740 1.00e-14 ***
reason_startautoplay                   -36100.8    69580.4  -0.519 0.603876    
reason_startbackbtn                     11386.8     6394.3   1.781 0.074949 .  
reason_startclickrow                    11806.2     6256.9   1.887 0.059176 .  
reason_startendplay                    -93554.4    48657.1  -1.923 0.054516 .  
reason_startfwdbtn                      -8936.0     6235.6  -1.433 0.151846    
reason_startnextbtn                    -15398.7    20569.9  -0.749 0.454098    
reason_startplaybtn                     -9859.0     6481.3  -1.521 0.128228    
reason_startpopup                      188852.8    37074.9   5.094 3.51e-07 ***
reason_startremote                     -29895.2     7061.3  -4.234 2.30e-05 ***
reason_starttrackdone                   18714.1     6218.4   3.009 0.002618 ** 
reason_starttrackerror                  -4957.9     8823.3  -0.562 0.574176    
reason_startunknown                     20128.3    15658.1   1.285 0.198624    
reason_endappload                       17501.7    26691.7   0.656 0.512021    
reason_endbackbtn                      -73121.9     7041.6 -10.384  < 2e-16 ***
reason_endclickrow                     -43142.2    14817.0  -2.912 0.003596 ** 
reason_endendplay                      -33045.6     6911.1  -4.781 1.74e-06 ***
reason_endfwdbtn                       -59999.5     6890.9  -8.707  < 2e-16 ***
reason_endlogout                        19318.3     6965.1   2.774 0.005545 ** 
reason_endnextbtn                      -42220.5    20975.8  -2.013 0.044136 *  
reason_endpopup                        -28817.5    34176.0  -0.843 0.399114    
reason_endreload                        11784.0    35581.9   0.331 0.740510    
reason_endremote                       -33539.6     8273.1  -4.054 5.04e-05 ***
reason_endtrackdone                    121407.9     6886.9  17.629  < 2e-16 ***
reason_endtrackerror                   -90533.3    34942.0  -2.591 0.009572 ** 
reason_endunexpected-exit              -23321.8     9323.2  -2.501 0.012369 *  
reason_endunexpected-exit-while-paused  12325.2     7079.0   1.741 0.081668 .  
reason_endunknown                        6968.0     8075.2   0.863 0.388201    
skippedTRUE                              2052.5      834.6   2.459 0.013925 *  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 68230 on 148640 degrees of freedom
Multiple R-squared:  0.6647,    Adjusted R-squared:  0.6647 
F-statistic:  8668 on 34 and 148640 DF,  p-value: < 2.2e-16