#Model 1
#Load Datapd
pd <- read.csv("predict.csv", header=TRUE)
head(pd)
## target trackID acousticness danceability duration_ms energy instrumentalness
## 1 NA 1480 0.006470 0.695 182693 0.818 4.87e-06
## 2 NA 1453 0.021400 0.587 198426 0.772 6.98e-01
## 3 NA 129 0.045200 0.746 238146 0.416 7.77e-05
## 4 NA 1976 0.000102 0.446 190787 0.900 1.89e-05
## 5 NA 84 0.193000 0.867 460400 0.733 2.19e-04
## 6 NA 279 0.384000 0.478 281774 0.600 8.08e-01
## key liveness loudness mode speechiness tempo time_signature valence
## 1 5 0.0219 -5.379 0 0.0334 119.965 4 0.9230
## 2 4 0.4470 -4.439 0 0.0375 127.978 4 0.2010
## 3 11 0.1200 -6.607 1 0.0482 132.036 4 0.2160
## 4 10 0.0404 -3.541 0 0.0489 187.961 4 0.8840
## 5 10 0.7490 -7.540 0 0.2370 97.835 4 0.7610
## 6 5 0.0987 -11.025 0 0.0356 143.980 4 0.0635
## song_title artist
## 1 Hard Times Paramore
## 2 Legacy Dimitri Vangelis & Wyman
## 3 Rewind Kelela
## 4 When We Stand Together Nickelback
## 5 The Payback James Brown
## 6 Like Lust MOVEMENT
test <- read.csv("test.csv", header=TRUE)
train <- read.csv("train.csv", header=TRUE)
#Class Types
summary(pd)
## target trackID acousticness danceability
## Mode:logical Min. : 2.0 Min. :0.0000079 Min. :0.1910
## NA's:100 1st Qu.: 462.5 1st Qu.:0.0051725 1st Qu.:0.5110
## Median :1059.5 Median :0.0607000 Median :0.6140
## Mean :1000.9 Mean :0.1786756 Mean :0.6126
## 3rd Qu.:1557.5 3rd Qu.:0.2562500 3rd Qu.:0.7270
## Max. :2015.0 Max. :0.9350000 Max. :0.8760
##
## duration_ms energy instrumentalness key
## Min. :155013 Min. :0.1250 Min. :0.0000000 Min. : 0.00
## 1st Qu.:194434 1st Qu.:0.6128 1st Qu.:0.0000000 1st Qu.: 2.00
## Median :224714 Median :0.7525 Median :0.0001325 Median : 5.00
## Mean :231787 Mean :0.7178 Mean :0.1579494 Mean : 5.34
## 3rd Qu.:250160 3rd Qu.:0.8615 3rd Qu.:0.0720750 3rd Qu.: 8.25
## Max. :460400 Max. :0.9920 Max. :0.9300000 Max. :11.00
##
## liveness loudness mode speechiness
## Min. :0.02190 Min. :-21.001 Min. :0.00 Min. :0.02740
## 1st Qu.:0.09112 1st Qu.: -7.454 1st Qu.:0.00 1st Qu.:0.03698
## Median :0.12900 Median : -5.647 Median :1.00 Median :0.05320
## Mean :0.19989 Mean : -6.306 Mean :0.57 Mean :0.08603
## 3rd Qu.:0.26850 3rd Qu.: -4.374 3rd Qu.:1.00 3rd Qu.:0.10975
## Max. :0.88700 Max. : -1.188 Max. :1.00 Max. :0.45500
##
## tempo time_signature valence song_title
## Min. : 72.28 Min. :3.00 Min. :0.0397 A Hundred Ropes: 1
## 1st Qu.: 97.93 1st Qu.:4.00 1st Qu.:0.2888 Ain't Nobody : 1
## Median :122.54 Median :4.00 Median :0.5160 Angel Eyes : 1
## Mean :121.82 Mean :3.97 Mean :0.4923 Archangel : 1
## 3rd Qu.:134.26 3rd Qu.:4.00 3rd Qu.:0.6830 Balance : 1
## Max. :203.82 Max. :4.00 Max. :0.9670 Before : 1
## (Other) :94
## artist
## Duke Dumont: 2
## James Brown: 2
## Santigold : 2
## Washed Out : 2
## *NSYNC : 1
## A$AP Ferg : 1
## (Other) :90
train$artist <- as.character(train$artist)
train$song_title <- as.character(train$song_title)
train$mode <- as.factor(train$mode)
train$time_signature <- as.factor(train$time_signature)
train$key <- as.factor(train$key)
train$trackID <- as.character.numeric_version(train$trackID)
pd$artist <- as.character(pd$artist)
pd$song_title <- as.character(pd$song_title)
pd$mode <- as.factor(pd$mode)
pd$time_signature <- as.factor(pd$time_signature)
pd$key <- as.factor(pd$key)
pd$trackID <- as.character.numeric_version(pd$trackID)
levels(pd$mode) <- levels (train$mode)
levels (pd$time_signature) <- levels (train$time_signature)
levels (pd$key) <- levels (train$key)
#train$target <- as.factor(train$target)
#Model1
model <- lm( `target`~ `acousticness`+ `danceability`+ `duration_ms` + `energy` + `instrumentalness` + `key` + `liveness`+ `loudness` + `mode` + `speechiness`+ `tempo` + `time_signature` + `valence`, data = train)
summary (model)
##
## Call:
## lm(formula = target ~ acousticness + danceability + duration_ms +
## energy + instrumentalness + key + liveness + loudness + mode +
## speechiness + tempo + time_signature + valence, data = train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.87540 -0.42526 -0.00754 0.43042 1.01521
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.011e-01 4.995e-01 -0.403 0.68732
## acousticness -3.450e-01 7.691e-02 -4.486 8.06e-06 ***
## danceability 3.283e-01 1.123e-01 2.924 0.00354 **
## duration_ms 4.709e-07 1.924e-07 2.447 0.01457 *
## energy 3.192e-02 1.285e-01 0.248 0.80384
## instrumentalness 2.808e-01 6.088e-02 4.613 4.48e-06 ***
## key1 6.762e-03 6.096e-02 0.111 0.91170
## key2 1.875e-01 6.461e-02 2.902 0.00379 **
## key3 -1.669e-01 9.161e-02 -1.822 0.06877 .
## key4 -5.255e-03 7.790e-02 -0.067 0.94622
## key5 4.099e-02 6.821e-02 0.601 0.54799
## key6 -1.313e-02 6.907e-02 -0.190 0.84931
## key7 7.871e-02 6.370e-02 1.236 0.21692
## key8 -3.903e-02 7.116e-02 -0.549 0.58343
## key9 5.426e-02 6.617e-02 0.820 0.41242
## key10 3.502e-02 7.025e-02 0.499 0.61823
## key11 8.898e-02 6.806e-02 1.307 0.19135
## liveness -4.940e-02 1.023e-01 -0.483 0.62935
## loudness -2.059e-02 6.444e-03 -3.195 0.00144 **
## mode1 -5.568e-02 3.150e-02 -1.767 0.07747 .
## speechiness 8.461e-01 1.683e-01 5.029 5.82e-07 ***
## tempo 1.654e-04 5.560e-04 0.297 0.76617
## time_signature3 5.365e-02 4.781e-01 0.112 0.91066
## time_signature4 8.842e-02 4.732e-01 0.187 0.85180
## time_signature5 -1.293e-02 4.902e-01 -0.026 0.97896
## valence 1.294e-01 7.177e-02 1.803 0.07169 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4665 on 1033 degrees of freedom
## Multiple R-squared: 0.151, Adjusted R-squared: 0.1304
## F-statistic: 7.347 on 25 and 1033 DF, p-value: < 2.2e-16
#Predict
Prediction <- predict(model,pd)
pd <- cbind(pd, Prediction)
pd$target <- ifelse( pd$Prediction >= .5, 1,0)
#Subset
pd <- pd [c(1:2)]
#export
write.csv(pd, "Entry1-Sariah Nokes.csv",row.names=FALSE)
#Model 2
#Load Datapd
pd <- read.csv("predict.csv", header=TRUE)
head(pd)
## target trackID acousticness danceability duration_ms energy instrumentalness
## 1 NA 1480 0.006470 0.695 182693 0.818 4.87e-06
## 2 NA 1453 0.021400 0.587 198426 0.772 6.98e-01
## 3 NA 129 0.045200 0.746 238146 0.416 7.77e-05
## 4 NA 1976 0.000102 0.446 190787 0.900 1.89e-05
## 5 NA 84 0.193000 0.867 460400 0.733 2.19e-04
## 6 NA 279 0.384000 0.478 281774 0.600 8.08e-01
## key liveness loudness mode speechiness tempo time_signature valence
## 1 5 0.0219 -5.379 0 0.0334 119.965 4 0.9230
## 2 4 0.4470 -4.439 0 0.0375 127.978 4 0.2010
## 3 11 0.1200 -6.607 1 0.0482 132.036 4 0.2160
## 4 10 0.0404 -3.541 0 0.0489 187.961 4 0.8840
## 5 10 0.7490 -7.540 0 0.2370 97.835 4 0.7610
## 6 5 0.0987 -11.025 0 0.0356 143.980 4 0.0635
## song_title artist
## 1 Hard Times Paramore
## 2 Legacy Dimitri Vangelis & Wyman
## 3 Rewind Kelela
## 4 When We Stand Together Nickelback
## 5 The Payback James Brown
## 6 Like Lust MOVEMENT
test <- read.csv("test.csv", header=TRUE)
train <- read.csv("train.csv", header=TRUE)
#Class Types
summary(pd)
## target trackID acousticness danceability
## Mode:logical Min. : 2.0 Min. :0.0000079 Min. :0.1910
## NA's:100 1st Qu.: 462.5 1st Qu.:0.0051725 1st Qu.:0.5110
## Median :1059.5 Median :0.0607000 Median :0.6140
## Mean :1000.9 Mean :0.1786756 Mean :0.6126
## 3rd Qu.:1557.5 3rd Qu.:0.2562500 3rd Qu.:0.7270
## Max. :2015.0 Max. :0.9350000 Max. :0.8760
##
## duration_ms energy instrumentalness key
## Min. :155013 Min. :0.1250 Min. :0.0000000 Min. : 0.00
## 1st Qu.:194434 1st Qu.:0.6128 1st Qu.:0.0000000 1st Qu.: 2.00
## Median :224714 Median :0.7525 Median :0.0001325 Median : 5.00
## Mean :231787 Mean :0.7178 Mean :0.1579494 Mean : 5.34
## 3rd Qu.:250160 3rd Qu.:0.8615 3rd Qu.:0.0720750 3rd Qu.: 8.25
## Max. :460400 Max. :0.9920 Max. :0.9300000 Max. :11.00
##
## liveness loudness mode speechiness
## Min. :0.02190 Min. :-21.001 Min. :0.00 Min. :0.02740
## 1st Qu.:0.09112 1st Qu.: -7.454 1st Qu.:0.00 1st Qu.:0.03698
## Median :0.12900 Median : -5.647 Median :1.00 Median :0.05320
## Mean :0.19989 Mean : -6.306 Mean :0.57 Mean :0.08603
## 3rd Qu.:0.26850 3rd Qu.: -4.374 3rd Qu.:1.00 3rd Qu.:0.10975
## Max. :0.88700 Max. : -1.188 Max. :1.00 Max. :0.45500
##
## tempo time_signature valence song_title
## Min. : 72.28 Min. :3.00 Min. :0.0397 A Hundred Ropes: 1
## 1st Qu.: 97.93 1st Qu.:4.00 1st Qu.:0.2888 Ain't Nobody : 1
## Median :122.54 Median :4.00 Median :0.5160 Angel Eyes : 1
## Mean :121.82 Mean :3.97 Mean :0.4923 Archangel : 1
## 3rd Qu.:134.26 3rd Qu.:4.00 3rd Qu.:0.6830 Balance : 1
## Max. :203.82 Max. :4.00 Max. :0.9670 Before : 1
## (Other) :94
## artist
## Duke Dumont: 2
## James Brown: 2
## Santigold : 2
## Washed Out : 2
## *NSYNC : 1
## A$AP Ferg : 1
## (Other) :90
train$artist <- as.character(train$artist)
train$song_title <- as.character(train$song_title)
train$mode <- as.factor(train$mode)
train$time_signature <- as.factor(train$time_signature)
train$key <- as.factor(train$key)
train$trackID <- as.character.numeric_version(train$trackID)
pd$artist <- as.character(pd$artist)
pd$song_title <- as.character(pd$song_title)
pd$mode <- as.factor(pd$mode)
pd$time_signature <- as.factor(pd$time_signature)
pd$key <- as.factor(pd$key)
pd$trackID <- as.character.numeric_version(pd$trackID)
levels(pd$mode) <- levels (train$mode)
levels (pd$time_signature) <- levels (train$time_signature)
levels (pd$key) <- levels (train$key)
train$target <- as.factor(train$target)
#Model2
logitMod <- glm(`target`~ `acousticness`+ `danceability`+ `duration_ms` + `energy` + `instrumentalness` + `key` + `liveness`+ `loudness` + `mode` + `speechiness`+ `tempo` + `time_signature` + `valence`, data = train, family=binomial(link="logit"))
#Predication
predicted <- plogis(predict(logitMod, pd))
pd <- cbind(pd, Prediction)
#If else statement
pd$target <- ifelse( pd$Prediction >= .5, 1,0)
#Subset/Export Data
pd <- pd [c(1:2)]
#export
write.csv(pd, "Entry2-Sariah Nokes.csv",row.names=FALSE)
#Model 3
#Load Datapd
pd <- read.csv("predict.csv", header=TRUE)
head(pd)
## target trackID acousticness danceability duration_ms energy instrumentalness
## 1 NA 1480 0.006470 0.695 182693 0.818 4.87e-06
## 2 NA 1453 0.021400 0.587 198426 0.772 6.98e-01
## 3 NA 129 0.045200 0.746 238146 0.416 7.77e-05
## 4 NA 1976 0.000102 0.446 190787 0.900 1.89e-05
## 5 NA 84 0.193000 0.867 460400 0.733 2.19e-04
## 6 NA 279 0.384000 0.478 281774 0.600 8.08e-01
## key liveness loudness mode speechiness tempo time_signature valence
## 1 5 0.0219 -5.379 0 0.0334 119.965 4 0.9230
## 2 4 0.4470 -4.439 0 0.0375 127.978 4 0.2010
## 3 11 0.1200 -6.607 1 0.0482 132.036 4 0.2160
## 4 10 0.0404 -3.541 0 0.0489 187.961 4 0.8840
## 5 10 0.7490 -7.540 0 0.2370 97.835 4 0.7610
## 6 5 0.0987 -11.025 0 0.0356 143.980 4 0.0635
## song_title artist
## 1 Hard Times Paramore
## 2 Legacy Dimitri Vangelis & Wyman
## 3 Rewind Kelela
## 4 When We Stand Together Nickelback
## 5 The Payback James Brown
## 6 Like Lust MOVEMENT
test <- read.csv("test.csv", header=TRUE)
train <- read.csv("train.csv", header=TRUE)
#Class Types
summary(pd)
## target trackID acousticness danceability
## Mode:logical Min. : 2.0 Min. :0.0000079 Min. :0.1910
## NA's:100 1st Qu.: 462.5 1st Qu.:0.0051725 1st Qu.:0.5110
## Median :1059.5 Median :0.0607000 Median :0.6140
## Mean :1000.9 Mean :0.1786756 Mean :0.6126
## 3rd Qu.:1557.5 3rd Qu.:0.2562500 3rd Qu.:0.7270
## Max. :2015.0 Max. :0.9350000 Max. :0.8760
##
## duration_ms energy instrumentalness key
## Min. :155013 Min. :0.1250 Min. :0.0000000 Min. : 0.00
## 1st Qu.:194434 1st Qu.:0.6128 1st Qu.:0.0000000 1st Qu.: 2.00
## Median :224714 Median :0.7525 Median :0.0001325 Median : 5.00
## Mean :231787 Mean :0.7178 Mean :0.1579494 Mean : 5.34
## 3rd Qu.:250160 3rd Qu.:0.8615 3rd Qu.:0.0720750 3rd Qu.: 8.25
## Max. :460400 Max. :0.9920 Max. :0.9300000 Max. :11.00
##
## liveness loudness mode speechiness
## Min. :0.02190 Min. :-21.001 Min. :0.00 Min. :0.02740
## 1st Qu.:0.09112 1st Qu.: -7.454 1st Qu.:0.00 1st Qu.:0.03698
## Median :0.12900 Median : -5.647 Median :1.00 Median :0.05320
## Mean :0.19989 Mean : -6.306 Mean :0.57 Mean :0.08603
## 3rd Qu.:0.26850 3rd Qu.: -4.374 3rd Qu.:1.00 3rd Qu.:0.10975
## Max. :0.88700 Max. : -1.188 Max. :1.00 Max. :0.45500
##
## tempo time_signature valence song_title
## Min. : 72.28 Min. :3.00 Min. :0.0397 A Hundred Ropes: 1
## 1st Qu.: 97.93 1st Qu.:4.00 1st Qu.:0.2888 Ain't Nobody : 1
## Median :122.54 Median :4.00 Median :0.5160 Angel Eyes : 1
## Mean :121.82 Mean :3.97 Mean :0.4923 Archangel : 1
## 3rd Qu.:134.26 3rd Qu.:4.00 3rd Qu.:0.6830 Balance : 1
## Max. :203.82 Max. :4.00 Max. :0.9670 Before : 1
## (Other) :94
## artist
## Duke Dumont: 2
## James Brown: 2
## Santigold : 2
## Washed Out : 2
## *NSYNC : 1
## A$AP Ferg : 1
## (Other) :90
train$artist <- as.character(train$artist)
train$song_title <- as.character(train$song_title)
train$mode <- as.factor(train$mode)
train$time_signature <- as.factor(train$time_signature)
train$key <- as.factor(train$key)
train$trackID <- as.character.numeric_version(train$trackID)
pd$artist <- as.character(pd$artist)
pd$song_title <- as.character(pd$song_title)
pd$mode <- as.factor(pd$mode)
pd$time_signature <- as.factor(pd$time_signature)
pd$key <- as.factor(pd$key)
pd$trackID <- as.character.numeric_version(pd$trackID)
levels(pd$mode) <- levels (train$mode)
levels (pd$time_signature) <- levels (train$time_signature)
levels (pd$key) <- levels (train$key)
train$target <- as.factor(train$target)
#Model3
rfMod <- randomForest(
`target`~ `acousticness`+ `danceability`+ `duration_ms` + `energy` + `instrumentalness` + `key` + `liveness`+ `loudness` + `mode` + `speechiness`+ `tempo` + `time_signature` + `valence`,
data=train
)
#Prediction
Prediction <- predict(model,pd)
pd <- cbind(pd, Prediction)
pd$target <- (pd$Prediction)
#Subset/Export Data
pd <- pd [c(1:2)]
#export
write.csv(pd, "Entry3-Sariah Nokes.csv",row.names=FALSE)