#Model 1

#Load Datapd 
pd <- read.csv("predict.csv", header=TRUE)
head(pd)
##   target trackID acousticness danceability duration_ms energy instrumentalness
## 1     NA    1480     0.006470        0.695      182693  0.818         4.87e-06
## 2     NA    1453     0.021400        0.587      198426  0.772         6.98e-01
## 3     NA     129     0.045200        0.746      238146  0.416         7.77e-05
## 4     NA    1976     0.000102        0.446      190787  0.900         1.89e-05
## 5     NA      84     0.193000        0.867      460400  0.733         2.19e-04
## 6     NA     279     0.384000        0.478      281774  0.600         8.08e-01
##   key liveness loudness mode speechiness   tempo time_signature valence
## 1   5   0.0219   -5.379    0      0.0334 119.965              4  0.9230
## 2   4   0.4470   -4.439    0      0.0375 127.978              4  0.2010
## 3  11   0.1200   -6.607    1      0.0482 132.036              4  0.2160
## 4  10   0.0404   -3.541    0      0.0489 187.961              4  0.8840
## 5  10   0.7490   -7.540    0      0.2370  97.835              4  0.7610
## 6   5   0.0987  -11.025    0      0.0356 143.980              4  0.0635
##               song_title                   artist
## 1             Hard Times                 Paramore
## 2                 Legacy Dimitri Vangelis & Wyman
## 3                 Rewind                   Kelela
## 4 When We Stand Together               Nickelback
## 5            The Payback              James Brown
## 6              Like Lust                 MOVEMENT
test <- read.csv("test.csv", header=TRUE)

train <- read.csv("train.csv", header=TRUE)

#Class Types

summary(pd)
##   target           trackID        acousticness        danceability   
##  Mode:logical   Min.   :   2.0   Min.   :0.0000079   Min.   :0.1910  
##  NA's:100       1st Qu.: 462.5   1st Qu.:0.0051725   1st Qu.:0.5110  
##                 Median :1059.5   Median :0.0607000   Median :0.6140  
##                 Mean   :1000.9   Mean   :0.1786756   Mean   :0.6126  
##                 3rd Qu.:1557.5   3rd Qu.:0.2562500   3rd Qu.:0.7270  
##                 Max.   :2015.0   Max.   :0.9350000   Max.   :0.8760  
##                                                                      
##   duration_ms         energy       instrumentalness         key       
##  Min.   :155013   Min.   :0.1250   Min.   :0.0000000   Min.   : 0.00  
##  1st Qu.:194434   1st Qu.:0.6128   1st Qu.:0.0000000   1st Qu.: 2.00  
##  Median :224714   Median :0.7525   Median :0.0001325   Median : 5.00  
##  Mean   :231787   Mean   :0.7178   Mean   :0.1579494   Mean   : 5.34  
##  3rd Qu.:250160   3rd Qu.:0.8615   3rd Qu.:0.0720750   3rd Qu.: 8.25  
##  Max.   :460400   Max.   :0.9920   Max.   :0.9300000   Max.   :11.00  
##                                                                       
##     liveness          loudness            mode       speechiness     
##  Min.   :0.02190   Min.   :-21.001   Min.   :0.00   Min.   :0.02740  
##  1st Qu.:0.09112   1st Qu.: -7.454   1st Qu.:0.00   1st Qu.:0.03698  
##  Median :0.12900   Median : -5.647   Median :1.00   Median :0.05320  
##  Mean   :0.19989   Mean   : -6.306   Mean   :0.57   Mean   :0.08603  
##  3rd Qu.:0.26850   3rd Qu.: -4.374   3rd Qu.:1.00   3rd Qu.:0.10975  
##  Max.   :0.88700   Max.   : -1.188   Max.   :1.00   Max.   :0.45500  
##                                                                      
##      tempo        time_signature    valence                 song_title
##  Min.   : 72.28   Min.   :3.00   Min.   :0.0397   A Hundred Ropes: 1  
##  1st Qu.: 97.93   1st Qu.:4.00   1st Qu.:0.2888   Ain't Nobody   : 1  
##  Median :122.54   Median :4.00   Median :0.5160   Angel Eyes     : 1  
##  Mean   :121.82   Mean   :3.97   Mean   :0.4923   Archangel      : 1  
##  3rd Qu.:134.26   3rd Qu.:4.00   3rd Qu.:0.6830   Balance        : 1  
##  Max.   :203.82   Max.   :4.00   Max.   :0.9670   Before         : 1  
##                                                   (Other)        :94  
##          artist  
##  Duke Dumont: 2  
##  James Brown: 2  
##  Santigold  : 2  
##  Washed Out : 2  
##  *NSYNC     : 1  
##  A$AP Ferg  : 1  
##  (Other)    :90
train$artist <- as.character(train$artist)
train$song_title <- as.character(train$song_title)
train$mode <- as.factor(train$mode)
train$time_signature <- as.factor(train$time_signature)
train$key <- as.factor(train$key)
train$trackID <- as.character.numeric_version(train$trackID)

pd$artist <- as.character(pd$artist)
pd$song_title <- as.character(pd$song_title)
pd$mode <- as.factor(pd$mode)
pd$time_signature <- as.factor(pd$time_signature)
pd$key <- as.factor(pd$key)
pd$trackID <- as.character.numeric_version(pd$trackID)
levels(pd$mode) <- levels (train$mode)
levels (pd$time_signature) <- levels (train$time_signature)
levels (pd$key) <- levels (train$key)
#train$target <- as.factor(train$target)

#Model1
model <- lm( `target`~ `acousticness`+ `danceability`+ `duration_ms` + `energy` + `instrumentalness` + `key` + `liveness`+ `loudness` + `mode` + `speechiness`+ `tempo` + `time_signature` + `valence`, data = train)
summary (model)
## 
## Call:
## lm(formula = target ~ acousticness + danceability + duration_ms + 
##     energy + instrumentalness + key + liveness + loudness + mode + 
##     speechiness + tempo + time_signature + valence, data = train)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.87540 -0.42526 -0.00754  0.43042  1.01521 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      -2.011e-01  4.995e-01  -0.403  0.68732    
## acousticness     -3.450e-01  7.691e-02  -4.486 8.06e-06 ***
## danceability      3.283e-01  1.123e-01   2.924  0.00354 ** 
## duration_ms       4.709e-07  1.924e-07   2.447  0.01457 *  
## energy            3.192e-02  1.285e-01   0.248  0.80384    
## instrumentalness  2.808e-01  6.088e-02   4.613 4.48e-06 ***
## key1              6.762e-03  6.096e-02   0.111  0.91170    
## key2              1.875e-01  6.461e-02   2.902  0.00379 ** 
## key3             -1.669e-01  9.161e-02  -1.822  0.06877 .  
## key4             -5.255e-03  7.790e-02  -0.067  0.94622    
## key5              4.099e-02  6.821e-02   0.601  0.54799    
## key6             -1.313e-02  6.907e-02  -0.190  0.84931    
## key7              7.871e-02  6.370e-02   1.236  0.21692    
## key8             -3.903e-02  7.116e-02  -0.549  0.58343    
## key9              5.426e-02  6.617e-02   0.820  0.41242    
## key10             3.502e-02  7.025e-02   0.499  0.61823    
## key11             8.898e-02  6.806e-02   1.307  0.19135    
## liveness         -4.940e-02  1.023e-01  -0.483  0.62935    
## loudness         -2.059e-02  6.444e-03  -3.195  0.00144 ** 
## mode1            -5.568e-02  3.150e-02  -1.767  0.07747 .  
## speechiness       8.461e-01  1.683e-01   5.029 5.82e-07 ***
## tempo             1.654e-04  5.560e-04   0.297  0.76617    
## time_signature3   5.365e-02  4.781e-01   0.112  0.91066    
## time_signature4   8.842e-02  4.732e-01   0.187  0.85180    
## time_signature5  -1.293e-02  4.902e-01  -0.026  0.97896    
## valence           1.294e-01  7.177e-02   1.803  0.07169 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4665 on 1033 degrees of freedom
## Multiple R-squared:  0.151,  Adjusted R-squared:  0.1304 
## F-statistic: 7.347 on 25 and 1033 DF,  p-value: < 2.2e-16
#Predict 
Prediction <- predict(model,pd)
pd <- cbind(pd, Prediction)

pd$target <- ifelse(  pd$Prediction >= .5, 1,0)


#Subset

pd <- pd [c(1:2)]

#export

write.csv(pd, "Entry1-Sariah Nokes.csv",row.names=FALSE)

#Model 2

#Load Datapd 
pd <- read.csv("predict.csv", header=TRUE)
head(pd)
##   target trackID acousticness danceability duration_ms energy instrumentalness
## 1     NA    1480     0.006470        0.695      182693  0.818         4.87e-06
## 2     NA    1453     0.021400        0.587      198426  0.772         6.98e-01
## 3     NA     129     0.045200        0.746      238146  0.416         7.77e-05
## 4     NA    1976     0.000102        0.446      190787  0.900         1.89e-05
## 5     NA      84     0.193000        0.867      460400  0.733         2.19e-04
## 6     NA     279     0.384000        0.478      281774  0.600         8.08e-01
##   key liveness loudness mode speechiness   tempo time_signature valence
## 1   5   0.0219   -5.379    0      0.0334 119.965              4  0.9230
## 2   4   0.4470   -4.439    0      0.0375 127.978              4  0.2010
## 3  11   0.1200   -6.607    1      0.0482 132.036              4  0.2160
## 4  10   0.0404   -3.541    0      0.0489 187.961              4  0.8840
## 5  10   0.7490   -7.540    0      0.2370  97.835              4  0.7610
## 6   5   0.0987  -11.025    0      0.0356 143.980              4  0.0635
##               song_title                   artist
## 1             Hard Times                 Paramore
## 2                 Legacy Dimitri Vangelis & Wyman
## 3                 Rewind                   Kelela
## 4 When We Stand Together               Nickelback
## 5            The Payback              James Brown
## 6              Like Lust                 MOVEMENT
test <- read.csv("test.csv", header=TRUE)

train <- read.csv("train.csv", header=TRUE)

#Class Types

summary(pd)
##   target           trackID        acousticness        danceability   
##  Mode:logical   Min.   :   2.0   Min.   :0.0000079   Min.   :0.1910  
##  NA's:100       1st Qu.: 462.5   1st Qu.:0.0051725   1st Qu.:0.5110  
##                 Median :1059.5   Median :0.0607000   Median :0.6140  
##                 Mean   :1000.9   Mean   :0.1786756   Mean   :0.6126  
##                 3rd Qu.:1557.5   3rd Qu.:0.2562500   3rd Qu.:0.7270  
##                 Max.   :2015.0   Max.   :0.9350000   Max.   :0.8760  
##                                                                      
##   duration_ms         energy       instrumentalness         key       
##  Min.   :155013   Min.   :0.1250   Min.   :0.0000000   Min.   : 0.00  
##  1st Qu.:194434   1st Qu.:0.6128   1st Qu.:0.0000000   1st Qu.: 2.00  
##  Median :224714   Median :0.7525   Median :0.0001325   Median : 5.00  
##  Mean   :231787   Mean   :0.7178   Mean   :0.1579494   Mean   : 5.34  
##  3rd Qu.:250160   3rd Qu.:0.8615   3rd Qu.:0.0720750   3rd Qu.: 8.25  
##  Max.   :460400   Max.   :0.9920   Max.   :0.9300000   Max.   :11.00  
##                                                                       
##     liveness          loudness            mode       speechiness     
##  Min.   :0.02190   Min.   :-21.001   Min.   :0.00   Min.   :0.02740  
##  1st Qu.:0.09112   1st Qu.: -7.454   1st Qu.:0.00   1st Qu.:0.03698  
##  Median :0.12900   Median : -5.647   Median :1.00   Median :0.05320  
##  Mean   :0.19989   Mean   : -6.306   Mean   :0.57   Mean   :0.08603  
##  3rd Qu.:0.26850   3rd Qu.: -4.374   3rd Qu.:1.00   3rd Qu.:0.10975  
##  Max.   :0.88700   Max.   : -1.188   Max.   :1.00   Max.   :0.45500  
##                                                                      
##      tempo        time_signature    valence                 song_title
##  Min.   : 72.28   Min.   :3.00   Min.   :0.0397   A Hundred Ropes: 1  
##  1st Qu.: 97.93   1st Qu.:4.00   1st Qu.:0.2888   Ain't Nobody   : 1  
##  Median :122.54   Median :4.00   Median :0.5160   Angel Eyes     : 1  
##  Mean   :121.82   Mean   :3.97   Mean   :0.4923   Archangel      : 1  
##  3rd Qu.:134.26   3rd Qu.:4.00   3rd Qu.:0.6830   Balance        : 1  
##  Max.   :203.82   Max.   :4.00   Max.   :0.9670   Before         : 1  
##                                                   (Other)        :94  
##          artist  
##  Duke Dumont: 2  
##  James Brown: 2  
##  Santigold  : 2  
##  Washed Out : 2  
##  *NSYNC     : 1  
##  A$AP Ferg  : 1  
##  (Other)    :90
train$artist <- as.character(train$artist)
train$song_title <- as.character(train$song_title)
train$mode <- as.factor(train$mode)
train$time_signature <- as.factor(train$time_signature)
train$key <- as.factor(train$key)
train$trackID <- as.character.numeric_version(train$trackID)

pd$artist <- as.character(pd$artist)
pd$song_title <- as.character(pd$song_title)
pd$mode <- as.factor(pd$mode)
pd$time_signature <- as.factor(pd$time_signature)
pd$key <- as.factor(pd$key)
pd$trackID <- as.character.numeric_version(pd$trackID)
levels(pd$mode) <- levels (train$mode)
levels (pd$time_signature) <- levels (train$time_signature)
levels (pd$key) <- levels (train$key)
train$target <- as.factor(train$target)


#Model2

logitMod <- glm(`target`~ `acousticness`+ `danceability`+ `duration_ms` + `energy` + `instrumentalness` + `key` + `liveness`+ `loudness` + `mode` + `speechiness`+ `tempo` + `time_signature` + `valence`, data = train, family=binomial(link="logit"))

#Predication
predicted <- plogis(predict(logitMod, pd))


pd <- cbind(pd, Prediction)

#If else statement

pd$target <- ifelse(  pd$Prediction >= .5, 1,0)


#Subset/Export Data

pd <- pd [c(1:2)]

#export

write.csv(pd, "Entry2-Sariah Nokes.csv",row.names=FALSE)

#Model 3

#Load Datapd 
pd <- read.csv("predict.csv", header=TRUE)
head(pd)
##   target trackID acousticness danceability duration_ms energy instrumentalness
## 1     NA    1480     0.006470        0.695      182693  0.818         4.87e-06
## 2     NA    1453     0.021400        0.587      198426  0.772         6.98e-01
## 3     NA     129     0.045200        0.746      238146  0.416         7.77e-05
## 4     NA    1976     0.000102        0.446      190787  0.900         1.89e-05
## 5     NA      84     0.193000        0.867      460400  0.733         2.19e-04
## 6     NA     279     0.384000        0.478      281774  0.600         8.08e-01
##   key liveness loudness mode speechiness   tempo time_signature valence
## 1   5   0.0219   -5.379    0      0.0334 119.965              4  0.9230
## 2   4   0.4470   -4.439    0      0.0375 127.978              4  0.2010
## 3  11   0.1200   -6.607    1      0.0482 132.036              4  0.2160
## 4  10   0.0404   -3.541    0      0.0489 187.961              4  0.8840
## 5  10   0.7490   -7.540    0      0.2370  97.835              4  0.7610
## 6   5   0.0987  -11.025    0      0.0356 143.980              4  0.0635
##               song_title                   artist
## 1             Hard Times                 Paramore
## 2                 Legacy Dimitri Vangelis & Wyman
## 3                 Rewind                   Kelela
## 4 When We Stand Together               Nickelback
## 5            The Payback              James Brown
## 6              Like Lust                 MOVEMENT
test <- read.csv("test.csv", header=TRUE)

train <- read.csv("train.csv", header=TRUE)

#Class Types

summary(pd)
##   target           trackID        acousticness        danceability   
##  Mode:logical   Min.   :   2.0   Min.   :0.0000079   Min.   :0.1910  
##  NA's:100       1st Qu.: 462.5   1st Qu.:0.0051725   1st Qu.:0.5110  
##                 Median :1059.5   Median :0.0607000   Median :0.6140  
##                 Mean   :1000.9   Mean   :0.1786756   Mean   :0.6126  
##                 3rd Qu.:1557.5   3rd Qu.:0.2562500   3rd Qu.:0.7270  
##                 Max.   :2015.0   Max.   :0.9350000   Max.   :0.8760  
##                                                                      
##   duration_ms         energy       instrumentalness         key       
##  Min.   :155013   Min.   :0.1250   Min.   :0.0000000   Min.   : 0.00  
##  1st Qu.:194434   1st Qu.:0.6128   1st Qu.:0.0000000   1st Qu.: 2.00  
##  Median :224714   Median :0.7525   Median :0.0001325   Median : 5.00  
##  Mean   :231787   Mean   :0.7178   Mean   :0.1579494   Mean   : 5.34  
##  3rd Qu.:250160   3rd Qu.:0.8615   3rd Qu.:0.0720750   3rd Qu.: 8.25  
##  Max.   :460400   Max.   :0.9920   Max.   :0.9300000   Max.   :11.00  
##                                                                       
##     liveness          loudness            mode       speechiness     
##  Min.   :0.02190   Min.   :-21.001   Min.   :0.00   Min.   :0.02740  
##  1st Qu.:0.09112   1st Qu.: -7.454   1st Qu.:0.00   1st Qu.:0.03698  
##  Median :0.12900   Median : -5.647   Median :1.00   Median :0.05320  
##  Mean   :0.19989   Mean   : -6.306   Mean   :0.57   Mean   :0.08603  
##  3rd Qu.:0.26850   3rd Qu.: -4.374   3rd Qu.:1.00   3rd Qu.:0.10975  
##  Max.   :0.88700   Max.   : -1.188   Max.   :1.00   Max.   :0.45500  
##                                                                      
##      tempo        time_signature    valence                 song_title
##  Min.   : 72.28   Min.   :3.00   Min.   :0.0397   A Hundred Ropes: 1  
##  1st Qu.: 97.93   1st Qu.:4.00   1st Qu.:0.2888   Ain't Nobody   : 1  
##  Median :122.54   Median :4.00   Median :0.5160   Angel Eyes     : 1  
##  Mean   :121.82   Mean   :3.97   Mean   :0.4923   Archangel      : 1  
##  3rd Qu.:134.26   3rd Qu.:4.00   3rd Qu.:0.6830   Balance        : 1  
##  Max.   :203.82   Max.   :4.00   Max.   :0.9670   Before         : 1  
##                                                   (Other)        :94  
##          artist  
##  Duke Dumont: 2  
##  James Brown: 2  
##  Santigold  : 2  
##  Washed Out : 2  
##  *NSYNC     : 1  
##  A$AP Ferg  : 1  
##  (Other)    :90
train$artist <- as.character(train$artist)
train$song_title <- as.character(train$song_title)
train$mode <- as.factor(train$mode)
train$time_signature <- as.factor(train$time_signature)
train$key <- as.factor(train$key)
train$trackID <- as.character.numeric_version(train$trackID)

pd$artist <- as.character(pd$artist)
pd$song_title <- as.character(pd$song_title)
pd$mode <- as.factor(pd$mode)
pd$time_signature <- as.factor(pd$time_signature)
pd$key <- as.factor(pd$key)
pd$trackID <- as.character.numeric_version(pd$trackID)
levels(pd$mode) <- levels (train$mode)
levels (pd$time_signature) <- levels (train$time_signature)
levels (pd$key) <- levels (train$key)
train$target <- as.factor(train$target)

#Model3

rfMod <- randomForest(
  `target`~ `acousticness`+ `danceability`+ `duration_ms` + `energy` + `instrumentalness` + `key` + `liveness`+ `loudness` + `mode` + `speechiness`+ `tempo` + `time_signature` + `valence`,
  data=train
)

#Prediction

Prediction <- predict(model,pd)
pd <- cbind(pd, Prediction)

pd$target <- (pd$Prediction)


#Subset/Export Data

pd <- pd [c(1:2)]

#export

write.csv(pd, "Entry3-Sariah Nokes.csv",row.names=FALSE)