Assignment 04: Hands On with ANN

# Import all the relevent libraries

library(tm)
library(gmodels)
library(Matrix)
library(qdap)
library(keras)
library(tensorflow)
library(readr)
library(tfruns)
library(ggplot2)
library(tidyr)
library(dplyr)
library(corrplot)
library(caret)
library(neuralnet)
library(GGally)
library(NLP)

Problem 2: Predicting Baseball players’ salaries

  1. Download the dataset hitters.csv and explore the overall structure of the dataset using the str() function. Get a summary statistics of each variable. Answer the following questions:

    How many observations do you have in the data?

    How many categorical and numeric variables you have in your data?

    Is there any missing value?

    Draw the histogram of salary. Interpret what you see in the histogram.

# Load the dataset
hitters <- read.csv("hitters-1.csv")
# Explore the overall structure of the dataset
str(hitters)
## 'data.frame':    322 obs. of  20 variables:
##  $ AtBat    : int  293 315 479 496 321 594 185 298 323 401 ...
##  $ Hits     : int  66 81 130 141 87 169 37 73 81 92 ...
##  $ HmRun    : int  1 7 18 20 10 4 1 0 6 17 ...
##  $ Runs     : int  30 24 66 65 39 74 23 24 26 49 ...
##  $ RBI      : int  29 38 72 78 42 51 8 24 32 66 ...
##  $ Walks    : int  14 39 76 37 30 35 21 7 8 65 ...
##  $ Years    : int  1 14 3 11 2 11 2 3 2 13 ...
##  $ CAtBat   : int  293 3449 1624 5628 396 4408 214 509 341 5206 ...
##  $ CHits    : int  66 835 457 1575 101 1133 42 108 86 1332 ...
##  $ CHmRun   : int  1 69 63 225 12 19 1 0 6 253 ...
##  $ CRuns    : int  30 321 224 828 48 501 30 41 32 784 ...
##  $ CRBI     : int  29 414 266 838 46 336 9 37 34 890 ...
##  $ CWalks   : int  14 375 263 354 33 194 24 12 8 866 ...
##  $ League   : chr  "A" "N" "A" "N" ...
##  $ Division : chr  "E" "W" "W" "E" ...
##  $ PutOuts  : int  446 632 880 200 805 282 76 121 143 0 ...
##  $ Assists  : int  33 43 82 11 40 421 127 283 290 0 ...
##  $ Errors   : int  20 10 14 3 4 25 7 9 19 0 ...
##  $ Salary   : num  NA 475 480 500 91.5 750 70 100 75 1100 ...
##  $ NewLeague: chr  "A" "N" "A" "N" ...
# Get a summary statistics of each variable
summary(hitters)
##      AtBat            Hits         HmRun            Runs       
##  Min.   : 16.0   Min.   :  1   Min.   : 0.00   Min.   :  0.00  
##  1st Qu.:255.2   1st Qu.: 64   1st Qu.: 4.00   1st Qu.: 30.25  
##  Median :379.5   Median : 96   Median : 8.00   Median : 48.00  
##  Mean   :380.9   Mean   :101   Mean   :10.77   Mean   : 50.91  
##  3rd Qu.:512.0   3rd Qu.:137   3rd Qu.:16.00   3rd Qu.: 69.00  
##  Max.   :687.0   Max.   :238   Max.   :40.00   Max.   :130.00  
##                                                                
##       RBI             Walks            Years            CAtBat       
##  Min.   :  0.00   Min.   :  0.00   Min.   : 1.000   Min.   :   19.0  
##  1st Qu.: 28.00   1st Qu.: 22.00   1st Qu.: 4.000   1st Qu.:  816.8  
##  Median : 44.00   Median : 35.00   Median : 6.000   Median : 1928.0  
##  Mean   : 48.03   Mean   : 38.74   Mean   : 7.444   Mean   : 2648.7  
##  3rd Qu.: 64.75   3rd Qu.: 53.00   3rd Qu.:11.000   3rd Qu.: 3924.2  
##  Max.   :121.00   Max.   :105.00   Max.   :24.000   Max.   :14053.0  
##                                                                      
##      CHits            CHmRun           CRuns             CRBI        
##  Min.   :   4.0   Min.   :  0.00   Min.   :   1.0   Min.   :   0.00  
##  1st Qu.: 209.0   1st Qu.: 14.00   1st Qu.: 100.2   1st Qu.:  88.75  
##  Median : 508.0   Median : 37.50   Median : 247.0   Median : 220.50  
##  Mean   : 717.6   Mean   : 69.49   Mean   : 358.8   Mean   : 330.12  
##  3rd Qu.:1059.2   3rd Qu.: 90.00   3rd Qu.: 526.2   3rd Qu.: 426.25  
##  Max.   :4256.0   Max.   :548.00   Max.   :2165.0   Max.   :1659.00  
##                                                                      
##      CWalks           League            Division            PutOuts      
##  Min.   :   0.00   Length:322         Length:322         Min.   :   0.0  
##  1st Qu.:  67.25   Class :character   Class :character   1st Qu.: 109.2  
##  Median : 170.50   Mode  :character   Mode  :character   Median : 212.0  
##  Mean   : 260.24                                         Mean   : 288.9  
##  3rd Qu.: 339.25                                         3rd Qu.: 325.0  
##  Max.   :1566.00                                         Max.   :1378.0  
##                                                                          
##     Assists          Errors          Salary        NewLeague        
##  Min.   :  0.0   Min.   : 0.00   Min.   :  67.5   Length:322        
##  1st Qu.:  7.0   1st Qu.: 3.00   1st Qu.: 190.0   Class :character  
##  Median : 39.5   Median : 6.00   Median : 425.0   Mode  :character  
##  Mean   :106.9   Mean   : 8.04   Mean   : 535.9                     
##  3rd Qu.:166.0   3rd Qu.:11.00   3rd Qu.: 750.0                     
##  Max.   :492.0   Max.   :32.00   Max.   :2460.0                     
##                                  NA's   :59

1.1 Number of observation

# Number of observation in the data
nrow(hitters)
## [1] 322

The dataset consist of 322 observations and 20 variables.

1.2 Types of Variables

Numeric Variables

cont_vars <- sapply(hitters, is.numeric)
cont_vars <- names(which(cont_vars==TRUE))
as.data.frame(cont_vars)
##    cont_vars
## 1      AtBat
## 2       Hits
## 3      HmRun
## 4       Runs
## 5        RBI
## 6      Walks
## 7      Years
## 8     CAtBat
## 9      CHits
## 10    CHmRun
## 11     CRuns
## 12      CRBI
## 13    CWalks
## 14   PutOuts
## 15   Assists
## 16    Errors
## 17    Salary

Categorical Variable:

cat_vars <- sapply(hitters, is.character)
cat_vars <- names(which(cat_vars==TRUE))
as.data.frame(cat_vars)
##    cat_vars
## 1    League
## 2  Division
## 3 NewLeague

1.3 Find missing values if there is any

as.data.frame(colSums(is.na(hitters)))
##           colSums(is.na(hitters))
## AtBat                           0
## Hits                            0
## HmRun                           0
## Runs                            0
## RBI                             0
## Walks                           0
## Years                           0
## CAtBat                          0
## CHits                           0
## CHmRun                          0
## CRuns                           0
## CRBI                            0
## CWalks                          0
## League                          0
## Division                        0
## PutOuts                         0
## Assists                         0
## Errors                          0
## Salary                         59
## NewLeague                       0

Only column named salary has missing values.

1.4 Plot the distribution of Salary using histogram

ggplot(hitters, aes(x = Salary)) +
  geom_histogram(bins = 20, fill = "steelblue", color = "white") +
  labs(title = "Histogram of Salary", x = "Salary", y = "Frequency")

The histogram of Salary shows that the distribution of salaries is right-skewed, with a long tail to the right. Most of the salaries are concentrated in the lower range, with very few players earning higher salaries. This suggests that there is a significant disparity in salaries among baseball players, with only a small number of elite players earning very high salaries, while the majority of players earn much lower salaries.

  1. remove the observation for which Salary value is missing
# remove null values.
hitters <- na.omit(hitters)
colSums(is.na(hitters))
##     AtBat      Hits     HmRun      Runs       RBI     Walks     Years    CAtBat 
##         0         0         0         0         0         0         0         0 
##     CHits    CHmRun     CRuns      CRBI    CWalks    League  Division   PutOuts 
##         0         0         0         0         0         0         0         0 
##   Assists    Errors    Salary NewLeague 
##         0         0         0         0
  1. Which predictors have most correlation with Salary? Use scattered plot, side-by-side box plots, t-test and correlation matrix to answer this question.

1) scatter plot

cols <- c("Salary", "AtBat", "Hits", "HmRun", "Runs", "RBI", "Walks", 
          "Years", "CAtBat", "CHits", "CHmRun", "CRuns", "CRBI", "CWalks", 
          "PutOuts", "Assists", "Errors")

ggpairs(hitters[, cols])

ii) side-by-side Box plots

# Subset the data to only include the relevant predictors
predictors <- c("League", "Division", "NewLeague")
data <- hitters[c(predictors, "Salary")]
# Create side-by-side box plots for each predictor
par(mfrow = c(1, length(predictors)))
for (i in 1:length(predictors)) {
  boxplot(Salary ~ eval(parse(text = predictors[i])), data = data,
          main = predictors[i], xlab = predictors[i], ylab = "Salary")
}

iii) t-test:

continuous_variables <- names(select_if(hitters, is.numeric))

# run a for loop through continuous variables and perform t-tests
for (var in continuous_variables) {
  print(paste0("T-test for association between Salary and ", var, ":"))
  print(t.test(hitters[[var]], hitters$Salary))
  print("_____________________________________________________________________")
}
## [1] "T-test for association between Salary and AtBat:"
## 
##  Welch Two Sample t-test
## 
## data:  hitters[[var]] and hitters$Salary
## t = -4.5206, df = 317.24, p-value = 8.72e-06
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -189.85666  -74.70993
## sample estimates:
## mean of x mean of y 
##  403.6426  535.9259 
## 
## [1] "_____________________________________________________________________"
## [1] "T-test for association between Salary and Hits:"
## 
##  Welch Two Sample t-test
## 
## data:  hitters[[var]] and hitters$Salary
## t = -15.313, df = 267.24, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -483.139 -373.055
## sample estimates:
## mean of x mean of y 
##  107.8289  535.9259 
## 
## [1] "_____________________________________________________________________"
## [1] "T-test for association between Salary and HmRun:"
## 
##  Welch Two Sample t-test
## 
## data:  hitters[[var]] and hitters$Salary
## t = -18.845, df = 262.2, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -579.0899 -469.5223
## sample estimates:
## mean of x mean of y 
##  11.61977 535.92588 
## 
## [1] "_____________________________________________________________________"
## [1] "T-test for association between Salary and Runs:"
## 
##  Welch Two Sample t-test
## 
## data:  hitters[[var]] and hitters$Salary
## t = -17.27, df = 263.68, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -536.0404 -426.3209
## sample estimates:
## mean of x mean of y 
##  54.74525 535.92588 
## 
## [1] "_____________________________________________________________________"
## [1] "T-test for association between Salary and RBI:"
## 
##  Welch Two Sample t-test
## 
## data:  hitters[[var]] and hitters$Salary
## t = -17.387, df = 263.72, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -539.3013 -429.5771
## sample estimates:
## mean of x mean of y 
##  51.48669 535.92588 
## 
## [1] "_____________________________________________________________________"
## [1] "T-test for association between Salary and Walks:"
## 
##  Welch Two Sample t-test
## 
## data:  hitters[[var]] and hitters$Salary
## t = -17.767, df = 263.21, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -549.6477 -439.9759
## sample estimates:
## mean of x mean of y 
##  41.11407 535.92588 
## 
## [1] "_____________________________________________________________________"
## [1] "T-test for association between Salary and Years:"
## 
##  Welch Two Sample t-test
## 
## data:  hitters[[var]] and hitters$Salary
## t = -19.002, df = 262.06, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -583.3908 -473.8374
## sample estimates:
##  mean of x  mean of y 
##   7.311787 535.925882 
## 
## [1] "_____________________________________________________________________"
## [1] "T-test for association between Salary and CAtBat:"
## 
##  Welch Two Sample t-test
## 
## data:  hitters[[var]] and hitters$Salary
## t = 14.763, df = 282.36, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  1838.730 2404.506
## sample estimates:
## mean of x mean of y 
## 2657.5437  535.9259 
## 
## [1] "_____________________________________________________________________"
## [1] "T-test for association between Salary and CHits:"
## 
##  Welch Two Sample t-test
## 
## data:  hitters[[var]] and hitters$Salary
## t = 3.8249, df = 467.57, p-value = 0.0001486
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##   90.56889 281.95197
## sample estimates:
## mean of x mean of y 
##  722.1863  535.9259 
## 
## [1] "_____________________________________________________________________"
## [1] "T-test for association between Salary and CHmRun:"
## 
##  Welch Two Sample t-test
## 
## data:  hitters[[var]] and hitters$Salary
## t = -16.505, df = 279.38, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -522.3458 -411.0269
## sample estimates:
## mean of x mean of y 
##  69.23954 535.92588 
## 
## [1] "_____________________________________________________________________"
## [1] "T-test for association between Salary and CRuns:"
## 
##  Welch Two Sample t-test
## 
## data:  hitters[[var]] and hitters$Salary
## t = -5.0626, df = 480.86, p-value = 5.901e-07
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -242.5126 -106.8981
## sample estimates:
## mean of x mean of y 
##  361.2205  535.9259 
## 
## [1] "_____________________________________________________________________"
## [1] "T-test for association between Salary and CRBI:"
## 
##  Welch Two Sample t-test
## 
## data:  hitters[[var]] and hitters$Salary
## t = -6.0045, df = 475.01, p-value = 3.817e-09
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -272.7598 -138.2555
## sample estimates:
## mean of x mean of y 
##  330.4183  535.9259 
## 
## [1] "_____________________________________________________________________"
## [1] "T-test for association between Salary and CWalks:"
## 
##  Welch Two Sample t-test
## 
## data:  hitters[[var]] and hitters$Salary
## t = -8.5523, df = 422.67, p-value = 2.237e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -339.0150 -212.3045
## sample estimates:
## mean of x mean of y 
##  260.2662  535.9259 
## 
## [1] "_____________________________________________________________________"
## [1] "T-test for association between Salary and PutOuts:"
## 
##  Welch Two Sample t-test
## 
## data:  hitters[[var]] and hitters$Salary
## t = -7.4903, df = 437.72, p-value = 3.827e-13
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -309.5574 -180.8723
## sample estimates:
## mean of x mean of y 
##  290.7110  535.9259 
## 
## [1] "_____________________________________________________________________"
## [1] "T-test for association between Salary and Assists:"
## 
##  Welch Two Sample t-test
## 
## data:  hitters[[var]] and hitters$Salary
## t = -14.277, df = 315.62, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -474.6566 -359.6742
## sample estimates:
## mean of x mean of y 
##  118.7605  535.9259 
## 
## [1] "_____________________________________________________________________"
## [1] "T-test for association between Salary and Errors:"
## 
##  Welch Two Sample t-test
## 
## data:  hitters[[var]] and hitters$Salary
## t = -18.955, df = 262.11, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -582.1122 -472.5533
## sample estimates:
##  mean of x  mean of y 
##   8.593156 535.925882 
## 
## [1] "_____________________________________________________________________"
## [1] "T-test for association between Salary and Salary:"
## 
##  Welch Two Sample t-test
## 
## data:  hitters[[var]] and hitters$Salary
## t = 0, df = 524, p-value = 1
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -77.28235  77.28235
## sample estimates:
## mean of x mean of y 
##  535.9259  535.9259 
## 
## [1] "_____________________________________________________________________"

Based on the t-test results, the variables that show a very strong association with salary are Hits, HmRun, Runs, RBI, Walks, Years, CAtBat, and CHmRun. The p-values for all these variables are less than 0.05, which suggests that the mean salary for the groups with high and low values of these variables are significantly different.

iv) Correlation Matrix

# Select only the numeric columns
num_cols <- sapply(hitters, is.numeric)
hitters_num <- hitters[, num_cols]

# Compute the correlation matrix
cor_mat <- cor(hitters_num)

# Plot the correlation matrix as a heatmap
corrplot(cor_mat, method = "color", type = "upper", 
          tl.col = "black")

cor_mat
##             AtBat       Hits        HmRun        Runs        RBI     Walks
## AtBat   1.0000000 0.96396913  0.555102154  0.89982910 0.79601539 0.6244481
## Hits    0.9639691 1.00000000  0.530627358  0.91063014 0.78847819 0.5873105
## HmRun   0.5551022 0.53062736  1.000000000  0.63107588 0.84910743 0.4404537
## Runs    0.8998291 0.91063014  0.631075883  1.00000000 0.77869235 0.6970151
## RBI     0.7960154 0.78847819  0.849107434  0.77869235 1.00000000 0.5695048
## Walks   0.6244481 0.58731051  0.440453717  0.69701510 0.56950476 1.0000000
## Years   0.0127255 0.01859809  0.113488420 -0.01197495 0.12966795 0.1347927
## CAtBat  0.2071663 0.20667761  0.217463613  0.17181080 0.27812591 0.2694500
## CHits   0.2253415 0.23560577  0.217495691  0.19132697 0.29213714 0.2707951
## CHmRun  0.2124215 0.18936425  0.492525845  0.22970104 0.44218969 0.3495822
## CRuns   0.2372778 0.23889610  0.258346846  0.23783121 0.30722616 0.3329766
## CRBI    0.2213932 0.21938423  0.349858379  0.20233548 0.38777657 0.3126968
## CWalks  0.1329257 0.12297073  0.227183183  0.16370021 0.23361884 0.4291399
## PutOuts 0.3096075 0.29968754  0.250931497  0.27115986 0.31206456 0.2808555
## Assists 0.3421174 0.30397495 -0.161601753  0.17925786 0.06290174 0.1025226
## Errors  0.3255770 0.27987618 -0.009743082  0.19260879 0.15015469 0.0819372
## Salary  0.3947709 0.43867474  0.343028078  0.41985856 0.44945709 0.4438673
##               Years       CAtBat       CHits      CHmRun       CRuns
## AtBat    0.01272550  0.207166254  0.22534146  0.21242155  0.23727777
## Hits     0.01859809  0.206677608  0.23560577  0.18936425  0.23889610
## HmRun    0.11348842  0.217463613  0.21749569  0.49252584  0.25834685
## Runs    -0.01197495  0.171810798  0.19132697  0.22970104  0.23783121
## RBI      0.12966795  0.278125914  0.29213714  0.44218969  0.30722616
## Walks    0.13479270  0.269449974  0.27079505  0.34958216  0.33297657
## Years    1.00000000  0.915680692  0.89784449  0.72237071  0.87664855
## CAtBat   0.91568069  1.000000000  0.99505681  0.80167609  0.98274694
## CHits    0.89784449  0.995056810  1.00000000  0.78665204  0.98454184
## CHmRun   0.72237071  0.801676089  0.78665204  1.00000000  0.82562483
## CRuns    0.87664855  0.982746941  0.98454184  0.82562483  1.00000000
## CRBI     0.86380936  0.950730141  0.94679739  0.92790264  0.94567701
## CWalks   0.83752373  0.906711655  0.89071842  0.81087827  0.92776846
## PutOuts -0.02001921  0.053392514  0.06734799  0.09382223  0.05908718
## Assists -0.08511772 -0.007897271 -0.01314420 -0.18888646 -0.03889509
## Errors  -0.15651196 -0.070477521 -0.06803583 -0.16536941 -0.09408054
## Salary   0.40065699  0.526135310  0.54890956  0.52493056  0.56267771
##                CRBI      CWalks     PutOuts      Assists       Errors
## AtBat    0.22139318  0.13292568  0.30960746  0.342117377  0.325576978
## Hits     0.21938423  0.12297073  0.29968754  0.303974950  0.279876183
## HmRun    0.34985838  0.22718318  0.25093150 -0.161601753 -0.009743082
## Runs     0.20233548  0.16370021  0.27115986  0.179257859  0.192608787
## RBI      0.38777657  0.23361884  0.31206456  0.062901737  0.150154692
## Walks    0.31269680  0.42913990  0.28085548  0.102522559  0.081937197
## Years    0.86380936  0.83752373 -0.02001921 -0.085117725 -0.156511957
## CAtBat   0.95073014  0.90671165  0.05339251 -0.007897271 -0.070477521
## CHits    0.94679739  0.89071842  0.06734799 -0.013144204 -0.068035829
## CHmRun   0.92790264  0.81087827  0.09382223 -0.188886464 -0.165369407
## CRuns    0.94567701  0.92776846  0.05908718 -0.038895093 -0.094080542
## CRBI     1.00000000  0.88913701  0.09537515 -0.096558877 -0.115316131
## CWalks   0.88913701  1.00000000  0.05816016 -0.066243445 -0.129935875
## PutOuts  0.09537515  0.05816016  1.00000000 -0.043390143  0.075305857
## Assists -0.09655888 -0.06624345 -0.04339014  1.000000000  0.703504693
## Errors  -0.11531613 -0.12993587  0.07530586  0.703504693  1.000000000
## Salary   0.56696569  0.48982204  0.30048036  0.025436136 -0.005400702
##               Salary
## AtBat    0.394770945
## Hits     0.438674738
## HmRun    0.343028078
## Runs     0.419858559
## RBI      0.449457088
## Walks    0.443867260
## Years    0.400656994
## CAtBat   0.526135310
## CHits    0.548909559
## CHmRun   0.524930560
## CRuns    0.562677711
## CRBI     0.566965686
## CWalks   0.489822036
## PutOuts  0.300480356
## Assists  0.025436136
## Errors  -0.005400702
## Salary   1.000000000

Variables that show strong association with salary, according to the correlation matrix, are:

Salary having more positive co-relation with “CAtBat”,“CHits”, “CHmRun”, and “CRuns” and Errors has slightly negetively co-relate.

Based on these statistical test, we are going to consider all the variables.

hitters_final <- subset(hitters, select = c(Hits, Runs, RBI, Walks, Years, CHits, CRuns, CRBI, CWalks, HmRun , CAtBat,CHmRun, League, Division, NewLeague, Salary))
hitters_final
##     Hits Runs RBI Walks Years CHits CRuns CRBI CWalks HmRun CAtBat CHmRun
## 2     81   24  38    39    14   835   321  414    375     7   3449     69
## 3    130   66  72    76     3   457   224  266    263    18   1624     63
## 4    141   65  78    37    11  1575   828  838    354    20   5628    225
## 5     87   39  42    30     2   101    48   46     33    10    396     12
## 6    169   74  51    35    11  1133   501  336    194     4   4408     19
## 7     37   23   8    21     2    42    30    9     24     1    214      1
## 8     73   24  24     7     3   108    41   37     12     0    509      0
## 9     81   26  32     8     2    86    32   34      8     6    341      6
## 10    92   49  66    65    13  1332   784  890    866    17   5206    253
## 11   159  107  75    59    10  1300   702  504    488    21   4631     90
## 12    53   31  26    27     9   467   192  186    161     4   1876     15
## 13   113   48  61    47     4   392   205  204    203    13   1512     41
## 14    60   30  11    22     6   510   309  103    207     0   1941      4
## 15    43   29  27    30    13   825   376  290    238     7   3231     36
## 17   158   89  75    73    15  2273  1045  993    732    20   8068    177
## 18    46   24   8    15     5   102    65   23     39     2    479      5
## 20    32   16  22    14     8   180    67   82     56     8    727     24
## 21    92   72  48    65     1    92    72   48     65    16    413     16
## 22   109   55  43    62     1   109    55   43     62     3    426      3
## 24   116   60  62    74     6   489   242  251    240    16   1924     67
## 25   168   73 102    40    18  2464  1008 1072    402    18   8424    164
## 26   163   92  51    70     6   747   442  198    317     4   2695     17
## 27    73   32  18    22     7   491   291  108    180     4   1931     13
## 28   129   50  56    40    10   604   246  327    166    10   2331     61
## 29   152   92  37    81     5   633   349  182    308     6   2308     32
## 30   137   90  95    90    14  1382   763  734    784    20   5201    166
## 32   108   55  36    22     3   149    80   46     31     6    591      8
## 34   168   83  80    56     5   452   219  208    136    17   1646     44
## 35    49   23  25    12     7   308   126  132     66     6   1309     27
## 36   106   38  60    30    14  1906   859  803    571    10   6207    146
## 38    60   24  25    15     2    78    34   29     18     5    350      5
## 41    41   15  21    33     2    50    20   29     45     1    232      4
## 44    46   32  19     9     4   160    86   76     32     7    694     32
## 46    68   28  26    22     6   236   108  117    118     2    999     21
## 47   132   57  49    33     3   273   113  121     80     7    932     24
## 48    57   34  32     9     5   192   117  107     51     8    756     32
## 50   146   71  70    84     6   715   352  342    289    13   2648     77
## 51   101   42  63    22    17  1767  1003  977    619    14   6521    281
## 52    53   30  29    23     2    59    32   32     27     2    226      2
## 54   168   80  72    39     9  1307   634  563    319    19   4478    113
## 55   101   45  53    39    12  1429   747  666    526    12   5150    166
## 56   102   49  85    20     6   231    99  138     64    18    950     29
## 57    58   28  25    35     4   333   164  179    194     9   1335     49
## 59    78   32  41    12    12   968   409  321    170     6   3742     35
## 60   177   98  81    70     6   927   529  472    313    25   3210    133
## 61   113   58  69    16     1   113    58   69     16    24    416     24
## 62    44   21  23    15    16  1634   698  661    777     6   6631     98
## 63    56   27  15    11     4   270   116   64     57     0   1115      1
## 64    53   31  15    22     4   210   118   69    114     1    926      9
## 66   139   93  94    62    17  1982  1141 1179    727    31   7546    315
## 68    53   29  22    21     3   120    57   40     39     5    514      8
## 69   142   67  86    45     4   205    99  103     78    20    815     22
## 71    81   42  30    26    17  2198   950  909    690     3   8247    100
## 73   131   69  96    52    14  1397   712  815    548    26   5347    221
## 74   122   78  85    91    18  1947  1175 1152   1380    29   7761    347
## 75   137   86  97    97    15  1785  1082  949    989    26   6661    291
## 76   119   57  46    13     9  1046   461  301    112     6   3651     32
## 77    97   55  29    39     4   353   196  110    117     4   1258     16
## 79   103   59  47    39     6   555   285  274    186    15   2174     80
## 80    96   37  29    23     4   290   123  108     55     4   1064     11
## 82    70   49  35    43    15  1661  1019  608    820     7   6311    154
## 83   238  117 113    53     5   737   349  401    171    31   2223     93
## 85   163   89  83    75    11  1388   813  822    617    29   5017    266
## 86    83   50  39    56     9   948   575  528    635     9   3828    145
## 87   174   89 116    56    14  2024   978 1093    495    31   6727    247
## 88    82   44  45    47     2   113    61   70     63    16    428     25
## 89    41   21  29    22    16  1338   746  805    875    12   5409    181
## 90   114   67  57    48     4   298   160  123    122    13   1350     28
## 91    83   39  46    16     5   405   156  159     76    13   1457     28
## 92   123   76  93    72     4   471   292  343    267    27   1810    108
## 93    78   35  35    32     1    78    35   35     32     7    317      7
## 94   138   76  96    61     3   164    87  110     71    25    592     28
## 96   119   54  58    36    12   594   287  294    227    13   2133     41
## 97   148   90 104    77    14  2083  1135 1234    791    24   7287    305
## 99   115   97  71    68     3   184   156  119     99    27    711     45
## 100  110   70  47    36     7   544   335  174    258    15   2130     38
## 101  151   61  84    78    10  1679   884 1015    709    17   5624    275
## 103   49   41  23    18     8   336   166  122    106     2   1350      7
## 108  154   76  84    43    14  1583   743  693    300    22   6100    131
## 109  198  101 108    41     5   610   297  319    117    31   2129     92
## 110   51   19  18    11     1    51    19   18     11     4    215      4
## 111  128   70  73    80    14  2095  1072 1050    695    16   6675    209
## 112   76   33  52    37     5   351   195  219    214    16   1506     71
## 113  125   81 105    62    13  1646   847  999    680    24   6063    271
## 114  152   91 101    64     3   260   148  173     95    31    985     53
## 116  171   91 108    52     6   728   361  401    224    34   2862    107
## 117  118   63  54    30     4   187   102   80     50    12    793     14
## 118   77   45  47    26    16  1910   915 1067    546    14   6840    259
## 119   94   42  36    66     9   866   429  365    410     4   3573     59
## 120   85   30  44    20     8   568   216  208     93     3   2140     16
## 121   96   49  46    60    15  1972  1070  955    921    21   6986    231
## 122   77   36  55    41    20  2172  1172 1267   1057    16   8716    384
## 123  139   93  58    69     5   369   247  126    198     5   1469     12
## 124   84   62  33    47     5   376   284  141    219    11   1516     42
## 125  126   42  44    35    11  1578   703  519    256     2   5562     44
## 127   78   37  51    29     5   453   211  280    138    13   1649     73
## 128  120   54  51    31     8   900   444  419    240     5   3118     92
## 129  158   70  84    42     5   636   265  316    134    15   2358     58
## 130  169   72  88    38     7  1077   492  589    263    21   3754    140
## 131  104   50  58    25     7   822   313  377    187    14   2954     55
## 132   54   30  39    31     5   299   145  154    128    10   1185     40
## 133   70   22  37    18    18  2081   935 1088    643     7   7186    190
## 134   99   46  24    29     4   129    72   31     48     1    618      1
## 135   39   18  30    15     9   151    80   97     61     5    639     16
## 136   40   23  11    18     3   125    58   37     47     4    524      7
## 137  170  107 108    69     6   634   371  376    238    40   2325    128
## 138  103   48  36    40    15  1193   581  421    325     6   4338     70
## 140  103   65  32    71     2   103    67   32     71     2    440      2
## 141  144   85 117    65     2   173   101  130     69    33    696     38
## 142  200  108 121    32     4   404   210  222     68    29   1447     57
## 143   55   34  23    45    12  1213   702  705    625     9   4405    194
## 144  133   48  72    55    17  2147   980 1032    854    10   7472    153
## 146  132   61  74    41     6   671   273  383    226    21   2641     97
## 147   39   18  31    22    14   543   304  268    298     8   2128     56
## 148  183   80  74    32     5   715   330  326    158    10   2482     27
## 149  136   58  38    26    11  1066   450  367    241     7   3871     40
## 150   70   32  51    28    15  1130   544  462    551    13   4040     97
## 152   41   26  21    19     2    68    45   39     35     4    288      9
## 153   86   33  38    45     1    86    33   38     45     4    278      4
## 154   95   48  42    20    10   808   379  428    221     6   2964     81
## 155  147   58  88    47    10   730   302  351    174    23   2744     97
## 156  102   56  34    34     5   167    89   48     54     3    670      4
## 157   94   37  32    26    13  1330   616  522    436     5   4618     57
## 160  163   83 107    32     3   377   181  227     82    27   1437     65
## 162  174   67  78    58     6   880   366  337    218     7   3053     32
## 163   39   13   9    16     3    44    18   10     18     2    196      2
## 164  200   98 110    62    13  2163  1104 1289    564    20   7127    351
## 165   66   31  26    32    14   979   518  324    382     5   3910     33
## 166   76   35  60    25     3   151    68   94     55    13    630     24
## 167  157   90  78    26     4   541   310  226     91    16   2020     52
## 168   92   54  49    18     6   325   188  135     63    11   1354     30
## 169   73   23  37    16     4   108    38   46     28     5    450      6
## 171   91   41  42    57    13  1397   578  579    644    12   5590     83
## 173  101   46  43    61     3   218    96   72     91     3    948      6
## 175   47   20  28    18    11   890   419  382    304     5   3327     74
## 176  184   83  79    38     5   462   219  195     82    20   1689     40
## 177   58   34  23    22     1    58    34   23     22     6    181      6
## 178  118   84  86    68     8   750   433  420    309    28   2723    126
## 179  150   69  58    35    14  1839   983  707    600    21   6126    121
## 180  171   94  83    94    13  1840   969  900    917    13   6090    128
## 181  147   85  91    71     6   815   405  474    319    29   2816    117
## 182   74   34  29    22    10  1062   505  456    283     4   3919     85
## 183  161   89  96    66     4   470   233  260    155    26   1789     65
## 184   91   51  43    33     2    94    51   44     33    12    342     12
## 185  159   72  79    53     9   880   363  477    295    12   3082     83
## 186  136   62  48    83    10   970   408  303    414     5   3423     20
## 187   85   69  64    88     7   214   150  156    187    24    911     64
## 188  223  119  96    34     3   587   262  201     91    31   1928     35
## 189   64   31  26    30     1    64    31   26     30     0    279      0
## 190  127   66  65    67     7   844   436  458    377    20   3006    116
## 191  127   77  45    58     2   187   117   64     88     8    667      9
## 192   70   33  37    27    12  1222   557  483    307     8   4479     94
## 193  141   77  47    37    15  1240   615  430    340    11   4291     84
## 194   52   26  28    21     6   191   113  119     87     9    805     30
## 195  149   89  86    64     7   928   513  471    351    21   3558    102
## 196   84   53  62    38    10  1123   577  700    334    22   4273    212
## 197  128   67  94    52    13  1552   740  840    452    28   5829    210
## 199   92   42  60    21     3   185    88  112     50    18    682     36
## 201  157   95  73    63    10  1320   724  522    576    20   4704     93
## 202   54   27  25    33     1    54    27   25     33     2    216      2
## 203  179   94  60    65     5   476   216  163    166     4   1696     12
## 205  131   77  55    34     7   549   300  263    153    19   2051     62
## 206   56   22  36    19     2    58    24   37     19    12    216     12
## 207   93   47  30    30     2   230   121   69     68     8    969     14
## 208  148   64  78    49    13  1000   445  491    301    14   3400    113
## 210  131   68  77    33     6   398   210  203    136    18   1416     47
## 212   65   30  36    27     9   698   315  325    189     8   2815     55
## 213   54   25  14    12     1    54    25   14     12     3    209      3
## 214   71   18  30    36     3    76    20   36     45     3    344      3
## 215   77   47  53    27     6   516   247  288    161    19   1928     90
## 216  120   71  71    54     3   259   150  167    114    28   1085     54
## 217   60   28  33    18     3   170    80   75     36     8    638     17
## 218    1    0   0     0     2     9     6    7      4     0     41      2
## 219   94   36  26    62     7   519   181  199    288     5   1968     26
## 220   43   26  35    39     3   116    59   55     78    10    498     14
## 221   75   38  23    26     3   160    71   33     44     6    580      8
## 222  167   89  49    57     4   232   132   83     79     8    822     19
## 223  110   61  45    32     7   834   451  249    168     9   3015     40
## 224   76   34  37    15     4   408   198  120    113     7   1644     16
## 225   93   43  42    49     5   323   181  177    157     9   1258     54
## 227  137   58  47    12     2   271   129   80     24     2   1038      3
## 228  152  105  49    65     2   249   168   91    101    18    978     36
## 230  144   67  54    79     9  1169   583  374    528     0   4739     13
## 231   80   45  48    63     7   359   176  202    175    15   1493     61
## 232  163   88  50    77     4   470   245  167    174    12   1556     38
## 233   83   43  41    30    14  1543   751  714    535     9   5885    104
## 234  135   82  88    55     1   135    82   88     55    30    540     30
## 235  123   62  55    40     9  1203   676  390    364     9   4139     79
## 237   52   15  25    30    24  4256  2165 1314   1566     0  14053    160
## 238   56   41  19    21     5   329   166  125    105     6   1257     24
## 239  154   61  48    29     6   566   250  252    178     6   1966     29
## 240   72   33  31    26     5    82    41   32     26     0    354      0
## 241   77   35  29    33    12  1358   630  435    403     5   4933     48
## 242   96   50  45    39     5   344   178  192    136     7   1394     43
## 243   56   22  18    15    12   665   266  304    198     4   2796     43
## 244   70   42  36    44    16  1845   965 1128    990    13   7058    312
## 245  108   75  86    72     3   142   102  109    102    33    652     44
## 246   68   42  29    45    18   939   438  380    466    13   3949     78
## 248  110   45  49    46     9   658   249  322    274     9   2331     50
## 249  160  130  74    89     8  1182   862  417    708    28   4071    103
## 250  101   65  58    92    20  2510  1509 1659   1342    18   9528    548
## 252   82   42  60    35     5   408   238  299    157    21   1770    115
## 253  145   51  76    40    11  1102   410  497    284    11   3967     67
## 256   76   35  39    13     6   234   102   96     80     5    912     12
## 257   52   31  27    17    12  1323   643  445    459     8   5134     56
## 258   90   50  45    43    10   614   295  273    269    11   2288     43
## 259  135   52  44    52     9   895   377  284    296     8   3368     39
## 260   68   32  22    24     1    68    32   22     24     2    312      2
## 261  119   57  33    21     7   882   365  280    165     8   3358     36
## 262   27    8  10     5     4    49    16   13     14     3    239      3
## 263   68   42  42    61     6   238   128  104    172     5    961     16
## 264  178   68  76    46     6   902   494  345    242    14   3146     74
## 265   86   38  28    36     4   267    94   71     76     1   1089      3
## 266   57   32  25    18     3   170    98   54     62     8    653     17
## 267  101   50  55    22     1   101    50   55     22    16    382     16
## 268  113   59  57    68    12  1369   713  660    735    20   5348    155
## 269  149   73  47    42     1   149    73   47     42     7    549      7
## 270   63   25  33    16    10   667   315  259    204     3   2682     38
## 272  163   82  46    62    13  2019  1043  827    535     9   7037    153
## 273  117   54  88    43     6   412   204  276    155    29   1750    100
## 274   66   20  28    13     3    80    27   31     15     5    290      5
## 275  140   73  77    60     4   185    93  106     86    16    730     22
## 276  112   54  54    35     2   160    76   75     49    18    680     24
## 277  145   66  68    21     2   210   106   86     40    17    831     21
## 278  159   82  50    47     6   426   218  149    163     3   1619     11
## 279  142   58  81    23    18  2583  1138 1299    478    21   8759    271
## 280   96   44  36    65     4   148    68   56     99     0    711      1
## 281  103   53  33    52     2   123    63   39     58     8    507      8
## 282  122   67  45    51     4   403   211  146    155     1   1716     12
## 283  210   91  56    59     6   872   420  230    274     6   3070     19
## 285  169   88  73    53     8   841   450  342    373    17   3181     61
## 286   76   42  25    20     8   657   324  300    179     3   2658     48
## 287  152   69  75    53     6   686   369  384    321    23   2765    133
## 288  213   91  65    27     4   448   196  137     89    10   1518     15
## 289  103   48  28    54     8   493   207  162    198     3   1897      9
## 290   70   26  23    30     4   220    83   82     86     1    888      9
## 291  211  107  59    52     5   770   352  230    193    14   2364     27
## 292   68   26  30    29     7   339   135  163    128     8   1337     32
## 294  141   48  61    73     8   874   421  349    359     2   3162     16
## 295  120   53  44    21     4   227   106   80     52    17    927     22
## 296    4    2   3     1     1     4     2    3      1     1     19      1
## 297   43   24  17    20     7   219   105   99     71     2    854     12
## 298   47   21  29    24     6   256   129  139    106    10   1136     42
## 300   61   17  22     3    17  1145   488  491    244     4   4061     83
## 301  147   56  52    53     7   821   307  340    174    10   2872     63
## 302  138   56  59    34     3   357   149  161     87     1   1399      7
## 304  113   76  52    76     5   397   226  149    191     5   1546     17
## 305   42   17  14    15    10  1150   579  363    406     3   4086     57
## 307   32   14  25    12    19  2402  1048 1348    819     4   8396    242
## 308   69   35  31    32     4   355   180  148    158     4   1359     31
## 309  112   50  71    44     7   771   338  406    239    18   3031    110
## 310  139   94  29    60     2   309   201   69    110     0   1236      1
## 311  186  107  98    74     6   753   399  366    286    19   2728     69
## 312   81   37  44    37     7   566   279  257    246     5   2268     41
## 313  124   67  27    36     7   506   272  125    194     1   1775      6
## 314  207  107  71   105     5   978   474  322    417     8   2778     32
## 315  117   66  41    34     1   117    66   41     34    11    408     11
## 316  172   82 100    57     1   172    82  100     57    22    593     22
## 318  127   65  48    37     5   806   379  311    138     7   2703     32
## 319  136   76  50    94    12  1511   897  451    875     5   5511     39
## 320  126   61  43    52     6   433   217   93    146     3   1700      7
## 321  144   85  60    78     8   857   470  420    332     9   3198     97
## 322  170   77  44    31    11  1457   775  357    249     9   4908     30
##     League Division NewLeague   Salary
## 2        N        W         N  475.000
## 3        A        W         A  480.000
## 4        N        E         N  500.000
## 5        N        E         N   91.500
## 6        A        W         A  750.000
## 7        N        E         A   70.000
## 8        A        W         A  100.000
## 9        N        W         N   75.000
## 10       A        E         A 1100.000
## 11       A        E         A  517.143
## 12       N        W         N  512.500
## 13       N        E         N  550.000
## 14       A        E         A  700.000
## 15       N        E         N  240.000
## 17       N        W         N  775.000
## 18       A        W         A  175.000
## 20       N        W         N  135.000
## 21       N        E         N  100.000
## 22       A        W         N  115.000
## 24       N        W         N  600.000
## 25       A        E         A  776.667
## 26       A        E         A  765.000
## 27       N        E         N  708.333
## 28       N        W         N  750.000
## 29       N        W         N  625.000
## 30       A        W         A  900.000
## 32       N        W         N  110.000
## 34       A        E         A  612.500
## 35       A        W         A  300.000
## 36       N        W         N  850.000
## 38       N        W         N   90.000
## 41       A        E         A   67.500
## 44       A        E         A  180.000
## 46       A        E         A  305.000
## 47       N        W         N  215.000
## 48       A        E         A  247.500
## 50       N        W         N  815.000
## 51       A        W         A  875.000
## 52       N        E         N   70.000
## 54       A        W         A 1200.000
## 55       A        E         A  675.000
## 56       N        W         N  415.000
## 57       N        W         N  340.000
## 59       N        W         N  416.667
## 60       A        E         A 1350.000
## 61       A        E         A   90.000
## 62       N        E         N  275.000
## 63       A        W         A  230.000
## 64       N        W         N  225.000
## 66       A        E         A  950.000
## 68       A        W         A   75.000
## 69       A        E         A  105.000
## 71       N        W         N  320.000
## 73       A        W         A  850.000
## 74       A        E         A  535.000
## 75       A        E         A  933.333
## 76       A        E         N  850.000
## 77       N        W         A  210.000
## 79       A        W         A  325.000
## 80       A        W         A  275.000
## 82       N        E         N  450.000
## 83       A        E         A 1975.000
## 85       N        W         N 1900.000
## 86       A        W         A  600.000
## 87       N        W         N 1041.667
## 88       A        E         A  110.000
## 89       A        W         A  260.000
## 90       A        W         A  475.000
## 91       A        W         A  431.500
## 92       N        E         N 1220.000
## 93       A        E         A   70.000
## 94       A        W         A  145.000
## 96       N        W         N  595.000
## 97       A        E         A 1861.460
## 99       N        W         N  300.000
## 100      N        W         N  490.000
## 101      A        E         A 2460.000
## 103      A        E         A  375.000
## 108      A        W         A  750.000
## 109      A        E         A 1175.000
## 110      A        E         A   70.000
## 111      A        W         A 1500.000
## 112      N        W         A  385.000
## 113      N        E         N 1925.571
## 114      N        W         N  215.000
## 116      A        W         A  900.000
## 117      A        W         A  155.000
## 118      A        W         A  700.000
## 119      N        W         N  535.000
## 120      A        E         A  362.500
## 121      N        E         N  733.333
## 122      N        W         N  200.000
## 123      A        W         A  400.000
## 124      N        E         A  400.000
## 125      N        W         N  737.500
## 127      A        W         A  500.000
## 128      A        W         A  600.000
## 129      N        E         N  662.500
## 130      A        W         A  950.000
## 131      N        E         N  750.000
## 132      N        E         N  297.500
## 133      A        W         A  325.000
## 134      A        W         A   87.500
## 135      N        W         N  175.000
## 136      N        E         N   90.000
## 137      A        E         A 1237.500
## 138      A        E         A  430.000
## 140      A        W         N  100.000
## 141      A        W         A  165.000
## 142      A        E         A  250.000
## 143      N        E         N 1300.000
## 144      N        W         N  773.333
## 146      N        E         N 1008.333
## 147      A        E         A  275.000
## 148      A        E         A  775.000
## 149      A        E         A  850.000
## 150      A        E         A  365.000
## 152      A        W         A   95.000
## 153      N        W         N  110.000
## 154      N        W         N  100.000
## 155      N        E         N  277.500
## 156      A        W         A   80.000
## 157      N        E         N  600.000
## 160      A        W         A  200.000
## 162      N        E         N  657.000
## 163      A        W         N   75.000
## 164      A        E         A 2412.500
## 165      N        W         A  250.000
## 166      N        E         N  155.000
## 167      N        E         N  640.000
## 168      A        E         A  300.000
## 169      A        W         A  110.000
## 171      A        W         N  825.000
## 173      N        W         N  195.000
## 175      N        W         N  450.000
## 176      N        W         N  630.000
## 177      N        W         N   86.500
## 178      A        E         A 1300.000
## 179      A        E         N 1000.000
## 180      N        E         N 1800.000
## 181      A        W         A 1310.000
## 182      N        W         N  737.500
## 183      N        W         N  625.000
## 184      N        E         N  125.000
## 185      N        E         N 1043.333
## 186      N        W         N  725.000
## 187      A        W         A  300.000
## 188      A        W         A  365.000
## 189      N        W         N   75.000
## 190      N        E         N 1183.333
## 191      N        E         N  202.500
## 192      A        E         A  225.000
## 193      A        E         A  525.000
## 194      N        W         N  265.000
## 195      A        E         A  787.500
## 196      A        E         N  800.000
## 197      A        W         A  587.500
## 199      A        E         A  145.000
## 201      A        E         A  420.000
## 202      N        W         N   75.000
## 203      A        E         A  575.000
## 205      A        W         A  780.000
## 206      N        E         N   90.000
## 207      N        W         N  150.000
## 208      A        E         N  700.000
## 210      A        E         A  550.000
## 212      N        E         A  650.000
## 213      A        W         A   68.000
## 214      N        E         N  100.000
## 215      N        W         N  670.000
## 216      A        E         A  175.000
## 217      A        W         A  137.000
## 218      N        E         N 2127.333
## 219      N        W         N  875.000
## 220      A        W         A  120.000
## 221      N        E         N  140.000
## 222      N        E         N  210.000
## 223      N        E         N  800.000
## 224      N        W         N  240.000
## 225      A        E         A  350.000
## 227      A        W         A  175.000
## 228      A        W         A  200.000
## 230      N        E         N 1940.000
## 231      N        W         N  700.000
## 232      A        W         A  750.000
## 233      N        W         N  450.000
## 234      A        W         A  172.000
## 235      A        E         A 1260.000
## 237      N        W         N  750.000
## 238      A        E         A  190.000
## 239      A        E         A  580.000
## 240      N        E         N  130.000
## 241      A        W         A  450.000
## 242      A        W         A  300.000
## 243      A        E         A  250.000
## 244      N        E         A 1050.000
## 245      A        E         A  215.000
## 246      A        E         A  400.000
## 248      A        E         A  560.000
## 249      A        E         A 1670.000
## 250      A        W         A  487.500
## 252      A        W         A  425.000
## 253      N        E         A  500.000
## 256      A        E         A  250.000
## 257      A        E         A  400.000
## 258      A        E         A  450.000
## 259      N        W         N  750.000
## 260      A        E         A   70.000
## 261      N        W         N  875.000
## 262      N        E         N  190.000
## 263      N        E         N  191.000
## 264      N        E         N  740.000
## 265      N        E         N  250.000
## 266      N        E         N  140.000
## 267      A        W         A   97.500
## 268      A        W         A  740.000
## 269      N        W         N  140.000
## 270      A        W         A  341.667
## 272      A        E         A 1000.000
## 273      A        W         A  100.000
## 274      A        W         A   90.000
## 275      N        E         N  200.000
## 276      A        W         A  135.000
## 277      N        E         N  155.000
## 278      A        W         A  475.000
## 279      N        W         N 1450.000
## 280      N        E         N  150.000
## 281      A        W         A  105.000
## 282      A        W         A  350.000
## 283      N        W         N   90.000
## 285      A        E         A  530.000
## 286      A        E         A  341.667
## 287      A        W         A  940.000
## 288      A        E         A  350.000
## 289      N        W         N  326.667
## 290      N        E         N  250.000
## 291      N        W         N  740.000
## 292      N        W         A  425.000
## 294      N        E         N  925.000
## 295      A        W         A  185.000
## 296      N        W         A  920.000
## 297      N        E         N  286.667
## 298      A        W         A  245.000
## 300      A        W         A  235.000
## 301      N        E         N 1150.000
## 302      N        E         N  160.000
## 304      A        W         A  425.000
## 305      N        W         N  900.000
## 307      N        W         N  500.000
## 308      N        E         N  277.500
## 309      N        E         N  750.000
## 310      N        E         N  160.000
## 311      N        E         N 1300.000
## 312      N        E         N  525.000
## 313      N        E         N  550.000
## 314      A        E         A 1600.000
## 315      N        W         N  120.000
## 316      A        W         A  165.000
## 318      N        E         N  700.000
## 319      A        E         A  875.000
## 320      A        W         A  385.000
## 321      A        E         A  960.000
## 322      A        W         A 1000.000
  1. Use set.seed(1) to set the random seed so I can reproduce your results.
set.seed(1)
  1. Use Caret’s “createDataPartition”to split data into hitters_train, and hitters_test (use 90% for training and 10% for testing)

    inTrain = createDataPartition(hitters$Salary, p=0.9, list=FALSE)

    hitters_train = hitters[inTrain,]

    hitters_train = hitters[-inTrain,]

# Create data partition
inTrain <- createDataPartition(hitters$Salary, p = 0.9, list = FALSE)

# Create training and test sets
hitters_train <- hitters_final[inTrain, ]
hitters_test <- hitters_final[-inTrain, ]
  1. Neural networks do not accept categorical variables and we must encode the categorical variables before training the network. All the categorical variables in this dataset are binary ( i.e., have two levels) so you can encode them by simply using iflese function to convert each to a numeric variable with two values 0 and 1.
# Encoding binary categorical variables
hitters_train$NewLeague <- ifelse(hitters_train$NewLeague == "A", 0, 1)
hitters_test$NewLeague <- ifelse(hitters_test$NewLeague == "A", 0, 1)

hitters_train$League <- ifelse(hitters_train$League == "A", 0, 1)
hitters_test$League <- ifelse(hitters_test$League == "A", 0, 1)


hitters_train$Division <- ifelse(hitters_train$Division == "E", 0, 1)
hitters_test$Division <- ifelse(hitters_test$Division == "E", 0, 1)
  1. Replace the salary column with log(salary) where log is the logarithm function. This will be the attribute we want to predict.
# Replace Salary column with log(Salary) in hitters_train
hitters_train$Salary <- log(hitters_train$Salary)

# Replace Salary column with log(Salary) in hitters_test
hitters_test$Salary <- log(hitters_test$Salary)
  1. Set.seed(1) and further divide the hitters_train data into 90% training and 10% validation using Caret’s “CreateDataPartition” function.
set.seed(1)

inTrain <- createDataPartition(hitters_train$Salary, p = 0.9, list = FALSE)

train <- hitters_train[inTrain, ]
validation <- hitters_train[-inTrain, ]
  1. Scale the numeric attributes in the training data (except for the outcome variable, Salary). Use the column means and column standard deviations from the training data to scale both the validation and test data (please refer to slide 81, lecture 9). Note: You should NOT scale the dummy variables you created in step 6. You can append the categorical variables to your scaled numeric variables.
# Select the numeric variables to scale
numeric_cols <- sapply(hitters_train, is.numeric) & !names(hitters_train) %in% c("Salary", "Division", "League", "NewLeague")
# Scale the numeric variables in the training data
scaled_train <- as.data.frame(scale(train[, numeric_cols]))

# Append the categorical variables to the scaled numeric variables
scaled_train <- cbind(scaled_train, train[, !numeric_cols])

# Get column means and standard deviations for scaling validation and test data
means <- colMeans(train[, numeric_cols])
sds <- apply(train[, numeric_cols], 2, sd)

# Scale validation and test data using means and sds from training data
scaled_valid <- as.data.frame(scale(validation[, numeric_cols], center = means, scale = sds))
scaled_valid <- cbind(scaled_valid, validation[, !numeric_cols])
scaled_test <- as.data.frame(scale(hitters_test[, numeric_cols], center = means, scale = sds))
scaled_test <- cbind(scaled_test, hitters_test[, !numeric_cols])
scaled_train <- as.matrix(scaled_train)
scaled_test <- as.matrix(scaled_test)
scaled_valid <- as.matrix(scaled_valid)
  1. Create an ANN model to predict log(salary) from other attributes. Use at least two hidden layers. Use tfruns to tune your model’s hyper-parameters including, the number of nodes in each hidden layer, the activation function in each hidden layer, batch_size, learning_rate, and the number of epochs). Validate each model on the validation set. Answer the following questions:
  • Print the returned value from tf_runs to see the metrics for each run. Which run ( which hyper-parameter combination) gave the best mean squared error on the validation data?

  • Print the learning curve for your best model. Does your best model still overfit?

  • Does your validation_loss stop decreasing after several epochs? If so, at roughly which epoch does your validation_loss stop decreasing?

X_train <- subset(scaled_train, select = -c(Salary))
y_train <- subset(scaled_train, select = c(Salary))

X_test <- subset(scaled_test, select = -c(Salary))
y_test <- subset(scaled_test, select = c(Salary))

X_val <- subset(scaled_valid, select = -c(Salary))
y_val <- subset(scaled_valid, select = c(Salary))
runs <- tuning_run("problem2_nn.R",
                   flags = list(
                     nodes = c(32, 64, 128),
                     learning_rate = c(0.01, 0.001, 0.0001),
                     batch_size=c(50,100,200,500),
                     epochs=c(30,50,100),
                     activation=c("relu","sigmoid","tanh")
                   ),
                   sample = 0.02
  
)
## 
## > FLAGS <- flags(flag_numeric("nodes", 64), flag_numeric("batch_size", 
## +     32), flag_string("activation", "relu"), flag_numeric("learning_rate", 
## + .... [TRUNCATED] 
## 
## > model = keras_model_sequential()
## 
## > model %>% layer_dense(units = FLAGS$nodes, input_shape = dim(X_train)[2]) %>% 
## +     layer_dense(units = 1)
## 
## > model %>% compile(optimizer = optimizer_adam(learning_rate = FLAGS$learning_rate), 
## +     loss = "mse", metrics = list("mse"))
## 
## > model %>% fit(X_train, y_train, epochs = FLAGS$epochs, 
## +     batch_size = FLAGS$batch_size, validation_data = list(X_val, 
## +         y_val))
## 
## > FLAGS <- flags(flag_numeric("nodes", 64), flag_numeric("batch_size", 
## +     32), flag_string("activation", "relu"), flag_numeric("learning_rate", 
## + .... [TRUNCATED] 
## 
## > model = keras_model_sequential()
## 
## > model %>% layer_dense(units = FLAGS$nodes, input_shape = dim(X_train)[2]) %>% 
## +     layer_dense(units = 1)
## 
## > model %>% compile(optimizer = optimizer_adam(learning_rate = FLAGS$learning_rate), 
## +     loss = "mse", metrics = list("mse"))
## 
## > model %>% fit(X_train, y_train, epochs = FLAGS$epochs, 
## +     batch_size = FLAGS$batch_size, validation_data = list(X_val, 
## +         y_val))
## 
## > FLAGS <- flags(flag_numeric("nodes", 64), flag_numeric("batch_size", 
## +     32), flag_string("activation", "relu"), flag_numeric("learning_rate", 
## + .... [TRUNCATED] 
## 
## > model = keras_model_sequential()
## 
## > model %>% layer_dense(units = FLAGS$nodes, input_shape = dim(X_train)[2]) %>% 
## +     layer_dense(units = 1)
## 
## > model %>% compile(optimizer = optimizer_adam(learning_rate = FLAGS$learning_rate), 
## +     loss = "mse", metrics = list("mse"))
## 
## > model %>% fit(X_train, y_train, epochs = FLAGS$epochs, 
## +     batch_size = FLAGS$batch_size, validation_data = list(X_val, 
## +         y_val))
## 
## > FLAGS <- flags(flag_numeric("nodes", 64), flag_numeric("batch_size", 
## +     32), flag_string("activation", "relu"), flag_numeric("learning_rate", 
## + .... [TRUNCATED] 
## 
## > model = keras_model_sequential()
## 
## > model %>% layer_dense(units = FLAGS$nodes, input_shape = dim(X_train)[2]) %>% 
## +     layer_dense(units = 1)
## 
## > model %>% compile(optimizer = optimizer_adam(learning_rate = FLAGS$learning_rate), 
## +     loss = "mse", metrics = list("mse"))
## 
## > model %>% fit(X_train, y_train, epochs = FLAGS$epochs, 
## +     batch_size = FLAGS$batch_size, validation_data = list(X_val, 
## +         y_val))
## 
## > FLAGS <- flags(flag_numeric("nodes", 64), flag_numeric("batch_size", 
## +     32), flag_string("activation", "relu"), flag_numeric("learning_rate", 
## + .... [TRUNCATED] 
## 
## > model = keras_model_sequential()
## 
## > model %>% layer_dense(units = FLAGS$nodes, input_shape = dim(X_train)[2]) %>% 
## +     layer_dense(units = 1)
## 
## > model %>% compile(optimizer = optimizer_adam(learning_rate = FLAGS$learning_rate), 
## +     loss = "mse", metrics = list("mse"))
## 
## > model %>% fit(X_train, y_train, epochs = FLAGS$epochs, 
## +     batch_size = FLAGS$batch_size, validation_data = list(X_val, 
## +         y_val))
## 
## > FLAGS <- flags(flag_numeric("nodes", 64), flag_numeric("batch_size", 
## +     32), flag_string("activation", "relu"), flag_numeric("learning_rate", 
## + .... [TRUNCATED] 
## 
## > model = keras_model_sequential()
## 
## > model %>% layer_dense(units = FLAGS$nodes, input_shape = dim(X_train)[2]) %>% 
## +     layer_dense(units = 1)
## 
## > model %>% compile(optimizer = optimizer_adam(learning_rate = FLAGS$learning_rate), 
## +     loss = "mse", metrics = list("mse"))
## 
## > model %>% fit(X_train, y_train, epochs = FLAGS$epochs, 
## +     batch_size = FLAGS$batch_size, validation_data = list(X_val, 
## +         y_val))
## 
## > FLAGS <- flags(flag_numeric("nodes", 64), flag_numeric("batch_size", 
## +     32), flag_string("activation", "relu"), flag_numeric("learning_rate", 
## + .... [TRUNCATED] 
## 
## > model = keras_model_sequential()
## 
## > model %>% layer_dense(units = FLAGS$nodes, input_shape = dim(X_train)[2]) %>% 
## +     layer_dense(units = 1)
## 
## > model %>% compile(optimizer = optimizer_adam(learning_rate = FLAGS$learning_rate), 
## +     loss = "mse", metrics = list("mse"))
## 
## > model %>% fit(X_train, y_train, epochs = FLAGS$epochs, 
## +     batch_size = FLAGS$batch_size, validation_data = list(X_val, 
## +         y_val))
runs
## Data frame: 7 x 25 
##                     run_dir metric_loss metric_mse metric_val_loss
## 1 runs/2023-03-27T01-00-38Z      0.4165     0.4165          0.4393
## 2 runs/2023-03-27T01-00-36Z      6.5761     6.5761          6.9082
## 3 runs/2023-03-27T01-00-33Z      3.7079     3.7079          4.6020
## 4 runs/2023-03-27T01-00-31Z     31.1762    31.1762         29.9040
## 5 runs/2023-03-27T01-00-26Z      0.5776     0.5776          0.3117
## 6 runs/2023-03-27T01-00-24Z     19.4368    19.4368         18.9947
## 7 runs/2023-03-27T01-00-12Z     33.0509    33.0509         34.0548
##   metric_val_mse
## 1         0.4393
## 2         6.9082
## 3         4.6020
## 4        29.9040
## 5         0.3117
## 6        18.9947
## 7        34.0548
## # ... with 20 more columns:
## #   flag_nodes, flag_batch_size, flag_activation, flag_learning_rate,
## #   flag_epochs, epochs, epochs_completed, metrics, model, loss_function,
## #   optimizer, learning_rate, script, start, end, completed, output,
## #   source_code, context, type
view_run(runs$run_dir[1])

Yes, the validation loss decreases continously and after 90th epoch the loss become flate, that is, it stopped decreasing.

Best Model:

  • nodes: 128

  • batch size: 50

  • activation function: sigmoid

  • learning rate: 0.0001

  • epochs: 50

Kindly look at the P2BestModel file.

  1. Measure the performance of your best model (after tuning) on the test set and compute its RMSE. Note that you must reverse the log transformation by taking the exp (exponent) of the predictions returned by the neural network model and compare it to the original salary value ( without log transformation). Doing this, helps us get the RMSE in the original scale
# Retrain the best model once again
best_model =keras_model_sequential()
best_model %>%
  layer_dense(units = 128, activation = "sigmoid", input_shape = dim(X_train)[2]) %>%
  layer_dense(units = 1)
best_model %>% compile(
  optimizer = optimizer_adam(learning_rate=0.001),
  loss = 'mse',
  metrics = list('mse'))
history <-best_model %>% fit(
  X_train, y_train, epochs = 100,
  batch_size= 50)
plot(history)

# Make predictions on the test set using the best model
y_pred <- best_model%>% predict(X_test) %>% as.vector()
# Reverse the log transformation
y_pred_orig <- exp(y_pred)
y_test_orig <- exp(y_test)
# Compute the RMSE in the original scale
rmse <- sqrt(mean((y_test_orig - y_pred_orig)^2))
cat(paste("The RMSE value of Neural Net model is: ",rmse))
## The RMSE value of Neural Net model is:  368.745935194109
  1. Use a simple ( or step wise) linear regression model to predict the salary. Train and test your model on the same data you used to train and test your best neural network model. Compare the RMSE of the linear model on the test data with the RMSE of the neural network model. How does your neural network model compare to a simple linear model?
# Train a simple linear regression model
linear_model <- lm(Salary ~ ., data = as.data.frame(scaled_train))
# Make predictions on the test set
y_pred_lm <- predict(linear_model, newdata = as.data.frame(scaled_test))
y_pred_lm_orig <- exp(y_pred_lm)
# Compute the RMSE of the linear model
rmse_lm <- sqrt(mean((y_test_orig - y_pred_lm_orig)^2))
cat(paste("The RMSE value of linear model is: ",rmse_lm))
## The RMSE value of linear model is:  457.745470302265

The root mean square error (RMSE) is a measure of the difference between the predicted and actual values in a regression analysis. A lower RMSE value indicates better performance of the model.

In this case, the RMSE value of the neural net model is 368.7459352, while that of the linear model is 457.7454703. This means that the neural net model performs better than the linear model as it has a lower RMSE value.