2D Clustering of NFL Interception Percentage vs College Career Statistics Using K-means(2)

The following code applies K-means clustering of an individual quarterbacks overall college career statistic against their first season playing in the NFL.

stats.all <- read.csv("C:/Users/ird/Desktop/dataMiningFinalProject/qb_stats.csv")
stats.college = stats.all[, c("name", "c_avg_cmpp", "c_ya", "c_aya", "c_rate", 
    "c_pct", "c_avg_att", "c_avg_tds", "c_avg_inter", "c_avg_yds", "c_numyrs")]
stats.nfl = stats.all[, c("name", "year", "age", "height", "weight", "rating", 
    "completions", "yds_per_comp", "yds_lost_by_sack", "ints", "net_yds_per_att", 
    "games_played", "longest_pass", "adj_net_yds_per_att", "td_percentage", 
    "completion_percentage", "games_started", "int_percentage", "avg_yds_per_att", 
    "avg_value", "comebacks", "gwds", "sacked", "wins", "tds", "yds_per_att", 
    "yds", "qbr", "yds_per_game", "att", "perc_times_sk")]

k = 2
college.data.col = 2
nfl.data.col = 18  # select an nfl target variable
for (i in college.data.col:(ncol(stats.college))) {
    cat(colnames(stats.college)[i], colnames(stats.nfl[nfl.data.col]), "\n")
    df = data.frame(stats.college[, i], stats.nfl[, nfl.data.col])
    colnames(df) <- c(colnames(stats.college)[i], colnames(stats.nfl[nfl.data.col]))
    df = na.omit(df)
    fit = kmeans(df, k)
    plot(df, col = fit$cluster, main = paste(paste(paste(paste("NFL", capitalize(colnames(stats.nfl[nfl.data.col]))), 
        "vs"), capitalize(stats.college.title[i])), "using K-means(2)"), xlab = (paste("College", 
        capitalize(stats.college.title[i]))), ylab = (paste("NFL", capitalize(colnames(stats.nfl[nfl.data.col])))), 
        cex.lab = 1.25)
}

## c_avg_cmpp int_percentage

plot of chunk unnamed-chunk-2

## c_ya int_percentage

plot of chunk unnamed-chunk-2

## c_aya int_percentage

plot of chunk unnamed-chunk-2

## c_rate int_percentage

plot of chunk unnamed-chunk-2

## c_pct int_percentage

plot of chunk unnamed-chunk-2

## c_avg_att int_percentage

plot of chunk unnamed-chunk-2

## c_avg_tds int_percentage

plot of chunk unnamed-chunk-2

## c_avg_inter int_percentage

plot of chunk unnamed-chunk-2

## c_avg_yds int_percentage

plot of chunk unnamed-chunk-2

## c_numyrs int_percentage

plot of chunk unnamed-chunk-2