Looking at some batter data from Fangraphs.

Setting up the basic framework for the projection model. First, we'll source a file that reads in the data and does some basic data cleaning and management, as well as load relavent libraries.

library(boot)
source("readfgData.R")

There are 967 players in the dataset.

Here is a quick look at the distribution of some variables and their corresponding normal approximations, FStrike, the percentage of first strikes seen by the batter, SwStr, the batter's swinging strike rate, and ZContact, the batter's contact percentage within the strikezone.

par(mfrow = c(1, 3))
hist(fg$FStrike, breaks = 20, freq = FALSE, main = "Distribution of the Percentage of \n First Strikes Seen", 
    cex.main = 1.3, ylab = "", xlab = "First Strike Percentage", cex.lab = 1.5)
normFit <- dnorm(seq(min(fg$FStrike), max(fg$FStrike), length = 100), mean = mean(fg$FStrike), 
    sd = sd(fg$FStrike))
points(seq(min(fg$FStrike), max(fg$FStrike), length = 100), normFit, type = "l", 
    col = "red")
hist(fg$SwStr, breaks = 20, freq = FALSE, main = "Distribution of Batter's Swinging Strike Rates", 
    cex.main = 1.3, ylab = "", xlab = "Swinging Strike Percentage", cex.lab = 1.5)
normFit <- dnorm(seq(min(fg$SwStr), max(fg$SwStr), length = 100), mean = mean(fg$SwStr), 
    sd = sd(fg$SwStr))
points(seq(min(fg$SwStr), max(fg$SwStr), length = 100), normFit, type = "l", 
    col = "red")
hist(fg$ZContact, breaks = 20, freq = FALSE, main = "Distribution of Batter's Contact Rate \n on Pitches Within The Stike Zone", 
    cex.main = 1.3, ylab = "", xlab = "In-Zone Contact Rate", cex.lab = 1.5)
normFit <- dnorm(seq(min(fg$ZContact), max(fg$ZContact), length = 100), mean = mean(fg$ZContact), 
    sd = sd(fg$ZContact))
points(seq(min(fg$ZContact), max(fg$ZContact), length = 100), normFit, type = "l", 
    col = "red")

plot of chunk unnamed-chunk-2


fg[fg$playerid == min(fg$playerid), ]
##     Season            Name    Team playerid  PA   AVG OSwing ZSwing Swing
## 23    2006 Alfredo Amezaga Marlins        1 378 0.260  0.223  0.628 0.449
## 457   2007 Alfredo Amezaga Marlins        1 448 0.263  0.237  0.640 0.447
## 896   2008 Alfredo Amezaga Marlins        1 337 0.264  0.250  0.643 0.461
##     OContact ZContact Contact  Zone FStrike SwStr   G  AB   H Singles
## 23     0.701    0.885   0.845 0.559   0.579 0.067 132 334  87      72
## 457    0.714    0.938   0.881 0.522   0.565 0.052 133 400 105      80
## 896    0.667    0.918   0.855 0.536   0.599 0.067 125 311  82      61
##     Doubles Triples HR  R RBI BB IBB SO HBP SF SH GDP SB CS BBpercent
## 23        9       3  3 42  19 33   4 46   3  1  7   5 20 12     0.087
## 457      14       9  2 46  30 35   0 52   4  5  4   4 13  7     0.078
## 896      13       5  3 41  32 19   1 47   3  0  4   6  8  2     0.056
##     Kpercent BBperK   OBP   SLG   OPS   ISO Spd BABIP  UBR  wSB wRC  wRAA
## 23     0.122   0.72 0.332 0.332 0.664 0.072 5.9 0.294  3.8 -1.4  36 -11.5
## 457    0.116   0.67 0.324 0.358 0.682 0.095 6.4 0.293 -0.3 -0.8  45  -9.9
## 896    0.139   0.40 0.312 0.367 0.679 0.103 6.8 0.303  2.6  0.5  33  -7.5
##      wOBA wRCadd PowFac
## 23  0.296     75  1.277
## 457 0.305     79  1.361
## 896 0.301     77  1.390

A relavent thing to look at it how stable some of the data is from year to year. Let's look at the year-to-year correlation for data throughout this dataset.

for (var in 5:length(names(fg))) {
    fgyear <- fg[fg$Season == 2006, ]
    varPair <- c()
    for (year in 2007:2013) {
        fglyear <- fgyear
        fgyear <- fg[fg$Season == year, ]
        for (pid in intersect(fgyear$playerid, fglyear$playerid)) {
            varPair <- rbind(varPair, c(fglyear[fglyear$playerid == pid, var], 
                fgyear[fgyear$playerid == pid, var]))
        }
    }
    cat("The year-to-year correlation for ", names(fg)[var], "is: ", cor(varPair[, 
        2], varPair[, 1]), "\n")
}
## The year-to-year correlation for  PA is:  0.5881 
## The year-to-year correlation for  AVG is:  0.3944 
## The year-to-year correlation for  OSwing is:  0.8024 
## The year-to-year correlation for  ZSwing is:  0.7921 
## The year-to-year correlation for  Swing is:  0.8025 
## The year-to-year correlation for  OContact is:  0.8096 
## The year-to-year correlation for  ZContact is:  0.8055 
## The year-to-year correlation for  Contact is:  0.8657 
## The year-to-year correlation for  Zone is:  0.7341 
## The year-to-year correlation for  FStrike is:  0.4482 
## The year-to-year correlation for  SwStr is:  0.8592 
## The year-to-year correlation for  G is:  0.4428 
## The year-to-year correlation for  AB is:  0.5821 
## The year-to-year correlation for  H is:  0.6088 
## The year-to-year correlation for  Singles is:  0.6073 
## The year-to-year correlation for  Doubles is:  0.5382 
## The year-to-year correlation for  Triples is:  0.5068 
## The year-to-year correlation for  HR is:  0.6775 
## The year-to-year correlation for  R is:  0.6266 
## The year-to-year correlation for  RBI is:  0.6403 
## The year-to-year correlation for  BB is:  0.6852 
## The year-to-year correlation for  IBB is:  0.6509 
## The year-to-year correlation for  SO is:  0.6379 
## The year-to-year correlation for  HBP is:  0.5522 
## The year-to-year correlation for  SF is:  0.3251 
## The year-to-year correlation for  SH is:  0.6132 
## The year-to-year correlation for  GDP is:  0.5267 
## The year-to-year correlation for  SB is:  0.7562 
## The year-to-year correlation for  CS is:  0.5961 
## The year-to-year correlation for  BBpercent is:  0.6663 
## The year-to-year correlation for  Kpercent is:  0.8188 
## The year-to-year correlation for  BBperK is:  0.6575 
## The year-to-year correlation for  OBP is:  0.4784 
## The year-to-year correlation for  SLG is:  0.5034 
## The year-to-year correlation for  OPS is:  0.4909 
## The year-to-year correlation for  ISO is:  0.6265 
## The year-to-year correlation for  Spd is:  0.7322 
## The year-to-year correlation for  BABIP is:  0.2759 
## The year-to-year correlation for  UBR is:  0.4773 
## The year-to-year correlation for  wSB is:  0.4677 
## The year-to-year correlation for  wRC is:  0.6373 
## The year-to-year correlation for  wRAA is:  0.5503 
## The year-to-year correlation for  wOBA is:  0.4747 
## The year-to-year correlation for  wRCadd is:  0.4544 
## The year-to-year correlation for  PowFac is:  0.6678

Here's some stuff I added just now for you Jake.

par(mfrow = c(1, 2))
HRregress <- lm(HR ~ PowFac + SB + HBP, data = fg)
plot(fg$PowFac, fg$HR, pch = 19, col = "blue", main = "Basic Scatterplot", xlab = "Power Factor", 
    ylab = "HRs", cex.main = 1.5, cex.lab = 1.3)
plot(fg$PowFac, HRregress$residuals, main = "Residuals of the Regression of HR \n on Power Factor, Stolen Bases, and HBP", 
    ylab = "Residuals", xlab = "Power Factor", cex.lab = 1.3, pch = 19, col = "dark red")

plot of chunk unnamed-chunk-4


summary(HRregress)
## 
## Call:
## lm(formula = HR ~ PowFac + SB + HBP, data = fg)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -24.818  -3.646  -0.306   3.298  28.834 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -38.8337     0.7705   -50.4   <2e-16 ***
## PowFac       29.4040     0.4874    60.3   <2e-16 ***
## SB            0.1490     0.0113    13.2   <2e-16 ***
## HBP           0.6769     0.0304    22.3   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 6.14 on 3550 degrees of freedom
## Multiple R-squared:  0.588,  Adjusted R-squared:  0.587 
## F-statistic: 1.69e+03 on 3 and 3550 DF,  p-value: <2e-16

Some quick tests of Bayesian updating for a players info.

Player ID 1744 is Miguel Cabrera. Lets try using the league-wide distribution of Power Factor to be the prior and use his season data to get a posterior distribution for his “true” power factor for each season.

First let's figure out a good approximation to the prior distribution of power factor:

hist(fg$PowFac, breaks = 20, freq = FALSE, main = "Distribution of Power Factor", 
    cex.main = 1.3, ylab = "", xlab = "Power Factor", cex.lab = 1.5)
normFit <- dnorm(seq(min(fg$PowFac), max(fg$PowFac), length = 100), mean = mean(fg$PowFac), 
    sd = sd(fg$PowFac))
points(seq(min(fg$PowFac), max(fg$PowFac), length = 100), normFit, type = "l", 
    col = "red")

plot of chunk unnamed-chunk-5

As we can see, the distribution of power factor for qualifying MLB players between 2006 and 2013 can be reasonably appoximated by a Normal distribution with a mean of 1.5709 and a standard deviation of 0.2192. The player with the maximum power factor was Jose Bautista in 2010

So now power factor is basically the average number of bases per hit a player achieved. As a random variable who is the mean of a collection of random variables (the value for the number of bases for each hit), it can be treated as normal with a standard devation of \( \frac{ \sigma}{\sqrt{n}} \) where \( \sigma \) here will be the prior standard deviation and \( n \) is the number of hits in the season. Strictly this is not the proper treatment for \( \sigma \) but will be adequate for the first look.

Because we have a normal prior and normal likelihood (a conjugate pair), we can do all the calculations very straightforwardly. So now we can get our posterior for Cabrera for each season as:

player <- fg[fg$playerid == 1744, ]
priorMean <- mean(fg$PowFac)
priorSD <- sd(fg$PowFac)
postMean <- rep(0, length(player$Season))
postSD <- rep(0, length(player$Season))
min(fg$PowFac)
## [1] 1
cols <- rgb(1:9/9, 1:9/18, 1 - 1:9/9, 0.35)
# Plot the prior distribution
datRange <- seq(min(fg$PowFac), max(fg$PowFac), length = 200)
plot(datRange, dnorm(datRange, priorMean, priorSD)/max(dnorm(datRange, priorMean, 
    priorSD)), type = "l", col = cols[1], lwd = 3, xlab = "Power Factor", ylab = "", 
    main = "Prior and Yearly Posterior Distributions for \n Miguel Cabrera's Power Factor", 
    cex.main = 1.3, cex.lab = 1.5, ylim = c(0, 1.1))

for (ind in 1:length(player$Season)) {
    yearMean <- player$PowFac[ind]
    yearSD <- priorSD/sqrt(player$H[ind])
    postMean[ind] <- (priorMean/priorSD^2 + yearMean/yearSD^2)/(1/priorSD^2 + 
        1/yearSD^2)
    postSD[ind] <- sqrt(1/(1/priorSD^2 + 1/yearSD^2))
    polygon(datRange, dnorm(datRange, postMean[ind], postSD[ind])/max(dnorm(datRange, 
        postMean[ind], postSD[ind])), col = cols[ind + 1])
    cat("The posterior for Miguel Cabrera's Power Factor in ", player$Season[ind], 
        " has a mean of ", postMean[ind], "with a standard deviation of", postSD[ind], 
        "\n")
}
## The posterior for Miguel Cabrera's Power Factor in  2006  has a mean of  1.675 with a standard deviation of 0.01566 
## The posterior for Miguel Cabrera's Power Factor in  2007  has a mean of  1.765 with a standard deviation of 0.01594 
## The posterior for Miguel Cabrera's Power Factor in  2008  has a mean of  1.838 with a standard deviation of 0.01629 
## The posterior for Miguel Cabrera's Power Factor in  2009  has a mean of  1.688 with a standard deviation of 0.01554 
## The posterior for Miguel Cabrera's Power Factor in  2010  has a mean of  1.895 with a standard deviation of 0.01629 
## The posterior for Miguel Cabrera's Power Factor in  2011  has a mean of  1.703 with a standard deviation of 0.01558 
## The posterior for Miguel Cabrera's Power Factor in  2012  has a mean of  1.835 with a standard deviation of 0.01527 
## The posterior for Miguel Cabrera's Power Factor in  2013  has a mean of  1.826 with a standard deviation of 0.01574

legend("topleft", c("Prior", player$Season), col = cols, lty = 1, lwd = 3)

plot of chunk unnamed-chunk-6

plot(player$Season, postMean, pch = 19, cex = 1.4, main = "Posterior Power Factor vs Season For Miguel Cabrera", 
    xlab = "Season", ylab = "Power Factor", cex.lab = 1.4, ylim = c(min(fg$PowFac), 
        max(fg$PowFac)))
arrows(player$Season, postMean - 2 * postSD, player$Season, postMean + 2 * postSD, 
    angle = 90, code = 3)

plot of chunk unnamed-chunk-6

This analysis assumes that Cabrera's power factor is constant throughout each year. We see that Cabrera's power factor is not terribly consistent across years. Most notably, there were two seasons in which Cabrera's power factor was well below what we would expect from the adjacent years.

Let's take a look at a few other players:

for (pName in c("Hanley Ramirez", "Grady Sizemore", "Giancarlo Stanton", "Jay Bruce", 
    "Jose Bautista", "Elvis Andrus")) {
    player <- fg[fg$Name == pName, ]
    playerName = unique(as.character(player$Name))
    priorMean <- mean(fg$PowFac)
    priorSD <- sd(fg$PowFac)
    postMean <- rep(0, length(player$Season))
    postSD <- rep(0, length(player$Season))
    min(fg$PowFac)
    cols <- rgb(1:9/9, 1:9/18, 1 - 1:9/9, 0.35)
    # Plot the prior distribution
    datRange <- seq(min(fg$PowFac) - 0.25, max(fg$PowFac) + 0.25, length = 200)
    plot(datRange, dnorm(datRange, priorMean, priorSD)/max(dnorm(datRange, priorMean, 
        priorSD)), type = "l", col = cols[1], lwd = 3, xlab = "Power Factor", 
        ylab = "", main = paste("Prior and Yearly Posterior Distributions for \n ", 
            playerName, "'s Power Factor"), cex.main = 1.3, cex.lab = 1.5, ylim = c(0, 
            1.1))

    for (ind in 1:length(player$Season)) {
        yearMean <- player$PowFac[ind]
        yearSD <- priorSD/sqrt(player$H[ind])
        postMean[ind] <- (priorMean/priorSD^2 + yearMean/yearSD^2)/(1/priorSD^2 + 
            1/yearSD^2)
        postSD[ind] <- sqrt(1/(1/priorSD^2 + 1/yearSD^2))
        polygon(datRange, dnorm(datRange, postMean[ind], postSD[ind])/max(dnorm(datRange, 
            postMean[ind], postSD[ind])), col = cols[ind + 1])
        cat("The posterior for", playerName, "'s Power Factor in ", player$Season[ind], 
            " has a mean of ", postMean[ind], "with a standard deviation of", 
            postSD[ind], "\n")
    }

    legend("topleft", c("Prior", player$Season), col = cols, lty = 1, lwd = 3)
    plot(player$Season, postMean, pch = 19, cex = 1.4, main = paste("Posterior Power Factor vs Season For", 
        playerName), xlab = "Season", ylab = "Power Factor", cex.lab = 1.4, 
        ylim = c(min(fg$PowFac), max(fg$PowFac)))
    arrows(player$Season, postMean - 2 * postSD, player$Season, postMean + 2 * 
        postSD, angle = 90, code = 3)
}

plot of chunk unnamed-chunk-7

## The posterior for Hanley Ramirez 's Power Factor in  2006  has a mean of  1.643 with a standard deviation of 0.01607 
## The posterior for Hanley Ramirez 's Power Factor in  2007  has a mean of  1.692 with a standard deviation of 0.01502 
## The posterior for Hanley Ramirez 's Power Factor in  2008  has a mean of  1.793 with a standard deviation of 0.01643 
## The posterior for Hanley Ramirez 's Power Factor in  2009  has a mean of  1.588 with a standard deviation of 0.01558 
## The posterior for Hanley Ramirez 's Power Factor in  2010  has a mean of  1.583 with a standard deviation of 0.01712 
## The posterior for Hanley Ramirez 's Power Factor in  2011  has a mean of  1.56 with a standard deviation of 0.02406 
## The posterior for Hanley Ramirez 's Power Factor in  2012  has a mean of  1.7 with a standard deviation of 0.01755 
## The posterior for Hanley Ramirez 's Power Factor in  2013  has a mean of  1.847 with a standard deviation of 0.02129

plot of chunk unnamed-chunk-7 plot of chunk unnamed-chunk-7

## The posterior for Grady Sizemore 's Power Factor in  2006  has a mean of  1.837 with a standard deviation of 0.01586 
## The posterior for Grady Sizemore 's Power Factor in  2007  has a mean of  1.667 with a standard deviation of 0.01657 
## The posterior for Grady Sizemore 's Power Factor in  2008  has a mean of  1.871 with a standard deviation of 0.01676 
## The posterior for Grady Sizemore 's Power Factor in  2009  has a mean of  1.792 with a standard deviation of 0.021 
## The posterior for Grady Sizemore 's Power Factor in  2010  has a mean of  1.377 with a standard deviation of 0.04143 
## The posterior for Grady Sizemore 's Power Factor in  2011  has a mean of  1.879 with a standard deviation of 0.02807

plot of chunk unnamed-chunk-7 plot of chunk unnamed-chunk-7

## The posterior for Giancarlo Stanton 's Power Factor in  2010  has a mean of  1.953 with a standard deviation of 0.02261 
## The posterior for Giancarlo Stanton 's Power Factor in  2011  has a mean of  2.046 with a standard deviation of 0.0188 
## The posterior for Giancarlo Stanton 's Power Factor in  2012  has a mean of  2.093 with a standard deviation of 0.01915 
## The posterior for Giancarlo Stanton 's Power Factor in  2013  has a mean of  1.924 with a standard deviation of 0.02119

plot of chunk unnamed-chunk-7 plot of chunk unnamed-chunk-7

## The posterior for Jay Bruce 's Power Factor in  2008  has a mean of  1.781 with a standard deviation of 0.02129 
## The posterior for Jay Bruce 's Power Factor in  2009  has a mean of  2.101 with a standard deviation of 0.02482 
## The posterior for Jay Bruce 's Power Factor in  2010  has a mean of  1.753 with a standard deviation of 0.01827 
## The posterior for Jay Bruce 's Power Factor in  2011  has a mean of  1.85 with a standard deviation of 0.01784 
## The posterior for Jay Bruce 's Power Factor in  2012  has a mean of  2.036 with a standard deviation of 0.0184 
## The posterior for Jay Bruce 's Power Factor in  2013  has a mean of  1.823 with a standard deviation of 0.01707

plot of chunk unnamed-chunk-7 plot of chunk unnamed-chunk-7

## The posterior for Jose Bautista 's Power Factor in  2006  has a mean of  1.785 with a standard deviation of 0.02249 
## The posterior for Jose Bautista 's Power Factor in  2007  has a mean of  1.629 with a standard deviation of 0.0188 
## The posterior for Jose Bautista 's Power Factor in  2008  has a mean of  1.7 with a standard deviation of 0.02324 
## The posterior for Jose Bautista 's Power Factor in  2009  has a mean of  1.734 with a standard deviation of 0.02451 
## The posterior for Jose Bautista 's Power Factor in  2010  has a mean of  2.368 with a standard deviation of 0.01796 
## The posterior for Jose Bautista 's Power Factor in  2011  has a mean of  2.01 with a standard deviation of 0.01755 
## The posterior for Jose Bautista 's Power Factor in  2012  has a mean of  2.179 with a standard deviation of 0.02436 
## The posterior for Jose Bautista 's Power Factor in  2013  has a mean of  1.92 with a standard deviation of 0.02018

plot of chunk unnamed-chunk-7 plot of chunk unnamed-chunk-7

## The posterior for Elvis Andrus 's Power Factor in  2009  has a mean of  1.398 with a standard deviation of 0.0193 
## The posterior for Elvis Andrus 's Power Factor in  2010  has a mean of  1.139 with a standard deviation of 0.01749 
## The posterior for Elvis Andrus 's Power Factor in  2011  has a mean of  1.296 with a standard deviation of 0.01707 
## The posterior for Elvis Andrus 's Power Factor in  2012  has a mean of  1.323 with a standard deviation of 0.01629 
## The posterior for Elvis Andrus 's Power Factor in  2013  has a mean of  1.223 with a standard deviation of 0.01686

plot of chunk unnamed-chunk-7

One approach to projecting players performance is to use straightforward, unadjusted outcomes and rates on these. We can think of a players at-bat being a sequence of stochastic outcomes. The result of a plate appearance can be anything in the set: {K, BB, HBP, Ball in play}, which can be modeled as a multinomial random variable. Conditional on the result being a ball in play, the outcome can then be anything from another set: {single, double, triple, HR, out, sacrifice}, which can again be treated as a multinomial. The parametersfor these multinomials can be estimated from the players historical data with appropriate uncertainty applied. Once we have estimates for these quantities, we could do a Monte Carlo study to build a projection for a player given a number of plate appearances.

Lets try estimating these parameters for Miguel Cabrera from his 2013 numbers and then simulate some seasons using those estimated parameters as the true parameters:

par(mfrow = c(1, 3))
for (pName in c("Miguel Cabrera")) {
    player <- fg[fg$Name == pName & fg$Season == 2013, ]
    playerName = unique(as.character(player$Name))
    Krate <- player$SO/player$PA
    BBrate <- player$BB/player$PA
    HBPrate <- player$HBP/player$PA
    BIPrate <- 1 - Krate - BBrate - HBPrate
    BIPs <- BIPrate * player$PA
    singleRate <- player$Singles/BIPs
    doubleRate <- player$Doubles/BIPs
    tripleRate <- player$Triples/BIPs
    hrRate <- player$HR/BIPs
    sacRate <- (player$SF + player$SH)/BIPs
    outRate <- 1 - singleRate - doubleRate - tripleRate - hrRate - sacRate
    simSeasons <- data.frame()
    for (i in 1:5000) {
        simData <- t(rmultinom(1, size = player$PA, c(Krate, BBrate, HBPrate, 
            BIPrate)))
        simData2 <- t(rmultinom(1, size = simData[4], c(singleRate, doubleRate, 
            tripleRate, hrRate, sacRate, outRate)))
        simSeasons <- rbind(simSeasons, cbind(simData, simData2))
    }
    names(simSeasons) <- c("K", "BB", "HBP", "BIP", "Singles", "Doubles", "Triples", 
        "HR", "Sac", "Outs")
    print(head(simSeasons))
    hist(simSeasons$HR, breaks = 20, xlab = "HRs", ylab = "", main = paste("Simulated Number of Home Runs for \n Miguel Cabrera \n using rates from the 2013 season"), 
        cex.axis = 1.3, cex.main = 1.5, cex.lab = 1.5, freq = FALSE)
    hist((simSeasons$Singles + simSeasons$Doubles + simSeasons$Triples)/(simSeasons$BIP - 
        simSeasons$HR), breaks = 20, xlab = "BABIP", ylab = "", main = paste("Simulated BABIP for Miguel Cabrera \n using rates from the 2013 season"), 
        cex.axis = 1.3, cex.main = 1.5, cex.lab = 1.5, freq = FALSE)
    hist((4 * simSeasons$HR + 3 * simSeasons$Triples + 2 * simSeasons$Doubles + 
        simSeasons$Singles)/(simSeasons$HR + simSeasons$Triples + simSeasons$Doubles + 
        simSeasons$Singles), breaks = 20, xlab = "Power Factor", ylab = "", 
        main = paste("Simulated Power Factor for \n Miguel Cabrera \n using rates from the 2013 season"), 
        cex.axis = 1.3, cex.main = 1.5, cex.lab = 1.5, freq = FALSE)
}
##     K  BB HBP BIP Singles Doubles Triples HR Sac Outs
## 1  91  94   4 463     129      24       0 45   4  261
## 2  68  97   4 483     137      33       3 46   0  264
## 3 106  97   5 444     110      25       0 44   1  264
## 4 113  99   5 435     132      26       0 39   2  236
## 5  97 104   5 446     107      30       0 46   2  261
## 6  99  92   5 456     119      23       1 38   0  275

plot of chunk unnamed-chunk-8